Exemple #1
0
    def test_should_run_only_if_language_is_portuguese(self):
        if palavras_raw.palavras_installed():
            self.document.update({'text': 'There was a rock on the way.',
                'language': 'en'})

            palavras_raw.PalavrasRaw().delay(self.fake_id)
            self.assertEqual(self.document['palavras_raw_ran'], False)
 def test_should_run_only_if_language_is_portuguese(self):
     if palavras_raw.palavras_installed():
         document = {'text': 'There was a rock on the way.', 'language': 'en'}
         result = palavras_raw.PalavrasRaw().process(document)
         self.assertEqual(result, {})
Exemple #3
0
#
# You should have received a copy of the GNU General Public License
# along with PyPLN.  If not, see <http://www.gnu.org/licenses/>.

from pypelinin import Worker

import en_nltk
import pt_palavras
from pypln.backend.workers.palavras_raw import palavras_installed


MAPPING = {
           'en': en_nltk.pos,
           'pt': pt_palavras.pos,
}
if not palavras_installed():
    del(MAPPING['pt'])

def put_offset(text, tagged_text):
    result = []
    position = 0
    for token, classification in tagged_text:
        token_position = text.find(token, position)
        result.append((token, classification, token_position))
        position = token_position + len(token) - 1
    return result

class POS(Worker):
    requires = ['text', 'tokens', 'language', 'palavras_raw']

    def process(self, document):
Exemple #4
0
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with PyPLN.  If not, see <http://www.gnu.org/licenses/>.

import en_nltk
import pt_palavras
from pypln.backend.workers.palavras_raw import palavras_installed
from pypln.backend.celery_task import PyPLNTask

MAPPING = {
    'en': en_nltk.pos,
    'pt': pt_palavras.pos,
}
if not palavras_installed():
    del (MAPPING['pt'])


def put_offset(text, tagged_text):
    result = []
    position = 0
    for token, classification in tagged_text:
        token_position = text.find(token, position)
        result.append((token, classification, token_position))
        position = token_position + len(token) - 1
    return result


class POS(PyPLNTask):
    def process(self, document):