def test_should_run_only_if_language_is_portuguese(self): if palavras_raw.palavras_installed(): self.document.update({'text': 'There was a rock on the way.', 'language': 'en'}) palavras_raw.PalavrasRaw().delay(self.fake_id) self.assertEqual(self.document['palavras_raw_ran'], False)
def test_should_run_only_if_language_is_portuguese(self): if palavras_raw.palavras_installed(): document = {'text': 'There was a rock on the way.', 'language': 'en'} result = palavras_raw.PalavrasRaw().process(document) self.assertEqual(result, {})
# # You should have received a copy of the GNU General Public License # along with PyPLN. If not, see <http://www.gnu.org/licenses/>. from pypelinin import Worker import en_nltk import pt_palavras from pypln.backend.workers.palavras_raw import palavras_installed MAPPING = { 'en': en_nltk.pos, 'pt': pt_palavras.pos, } if not palavras_installed(): del(MAPPING['pt']) def put_offset(text, tagged_text): result = [] position = 0 for token, classification in tagged_text: token_position = text.find(token, position) result.append((token, classification, token_position)) position = token_position + len(token) - 1 return result class POS(Worker): requires = ['text', 'tokens', 'language', 'palavras_raw'] def process(self, document):
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with PyPLN. If not, see <http://www.gnu.org/licenses/>. import en_nltk import pt_palavras from pypln.backend.workers.palavras_raw import palavras_installed from pypln.backend.celery_task import PyPLNTask MAPPING = { 'en': en_nltk.pos, 'pt': pt_palavras.pos, } if not palavras_installed(): del (MAPPING['pt']) def put_offset(text, tagged_text): result = [] position = 0 for token, classification in tagged_text: token_position = text.find(token, position) result.append((token, classification, token_position)) position = token_position + len(token) - 1 return result class POS(PyPLNTask): def process(self, document):