예제 #1
0
 def _get_morph():
     morph = majka.Majka(os.path.join(NLP_DATA_PATH, 'majka.w-lt'))
     morph.tags = False
     morph.first_only = True
     morph.negative = "ne"
     morph.flags |= majka.IGNORE_CASE  # ignore the word case whatsoever
     return morph
예제 #2
0
def convert_words_into_lemmas(text, text_language):
    try:
        morph = majka.Majka("../majka/" + text_language + ".w-lt")
    except:
        return text

    result = ""
    for word in text.split():
        lemma_result = morph.find(word)
        if len(lemma_result) is 0:
            result += (" " + word)
        else:
            result += (" " + lemma_result[0]['lemma'])

    return result
예제 #3
0
 def init_lemmatizer(self, majka_path):
     lemmatizer = majka.Majka('{}\\{}'.format(majka_path, 'majka.w-lt'))
     lemmatizer.first_only = True
     lemmatizer.tags = False
     lemmatizer.negative = 'ne'
     return lemmatizer
예제 #4
0
import psycopg2
import os
import re
import sys
import majka
import json
import time

morph_cz = majka.Majka('/home/jhu/PycharmProjects/ConEv/nlp/majka.w-lt')
morph_en = majka.Majka('/home/jhu/PycharmProjects/ConEv/nlp/w-lt.en.fsa')
morph_de = majka.Majka('/home/jhu/PycharmProjects/ConEv/nlp/w-lt.ger.fsa')
# https://nlp.fi.muni.cz/czech-morphology-analyser/

db_connection = psycopg2.connect(
    "dbname=ConEv user=postgres password=forward host=127.0.0.1")
db = db_connection.cursor()


def find_word_attrs(word, language):

    #-----------------------------------------------------------------------------------------------CZ
    morph_cz.flags |= majka.ADD_DIACRITICS  # find word forms with diacritics
    morph_cz.flags |= majka.DISALLOW_LOWERCASE  # do not enable to find lowercase variants
    morph_cz.flags |= majka.IGNORE_CASE  # ignore the word case whatsoever
    morph_cz.flags = 0  # unset all flags

    morph_cz.tags = False  # return just the lemma, do not process the tags
    morph_cz.tags = True  # turn tag processing back on (default)

    morph_cz.compact_tag = True  # return tag in compact form (as returned by Majka)
    morph_cz.compact_tag = False  # do not return compact tag (default)
예제 #5
0
import majka

lemmatizer = majka.Majka('majka.w-lt')
lemmatizer.first_only = True
lemmatizer.tags = False
lemmatizer.negative = 'ne'


def lemmatize(word):
    lemma = lemmatizer.find(word)
    if len(lemma) > 0:
        return lemma[0]['lemma']
    else:
        return word