def __init__(self, dir_freeling=None): if dir_freeling is not None: self.data_dir = dir_freeling else: self.data_dir = "/usr/share/freeling/" self.data_dir_common = self.data_dir + "common/" freeling.util_init_locale("default") self.la = freeling.lang_ident(self.data_dir_common + "lang_ident/ident.dat")
def __init__(self, text): super().__init__(text) freeling.util_init_locale("default") self.la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat") op = freeling.maco_options("es") op.set_data_files( "", DATA + "common/punct.dat", DATA + LANG + "/dicc.src", DATA + LANG + "/afixos.dat", "", DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat", DATA + LANG + "/quantities.dat", DATA + LANG + "/probabilitats.dat" ) # create analyzers self.tk = freeling.tokenizer(DATA + LANG + "/tokenizer.dat") self.sp = freeling.splitter(DATA + LANG + "/splitter.dat") self.sid = self.sp.open_session() self.mf = freeling.maco(op) # activate mmorpho odules to be used in next call self.mf.set_active_options( False, # umap User map module True, # num Number Detection True, # pun Punctuation Detection True, # dat Date Detection True, # dic Dictionary Search True, # aff False, # com True, # rtk True, # mw Multiword Recognition True, # ner Name Entity Recongnition True, # qt Quantity Recognition True # prb Probability Assignment And Guesser ) # default: all created submodules are used # create tagger, sense anotator, and parsers self.tg = freeling.hmm_tagger(DATA + LANG + "/tagger.dat", True, 2) self.sen = freeling.senses(DATA + LANG + "/senses.dat") self.parser = freeling.chart_parser(DATA + LANG + "/chunker/grammar-chunk.dat") self.dep = freeling.dep_txala(DATA + LANG + "/dep_txala/dependences.dat", self.parser.get_start_symbol())
def inicializa(self): FREELINGDIR = "/usr/local" DATA = FREELINGDIR + "/share/freeling/" LANG = self.lang freeling.util_init_locale("default") # create language analyzer self.la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat") # opciones para maco analyzer. op = freeling.maco_options("es") op.set_active_modules(0, 1, 1, 1, 1, 1, 1, 1, 1, 1) op.set_data_files("", DATA + LANG + "/locucions.dat", DATA + LANG + "/quantities.dat", DATA + LANG + "/afixos.dat", DATA + LANG + "/probabilitats.dat", DATA + LANG + "/dicc.src", DATA + LANG + "/np.dat", DATA + "common/punct.dat", DATA + LANG + "/corrector/corrector.dat") # crear analyzers self.tk = freeling.tokenizer(DATA + LANG + "/tokenizer.dat") self.sp = freeling.splitter(DATA + LANG + "/splitter.dat") self.mf = freeling.maco(op) self.tg = freeling.hmm_tagger(DATA + LANG + "/tagger.dat", 1, 2) self.sen = freeling.senses(DATA + LANG + "/senses.dat") self.nec = freeling.nec(DATA + LANG + "/nerc/nec/nec-ab-rich.dat") # self.ner=freeling.nec(DATA+LANG+"/ner/ner-ab.dat"); self.parser = freeling.chart_parser(DATA + LANG + "/chunker/grammar-chunk.dat") self.dep = freeling.dep_txala(DATA + LANG + "/dep/dependences.dat", self.parser.get_start_symbol()) con_data={'user':'******','password':'******','host':'127.0.0.1', \ 'database':'agiria','raise_on_warnings': True, 'autocommit':True, 'buffered':True} self.con = my.connect(**con_data)
def inicializa(self): FREELINGDIR = "/usr/local"; DATA = FREELINGDIR+"/share/freeling/"; LANG=self.lang; freeling.util_init_locale("default"); # create language analyzer self.la=freeling.lang_ident(DATA+"common/lang_ident/ident.dat"); # opciones para maco analyzer. op= freeling.maco_options("es"); op.set_active_modules(0,1,1,1,1,1,1,1,1,1) op.set_data_files("",DATA+LANG+"/locucions.dat", DATA+LANG+"/quantities.dat", DATA+LANG+"/afixos.dat", DATA+LANG+"/probabilitats.dat", DATA+LANG+"/dicc.src", DATA+LANG+"/np.dat", DATA+"common/punct.dat",DATA+LANG+"/corrector/corrector.dat"); # crear analyzers self.tk=freeling.tokenizer(DATA+LANG+"/tokenizer.dat"); self.sp=freeling.splitter(DATA+LANG+"/splitter.dat"); self.mf=freeling.maco(op); self.tg=freeling.hmm_tagger(DATA+LANG+"/tagger.dat",1,2); self.sen=freeling.senses(DATA+LANG+"/senses.dat"); self.nec=freeling.nec(DATA+LANG+"/nerc/nec/nec-ab-rich.dat"); # self.ner=freeling.nec(DATA+LANG+"/ner/ner-ab.dat"); self.parser= freeling.chart_parser(DATA+LANG+"/chunker/grammar-chunk.dat"); self.dep=freeling.dep_txala(DATA+LANG+"/dep/dependences.dat", self.parser.get_start_symbol()); con_data={'user':'******','password':'******','host':'127.0.0.1', \ 'database':'agiria','raise_on_warnings': True, 'autocommit':True, 'buffered':True} self.con = my.connect(**con_data)
p_comunes = []; for p in open("lexicon_total.txt", encoding="latin-1"): p_comunes.append(p.replace("\n","")); FREELINGDIR = "/usr/local"; #sys.stdin = io.TextIOWrapper(sys.stdin.buffer,encoding='latin-1'); #sys.stdout = io.TextIOWrapper(sys.stdout.buffer,encoding='utf8'); DATA = FREELINGDIR+"/share/freeling/"; LANG="es"; freeling.util_init_locale("default"); # create language analyzer la=freeling.lang_ident(DATA+"common/lang_ident/ident.dat"); # create options set for maco analyzer. Default values are Ok, except for data files. op= freeling.maco_options("es"); op.set_data_files( "", DATA + "common/punct.dat", DATA + LANG + "/dicc.src", DATA + LANG + "/afixos.dat", "", DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat", DATA + LANG + "/quantities.dat", DATA + LANG + "/probabilitats.dat"); # create analyzers tk=freeling.tokenizer(DATA+LANG+"/tokenizer.dat");
def __init__(self): lang = 'fr' ComplexityLanguage.__init__(self, lang) ## Modify this line to be your FreeLing installation directory FREELINGDIR = "/home/garciacumbreras18/dist/freeling" DATA = FREELINGDIR + "/data/" CLASSDIR = "" self.lang = lang freeling.util_init_locale("default") # create language analyzer self.la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat") # create options set for maco analyzer. Default values are Ok, except for data files. op = freeling.maco_options(lang) op.set_data_files( "", DATA + "common/punct.dat", DATA + lang + "/dicc.src", DATA + lang + "/afixos.dat", "", DATA + lang + "/locucions.dat", DATA + lang + "/np.dat", DATA + lang + "/quantities.dat", DATA + lang + "/probabilitats.dat") # create analyzers self.tk = freeling.tokenizer(DATA + lang + "/tokenizer.dat") self.sp = freeling.splitter(DATA + lang + "/splitter.dat") self.mf = freeling.maco(op) # activate mmorpho modules to be used in next call self.mf.set_active_options( False, True, True, True, # select which among created True, True, False, True, # submodules are to be used. True, True, True, True) # default: all created submodules are used # create tagger and sense anotator self.tg = freeling.hmm_tagger(DATA + lang + "/tagger.dat", True, 2) self.sen = freeling.senses(DATA + lang + "/senses.dat") f = open(CLASSDIR + '/home/garciacumbreras18/DaleChall.txt') lines = f.readlines() f.close() listDaleChall = [] for l in lines: data = l.strip().split() listDaleChall += data self.listDaleChall = listDaleChall """ config es una lista de valores booleanos que activa o desactivan el cálculo de una medida config = [ True|False, # KANDEL MODELS True|False, # DALE CHALL True|False, # SOL ] """ self.config += [True, True, True] self.metricsStr.extend(['KANDEL-MODELS', 'DALE CHALL', 'SOL']) self.configExtend += [True, True] self.metricsStrExtend.extend(['MEAN RARE WORDS', 'STD RARE WORDS'])
## ---------------------------------------------- ## ------------- MAIN PROGRAM --------------- ## ---------------------------------------------- ## Modify this line to be your FreeLing installation directory FREELINGDIR = "/usr/local" DATA = FREELINGDIR + "/share/freeling/" LANG = "es" freeling.util_init_locale("default") # create language analyzer la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat") # create options set for maco analyzer. Default values are Ok, except for data files. op = freeling.maco_options("es") op.set_data_files("", DATA + "common/punct.dat", DATA + LANG + "/dicc.src", DATA + LANG + "/afixos.dat", "", DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat", DATA + LANG + "/quantities.dat", DATA + LANG + "/probabilitats.dat") # create analyzers tk = freeling.tokenizer(DATA + LANG + "/tokenizer.dat") sp = freeling.splitter(DATA + LANG + "/splitter.dat") sid = sp.open_session() mf = freeling.maco(op)
def __init__(self, lang='it'): ## Modify this line to be your FreeLing installation directory FREELINGDIR = "/home/garciacumbreras18/dist/freeling" DATA = FREELINGDIR + "/data/" self.DATA = DATA self.lang = lang freeling.util_init_locale("default") # create language analyzer self.la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat") # create options set for maco analyzer. Default values are Ok, except for data files. op = freeling.maco_options(lang) op.set_data_files("", self.DATA + "common/punct.dat", self.DATA + self.lang + "/dicc.src", self.DATA + self.lang + "/afixos.dat", "", self.DATA + self.lang + "/locucions.dat", self.DATA + self.lang + "/np.dat", "", self.DATA + self.lang + "/probabilitats.dat") # create analyzers self.tk = freeling.tokenizer(self.DATA + self.lang + "/tokenizer.dat") self.sp = freeling.splitter(self.DATA + self.lang + "/splitter.dat") self.mf = freeling.maco(op) # activate mmorpho modules to be used in next call self.mf.set_active_options( False, True, True, True, # select which among created True, True, False, True, # submodules are to be used. True, True, True, True) # default: all created submodules are used # create tagger self.tg = freeling.hmm_tagger(self.DATA + self.lang + "/tagger.dat", True, 2) self.sen = freeling.senses(DATA + lang + "/senses.dat") """ config es una lista de valores booleanos que activa o desactivan el cálculo de una medida config = [ True|False, # PUNCTUATION MARKS True|False, # SCI True|False, # ARI True|False, # MU True|False, # Flesch-Vaca True|False, # Gulpease ] Si config == None se calculan todas las métricas de complejidad soportadas """ self.config = [True, True, True, True, True, True] self.metricsIt = [ 'AVERAGE PUNCTUATION MARKS', 'SCI', 'ARI', 'MU', 'FLESCH-VACA', 'GULPEASE' ] self.configExtend = [True, True, True, True, True] self.metricsItExtend = [ 'MEAN WORDS', 'STD WORDS', 'COMPLEX SENTENCES', 'MEAN SYLLABLES', 'STD SYLLABLES' ]
freeling.util_init_locale("default") op = freeling.maco_options("pt") op.set_active_modules(1,1,1,1,1,1,1,1,1,1,0) op.set_data_files("usermap.dat", DATA+LANG+"/locucions.dat", DATA+LANG+"/quantities.dat", DATA+LANG+"/afixos.dat", DATA+LANG+"/probabilitats.dat", DATA+LANG+"/dicc.src", DATA+LANG+"/np.dat", DATA+"common/punct.dat", "") op.set_retok_contractions(False) lg = freeling.lang_ident(DATA+"common/lang_ident/ident-few.dat") mf = freeling.maco(op) tk = freeling.tokenizer(DATA+LANG+"/tokenizer.dat") sp = freeling.splitter(DATA+LANG+"/splitter.dat") tg = freeling.hmm_tagger(DATA+LANG+"/tagger.dat",1,2) sen = freeling.senses(DATA+LANG+"/senses.dat"); ukb = freeling.ukb(DATA+LANG+"/ukb.dat") def tag (obj): sent = obj["text"] out = obj lang = lg.identify_language(sent) l = tk.tokenize(sent) ls = sp.split(l,1) # old value 0 ls = mf.analyze(ls) ls = tg.analyze(ls)
def fullParsing(self, text, sentimentText): ## Modify this line to be your FreeLing installation directory FREELINGDIR = "/usr/local" DATA = FREELINGDIR + "/share/freeling/" LANG = "es" freeling.util_init_locale("default") # create language analyzer la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat") # create options set for maco analyzer. Default values are Ok, except for data files. op = freeling.maco_options("es") op.set_data_files( "", DATA + "common/punct.dat", DATA + LANG + "/dicc.src", DATA + LANG + "/afixos.dat", "", DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat", DATA + LANG + "/quantities.dat", DATA + LANG + "/probabilitats.dat") # create analyzers tk = freeling.tokenizer(DATA + LANG + "/tokenizer.dat") sp = freeling.splitter(DATA + LANG + "/splitter.dat") sid = sp.open_session() mf = freeling.maco(op) # activate mmorpho odules to be used in next call mf.set_active_options( False, True, True, True, # select which among created True, True, False, True, # submodules are to be used. True, True, True, True) # default: all created submodules are used # create tagger, sense anotator, and parsers tg = freeling.hmm_tagger(DATA + LANG + "/tagger.dat", True, 2) sen = freeling.senses(DATA + LANG + "/senses.dat") parser = freeling.chart_parser(DATA + LANG + "/chunker/grammar-chunk.dat") dep = freeling.dep_txala(DATA + LANG + "/dep_txala/dependences.dat", parser.get_start_symbol()) #split Target as a list #print(sentimentText) sentimentText += '.' if sentimentText[0] == '@': sentimentText = sentimentText[1:] target = tk.tokenize(sentimentText) targets = sp.split(sid, target, True) targets = mf.analyze(targets) targets = parser.analyze(targets) targets = dep.analyze(targets) for s in targets: targetr = s.get_parse_tree() targetList = self.getTreeAsList(targetr, 0) del targetList[-1] #print(targetList) # process input text lin = text if lin[0] == '@': lin = lin[1:] #while (lin) : l = tk.tokenize(lin) ls = sp.split(sid, l, True) ls = mf.analyze(ls) ls = parser.analyze(ls) ls = dep.analyze(ls) finalType = None finalList = None ## output results for s in ls: tr = s.get_parse_tree() #self.printTree(tr, 0); wordType, wordList = self.getTypeNode(tr, 0, targetList) if finalType is None: if wordType is not None: finalType = wordType finalList = wordList # clean up sp.close_session(sid) return finalType, finalList
def __init__(self, text): super().__init__(text) self.stop_words = set(stopwords.words('spanish') + list(punctuation)) self._cleaned_text = list() freeling.util_init_locale("default") # create language analyzer la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat") # create options set for maco analyzer. Default values are Ok, except for data files. op = freeling.maco_options("es") op.set_data_files( "", DATA + "common/punct.dat", DATA + LANG + "/dicc.src", DATA + LANG + "/afixos.dat", "", DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat", DATA + LANG + "/quantities.dat", DATA + LANG + "/probabilitats.dat") # create analyzers tk = freeling.tokenizer(DATA + LANG + "/tokenizer.dat") sp = freeling.splitter(DATA + LANG + "/splitter.dat") sid = sp.open_session() mf = freeling.maco(op) # activate mmorpho odules to be used in next call mf.set_active_options( True, True, True, True, # select which among created True, True, True, True, # submodules are to be used. True, True, True, True) # default: all created submodules are used # create tagger, sense anotator, and parsers tg = freeling.hmm_tagger(DATA + LANG + "/tagger.dat", True, 2) sen = freeling.senses(DATA + LANG + "/senses.dat") parser = freeling.chart_parser(DATA + LANG + "/chunker/grammar-chunk.dat") l = tk.tokenize(self.text) ls = sp.split(sid, l, False) ls = mf.analyze(ls) ls = tg.analyze(ls) ls = sen.analyze(ls) ls = parser.analyze(ls) for s in ls: ws = s.get_words() for w in ws: # Removing all stopped words, including prepositions, conjunctions, interjections and punctuation tag = w.get_tag() word = w.get_form() if tag.startswith("S") or \ tag.startswith("I") or \ tag.startswith("C") or \ tag.startswith("F") or \ tag.startswith("D") or \ tag.startswith("P"): pass else: self._cleaned_text.append("{}-{}".format(word, tag))