def __init__(self):
        ComplexityLanguage.__init__(self, 'en')

        # create parsers
        self.parser = freeling.chart_parser(self.DATA + self.lang +
                                            "/chunker/grammar-chunk.dat")
        self.dep = freeling.dep_txala(
            self.DATA + self.lang + "/dep_txala/dependences.dat",
            self.parser.get_start_symbol())
        """
        config es una lista de valores booleanos que activa o desactivan el cálculo de una medida
        config = [
            True|False,         # MAXIMUN EMBEDDING DEPTH OF SENTENCE (MaxDEPTH)
            True|False,         # MINIMUN EMBEDDING DEPTH OF SENTENCE (MinDEPTH)
            True|False,         # AVERAGE EMBEDDING DEPTH OF SENTENCE (MeanDEPTH)
            True|False,         # FOG
            True|False,         # FLESCH
            True|False,         # FLESCH-KINCAID
            True|False,         # SMOG
            ]
        """
        self.config += [True, True, True, True, True, True, True, True]
        self.metricsStr.extend([
            'MaxDEPTH', 'MinDEPTH', 'MeanDEPTH', 'StdDEPTH', 'FOG', 'FLESCH',
            'FLESCH-KINCAID', 'SMOG'
        ])
Beispiel #2
0
    def inicia(self):
        FREELINGDIR = "/usr/local"

        DATA = FREELINGDIR + "/share/freeling/"
        LANG = "es"

        freeling.util_init_locale("default")
        # create options set for maco analyzer. Default values are Ok, except for data files.
        op = freeling.maco_options("es")
        op.set_active_modules(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0)
        op.set_data_files("", DATA + LANG + "/locucions.dat",
                          DATA + LANG + "/quantities.dat",
                          DATA + LANG + "/afixos.dat",
                          DATA + LANG + "/probabilitats.dat",
                          DATA + LANG + "/dicc.src", DATA + LANG + "/np.dat",
                          DATA + "common/punct.dat",
                          DATA + LANG + "/corrector/corrector.dat")

        # create analyzers
        self.tk = freeling.tokenizer(DATA + LANG + "/tokenizer.dat")
        self.sp = freeling.splitter(DATA + LANG + "/splitter.dat")
        self.mf = freeling.maco(op)

        self.tg = freeling.hmm_tagger("es", DATA + LANG + "/tagger.dat", 1, 2)
        self.sen = freeling.senses(DATA + LANG + "/senses.dat")
        ner = freeling.ner(DATA + LANG + "/ner/ner-ab.dat")

        self.parser = freeling.chart_parser(DATA + LANG +
                                            "/chunker/grammar-chunk.dat")
        self.dep = freeling.dep_txala(DATA + LANG + "/dep/dependences.dat",
                                      self.parser.get_start_symbol())
Beispiel #3
0
    def __init__(self, text):
        super().__init__(text)
        freeling.util_init_locale("default")
        self.la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat")
        op = freeling.maco_options("es")
        op.set_data_files(
            "",
            DATA + "common/punct.dat",
            DATA + LANG + "/dicc.src",
            DATA + LANG + "/afixos.dat",
            "",
            DATA + LANG + "/locucions.dat",
            DATA + LANG + "/np.dat",
            DATA + LANG + "/quantities.dat",
            DATA + LANG + "/probabilitats.dat"
        )

        # create analyzers
        self.tk = freeling.tokenizer(DATA + LANG + "/tokenizer.dat")
        self.sp = freeling.splitter(DATA + LANG + "/splitter.dat")
        self.sid = self.sp.open_session()
        self.mf = freeling.maco(op)

        # activate mmorpho odules to be used in next call
        self.mf.set_active_options(
            False,  # umap User map module
            True,  # num Number Detection
            True,  # pun Punctuation Detection
            True,  # dat Date Detection
            True,  # dic Dictionary Search
            True,  # aff
            False,  # com
            True,  # rtk
            True,  # mw Multiword Recognition
            True,  # ner  Name Entity Recongnition
            True,  # qt Quantity Recognition
            True  # prb Probability Assignment And Guesser
        )  # default: all created submodules are used

        # create tagger, sense anotator, and parsers
        self.tg = freeling.hmm_tagger(DATA + LANG + "/tagger.dat", True, 2)
        self.sen = freeling.senses(DATA + LANG + "/senses.dat")
        self.parser = freeling.chart_parser(DATA + LANG + "/chunker/grammar-chunk.dat")
        self.dep = freeling.dep_txala(DATA + LANG + "/dep_txala/dependences.dat", self.parser.get_start_symbol())
def process_list(student_list, prompt):
    for essay in student_list:
        # create tagger
        level = essay[0]
        text = essay[1]

        tagger = freeling.hmm_tagger(lpath + "tagger.dat", True, 2)

        # create sense annotator
        sen = freeling.senses(lpath + "senses.dat")

        # create sense disambiguator
        wsd = freeling.ukb(lpath + "ukb.dat")

        # create dependency parser
        parser = freeling.chart_parser(lpath + "/chunker/grammar-chunk.dat")
        dep = freeling.dep_txala(lpath + "/dep_txala/dependences.dat",
                                 parser.get_start_symbol())

        # tokenize input line into a list of words
        lw = tk.tokenize(text)
        # split list of words in sentences, return list of sentences
        ls = sp.split(lw)

        # perform morphosyntactic analysis and disambiguation
        ls = morfo.analyze(ls)
        ls = tagger.analyze(ls)

        # annotate and disambiguate senses
        ls = sen.analyze(ls)
        ls = wsd.analyze(ls)
        # parse sentences
        ls = parser.analyze(ls)
        ls = dep.analyze(ls)

        # get the parsed essay text
        essay_parse = ProcessSentences(ls)

        #append tuple with level and parsed text to appropriate essay list
        if prompt == "V":
            essays_vacation_parsed.append((level, essay_parse))

        elif prompt == "F":
            essays_famous_parsed.append((level, essay_parse))
 def __init__(self):
           
     ComplexityLanguage.__init__(self, 'es')    
     ## Modify this line to be your FreeLing installation directory
     
     # create parsers
     self.parser= freeling.chart_parser(self.DATA+self.lang+"/chunker/grammar-chunk.dat")
     self.dep=freeling.dep_txala(self.DATA+self.lang+"/dep_txala/dependences.dat", self.parser.get_start_symbol())
     
     # Para leer el texto que introducimos
     CLASSDIR = "/home/garciacumbreras18/"
             
     f = open(CLASSDIR + 'CREA_total.txt')
     lines = f.readlines()
     f.close()
     crea = {}
     for l in lines[1:1000]: # those words not in the 1000 most frequent words in CREA are low frequency words
         data = l.strip().split()
         crea[data[1]] = float(data[2].replace(',', ''))
     self.crea = crea
     
     """
     config es una lista de valores booleanos que activa o desactivan el cálculo de una medida
     config = [
         True|False,         # MAXIMUN EMBEDDING DEPTH OF SENTENCE (MaxDEPTH)
         True|False,         # MINIMUN EMBEDDING DEPTH OF SENTENCE (MinDEPTH)
         True|False,         # AVERAGE EMBEDDING DEPTH OF SENTENCE (MeanDEPTH)
         True|False,         # LC
         True|False,         # SSR
         True|False,         # HUERTA
         True|False,         # IFSZ
         True|False,         # POLINI
         True|False,         # MINIMUN AGE
         True|False,         # SOL
         True|False,         # CRAWFORD
         ]
     """
     self.config += [True, True, True, True, True, True, True, True, True, True, True, True]
     self.metricsStr.extend(['MaxDEPTH','MinDEPTH', 'MeanDEPTH', 'StdDEPTH', 'LC','SSR', 'HUERTA', 'IFSZ', 'POLINI', 'MINIMUN AGE', 'SOL', 'CRAWFORD'])
     
     self.configExtend += [True, True]
     self.metricsStrExtend.extend(['MEAN RARE WORDS', 'STD RARE WORDS'])
Beispiel #6
0
    def inicializa(self):

        FREELINGDIR = "/usr/local"

        DATA = FREELINGDIR + "/share/freeling/"
        LANG = self.lang

        freeling.util_init_locale("default")

        # create language analyzer
        self.la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat")

        # opciones para maco analyzer.
        op = freeling.maco_options("es")
        op.set_active_modules(0, 1, 1, 1, 1, 1, 1, 1, 1, 1)
        op.set_data_files("", DATA + LANG + "/locucions.dat",
                          DATA + LANG + "/quantities.dat",
                          DATA + LANG + "/afixos.dat",
                          DATA + LANG + "/probabilitats.dat",
                          DATA + LANG + "/dicc.src", DATA + LANG + "/np.dat",
                          DATA + "common/punct.dat",
                          DATA + LANG + "/corrector/corrector.dat")

        # crear analyzers
        self.tk = freeling.tokenizer(DATA + LANG + "/tokenizer.dat")
        self.sp = freeling.splitter(DATA + LANG + "/splitter.dat")
        self.mf = freeling.maco(op)

        self.tg = freeling.hmm_tagger(DATA + LANG + "/tagger.dat", 1, 2)
        self.sen = freeling.senses(DATA + LANG + "/senses.dat")
        self.nec = freeling.nec(DATA + LANG + "/nerc/nec/nec-ab-rich.dat")
        # self.ner=freeling.nec(DATA+LANG+"/ner/ner-ab.dat");

        self.parser = freeling.chart_parser(DATA + LANG +
                                            "/chunker/grammar-chunk.dat")
        self.dep = freeling.dep_txala(DATA + LANG + "/dep/dependences.dat",
                                      self.parser.get_start_symbol())

        con_data={'user':'******','password':'******','host':'127.0.0.1', \
'database':'agiria','raise_on_warnings': True, 'autocommit':True, 'buffered':True}

        self.con = my.connect(**con_data)
def process_file(essay_lst, x):
    for entry in essay_lst:
        # create tagger
        essay = entry[1]
        id = entry[0]
        tagger = freeling.hmm_tagger(lpath + "tagger.dat", True, 2)

        # create sense annotator
        sen = freeling.senses(lpath + "senses.dat")

        # create sense disambiguator
        wsd = freeling.ukb(lpath + "ukb.dat")

        # create dependency parser
        parser = freeling.chart_parser(lpath + "/chunker/grammar-chunk.dat")
        dep = freeling.dep_txala(lpath + "/dep_txala/dependences.dat",
                                 parser.get_start_symbol())

        # tokenize input line into a list of words
        lw = tk.tokenize(essay)
        # split list of words in sentences, return list of sentences
        ls = sp.split(lw)

        # perform morphosyntactic analysis and disambiguation
        ls = morfo.analyze(ls)
        ls = tagger.analyze(ls)

        # annotate and disambiguate senses
        ls = sen.analyze(ls)
        ls = wsd.analyze(ls)
        # parse sentences
        ls = parser.analyze(ls)
        ls = dep.analyze(ls)

        # do whatever is needed with processed sentences
        if x == 2:
            essays_special_tagged.append((id, ProcessSentences(ls)))
        elif x == 3:
            essays_terrible_tagged.append((id, ProcessSentences(ls)))
Beispiel #8
0
    def inicializa(self):

        FREELINGDIR = "/usr/local";
        
        DATA = FREELINGDIR+"/share/freeling/";
        LANG=self.lang;
        
        freeling.util_init_locale("default");
        
        # create language analyzer
        self.la=freeling.lang_ident(DATA+"common/lang_ident/ident.dat");
        
        # opciones para maco analyzer. 
        op= freeling.maco_options("es");
        op.set_active_modules(0,1,1,1,1,1,1,1,1,1)
        op.set_data_files("",DATA+LANG+"/locucions.dat", DATA+LANG+"/quantities.dat", 
                          DATA+LANG+"/afixos.dat", DATA+LANG+"/probabilitats.dat", 
                          DATA+LANG+"/dicc.src", DATA+LANG+"/np.dat",  
                          DATA+"common/punct.dat",DATA+LANG+"/corrector/corrector.dat");
        
        # crear analyzers
        self.tk=freeling.tokenizer(DATA+LANG+"/tokenizer.dat");
        self.sp=freeling.splitter(DATA+LANG+"/splitter.dat");
        self.mf=freeling.maco(op);
        
        self.tg=freeling.hmm_tagger(DATA+LANG+"/tagger.dat",1,2);
        self.sen=freeling.senses(DATA+LANG+"/senses.dat");
        self.nec=freeling.nec(DATA+LANG+"/nerc/nec/nec-ab-rich.dat");
        # self.ner=freeling.nec(DATA+LANG+"/ner/ner-ab.dat");
        
        self.parser= freeling.chart_parser(DATA+LANG+"/chunker/grammar-chunk.dat");
        self.dep=freeling.dep_txala(DATA+LANG+"/dep/dependences.dat", self.parser.get_start_symbol());

        con_data={'user':'******','password':'******','host':'127.0.0.1', \
'database':'agiria','raise_on_warnings': True, 'autocommit':True, 'buffered':True}

        self.con = my.connect(**con_data)
Beispiel #9
0
    def config_files(self, lang, data_dir, data_dir_common):

        data_dir += lang + "/"
        data_conf = data_dir + "nerc/nec/nec.cfg"

        opt = freeling.maco_options(lang)

        # (usr, pun, dic, aff, comp, loc, nps, qty, prb)
        opt.set_data_files("",
                           data_dir_common + "punct.dat",
                           data_dir + "dicc.src",
                           data_dir + "afixos.dat",
                           data_dir + "compounds.dat",
                           data_dir + "locucions.dat",
                           data_dir + "np.dat",
                           data_dir + "quantities.dat",
                           data_dir + "probabilitats.dat")

        self.mf = freeling.maco(opt)

        # (umap, num, pun, dat, dic, aff, comp, rtk, mw, ner, qt, prb)
        # (0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0)
        self.mf.set_active_options(False, True, True, True, False, True, True, True, True, True, True, True)

        self.tk = freeling.tokenizer(data_dir + "tokenizer.dat")
        self.sp = freeling.splitter(data_dir + "splitter.dat")

        self.tg = freeling.hmm_tagger(data_dir + "tagger.dat", True, 2)
        self.sen = freeling.senses(data_dir + "senses.dat")

        self.parser = freeling.chart_parser(data_dir + "chunker/grammar-chunk.dat")

        self.dep = freeling.dep_txala(data_dir + "/dep_txala/dependences.dat",
                                      self.parser.get_start_symbol())

        self.nec = freeling.nec(data_conf)
                   DATA + LANG + "/quantities.dat",
                   DATA + LANG + "/probabilitats.dat");

tk=freeling.tokenizer(DATA+LANG+"/tokenizer.dat");
sp=freeling.splitter(DATA+LANG+"/splitter.dat");
sid=sp.open_session();
mf=freeling.maco(op);

mf.set_active_options(False, False, True, False,
                      True, True, False, True,
                      False, True, False, True )

tg=freeling.hmm_tagger(DATA+LANG+"/tagger.dat",True,2)
sen=freeling.senses(DATA+LANG+"/senses.dat")
parser= freeling.chart_parser(DATA+LANG+"/chunker/grammar-chunk.dat")
dep=freeling.dep_txala(DATA+LANG+"/dep_txala/dependences.dat", parser.get_start_symbol())


process_file(input_training_file, output_training_file, [sid, tk, sp, mf, tg, sen, parser, dep])
process_file(input_testing_file, output_testing_file, [sid, tk, sp, mf, tg, sen, parser, dep])
process_file(input_pruebas_file, output_pruebas_file, [sid, tk, sp, mf, tg, sen, parser, dep])

input_training_file.close()
input_pruebas_file.close()
input_testing_file.close()

output_pruebas_file.close()
output_testing_file.close()
output_training_file.close()
    
sp.close_session(sid);
Beispiel #11
0
    True,  # select which among created 
    True,
    True,
    False,
    True,  # submodules are to be used. 
    True,
    True,
    True,
    True)
# default: all created submodules are used

# create tagger, sense anotator, and parsers
tg = freeling.hmm_tagger(DATA + LANG + "/tagger.dat", True, 2)
sen = freeling.senses(DATA + LANG + "/senses.dat")
parser = freeling.chart_parser(DATA + LANG + "/chunker/grammar-chunk.dat")
dep = freeling.dep_txala(DATA + LANG + "/dep_txala/dependences.dat",
                         parser.get_start_symbol())

# process input text
lin = sys.stdin.readline()

print("Text language is: " +
      la.identify_language(lin, ["es", "ca", "en", "it"]) + "\n")

while (lin):

    l = tk.tokenize(lin)
    ls = sp.split(sid, l, False)

    ls = mf.analyze(ls)
    ls = tg.analyze(ls)
    ls = sen.analyze(ls)
Beispiel #12
0
    def fullParsing(self, text, sentimentText):

        ## Modify this line to be your FreeLing installation directory
        FREELINGDIR = "/usr/local"

        DATA = FREELINGDIR + "/share/freeling/"
        LANG = "es"

        freeling.util_init_locale("default")

        # create language analyzer
        la = freeling.lang_ident(DATA + "common/lang_ident/ident.dat")

        # create options set for maco analyzer. Default values are Ok, except for data files.
        op = freeling.maco_options("es")
        op.set_data_files(
            "", DATA + "common/punct.dat", DATA + LANG + "/dicc.src",
            DATA + LANG + "/afixos.dat", "", DATA + LANG + "/locucions.dat",
            DATA + LANG + "/np.dat", DATA + LANG + "/quantities.dat",
            DATA + LANG + "/probabilitats.dat")

        # create analyzers
        tk = freeling.tokenizer(DATA + LANG + "/tokenizer.dat")
        sp = freeling.splitter(DATA + LANG + "/splitter.dat")
        sid = sp.open_session()
        mf = freeling.maco(op)

        # activate mmorpho odules to be used in next call
        mf.set_active_options(
            False,
            True,
            True,
            True,  # select which among created 
            True,
            True,
            False,
            True,  # submodules are to be used. 
            True,
            True,
            True,
            True)
        # default: all created submodules are used

        # create tagger, sense anotator, and parsers
        tg = freeling.hmm_tagger(DATA + LANG + "/tagger.dat", True, 2)
        sen = freeling.senses(DATA + LANG + "/senses.dat")
        parser = freeling.chart_parser(DATA + LANG +
                                       "/chunker/grammar-chunk.dat")
        dep = freeling.dep_txala(DATA + LANG + "/dep_txala/dependences.dat",
                                 parser.get_start_symbol())

        #split Target as a list
        #print(sentimentText)
        sentimentText += '.'
        if sentimentText[0] == '@':
            sentimentText = sentimentText[1:]
        target = tk.tokenize(sentimentText)
        targets = sp.split(sid, target, True)

        targets = mf.analyze(targets)
        targets = parser.analyze(targets)
        targets = dep.analyze(targets)

        for s in targets:
            targetr = s.get_parse_tree()
            targetList = self.getTreeAsList(targetr, 0)
            del targetList[-1]
        #print(targetList)

        # process input text
        lin = text
        if lin[0] == '@':
            lin = lin[1:]

        #while (lin) :

        l = tk.tokenize(lin)
        ls = sp.split(sid, l, True)

        ls = mf.analyze(ls)
        ls = parser.analyze(ls)
        ls = dep.analyze(ls)

        finalType = None
        finalList = None

        ## output results
        for s in ls:
            tr = s.get_parse_tree()
            #self.printTree(tr, 0);
            wordType, wordList = self.getTypeNode(tr, 0, targetList)
            if finalType is None:
                if wordType is not None:
                    finalType = wordType
                    finalList = wordList
        # clean up
        sp.close_session(sid)

        return finalType, finalList