Esempio n. 1
0
    def test_one(text, debug, outfile):
        analyzer = qanalex.Analex(cache_path="cache/")
        analyzer.disable_allow_cache_use()
        #~ analyzer.enable_fully_vocalized_input()
        debug = True
        analyzer.set_debug(debug)
        #~ tokens = araby.tokenize(text)
        #~ if tokens:
        #~ text = tokens[0]
        result = analyzer.check_text(text)
        adapted_result = []

        for i, analyzed_list in enumerate(result):
            for analyzed in analyzed_list:
                adapted_result.append(analyzed.__dict__)

        df = pd.DataFrame(adapted_result)
        print df.columns.values
        #~ print(df.columns.values)
        #~ print(df.head(12))
        display = df[[
            'vocalized', 'unvocalized', 'word', 'stem', 'type', 'root',
            'original', "tags"
        ]]
        display = display.drop_duplicates()
        display.to_csv(outfile, sep='\t', encoding="utf8")
Esempio n. 2
0
    def test_quran(text, debug, outfile):
        analyzer = qanalex.Analex(cache_path="cache/")
        analyzer.disable_allow_cache_use()
        analyzer.enable_fully_vocalized_input()
        analyzer.set_debug(debug)
        result = analyzer.check_text(text)

        adapted_result = []

        for i, analyzed_list in enumerate(result):
            for analyzed in analyzed_list:
                adapted_result.append(analyzed.__dict__)

        df = pd.DataFrame(adapted_result)
        print df.columns.values
        #~ print(df.columns.values)
        #~ print(df.head(12))
        display = df[[
            'vocalized', 'unvocalized', 'word', 'stem', 'type', 'root',
            'original', "tags"
        ]]
        display = display.drop_duplicates()
        #~ print(display.head(10))
        #~ print(display)
        #~ print("root exists ", ('root' in df.columns))
        display.to_csv(outfile, sep='\t', encoding="utf8")
        display_unknown = display[display.type == "unknown"]
        display_unknown.to_csv(outfile + ".unknown.csv",
                               sep='\t',
                               encoding='utf8')
        display_known = display[display.type != "unknown"]
        display_known.to_csv(outfile + ".known.csv", sep='\t', encoding='utf8')
        print("Unknown ", display_unknown.count())
        print("known ", display_known.count())
Esempio n. 3
0
#!/usr/bin/env python

import componentes
import analex
import anasintac
import flujo
import string
import sys
import AST

from sys import argv
from sets import ImmutableSet

############################################################################
#
#  Funcion: __main__
#  Tarea:  Programa principal
#  Prametros:  --
#  Devuelve: --
#
############################################################################
if __name__ == "__main__":
    script, filename = argv
    txt = open(filename)
    #print "Este es tu fichero %r" % filename

    fl = flujo.Flujo(txt)
    analex = analex.Analex(fl)

    anasintac.Anasintac().Analiza(analex)
Esempio n. 4
0
myfile = open(filename)
text = (myfile.read()).decode('utf8')

try:
    myfile = open(filename)
    text = (myfile.read()).decode('utf8')

    if text == None:
        text = u"السلام عليكم يستعملونهم"
except:
    text = u"السلام عليكم يستعملونهم"
    print(" given text")

debug = False
limit = 500
analyzer = analex.Analex(cache_path="cache/")
#ianalyzer.disable_allow_cache_use()
analyzer.set_debug(debug)
result = analyzer.check_text(text)

import pandas as pd
adapted_result = []

for i, analyzed_list in enumerate(result):
    for analyzed in analyzed_list:
        adapted_result.append(analyzed.__dict__)

df = pd.DataFrame(adapted_result)
print(df.columns.values)
print(df.head(12))
display = df[['word', 'stem', 'type', 'root']]
Esempio n. 5
0
                # <Signo> -> +
                pass
            elif self.componente.cat == "OpAdd":
                # <Signo> -> -
                pass
            else:
                raise errores.ErrorSintactico(
                    "Error sintáctico analizaTermino()")
        except errores.Error as err:
            sys.stderr.write("%s\n" % err)

    def sincroniza(self, sinc):
        sinc |= "eof"  # Nos aseguramos de que este eof
        while self.componente.cat not in sinc:
            self.avanza()


if __name__ == "__main__":
    script, filename = argv
    txt = open(filename)
    print("Este es tu fichero %r" % filename)
    i = 0
    TS = TS.TS()
    fl = flujo.Flujo(txt)
    analexi = analex.Analex(fl)
    anasint = Anasin(analexi, TS)
    try:
        arbolPrograma = anasint.analizaPrograma()
        anasint.comprueba("eof")
    except errores.Error as err:
        sys.stderr.write("%s\n" % err)