Ejemplo n.º 1
0
def load_modules():
    global FREELING_MODULES
    freeling.util_init_locale("default")
    op = freeling.maco_options(FREELING_LANG)
    op.set_data_files("",
                      FREELING_DATA_DIR + "common/punct.dat",
                      FREELING_DATA_DIR + FREELING_LANG + "/dicc.src",
                      FREELING_DATA_DIR + FREELING_LANG + "/afixos.dat",
                      "",
                      FREELING_DATA_DIR + FREELING_LANG + "/locucions.dat",
                      FREELING_DATA_DIR + FREELING_LANG + "/np.dat",
                      FREELING_DATA_DIR + FREELING_LANG + "/quantities.dat",
                      FREELING_DATA_DIR + FREELING_LANG + "/probabilitats.dat")
    FREELING_MODULES["tk"] = freeling.tokenizer(FREELING_DATA_DIR + FREELING_LANG + "/tokenizer.dat")
    FREELING_MODULES["sp"] = freeling.splitter(FREELING_DATA_DIR + FREELING_LANG + "/splitter.dat")
    FREELING_MODULES["sid"] = FREELING_MODULES["sp"].open_session()
    FREELING_MODULES["mf"] = freeling.maco(op)
    FREELING_MODULES["mf"].set_active_options(False, True, True, True,
                               True, True, False, True,
                               True, True, True, True)
    FREELING_MODULES["tg"] = freeling.hmm_tagger(FREELING_DATA_DIR + FREELING_LANG + "/tagger.dat", True, 2)

    if os.path.isdir(FREELING_DATA_DIR + FREELING_LANG + "/chucker/grammar-chunk.dat"):
        FREELING_MODULES["parser"] = freeling.chart_parser(FREELING_DATA_DIR + FREELING_LANG
                                                           + "/chunker/grammar-chunk.dat")
def setup_freeling():
    """
    Loads FreeLing with the settings for Spanish. It's called inside preprocess(). Paths with the Spanish settings
    are read from the file config.ini. If your installation of FreeLing differs from the typical, chances are that
    you must change the paths.

    :return: 4 FreeLing components for pre-processing
    """
    config = configparser.ConfigParser()
    config.read('config.ini')
    pyfreeling.util_init_locale('default')
    tk = pyfreeling.tokenizer(config['FREELING']['Data'] + config['FREELING']['Lang'] + '/twitter/tokenizer.dat')
    sp = pyfreeling.splitter(config['FREELING']['Data'] + config['FREELING']['Lang'] + '/splitter.dat')
    umap = pyfreeling.RE_map(config['FREELING']['Data'] + config['FREELING']['Lang'] + '/twitter/usermap.dat')

    op = pyfreeling.maco_options("es")
    op.set_data_files("",
                      config['FREELING']['Data'] + "common/punct.dat",
                      config['FREELING']['Data'] + config['FREELING']['Lang'] + "/dicc.src",
                      config['FREELING']['Data'] + config['FREELING']['Lang'] + "/afixos.dat",
                      "",
                      config['FREELING']['Data'] + config['FREELING']['Lang'] + "/locucions.dat",
                      config['FREELING']['Data'] + config['FREELING']['Lang'] + "/np.dat",
                      config['FREELING']['Data'] + config['FREELING']['Lang'] + "/quantities.dat",
                      config['FREELING']['Data'] + config['FREELING']['Lang'] + "/probabilitats.dat")

    mf = pyfreeling.maco(op)
    mf.set_active_options(False, True, True, True,
                          True, True, False, True,
                          True, True, True, True)
    return tk, sp, umap, mf
Ejemplo n.º 3
0
def my_maco_options(lang, lpath):

    # create options holder
    opt = pyfreeling.maco_options(lang)

    # Provide files for morphological submodules. Note that it is not
    # necessary to set file for modules that will not be used.
    opt.UserMapFile = ""
    opt.LocutionsFile = lpath + "locucions.dat"
    opt.AffixFile = lpath + "afixos.dat"
    opt.ProbabilityFile = lpath + "probabilitats.dat"
    opt.DictionaryFile = lpath + "dicc.src"
    opt.NPdataFile = lpath + "np.dat"
    opt.PunctuationFile = lpath + "../common/punct.dat"
    return opt
Ejemplo n.º 4
0
def my_maco_options(LANG, DATA) :
    # create options holder 
    opt = pyfreeling.maco_options(LANG);
    
    # Provide files for morphological submodules. Note that it is not 
    # necessary to set file for modules that will not be used.
    opt.UserMapFile     = "";
    opt.LocutionsFile   = DATA + LANG + "/locucions.dat"; 
    opt.AffixFile       = DATA + LANG + "/afixos.dat";
    opt.ProbabilityFile = DATA + LANG + "/probabilitats.dat"; 
    opt.DictionaryFile  = DATA + LANG + "/dicc.src";
    opt.NPdataFile      = DATA + LANG + "/np.dat"; 
    opt.PunctuationFile = DATA + "common/punct.dat"; 

    return opt;
Ejemplo n.º 5
0
    def my_maco_options(self, lang,lpath) :
        ## -----------------------------------------------
        ## Set desired options for morphological analyzer
        ## -----------------------------------------------

        # create options holder 
        opt = freeling.maco_options(lang);

        # Provide files for morphological submodules. Note that it is not 
        # necessary to set file for modules that will not be used.
        opt.UserMapFile = "";
        opt.LocutionsFile = lpath + "locucions.dat"; 
        opt.AffixFile = lpath + "afixos.dat";
        opt.ProbabilityFile = lpath + "probabilitats.dat"; 
        opt.DictionaryFile = lpath + "dicc.src";
        opt.NPdataFile = lpath + "np.dat";
        opt.PunctuationFile = lpath + "../common/punct.dat";
        return opt;
Ejemplo n.º 6
0
        file=sys.stderr)
    sys.exit(1)

# Location of FreeLing configuration files.
DATA = os.environ["FREELINGDIR"] + "/share/freeling/"

# Init locales
pyfreeling.util_init_locale("default")

# create language detector. Used just to show it. Results are printed
# but ignored (after, it is assumed language is LANG)
la = pyfreeling.lang_ident(DATA + "common/lang_ident/ident-few.dat")

# create options set for maco analyzer. Default values are Ok, except for data files.
LANG = "es"
op = pyfreeling.maco_options(LANG)
op.set_data_files("", DATA + "common/punct.dat", DATA + LANG + "/dicc.src",
                  DATA + LANG + "/afixos.dat", "",
                  DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat",
                  DATA + LANG + "/quantities.dat",
                  DATA + LANG + "/probabilitats.dat")

# create analyzers
tk = pyfreeling.tokenizer(DATA + LANG + "/tokenizer.dat")
sp = pyfreeling.splitter(DATA + LANG + "/splitter.dat")
sid = sp.open_session()
mf = pyfreeling.maco(op)

# activate mmorpho odules to be used in next call
mf.set_active_options(
    False,
   print("Folder",os.environ["FREELINGDIR"]+"/share/freeling",
         "not found.\nPlease set FREELINGDIR environment variable to FreeLing installation directory",
         file=sys.stderr)
   sys.exit(1)


# Location of FreeLing configuration files.
DATA = os.environ["FREELINGDIR"]+"/share/freeling/";

# set locale to an UTF8 compatible locale
freeling.util_init_locale("default");

# get requested language from arg1, or English if not provided
lang = "es"
LANG = "es"
op= freeling.maco_options(LANG);
op.set_data_files( "",
                   DATA + "common/punct.dat",
                   DATA + LANG + "/dicc.src",
                   DATA + LANG + "/afixos.dat",
                   "",
                   DATA + LANG + "/locucions.dat",
                   DATA + LANG + "/np.dat",
                   DATA + LANG + "/quantities.dat",
                   DATA + LANG + "/probabilitats.dat");

# get installation path to use from arg2, or use /usr/local if not provided
#ipath = "/usr/local/Cellar/freeling/4.0_4";
ipath = "/usr/local/";

# path to language data
Ejemplo n.º 8
0
   sys.exit(1)


# Location of FreeLing configuration files.
DATA = os.environ["FREELINGDIR"]+"/share/freeling/";

# Init locales
pyfreeling.util_init_locale("default");

# create language detector. Used just to show it. Results are printed
# but ignored (after, it is assumed language is LANG)
la=pyfreeling.lang_ident(DATA+"common/lang_ident/ident-few.dat");

# create options set for maco analyzer. Default values are Ok, except for data files.
LANG="es";
op= pyfreeling.maco_options(LANG);
op.set_data_files( "", 
                   DATA + "common/punct.dat",
                   DATA + LANG + "/dicc.src",
                   DATA + LANG + "/afixos.dat",
                   "",
                   DATA + LANG + "/locucions.dat", 
                   DATA + LANG + "/np.dat",
                   DATA + LANG + "/quantities.dat",
                   DATA + LANG + "/probabilitats.dat");

# create analyzers
tk=pyfreeling.tokenizer(DATA+LANG+"/tokenizer.dat");
sp=pyfreeling.splitter(DATA+LANG+"/splitter.dat");
sid=sp.open_session();
mf=pyfreeling.maco(op);
Ejemplo n.º 9
0
def set_up_analyzer():
    ## Check whether we know where to find FreeLing data files
    if "FREELINGDIR" not in os.environ:
        if sys.platform == "win32" or sys.platform == "win64":
            os.environ["FREELINGDIR"] = "C:\\Program Files"
        else:
            os.environ["FREELINGDIR"] = "/usr/local"
        #print("FREELINGDIR environment variable not defined, trying ", os.environ["FREELINGDIR"], file=sys.stderr)

        if not os.path.exists(os.environ["FREELINGDIR"] + "/share/freeling"):
            #print("Folder",os.environ["FREELINGDIR"]+"/share/freeling", "not found.\nPlease set FREELINGDIR environment variable to FreeLing installation directory", file=sys.stderr)
            sys.exit(1)

    # Location of FreeLing configuration files.
    DATA = os.environ["FREELINGDIR"] + "/share/freeling/"

    # Init locales
    pyfreeling.util_init_locale("default")

    #create language detector. Used just to show it. Results are printed
    # but ignored (after, it is assumed language is LANG)
    la = pyfreeling.lang_ident(DATA + "common/lang_ident/ident-few.dat")

    # create options set for maco analyzer. Default values are Ok, except for data files.
    LANG = "ru"
    op = pyfreeling.maco_options(LANG)
    op.set_data_files("", DATA + "common/punct.dat", DATA + LANG + "/dicc.src",
                      DATA + LANG + "/afixos.dat", "",
                      DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat",
                      DATA + LANG + "/quantities.dat",
                      DATA + LANG + "/probabilitats.dat")

    # create analyzers
    tk = pyfreeling.tokenizer(DATA + LANG + "/tokenizer.dat")
    sp = pyfreeling.splitter(DATA + LANG + "/splitter.dat")
    sid = sp.open_session()
    mf = pyfreeling.maco(op)

    # activate mmorpho odules to be used in next call
    mf.set_active_options(
        False,
        True,
        True,
        True,  # select which among created 
        True,
        True,
        False,
        True,  # submodules are to be used. 
        True,
        True,
        True,
        True)
    # default: all created submodules are used

    # create tagger, sense anotator, and parsers
    tg = pyfreeling.hmm_tagger(DATA + LANG + "/tagger.dat", True, 2)
    sen = pyfreeling.senses(DATA + LANG + "/senses.dat")
    #parser= pyfreeling.chart_parser(DATA+LANG+"/chunker/grammar-chunk.dat");
    #dep=pyfreeling.dep_txala(DATA+LANG+"/dep_txala/dependences.dat", parser.get_start_symbol());

    return (tk, sp, sid, mf, tg, sen)