def load_modules(): global FREELING_MODULES freeling.util_init_locale("default") op = freeling.maco_options(FREELING_LANG) op.set_data_files("", FREELING_DATA_DIR + "common/punct.dat", FREELING_DATA_DIR + FREELING_LANG + "/dicc.src", FREELING_DATA_DIR + FREELING_LANG + "/afixos.dat", "", FREELING_DATA_DIR + FREELING_LANG + "/locucions.dat", FREELING_DATA_DIR + FREELING_LANG + "/np.dat", FREELING_DATA_DIR + FREELING_LANG + "/quantities.dat", FREELING_DATA_DIR + FREELING_LANG + "/probabilitats.dat") FREELING_MODULES["tk"] = freeling.tokenizer(FREELING_DATA_DIR + FREELING_LANG + "/tokenizer.dat") FREELING_MODULES["sp"] = freeling.splitter(FREELING_DATA_DIR + FREELING_LANG + "/splitter.dat") FREELING_MODULES["sid"] = FREELING_MODULES["sp"].open_session() FREELING_MODULES["mf"] = freeling.maco(op) FREELING_MODULES["mf"].set_active_options(False, True, True, True, True, True, False, True, True, True, True, True) FREELING_MODULES["tg"] = freeling.hmm_tagger(FREELING_DATA_DIR + FREELING_LANG + "/tagger.dat", True, 2) if os.path.isdir(FREELING_DATA_DIR + FREELING_LANG + "/chucker/grammar-chunk.dat"): FREELING_MODULES["parser"] = freeling.chart_parser(FREELING_DATA_DIR + FREELING_LANG + "/chunker/grammar-chunk.dat")
def setup_freeling(): """ Loads FreeLing with the settings for Spanish. It's called inside preprocess(). Paths with the Spanish settings are read from the file config.ini. If your installation of FreeLing differs from the typical, chances are that you must change the paths. :return: 4 FreeLing components for pre-processing """ config = configparser.ConfigParser() config.read('config.ini') pyfreeling.util_init_locale('default') tk = pyfreeling.tokenizer(config['FREELING']['Data'] + config['FREELING']['Lang'] + '/twitter/tokenizer.dat') sp = pyfreeling.splitter(config['FREELING']['Data'] + config['FREELING']['Lang'] + '/splitter.dat') umap = pyfreeling.RE_map(config['FREELING']['Data'] + config['FREELING']['Lang'] + '/twitter/usermap.dat') op = pyfreeling.maco_options("es") op.set_data_files("", config['FREELING']['Data'] + "common/punct.dat", config['FREELING']['Data'] + config['FREELING']['Lang'] + "/dicc.src", config['FREELING']['Data'] + config['FREELING']['Lang'] + "/afixos.dat", "", config['FREELING']['Data'] + config['FREELING']['Lang'] + "/locucions.dat", config['FREELING']['Data'] + config['FREELING']['Lang'] + "/np.dat", config['FREELING']['Data'] + config['FREELING']['Lang'] + "/quantities.dat", config['FREELING']['Data'] + config['FREELING']['Lang'] + "/probabilitats.dat") mf = pyfreeling.maco(op) mf.set_active_options(False, True, True, True, True, True, False, True, True, True, True, True) return tk, sp, umap, mf
def my_maco_options(lang, lpath): # create options holder opt = pyfreeling.maco_options(lang) # Provide files for morphological submodules. Note that it is not # necessary to set file for modules that will not be used. opt.UserMapFile = "" opt.LocutionsFile = lpath + "locucions.dat" opt.AffixFile = lpath + "afixos.dat" opt.ProbabilityFile = lpath + "probabilitats.dat" opt.DictionaryFile = lpath + "dicc.src" opt.NPdataFile = lpath + "np.dat" opt.PunctuationFile = lpath + "../common/punct.dat" return opt
def my_maco_options(LANG, DATA) : # create options holder opt = pyfreeling.maco_options(LANG); # Provide files for morphological submodules. Note that it is not # necessary to set file for modules that will not be used. opt.UserMapFile = ""; opt.LocutionsFile = DATA + LANG + "/locucions.dat"; opt.AffixFile = DATA + LANG + "/afixos.dat"; opt.ProbabilityFile = DATA + LANG + "/probabilitats.dat"; opt.DictionaryFile = DATA + LANG + "/dicc.src"; opt.NPdataFile = DATA + LANG + "/np.dat"; opt.PunctuationFile = DATA + "common/punct.dat"; return opt;
def my_maco_options(self, lang,lpath) : ## ----------------------------------------------- ## Set desired options for morphological analyzer ## ----------------------------------------------- # create options holder opt = freeling.maco_options(lang); # Provide files for morphological submodules. Note that it is not # necessary to set file for modules that will not be used. opt.UserMapFile = ""; opt.LocutionsFile = lpath + "locucions.dat"; opt.AffixFile = lpath + "afixos.dat"; opt.ProbabilityFile = lpath + "probabilitats.dat"; opt.DictionaryFile = lpath + "dicc.src"; opt.NPdataFile = lpath + "np.dat"; opt.PunctuationFile = lpath + "../common/punct.dat"; return opt;
file=sys.stderr) sys.exit(1) # Location of FreeLing configuration files. DATA = os.environ["FREELINGDIR"] + "/share/freeling/" # Init locales pyfreeling.util_init_locale("default") # create language detector. Used just to show it. Results are printed # but ignored (after, it is assumed language is LANG) la = pyfreeling.lang_ident(DATA + "common/lang_ident/ident-few.dat") # create options set for maco analyzer. Default values are Ok, except for data files. LANG = "es" op = pyfreeling.maco_options(LANG) op.set_data_files("", DATA + "common/punct.dat", DATA + LANG + "/dicc.src", DATA + LANG + "/afixos.dat", "", DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat", DATA + LANG + "/quantities.dat", DATA + LANG + "/probabilitats.dat") # create analyzers tk = pyfreeling.tokenizer(DATA + LANG + "/tokenizer.dat") sp = pyfreeling.splitter(DATA + LANG + "/splitter.dat") sid = sp.open_session() mf = pyfreeling.maco(op) # activate mmorpho odules to be used in next call mf.set_active_options( False,
print("Folder",os.environ["FREELINGDIR"]+"/share/freeling", "not found.\nPlease set FREELINGDIR environment variable to FreeLing installation directory", file=sys.stderr) sys.exit(1) # Location of FreeLing configuration files. DATA = os.environ["FREELINGDIR"]+"/share/freeling/"; # set locale to an UTF8 compatible locale freeling.util_init_locale("default"); # get requested language from arg1, or English if not provided lang = "es" LANG = "es" op= freeling.maco_options(LANG); op.set_data_files( "", DATA + "common/punct.dat", DATA + LANG + "/dicc.src", DATA + LANG + "/afixos.dat", "", DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat", DATA + LANG + "/quantities.dat", DATA + LANG + "/probabilitats.dat"); # get installation path to use from arg2, or use /usr/local if not provided #ipath = "/usr/local/Cellar/freeling/4.0_4"; ipath = "/usr/local/"; # path to language data
sys.exit(1) # Location of FreeLing configuration files. DATA = os.environ["FREELINGDIR"]+"/share/freeling/"; # Init locales pyfreeling.util_init_locale("default"); # create language detector. Used just to show it. Results are printed # but ignored (after, it is assumed language is LANG) la=pyfreeling.lang_ident(DATA+"common/lang_ident/ident-few.dat"); # create options set for maco analyzer. Default values are Ok, except for data files. LANG="es"; op= pyfreeling.maco_options(LANG); op.set_data_files( "", DATA + "common/punct.dat", DATA + LANG + "/dicc.src", DATA + LANG + "/afixos.dat", "", DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat", DATA + LANG + "/quantities.dat", DATA + LANG + "/probabilitats.dat"); # create analyzers tk=pyfreeling.tokenizer(DATA+LANG+"/tokenizer.dat"); sp=pyfreeling.splitter(DATA+LANG+"/splitter.dat"); sid=sp.open_session(); mf=pyfreeling.maco(op);
def set_up_analyzer(): ## Check whether we know where to find FreeLing data files if "FREELINGDIR" not in os.environ: if sys.platform == "win32" or sys.platform == "win64": os.environ["FREELINGDIR"] = "C:\\Program Files" else: os.environ["FREELINGDIR"] = "/usr/local" #print("FREELINGDIR environment variable not defined, trying ", os.environ["FREELINGDIR"], file=sys.stderr) if not os.path.exists(os.environ["FREELINGDIR"] + "/share/freeling"): #print("Folder",os.environ["FREELINGDIR"]+"/share/freeling", "not found.\nPlease set FREELINGDIR environment variable to FreeLing installation directory", file=sys.stderr) sys.exit(1) # Location of FreeLing configuration files. DATA = os.environ["FREELINGDIR"] + "/share/freeling/" # Init locales pyfreeling.util_init_locale("default") #create language detector. Used just to show it. Results are printed # but ignored (after, it is assumed language is LANG) la = pyfreeling.lang_ident(DATA + "common/lang_ident/ident-few.dat") # create options set for maco analyzer. Default values are Ok, except for data files. LANG = "ru" op = pyfreeling.maco_options(LANG) op.set_data_files("", DATA + "common/punct.dat", DATA + LANG + "/dicc.src", DATA + LANG + "/afixos.dat", "", DATA + LANG + "/locucions.dat", DATA + LANG + "/np.dat", DATA + LANG + "/quantities.dat", DATA + LANG + "/probabilitats.dat") # create analyzers tk = pyfreeling.tokenizer(DATA + LANG + "/tokenizer.dat") sp = pyfreeling.splitter(DATA + LANG + "/splitter.dat") sid = sp.open_session() mf = pyfreeling.maco(op) # activate mmorpho odules to be used in next call mf.set_active_options( False, True, True, True, # select which among created True, True, False, True, # submodules are to be used. True, True, True, True) # default: all created submodules are used # create tagger, sense anotator, and parsers tg = pyfreeling.hmm_tagger(DATA + LANG + "/tagger.dat", True, 2) sen = pyfreeling.senses(DATA + LANG + "/senses.dat") #parser= pyfreeling.chart_parser(DATA+LANG+"/chunker/grammar-chunk.dat"); #dep=pyfreeling.dep_txala(DATA+LANG+"/dep_txala/dependences.dat", parser.get_start_symbol()); return (tk, sp, sid, mf, tg, sen)