def __init__(self, cfg): self.cfg = cfg self.lang = self.cfg.get("deps", "lang") self.out_fn = self.cfg.get("machine", "definitions_binary_out") ensure_dir(os.path.dirname(self.out_fn)) self.dependency_processor = DependencyProcessor(self.cfg) dep_map_fn = cfg.get("deps", "dep_map") self.read_dep_map(dep_map_fn) self.undefined = set() self.lemmatizer = Lemmatizer(cfg) self.lexicon_fn = self.cfg.get("machine", "definitions_binary") self.lexicon = Lexicon.load_from_binary(self.lexicon_fn) self.word2lemma = {}
def __init__(self, cfg): self.cfg = cfg self.lang = self.cfg.get("deps", "lang") self.out_fn = self.cfg.get("machine", "definitions_binary_out") ensure_dir(os.path.dirname(self.out_fn)) self.dependency_processor = DependencyProcessor(self.cfg) dep_map_fn = cfg.get("deps", "dep_map") self.read_dep_map(dep_map_fn) self.undefined = set() self.lemmatizer = Lemmatizer(cfg) self.lexicon_fn = self.cfg.get("machine", "definitions_binary") self.lexicon = Lexicon.load_from_binary(self.lexicon_fn) self.word2lemma = {}
def __init__(self, cfg, direct_parse=False): self.cfg = cfg self.lang = self.cfg.get("deps", "lang") if (not direct_parse): self.out_fn = self.cfg.get("machine", "definitions_binary_out") ensure_dir(os.path.dirname(self.out_fn)) self.dependency_processor = DependencyProcessor(self.cfg) dep_map_fn = cfg.get("deps", "dep_map") self.undefined = set() self.lemmatizer = Lemmatizer(cfg) self.lexicon_fn = self.cfg.get("machine", "definitions_binary") self.lexicon = Lexicon.load_from_binary(self.lexicon_fn) self.read_dep_map(dep_map_fn) self.word2lemma = {} self.first_only = cfg.getboolean('filter', 'first_only')
def __init__(self, cfg, direct_parse=False): self.cfg = cfg self.lang = self.cfg.get("deps", "lang") if(not direct_parse): self.out_fn = self.cfg.get("machine", "definitions_binary_out") ensure_dir(os.path.dirname(self.out_fn)) self.dependency_processor = DependencyProcessor(self.cfg) dep_map_fn = cfg.get("deps", "dep_map") self.undefined = set() self.lemmatizer = Lemmatizer(cfg) self.lexicon_fn = self.cfg.get("machine", "definitions_binary") self.lexicon = Lexicon.load_from_binary(self.lexicon_fn) self.read_dep_map(dep_map_fn) self.word2lemma = {} self.first_n = cfg.getint('filter', 'first_n') self.graph_dir = self.cfg.get('machine', 'graph_dir') ensure_dir(self.graph_dir)
def __init__(self, cfg, cfg_section="word_sim"): try: self.batch = cfg.getboolean(cfg_section, "batch") except NoSectionError: self.batch = False self.cfg = cfg self.graph_dir = cfg.get(cfg_section, "graph_dir") ensure_dir(self.graph_dir) self.lemmatizer = Lemmatizer(cfg) self.lexicon_fn = self.cfg.get(cfg_section, "definitions_binary") self.lexicon = Lexicon.load_from_binary(self.lexicon_fn) self.defined_words = self.lexicon.get_words() self.word_sim_cache = {} self.lemma_sim_cache = {} self.links_nodes_cache = {} self.stopwords = set(nltk_stopwords.words("english")) self.expand = cfg.getboolean(cfg_section, "expand") logging.info("expand is {0}".format(self.expand))
def __init__(self, cfg, cfg_section='word_sim'): self.batch = cfg.getboolean(cfg_section, 'batch') logging.warning("fourlangpath is {0}".format( cfg.get(cfg_section, 'fourlangpath'))) self.cfg = cfg self.graph_dir = cfg.get(cfg_section, "graph_dir") ensure_dir(self.graph_dir) self.lemmatizer = Lemmatizer(cfg) self.lexicon_fn = self.cfg.get(cfg_section, "definitions_binary") self.lexicon = Lexicon.load_from_binary(self.lexicon_fn) self.defined_words = self.lexicon.get_words() self.word_sim_cache = {} self.lemma_sim_cache = {} self.links_nodes_cache = {} self.stopwords = set(nltk_stopwords.words('english')) self.sim_feats = SimFeatures(cfg, cfg_section) self.expand = cfg.getboolean(cfg_section, "expand") logging.info("expand is {0}".format(self.expand))
def __init__(self, cfg, cfg_section='word_sim'): self.batch = cfg.getboolean(cfg_section, 'batch') logging.warning("fourlangpath is {0}".format( cfg.get(cfg_section, 'fourlangpath'))) self.cfg = cfg self.graph_dir = cfg.get(cfg_section, "graph_dir") ensure_dir(self.graph_dir) self.lemmatizer = Lemmatizer(cfg) self.lexicon_fn = self.cfg.get(cfg_section, "definitions_binary") self.lexicon = Lexicon.load_from_binary(self.lexicon_fn) self.defined_words = self.lexicon.get_words() self.word_sim_cache = {} self.lemma_sim_cache = {} self.links_nodes_cache = {} self.stopwords = set(nltk_stopwords.words('english')) self.sim_feats = SimFeatures(cfg, cfg_section, self.lexicon) self.expand = cfg.getboolean(cfg_section, "expand") compositional = cfg.getboolean('similarity', 'compositional') if compositional is True: self.text_to_4lang = TextTo4lang(cfg, direct_parse=True) logging.info("expand is {0}".format(self.expand)) self.allow_4lang = cfg.getboolean('machine', 'allow_4lang')