def process_entries(self, words): entry_preprocessor = EntryPreprocessor(self.cfg) entries = map(entry_preprocessor.preprocess_entry, (self.raw_dict[word] for word in words)) if self.lang == 'eng': stanford_wrapper = StanfordWrapper(self.cfg) entries = stanford_wrapper.parse_sentences(entries, definitions=True) elif self.lang == 'hun': magyarlanc_wrapper = Magyarlanc(self.cfg) entries = magyarlanc_wrapper.parse_entries(entries) else: print 'incorrect lang' for entry in entries: if entry['to_filter']: continue word = entry['hw'] for sense in entry['senses']: definition = sense['definition'] if definition is None: continue if word in self.dictionary: logging.warning( "entries with identical headwords:\n{0}\n{1}".format( entry, self.dictionary[word])) self.unify(self.dictionary[word], entry) else: self.dictionary[word] = entry
def __init__(self, cfg): self.cfg = cfg self.lang = self.cfg.get("deps", "lang") self.deps_dir = self.cfg.get('text', 'deps_dir') # self.machines_dir = self.cfg.get('text', 'machines_dir') self.graphs_dir = cfg.get('text', 'graph_dir') map(ensure_dir, (self.deps_dir, self.graphs_dir)) # self.machines_dir if self.lang == 'en': self.parser_wrapper = CoreNLPWrapper(self.cfg) elif self.lang == 'hu': self.parser_wrapper = Magyarlanc(self.cfg) self.dep_to_4lang = DepTo4lang(self.cfg)
def __init__(self, cfg, direct_parse=False): self.cfg = cfg self.lang = self.cfg.get("deps", "lang") if not direct_parse: self.deps_dir = self.cfg.get('text', 'deps_dir') ensure_dir(self.deps_dir) # self.machines_dir = self.cfg.get('text', 'machines_dir') self.graphs_dir = cfg.get('text', 'graph_dir') ensure_dir(self.graphs_dir) if self.lang == 'en': self.parser_wrapper = CoreNLPWrapper(self.cfg) elif self.lang == 'hu': self.parser_wrapper = Magyarlanc(self.cfg) self.dep_to_4lang = DepTo4lang(self.cfg, direct_parse)