Esempio n. 1
0
    def process_entries(self, words):
        entry_preprocessor = EntryPreprocessor(self.cfg)
        entries = map(entry_preprocessor.preprocess_entry,
                      (self.raw_dict[word] for word in words))

        if self.lang == 'eng':
            stanford_wrapper = StanfordWrapper(self.cfg)
            entries = stanford_wrapper.parse_sentences(entries,
                                                       definitions=True)
        elif self.lang == 'hun':
            magyarlanc_wrapper = Magyarlanc(self.cfg)
            entries = magyarlanc_wrapper.parse_entries(entries)
        else:
            print 'incorrect lang'

        for entry in entries:
            if entry['to_filter']:
                continue
            word = entry['hw']
            for sense in entry['senses']:
                definition = sense['definition']
                if definition is None:
                    continue

            if word in self.dictionary:
                logging.warning(
                    "entries with identical headwords:\n{0}\n{1}".format(
                        entry, self.dictionary[word]))

                self.unify(self.dictionary[word], entry)
            else:
                self.dictionary[word] = entry
Esempio n. 2
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.lang = self.cfg.get("deps", "lang")
     self.deps_dir = self.cfg.get('text', 'deps_dir')
     # self.machines_dir = self.cfg.get('text', 'machines_dir')
     self.graphs_dir = cfg.get('text', 'graph_dir')
     map(ensure_dir, (self.deps_dir, self.graphs_dir))  # self.machines_dir
     if self.lang == 'en':
         self.parser_wrapper = CoreNLPWrapper(self.cfg)
     elif self.lang == 'hu':
         self.parser_wrapper = Magyarlanc(self.cfg)
     self.dep_to_4lang = DepTo4lang(self.cfg)
Esempio n. 3
0
 def __init__(self, cfg, direct_parse=False):
     self.cfg = cfg
     self.lang = self.cfg.get("deps", "lang")
     if not direct_parse:
         self.deps_dir = self.cfg.get('text', 'deps_dir')
         ensure_dir(self.deps_dir)
     # self.machines_dir = self.cfg.get('text', 'machines_dir')
     self.graphs_dir = cfg.get('text', 'graph_dir')
     ensure_dir(self.graphs_dir)
     if self.lang == 'en':
         self.parser_wrapper = CoreNLPWrapper(self.cfg)
     elif self.lang == 'hu':
         self.parser_wrapper = Magyarlanc(self.cfg)
     self.dep_to_4lang = DepTo4lang(self.cfg, direct_parse)