Beispiel #1
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.lang = self.cfg.get("deps", "lang")
     self.out_fn = self.cfg.get("machine", "definitions_binary_out")
     ensure_dir(os.path.dirname(self.out_fn))
     self.dependency_processor = DependencyProcessor(self.cfg)
     dep_map_fn = cfg.get("deps", "dep_map")
     self.read_dep_map(dep_map_fn)
     self.undefined = set()
     self.lemmatizer = Lemmatizer(cfg)
     self.lexicon_fn = self.cfg.get("machine", "definitions_binary")
     self.lexicon = Lexicon.load_from_binary(self.lexicon_fn)
     self.word2lemma = {}
Beispiel #2
0
 def __init__(self, cfg):
     self.cfg = cfg
     self.lang = self.cfg.get("deps", "lang")
     self.out_fn = self.cfg.get("machine", "definitions_binary_out")
     ensure_dir(os.path.dirname(self.out_fn))
     self.dependency_processor = DependencyProcessor(self.cfg)
     dep_map_fn = cfg.get("deps", "dep_map")
     self.read_dep_map(dep_map_fn)
     self.undefined = set()
     self.lemmatizer = Lemmatizer(cfg)
     self.lexicon_fn = self.cfg.get("machine", "definitions_binary")
     self.lexicon = Lexicon.load_from_binary(self.lexicon_fn)
     self.word2lemma = {}
Beispiel #3
0
 def __init__(self, cfg, direct_parse=False):
     self.cfg = cfg
     self.lang = self.cfg.get("deps", "lang")
     if (not direct_parse):
         self.out_fn = self.cfg.get("machine", "definitions_binary_out")
         ensure_dir(os.path.dirname(self.out_fn))
     self.dependency_processor = DependencyProcessor(self.cfg)
     dep_map_fn = cfg.get("deps", "dep_map")
     self.undefined = set()
     self.lemmatizer = Lemmatizer(cfg)
     self.lexicon_fn = self.cfg.get("machine", "definitions_binary")
     self.lexicon = Lexicon.load_from_binary(self.lexicon_fn)
     self.read_dep_map(dep_map_fn)
     self.word2lemma = {}
     self.first_only = cfg.getboolean('filter', 'first_only')
Beispiel #4
0
 def __init__(self, cfg, direct_parse=False):
     self.cfg = cfg
     self.lang = self.cfg.get("deps", "lang")
     if(not direct_parse):
         self.out_fn = self.cfg.get("machine", "definitions_binary_out")
         ensure_dir(os.path.dirname(self.out_fn))
     self.dependency_processor = DependencyProcessor(self.cfg)
     dep_map_fn = cfg.get("deps", "dep_map")
     self.undefined = set()
     self.lemmatizer = Lemmatizer(cfg)
     self.lexicon_fn = self.cfg.get("machine", "definitions_binary")
     self.lexicon = Lexicon.load_from_binary(self.lexicon_fn)
     self.read_dep_map(dep_map_fn)
     self.word2lemma = {}
     self.first_n = cfg.getint('filter', 'first_n')
     self.graph_dir = self.cfg.get('machine', 'graph_dir')
     ensure_dir(self.graph_dir)
Beispiel #5
0
    def __init__(self, cfg, cfg_section="word_sim"):
        try:
            self.batch = cfg.getboolean(cfg_section, "batch")
        except NoSectionError:
            self.batch = False

        self.cfg = cfg
        self.graph_dir = cfg.get(cfg_section, "graph_dir")
        ensure_dir(self.graph_dir)
        self.lemmatizer = Lemmatizer(cfg)
        self.lexicon_fn = self.cfg.get(cfg_section, "definitions_binary")
        self.lexicon = Lexicon.load_from_binary(self.lexicon_fn)
        self.defined_words = self.lexicon.get_words()
        self.word_sim_cache = {}
        self.lemma_sim_cache = {}
        self.links_nodes_cache = {}
        self.stopwords = set(nltk_stopwords.words("english"))
        self.expand = cfg.getboolean(cfg_section, "expand")
        logging.info("expand is {0}".format(self.expand))
Beispiel #6
0
    def __init__(self, cfg, cfg_section='word_sim'):
        self.batch = cfg.getboolean(cfg_section, 'batch')

        logging.warning("fourlangpath is {0}".format(
            cfg.get(cfg_section, 'fourlangpath')))
        self.cfg = cfg
        self.graph_dir = cfg.get(cfg_section, "graph_dir")
        ensure_dir(self.graph_dir)
        self.lemmatizer = Lemmatizer(cfg)
        self.lexicon_fn = self.cfg.get(cfg_section, "definitions_binary")
        self.lexicon = Lexicon.load_from_binary(self.lexicon_fn)
        self.defined_words = self.lexicon.get_words()
        self.word_sim_cache = {}
        self.lemma_sim_cache = {}
        self.links_nodes_cache = {}
        self.stopwords = set(nltk_stopwords.words('english'))
        self.sim_feats = SimFeatures(cfg, cfg_section)
        self.expand = cfg.getboolean(cfg_section, "expand")
        logging.info("expand is {0}".format(self.expand))
Beispiel #7
0
    def __init__(self, cfg, cfg_section='word_sim'):
        self.batch = cfg.getboolean(cfg_section, 'batch')

        logging.warning("fourlangpath is {0}".format(
            cfg.get(cfg_section, 'fourlangpath')))
        self.cfg = cfg
        self.graph_dir = cfg.get(cfg_section, "graph_dir")
        ensure_dir(self.graph_dir)
        self.lemmatizer = Lemmatizer(cfg)
        self.lexicon_fn = self.cfg.get(cfg_section, "definitions_binary")
        self.lexicon = Lexicon.load_from_binary(self.lexicon_fn)
        self.defined_words = self.lexicon.get_words()
        self.word_sim_cache = {}
        self.lemma_sim_cache = {}
        self.links_nodes_cache = {}
        self.stopwords = set(nltk_stopwords.words('english'))
        self.sim_feats = SimFeatures(cfg, cfg_section, self.lexicon)
        self.expand = cfg.getboolean(cfg_section, "expand")
        compositional = cfg.getboolean('similarity', 'compositional')
        if compositional is True:
            self.text_to_4lang = TextTo4lang(cfg, direct_parse=True)
        logging.info("expand is {0}".format(self.expand))
        self.allow_4lang = cfg.getboolean('machine', 'allow_4lang')