def main(): entity_db = EntityDB() freebase_notable_types_f = sys.argv[1] dbpedia_en_f = sys.argv[2] dbpedia_de_f = sys.argv[3] geo_f = sys.argv[4] gerword_def_f = sys.argv[5] gerword_undef_f = sys.argv[6] prefix_dawg_fn = sys.argv[-1] dawg_fn = sys.argv[-2] entities_fn = sys.argv[-3] if len(sys.argv) > 10: with open(sys.argv[7]) as f: entity_db.add_to_keep_list( [l.strip().decode("utf-8").lower() for l in f.readlines()]) add_unambig_freebase(freebase_notable_types_f, entity_db) #add_freebase(freebase_dump_gz_f, entity_db) #add_dbpedia(dbpedia_en_f, dbpedia_de_f, entity_db) #add_geonames(geo_f, entity_db) add_wikt(gerword_def_f, gerword_undef_f, entity_db) with open(dawg_fn, 'wb') as dawg_fb: with open(entities_fn, "w") as pickle_f: with open(prefix_dawg_fn, "wb") as pd_fb: entity_db.dump(pickle_f, dawg_fb, pd_fb)
def main(): dir_old = sys.argv[1] dir_new = sys.argv[2] edb = EntityDB() a = ModifyEBD(EntityDB.load(dir_old), edb) a.unidecode_entities() edb.dump(dir_new)
def modify_edb(self, modifier): orig_edb = EntityDB.load_from_files(self.dir_old) altered_edb = EntityDB() for e in orig_edb.dawg.keys(): types = orig_edb.get_type(e) for type_ in types: src, data = type_ needed, e_, data_, src_ = modifier((e, data, src)) if e_ == '': continue if needed: altered_edb.add_entity(e_, data_, src_) altered_edb.dump_to_files(self.dir_new)