Beispiel #1
0
def main():
    entity_db = EntityDB()
    freebase_notable_types_f = sys.argv[1]
    dbpedia_en_f = sys.argv[2]
    dbpedia_de_f = sys.argv[3]
    geo_f = sys.argv[4]
    gerword_def_f = sys.argv[5]
    gerword_undef_f = sys.argv[6]
    prefix_dawg_fn = sys.argv[-1]
    dawg_fn = sys.argv[-2]
    entities_fn = sys.argv[-3]
    if len(sys.argv) > 10:
        with open(sys.argv[7]) as f:
            entity_db.add_to_keep_list(
                [l.strip().decode("utf-8").lower() for l in f.readlines()])

    add_unambig_freebase(freebase_notable_types_f, entity_db)
    #add_freebase(freebase_dump_gz_f, entity_db)
    #add_dbpedia(dbpedia_en_f, dbpedia_de_f, entity_db)
    #add_geonames(geo_f, entity_db)
    add_wikt(gerword_def_f, gerword_undef_f, entity_db)

    with open(dawg_fn, 'wb') as dawg_fb:
        with open(entities_fn, "w") as pickle_f:
            with open(prefix_dawg_fn, "wb") as pd_fb:
                entity_db.dump(pickle_f, dawg_fb, pd_fb)
Beispiel #2
0
def main():

    dir_old = sys.argv[1]
    dir_new = sys.argv[2]
    edb = EntityDB()
    a = ModifyEBD(EntityDB.load(dir_old), edb)
    a.unidecode_entities()
    edb.dump(dir_new)
Beispiel #3
0
def main():

    dir_old = sys.argv[1]
    dir_new = sys.argv[2]
    edb = EntityDB()
    a = ModifyEBD(EntityDB.load(dir_old), edb)
    a.unidecode_entities()
    edb.dump(dir_new)
Beispiel #4
0
def main():
    entity_db = EntityDB()
    freebase_notable_types_f = sys.argv[1]
    dbpedia_en_f = sys.argv[2]
    dbpedia_de_f = sys.argv[3]
    geo_f = sys.argv[4]
    gerword_def_f = sys.argv[5]
    gerword_undef_f = sys.argv[6]
    prefix_dawg_fn = sys.argv[-1]
    dawg_fn = sys.argv[-2]
    entities_fn = sys.argv[-3]
    if len(sys.argv) > 10:
        with open(sys.argv[7]) as f:
            entity_db.add_to_keep_list(
                [l.strip().decode("utf-8").lower() for l in f.readlines()])

    add_unambig_freebase(freebase_notable_types_f, entity_db)
    #add_freebase(freebase_dump_gz_f, entity_db)
    #add_dbpedia(dbpedia_en_f, dbpedia_de_f, entity_db)
    #add_geonames(geo_f, entity_db)
    add_wikt(gerword_def_f, gerword_undef_f, entity_db)

    with open(dawg_fn, 'wb') as dawg_fb:
        with open(entities_fn, "w") as pickle_f:
            with open(prefix_dawg_fn, "wb") as pd_fb:
                entity_db.dump(pickle_f, dawg_fb, pd_fb)
Beispiel #5
0
    def modify_edb(self, modifier):

        orig_edb = EntityDB.load_from_files(self.dir_old)
        altered_edb = EntityDB()
        for e in orig_edb.dawg.keys():
            types = orig_edb.get_type(e)
            for type_ in types:
                src, data = type_
                needed, e_, data_, src_ = modifier((e, data, src))
                if e_ == '':
                    continue
                if needed:
                    altered_edb.add_entity(e_, data_, src_)
        altered_edb.dump_to_files(self.dir_new)