def add_freebase(freebase_dump_gzip_f, entity_db): f = gzip_open(freebase_dump_gzip_f) c = 0 for entity, data in gen_freebase_pairs(f): for dat in data: entity_db.add_entity(entity, dat, "freebase") c += 1 if c % 1000000 == 0: logging.info("freebase: {0}".format(c))
def filename_to_dict(args): fn, minconf, lower = args d = {} f = (gzip_open(fn) if fn.endswith(".gz") else open(fn)) for triplet in yield_triplets(f): m, c, e = triplet if lower: m = m.lower() if c < minconf: continue if m not in d: d[m] = {} d[m][e] = d[m].get(e, 0) + 1 return d