Example #1
0
def add_freebase(freebase_dump_gzip_f, entity_db):
    f = gzip_open(freebase_dump_gzip_f)
    c = 0
    for entity, data in gen_freebase_pairs(f):
        for dat in data:
            entity_db.add_entity(entity, dat, "freebase")
        c += 1
        if c % 1000000 == 0:
            logging.info("freebase: {0}".format(c))
Example #2
0
def add_freebase(freebase_dump_gzip_f, entity_db):
    f = gzip_open(freebase_dump_gzip_f)
    c = 0
    for entity, data in gen_freebase_pairs(f):
        for dat in data:
            entity_db.add_entity(entity, dat, "freebase")
        c += 1
        if c % 1000000 == 0:
            logging.info("freebase: {0}".format(c))
def filename_to_dict(args):
    fn, minconf, lower = args
    d = {}
    f = (gzip_open(fn) if fn.endswith(".gz") else open(fn))
    for triplet in yield_triplets(f):
        m, c, e = triplet
        if lower:
            m = m.lower()

        if c < minconf:
            continue
        if m not in d:
            d[m] = {}
        d[m][e] = d[m].get(e, 0) + 1
    return d
def filename_to_dict(args):
    fn, minconf, lower = args
    d = {}
    f = (gzip_open(fn) if fn.endswith(".gz") else open(fn))
    for triplet in yield_triplets(f):
        m, c, e = triplet
        if lower:
            m = m.lower()

        if c < minconf:
            continue
        if m not in d:
            d[m] = {}
        d[m][e] = d[m].get(e, 0) + 1
    return d