def filter_entity_descriptions(mids): import gzip descriptions_file = globals.config.get("EntityLinker", "entity-descriptions-file") with gzip.open(descriptions_file, "r") as input_file, gzip.open(descriptions_file + "_small", "w") as out: for index, line in enumerate(input_file): triple = KBEntity.parse_freebase_string_triple(line) if triple[0] in mids: print >> out, line.strip()
def filter_entity_names(names): import gzip mids = set() entities_file = globals.config.get("EntityLinker", "entity-names-file") with gzip.open(entities_file, "r") as input_file, gzip.open(entities_file + "_small", "w") as out: for index, line in enumerate(input_file): triple = KBEntity.parse_freebase_string_triple(line) name = triple[2].lower() if name in names: mids.add(triple[0]) print >> out, line.strip() return mids