Ejemplo n.º 1
0
def collect_entities():
    try:
        return Pickler.load(entities_path)
    except IOError:
        pass
    entities = defaultdict(list)
    for i, type in enumerate(entities_types):
        entities_of_type = select_entities_of_type(full_type_name(type))
        for entity in entities_of_type:
            entities[entity].append(i)
            if "_(" in entity:
                entities[entity.split("_(")[0]].append(i)
    Pickler.store(entities, entities_path)
    return entities
Ejemplo n.º 2
0
def collect_entities():
    try:
        return Pickler.load(entities_path)
    except IOError:
        pass
    entities = defaultdict(list)
    for i, type in enumerate(entities_types):
        entities_of_type = select_entities_of_type(full_type_name(type))
        for entity in entities_of_type:
            entities[entity].append(i)
            if '_(' in entity:
                entities[entity.split('_(')[0]].append(i)
    Pickler.store(entities, entities_path)
    return entities
 def get_predominant_types(predicate, subject=True):
     if predicate in type_restrictions:
         return [full_type_name(type_restrictions[predicate])]
     #limiting these relations to settlements only increases precision a lot
     if predicate in ['stolica', 'gmina', 'region', 'prowincja', 'hrabstwo']:
         return [u'http://dbpedia.org/ontology/Settlement']
     type_preciseness = .75
     types_list = select_types(predicate, subject)
     types_count = defaultdict(int)
     for types in types_list:
         for type in types:
             if type not in CandidatesSelector.wide_types:
                 types_count[type] += 1
     num_entities = len(types_list)
     return [
         type for type, count in types_count.iteritems()
         if count >= type_preciseness * num_entities
     ]
Ejemplo n.º 4
0
 def get_predominant_types(predicate, subject=True):
     if predicate in type_restrictions:
         return [full_type_name(type_restrictions[predicate])]
     #limiting these relations to settlements only increases precision a lot
     if predicate in [
             'stolica', 'gmina', 'region', 'prowincja', 'hrabstwo'
     ]:
         return [u'http://dbpedia.org/ontology/Settlement']
     type_preciseness = .75
     types_list = select_types(predicate, subject)
     types_count = defaultdict(int)
     for types in types_list:
         for type in types:
             if type not in CandidatesSelector.wide_types:
                 types_count[type] += 1
     num_entities = len(types_list)
     return [
         type for type, count in types_count.iteritems()
         if count >= type_preciseness * num_entities
     ]