def collect_entities(): try: return Pickler.load(entities_path) except IOError: pass entities = defaultdict(list) for i, type in enumerate(entities_types): entities_of_type = select_entities_of_type(full_type_name(type)) for entity in entities_of_type: entities[entity].append(i) if "_(" in entity: entities[entity.split("_(")[0]].append(i) Pickler.store(entities, entities_path) return entities
def collect_entities(): try: return Pickler.load(entities_path) except IOError: pass entities = defaultdict(list) for i, type in enumerate(entities_types): entities_of_type = select_entities_of_type(full_type_name(type)) for entity in entities_of_type: entities[entity].append(i) if '_(' in entity: entities[entity.split('_(')[0]].append(i) Pickler.store(entities, entities_path) return entities
def get_predominant_types(predicate, subject=True): if predicate in type_restrictions: return [full_type_name(type_restrictions[predicate])] #limiting these relations to settlements only increases precision a lot if predicate in ['stolica', 'gmina', 'region', 'prowincja', 'hrabstwo']: return [u'http://dbpedia.org/ontology/Settlement'] type_preciseness = .75 types_list = select_types(predicate, subject) types_count = defaultdict(int) for types in types_list: for type in types: if type not in CandidatesSelector.wide_types: types_count[type] += 1 num_entities = len(types_list) return [ type for type, count in types_count.iteritems() if count >= type_preciseness * num_entities ]
def get_predominant_types(predicate, subject=True): if predicate in type_restrictions: return [full_type_name(type_restrictions[predicate])] #limiting these relations to settlements only increases precision a lot if predicate in [ 'stolica', 'gmina', 'region', 'prowincja', 'hrabstwo' ]: return [u'http://dbpedia.org/ontology/Settlement'] type_preciseness = .75 types_list = select_types(predicate, subject) types_count = defaultdict(int) for types in types_list: for type in types: if type not in CandidatesSelector.wide_types: types_count[type] += 1 num_entities = len(types_list) return [ type for type, count in types_count.iteritems() if count >= type_preciseness * num_entities ]