Beispiel #1
0
    def resolve(self, kb, awake_db):
        print("ExternalURIResolver RESOLVE")
        resolved_kb = KnowledgeBase()

        super(ExternalURIResolver, self).copy_all(resolved_kb, kb)
        if awake_db == "NA":
            return resolved_kb

        kb_entity_to_entity_group = dict()
        for entgroupid, kb_entity_group in resolved_kb.get_entity_groups():
            for kb_entity in kb_entity_group.members:
                kb_entity_to_entity_group[kb_entity] = kb_entity_group

        AwakeDB.initialize_awake_db(awake_db)
        for entid, kb_entity in resolved_kb.entid_to_kb_entity.items():
            kb_entity_group = kb_entity_to_entity_group[kb_entity]
            source_string = AwakeDB.get_source_string(kb_entity_group.actor_id)
            if source_string is not None and source_string.find(
                    "dbpedia.org") != -1:
                formatted_string = source_string.strip()
                if source_string.startswith("<"):
                    source_string = source_string[1:]
                if source_string.endswith(">"):
                    source_string = source_string[0:-1]
                source_string = source_string.replace("dbpedia.org/resource",
                                                      "en.wikipedia.org/wiki",
                                                      1)
                kb_entity.properties["external_uri"] = source_string
            # For countries, add geoname_id to properties
            if (kb_entity_group.actor_id is not None
                    and "external_uri" not in kb_entity.properties
                    and "geonameid" not in kb_entity.properties):
                geonameid = AwakeDB.get_geonameid_from_actorid(
                    kb_entity_group.actor_id)
                if geonameid is not None and len(str(geonameid).strip()) != 0:
                    kb_entity.properties["geonameid"] = str(geonameid)

        return resolved_kb
    def resolve(self, kb):
        print("EntityGroupEntityTypeResolver RESOLVE")

        resolved_kb = KnowledgeBase()
        super(EntityGroupEntityTypeResolver, self).copy_all(resolved_kb, kb)

        # Make sure best entity type across each entity group is consistent
        for entgroupid, entity_group in resolved_kb.get_entity_groups():
            entity_type_to_count = dict()  # entity_type => count
            for entity in entity_group.members:
                entity_type = entity.get_best_entity_type()
                if entity_type not in entity_type_to_count:
                    entity_type_to_count[entity_type] = 0
                entity_type_to_count[entity_type] += 1

            # Get best entity type from dict
            best_entity_type = None
            highest_count = None
            for et, count in entity_type_to_count.items():
                if best_entity_type is None or count > highest_count:
                    best_entity_type = et
                    highest_count = count
                    continue
                if count < highest_count:
                    continue
                # count and highest count is equal
                best_entity_type = self.get_better_entity_type(
                    best_entity_type, et)

            # set entity type for group
            if len(entity_type_to_count) > 1:
                #print "Setting entity type for " + unidecode.unidecode(entity_group.canonical_name) + " " + entity_group.id + " to " + best_entity_type
                #print "Based on: " + str(entity_type_to_count)
                for entity in entity_group.members:
                    entity.add_entity_type(best_entity_type, 0.9)

        return resolved_kb
Beispiel #3
0
    def resolve(self, kb):
        print("AdditionalAffiliationResolver RESOLVE")
        resolved_kb = KnowledgeBase()

        super(AdditionalAffiliationResolver, self).copy_all(resolved_kb, kb)

        script_dir = os.path.dirname(os.path.realpath(__file__))

        actor_affiliation = dict(
        )  # actor_id => affiliated_actor_id e.g. "Vladimir Putin" -> "Russia"
        actor_component_of = dict(
        )  # actor_id => actor_id e.g. "Estonia" -> ["Baltic States", "NATO"]

        # Load actor_id -> actor_id/CAMEO code has affiliation
        affiliation_file = os.path.join(script_dir, "..", "data_files",
                                        "actor_affiliation_info.txt")
        a = codecs.open(affiliation_file, 'r', encoding='utf8')
        for line in a:
            line = line.strip()
            if line.startswith("#"):
                continue
            pieces = line.split(" ", 2)
            actor_id = int(pieces[0])
            affiliated_actor_id_or_cameo_code = pieces[1]
            description = pieces[2]
            actor_affiliation[
                actor_id] = affiliated_actor_id_or_cameo_code  # assumes one affiliation per actor id
        a.close()

        # Load actor_id -> actor_id component info
        component_file = os.path.join(script_dir, "..", "data_files",
                                      "actor_component_info.txt")
        c = codecs.open(component_file, 'r', encoding='utf8')
        for line in c:
            line = line.strip()
            if line.startswith("#"):
                continue
            pieces = line.split(" ", 2)
            actor_id = int(pieces[0])
            containing_actor_id = int(pieces[1])
            description = pieces[2]
            if actor_id not in actor_component_of:
                actor_component_of[actor_id] = []
            actor_component_of[actor_id].append(containing_actor_id)
        c.close()

        # Set properties on entity groups
        for (entgroupid, entity_group) in resolved_kb.get_entity_groups():
            actor_id = entity_group.actor_id

            if actor_id is None:
                continue
            if actor_id in actor_affiliation:
                affiliated_actor_id_or_cameo_code = actor_affiliation[actor_id]
                if AdditionalAffiliationResolver.cameo_code_re.match(
                        affiliated_actor_id_or_cameo_code):
                    entity_group.properties[
                        "awake_affiliated_cameo_code"] = affiliated_actor_id_or_cameo_code
                else:
                    entity_group.properties["awake_affiliated_actor_id"] = int(
                        affiliated_actor_id_or_cameo_code)
            if actor_id in actor_component_of:
                if "component_of_actor_ids" not in entity_group.properties:
                    entity_group.properties["component_of_actor_ids"] = []
                entity_group.properties["component_of_actor_ids"].extend(
                    actor_component_of[actor_id])

        return resolved_kb
    def resolve(self, kb):
        print("CountryCodeResolver RESOLVE")
        resolved_kb = KnowledgeBase()

        super(CountryCodePropertyResolver, self).copy_all(resolved_kb, kb)

        for entgroupid, kb_entity_group in resolved_kb.get_entity_groups():
            # Awake ISO code for geoname's country to cameo_code for geoname's country
            # This is for when the KB entity group is a city/geoname
            if ("country_iso_code" in kb_entity_group.properties
                    and kb_entity_group.properties["country_iso_code"]
                    in self.iso_country_codes):
                geonames_country_code = self.iso_country_codes[
                    kb_entity_group.properties["country_iso_code"]]
                kb_entity_group.properties[
                    "geonames_country_code"] = geonames_country_code

        for entid, kb_entity in resolved_kb.entid_to_kb_entity.items():
            # cameo_country_code properties for GPE kb_entity
            # This is for when the KB entity is a country
            cameo_country_code = self.country_codes.get(
                kb_entity.canonical_name)
            if cameo_country_code is not None:
                kb_entity.properties["cameo_country_code"] = cameo_country_code

        # Reliable (entity, country_code) pairs
        reliable_country_codes = set()

        # citizenship_cameo_country_code property for PER kb_entity
        kb_entity_to_country_code_count = dict()
        for relid, kb_relation in resolved_kb.relid_to_kb_relation.items():
            if kb_relation.relation_type != "GEN-AFF.Citizen-Resident-Religion-Ethnicity":
                continue

            left_id = kb_relation.left_argument_id
            right_id = kb_relation.right_argument_id

            left_entity = resolved_kb.entid_to_kb_entity[left_id]
            right_entity = resolved_kb.entid_to_kb_entity[right_id]

            if "PER.Individual" not in left_entity.entity_type_to_confidence and "PER.Group" not in left_entity.entity_type_to_confidence:
                continue
            if "GPE.Nation" not in right_entity.entity_type_to_confidence:
                continue

            country_code = self.country_codes.get(right_entity.canonical_name)
            if country_code is None:
                continue

            if left_entity not in kb_entity_to_country_code_count:
                kb_entity_to_country_code_count[left_entity] = dict()
            if country_code not in kb_entity_to_country_code_count[
                    left_entity]:
                kb_entity_to_country_code_count[left_entity][country_code] = 0
            kb_entity_to_country_code_count[left_entity][country_code] += 1

            # Record (entity, country_code) pair if relation is reliable on its own
            for relmention in kb_relation.relation_mentions:
                mention = relmention.left_mention
                if mention.link_confidence in CountryCodePropertyResolver.reliable_link_confidences:
                    reliable_country_codes.add((
                        left_entity,
                        country_code,
                    ))

        # Take most common country_code in dictionary
        for kb_entity, country_code_count in kb_entity_to_country_code_count.items(
        ):
            most_common_country_code = None
            most_common_country_code_count = 0
            for country_code, count in country_code_count.items():
                if count > most_common_country_code_count:
                    most_common_country_code = country_code
                    most_common_country_code_count = count
                elif (count == most_common_country_code_count
                      and country_code < most_common_country_code):
                    most_common_country_code = country_code
                    most_common_country_code_count = count

            # If it's a named entity, require reliable match
            if kb_entity.canonical_name is not None and (
                    kb_entity,
                    most_common_country_code,
            ) not in reliable_country_codes:
                #print "Excluding: " + kb_entity.id + " from having country code: " + most_common_country_code
                continue

            kb_entity.properties[
                "citizenship_cameo_country_code"] = most_common_country_code
            if most_common_country_code in self.ethnicities:
                kb_entity.properties["ethnicity"] = self.ethnicities[
                    most_common_country_code]

        return resolved_kb