예제 #1
0
    def test_create_seedling_entity_with_alternate_names(self):
        g = aifutils.make_graph()

        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, "http://www.test.edu/testSystem")

        entity = aifutils.make_entity(g, "http://www.test.edu/entities/1",
                                      system)

        # in order to allow uncertainty about the type of an entity, we don't mark an entity's type directly on the
        # entity, but rather make a separate assertion for it.
        type_assertion = aifutils.mark_type(
            g, "http://www.test.org/assertions/1", entity,
            SEEDLING_TYPES_NIST.Person, system, 1.0)

        # This is just a test to make sure that validation works for the different
        # mark types.  Rare that you would have all three with a single entity.
        aifutils.mark_name(g, entity, "Name One")
        aifutils.mark_name(g, entity, "N. One")
        aifutils.mark_name(g, entity, "N-Money")

        aifutils.mark_text_value(g, entity, "TextValue")

        aifutils.mark_numeric_value_as_double(g, entity, 100)
        aifutils.mark_numeric_value_as_long(g, entity, 100)
        aifutils.mark_numeric_value_as_string(g, entity, "100")

        self.new_file(
            g, "test_create_a_seedling_entity_with_alternate_names.ttl")
        self.dump_graph(g, "Example of seedling entity with alternate names")
예제 #2
0
    def test_create_an_entity_with_add_invalid_attribute(self):
        #g = get_initialized_graph()
        g = aifutils.make_graph()

        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, "http://www.test.edu/testSystem")

        # it doesn't matter what URI we give entities, events, etc. so long as they are
        # unique
        entity = aifutils.make_entity(g, "http://www.test.edu/entities/1",
                                      system)

        aifutils.mark_attribute(g, entity, interchange_ontology.Irrealis)
        aifutils.mark_attribute(g, entity, interchange_ontology.Negated)
        aifutils.mark_attribute(g, entity, interchange_ontology.Hedged)
        aifutils.mark_attribute(
            g, entity, interchange_ontology.VideoJustificationChannelPicture)

        self.new_file(g,
                      "test_create_an_entity_with_add_invalid_attribute.ttl")
        self.dump_graph(
            g,
            "Invalid: Semantic Attribute for Entity can only be must be aida:Generic"
        )
예제 #3
0
    def test_create_a_relation_argument_add_attribute(self):
        #g = get_initialized_graph()
        g = aifutils.make_graph()

        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, "http://www.test.edu/testSystem")

        bob = aifutils.make_entity(g, "http://www.test.edu/entites/person/Bob",
                                   system)
        maryland = aifutils.make_entity(
            g, "http://www.test.edu/entites/place/Maryland", system)

        aifutils.mark_type(g, "http://www.test.edu/assertions/bobIsAPerson",
                           bob, ldc_ontology.PER, system, 1.0)
        aifutils.mark_type(
            g, "http://www.test.edu/assertions/marylandIsALocation", maryland,
            ldc_ontology.LOC_Position_Region, system, 1.0)

        # we make a resource for the event itself
        relationBobLiveInMD = aifutils.make_relation(
            g, "http://www.test.edu/relationss/bobLivesInMaryland", system)

        argument1 = aifutils.mark_as_argument(
            g, relationBobLiveInMD, ldc_ontology.Physical_Resident_Resident,
            bob, system, 1)

        aifutils.mark_attribute(g, argument1, interchange_ontology.Generic)

        self.new_file(g, "test_create_a_relation_argument_add_attribute.ttl")
        self.dump_graph(
            g, "Invalid: Relation Argument cannot have aida:Attribute")
예제 #4
0
    def test_create_an_entity_with_uncertainty_about_its_type(self):
        g = aifutils.make_graph()
        g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri)

        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, "http://www.test.edu/testSystem")

        entity = aifutils.make_entity(g, "http://www.test.edu/entities/1",
                                      system)
        entity_is_a_person = aifutils.mark_type(
            g, "http://www.test.org/assertions/1", entity,
            SEEDLING_TYPES_NIST.Person, system, 0.5)
        entity_is_an_organization = aifutils.mark_type(
            g, "http://www.test.org/assertions/2", entity,
            SEEDLING_TYPES_NIST.Organization, system, 0.2)

        aifutils.mark_text_justification(g, [entity, entity_is_a_person],
                                         "NYT_ENG_201181231", 42, 143, system,
                                         0.6)

        aifutils.mark_text_justification(g,
                                         [entity, entity_is_an_organization],
                                         "NYT_ENG_201181231", 343, 367, system,
                                         0.3)

        aifutils.mark_as_mutually_exclusive(
            g, {
                tuple([entity_is_a_person]): 0.5,
                tuple([entity_is_an_organization]): 0.2
            }, system, None)
        self.new_file(
            g, "test_create_an_entity_with_uncertainty_about_its_type.ttl")
        self.dump_graph(g, "Example of entity with uncertainty about type")
예제 #5
0
    def test_create_a_simple_cluster_with_handle(self):
        g = aifutils.make_graph()
        g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri)

        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, 'http://www.test.edu/testSystem')

        # Two people, probably the same person
        vladimir_putin = aifutils.make_entity(
            g, "http://www.test.edu/entities/1", system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/1",
                           vladimir_putin, SEEDLING_TYPES_NIST.Person, system,
                           1.0)
        aifutils.mark_name(g, vladimir_putin, "Vladimir Putin")

        putin = aifutils.make_entity(g, "http://www.test.edu/entities/2",
                                     system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/2", putin,
                           SEEDLING_TYPES_NIST.Person, system, 1.0)

        aifutils.mark_name(g, putin, "Путин")

        # create a cluster with prototype
        putin_cluster = aifutils.make_cluster_with_prototype(
            g, "http://www.test.edu/clusters/1", vladimir_putin, system,
            "Vladimir Putin")

        # person 1 is definitely in the cluster, person 2 is probably in the cluster
        aifutils.mark_as_possible_cluster_member(g, putin, putin_cluster, 0.71,
                                                 system)

        self.new_file(g, "test_create_a_simple_cluster_with_handle.ttl")
        self.dump_graph(g, "create a simple cluster with handle")
예제 #6
0
    def test_create_an_event_with_ldc_time(self):
        g = aifutils.make_graph()
        g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri)

        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, "http://www.test.edu/testSystem")

        # create a start position event with unknown start and end time
        event_start_position = aifutils.make_event(
            g, "http://www.test.edu/event/1", system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/1",
                           event_start_position,
                           SEEDLING_TYPES_NIST['Personnel.StartPosition'],
                           system, 1.0)
        unknown = LDCTimeComponent(LDCTimeType.UNKNOWN, None, None, None)
        endBefore = LDCTimeComponent(LDCTimeType.BEFORE, "2016", None, None)
        aifutils.mark_ldc_time(g, event_start_position, unknown, endBefore,
                               system)

        # create an attack event with an unknown start date, but definite end date
        event_attack_unknown = aifutils.make_event(
            g, "http://www.test.edu/event/2", system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/2",
                           event_attack_unknown,
                           SEEDLING_TYPES_NIST['Conflict.Attack'], system, 1.0)
        start = LDCTimeComponent(LDCTimeType.AFTER, "2014", "--02", None)
        end = LDCTimeComponent(LDCTimeType.ON, "2014", "--02", "---21")
        aifutils.mark_ldc_time(g, event_attack_unknown, start, end, system)

        self.new_file(g, "test_create_an_event_with_ldc_time.ttl")
        self.dump_graph(g, "create an event with LDCTime")
예제 #7
0
    def test_event_missing_type(self):
        # having mulitple type assertions in case of uncertainty is ok, but there must always be at
        # least one type assertion
        g = aifutils.make_graph()
        system = aifutils.make_system_with_uri(
            g, "http://www.test.edu/testSytem")

        aifutils.make_event(g, "http://www.test.edu/events/1", system)

        self.dump_graph(g, "Invalid: Event missing type")
예제 #8
0
    def test_make_entity(self):
        g = make_graph()
        system = make_system_with_uri(g, "http://www.test.edu/system")
        entity = make_entity(g, "http://www.test.edu/entities/1", system)
        type_assertion = mark_type(g, "http://www.test.edu/assertions/1",
                                   entity, AIDA_PROGRAM_ONTOLOGY.Person,
                                   system, 1.0)

        mark_text_justification(g, [entity, type_assertion],
                                "NYT_ENG_20181231", 42, 143, system, 0.973)

        self.dump_graph(g, "Example of creating an entity")
        self.assertEqual([type_assertion], get_type_assertions(g, entity))
예제 #9
0
    def test_create_an_event_argument_add_invalid_attribute(self):
        #g = get_initialized_graph()
        g = aifutils.make_graph()

        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, "http://www.test.edu/testSystem")

        # we make a resource for the event itself
        event = aifutils.make_event(g, "http://www.test.edu/events/1", system)

        # mark the event as a Personnel.Elect event; type is encoded separately so we can express
        # uncertainty about type
        aifutils.mark_type(g, "http://www.test.edu/assertions/5", event,
                           ldc_ontology.Personnel_Elect, system, 1.0)

        # create the two entities involved in the event
        electee = aifutils.make_entity(g, "http://www.test.edu/entities/1",
                                       system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/6", electee,
                           ldc_ontology.PER, system, 1.0)

        election_country = aifutils.make_entity(
            g, "http://www.test.edu/entities/2", system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/7",
                           election_country, ldc_ontology.GPE, system, 1.0)

        # link those entities to the event
        argument1 = aifutils.mark_as_argument(
            g, event, ldc_ontology.Personnel_Elect_Candidate, electee, system,
            0.785)
        argument2 = aifutils.mark_as_argument(
            g, event, ldc_ontology.Personnel_Elect_Place, election_country,
            system, 0.589)

        aifutils.mark_attribute(g, argument1, interchange_ontology.Irrealis)
        aifutils.mark_attribute(g, argument1, interchange_ontology.Generic)

        aifutils.mark_attribute(
            g, argument2,
            interchange_ontology.VideoJustificationChannelPicture)
        aifutils.mark_attribute(
            g, argument2, interchange_ontology.VideoJustificationChannelSound)

        self.new_file(
            g, "test_create_an_event_argument_add_invalid_attribute.ttl")
        self.dump_graph(
            g,
            "Invalid: Semantic Attribute for Event Argument, must be aida:Negated, aida:Hedged"
        )
예제 #10
0
    def test_create_an_entity_with_information_justification(self):
        g = aifutils.make_graph()
        g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri)

        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, 'http://www.test.edu/testSystem')

        # Two people, probably the same person
        vladimir_putin = aifutils.make_entity(
            g, "http://www.test.edu/entities/1", system)
        aifutils.mark_name(g, vladimir_putin, "Vladimir Putin")

        type_assertion = aifutils.mark_type(
            g, "http://www.test.org/assertions/1", vladimir_putin,
            SEEDLING_TYPES_NIST.Person, system, 1.0)

        text_justification_1 = aifutils.mark_text_justification(
            g, [vladimir_putin, type_assertion], "HC00002Z0", 0, 10, system,
            1.0)
        aifutils.mark_informative_justification(g, vladimir_putin,
                                                text_justification_1)

        putin = aifutils.make_entity(g, "http://www.test.edu/entities/2",
                                     system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/2", putin,
                           SEEDLING_TYPES_NIST.Person, system, 1.0)

        aifutils.mark_name(g, putin, "Путин")

        # create a cluster with prototype
        putin_cluster = aifutils.make_cluster_with_prototype(
            g, "http://www.test.edu/clusters/1", vladimir_putin, system,
            "Vladimir Putin")
        text_justification_2 = aifutils.mark_text_justification(
            g, [putin, type_assertion], "HC00002Z0", 0, 10, system, 1.0)
        aifutils.mark_informative_justification(g, putin_cluster,
                                                text_justification_2)

        # person 1 is definitely in the cluster, person 2 is probably in the cluster
        aifutils.mark_as_possible_cluster_member(g, putin, putin_cluster, 0.71,
                                                 system)

        self.new_file(
            g,
            "test_create_an_entity_and_cluster_with_informative_mention.ttl")
        self.dump_graph(
            g, "create an entity and cluster with informative mention")
예제 #11
0
    def test_make_entity(self):
        g = aifutils.make_graph()
        system = aifutils.make_system_with_uri(g, "http://www.test.edu/system")
        entity = aifutils.make_entity(g, "http://www.test.edu/entities/1",
                                      system)
        type_assertion = aifutils.mark_type(
            g, "http://www.test.edu/assertions/1", entity,
            SEEDLING_TYPES_NIST.Person, system, 1.0)

        aifutils.mark_text_justification(g, [entity, type_assertion],
                                         "NYT_ENG_20181231", 42, 143, system,
                                         0.973)

        self.new_file(g, "test_make_an_entity.ttl")
        self.dump_graph(g, "Example of creating an entity")
        self.assertEqual([type_assertion],
                         aifutils.get_type_assertions(g, entity))
    def test_confidence_outside_of_zero_one(self):
        g = aifutils.make_graph()
        system = aifutils.make_system_with_uri(g, "http://test.edu/testSystem")

        entity = aifutils.make_entity(g, "http://www.test.edu/entities/1",
                                      system)

        aifutils.mark_type(
            g,
            "http://www.test.org/assertions/1",
            #illegal confidence value - not in [0.0, 1.0]
            entity,
            AIDA_PROGRAM_ONTOLOGY.Person,
            system,
            100.0)

        self.dump_graph(g, "Invalid: Confidence outside of zero to one")
예제 #13
0
    def test_non_type_used_as_type(self):
        g = aifutils.make_graph()
        system = aifutils.make_system_with_uri(
            g, "http://www.test.edu/testSystem")

        entity = aifutils.make_entity(g, "http://www.test.edu/entities/1",
                                      system)

        aifutils.mark_type(
            g,
            "http://www.test.edu/typeAssertion/1",
            entity,
            # use a blank node as teh bogus entity type
            BNode(),
            system,
            1.0)

        self.dump_graph(g, "Invalid: Non type used as type")
예제 #14
0
    def test_create_an_entity_with_image_justification_and_vector(self):
        g = aifutils.make_graph()
        g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri)

        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, "http://www.test.edu/testSystem")

        # it doesn't matter what URI we give entities, events, etc. so long as they are
        # unique
        entity = aifutils.make_entity(g, "http://www.test.edu/entities/1",
                                      system)

        # in order to allow uncertainty about the type of an entity, we don't mark an
        # entity's type directly on the entity, but rather make a separate assertion for it
        # its URI doesn't matter either
        type_assertion = aifutils.mark_type(
            g, "http://www.test.org/assertions/1", entity,
            SEEDLING_TYPES_NIST.Person, system, 1.0)

        # the justification provides the evidence for our claim about the entity's type
        # we attach this justification to both the type assertion and the entity object
        # itself, since it provides evidence both for the entity's existence and its type.
        # in TA1 -> TA2 communications, we attach confidences at the level of justifications
        # let's suppose we also have evidence from an image
        bb1 = Bounding_Box((123, 45), (167, 98))
        aifutils.mark_image_justification(g, [entity, type_assertion],
                                          "NYT_ENG_20181231_03", bb1, system,
                                          0.123)

        # also we can link this entity to something in an external KB
        aifutils.link_to_external_kb(g, entity, "freebase.FOO", system, .398)

        vec = {
            "vector_type": "http://www.test.edu/systemX/personVector",
            "vector_data": [2.0, 7.5, 0.2, 8.1]
        }
        # let's mark our entity with some arbitrary system-private data. You can attach such data
        # to nearly anything
        aifutils.mark_private_data_with_vector(g, entity, system, vec)
        self.new_file(
            g, "test_create_an_entity_with_image_justification_and_vector.ttl")
        self.dump_graph(
            g, "Example of entity with image justification and vector")
예제 #15
0
    def test_create_an_event_add_invalid_attribute(self):
        #g = get_initialized_graph()
        g = aifutils.make_graph()

        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, "http://www.test.edu/testSystem")

        # we make a resource for the event itself
        event = aifutils.make_event(g, "http://www.test.edu/events/1", system)

        aifutils.mark_attribute(
            g, event, interchange_ontology.VideoJustificationChannelPicture)

        self.new_file(g, "test_create_an_event_add_invalid_attribute.ttl")
        self.dump_graph(
            g,
            "Invalid: Semantic Attribute for Event, must be aida:Negated, aida:Hedged, aida:Irrealis, aida:Generic"
        )
    def test_justification_missing_confidence(self):
        g = aifutils.make_graph()
        system = aifutils.make_system_with_uri(g, "http://test.edu/testSystem")

        entity = aifutils.make_entity(g, "http://www.test.edu/events/1",
                                      system)

        # create justification without the required confidence
        justification = BNode()
        g.add((justification, RDF.type, AIDA_ANNOTATION.TextJustification))
        g.add((justification, AIDA_ANNOTATION.source,
               Literal("FOO", datatype=XSD.string)))
        g.add((justification, AIDA_ANNOTATION.startOffset,
               Literal(14, datatype=XSD.integer)))
        g.add((justification, AIDA_ANNOTATION.endOffsetInclusive,
               Literal(56, datatype=XSD.integer)))
        g.add((justification, AIDA_ANNOTATION.system, system))
        g.add((entity, AIDA_ANNOTATION.justifiedBy, justification))

        self.dump_graph(g, "Invalid: Justification missing confidence")
예제 #17
0
    def test_create_a_relation_add_invalid_attribute(self):
        #g = get_initialized_graph()
        g = aifutils.make_graph()

        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, "http://www.test.edu/testSystem")

        # we make a resource for the event itself
        relation = aifutils.make_relation(
            g,
            "https://github.com/NextCenturyCorporation/AIDA-Interchange-Format/LdcAnnotations#R779959.00004",
            system)

        aifutils.mark_attribute(
            g, relation, interchange_ontology.VideoJustificationChannelPicture)

        self.new_file(g, "test_create_a_relation_add_invalid_attribute.ttl")
        self.dump_graph(
            g,
            "Invalid: Semantic Attribute for Relation, must be aida:Negated, aida:Hedged, aida:Irrealis, aida:Generic"
        )
예제 #18
0
    def test_create_seedling_event_with_event_argument_uri(self):
        g = aifutils.make_graph()
        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, "http://www.test.edu/testSystem")

        # we make a resource for the event itself
        event = aifutils.make_event(g, "http://www.test.edu/events/1", system)

        # mark the event as a Personnel.Elect event; type is encoded separately so we can express
        # uncertainty about type
        event_type_string = "Personnel.Elect"
        aifutils.mark_type(g, "http://www.test.edu/assertions/5", event,
                           SEEDLING_TYPES_NIST[event_type_string], system, 1.0)

        # create the two entities involved in the event
        electee = aifutils.make_entity(g, "http://www.test.edu/entities/1",
                                       system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/7", electee,
                           SEEDLING_TYPES_NIST.Person, system, 1.0)

        election_country = aifutils.make_entity(
            g, "http://www.test.edu/entities/2", system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/7",
                           election_country,
                           SEEDLING_TYPES_NIST.GeopoliticalEntity, system, 1.0)

        # link those entities to the event
        aifutils.mark_as_argument(
            g, event, SEEDLING_TYPES_NIST[event_type_string] + "_Elect",
            electee, system, .785, "http://www.test.edu/eventArgument/1")
        aifutils.mark_as_argument(
            g, event, SEEDLING_TYPES_NIST[event_type_string] + "_Place",
            election_country, system, .589,
            "http://www.test.edu/eventArgument/2")
        self.new_file(
            g, "test_create_seedling_event_with_event_argument_uri.ttl")
        self.dump_graph(g,
                        "Example of seedling event with event assertion URI")
예제 #19
0
    def test_create_a_cluster_with_link_and_confidence(self):
        g = aifutils.make_graph()
        g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri)

        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, "http://www.test.edu/testSystem")

        putin = aifutils.make_entity(g, "http://www.test.edu/entities/1",
                                     system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/1", putin,
                           SEEDLING_TYPES_NIST.Person, system, 1.0)
        aifutils.mark_name(g, putin, "Путин")

        vladimir_putin = aifutils.make_entity(
            g, "http://www.test.edu/entities/2", system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/2",
                           vladimir_putin, SEEDLING_TYPES_NIST.Person, system,
                           1.0)
        aifutils.mark_name(g, vladimir_putin, "Vladimir Putin")

        # create a cluster with prototype
        putin_cluster = aifutils.make_cluster_with_prototype(
            g, "http://www.test.edu/clusters/1", vladimir_putin, system,
            "Vladimir Putin")

        # person 1 is definitely in the cluster, person 2 is probably in the cluster
        aifutils.mark_as_possible_cluster_member(g, putin, putin_cluster, 1.0,
                                                 system)
        aifutils.mark_as_possible_cluster_member(g, vladimir_putin,
                                                 putin_cluster, 0.71, system)

        # also we can link this entity to something in an external KB
        aifutils.link_to_external_kb(g, putin_cluster, "freebase.FOO", system,
                                     .398)

        self.new_file(g, "test_create_a_cluster_with_link_and_confidence.ttl")
        self.dump_graph(g, "create a cluster with link and confidence")
예제 #20
0
    def test_create_an_event(self):
        g = aifutils.make_graph()
        g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri)

        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, "http://www.test.edu/testSystem")

        # we make a resource for the event itself
        event = aifutils.make_event(g, "http://www.test.edu/events/1", system)

        # mark the event as a Personnel.Elect event; type is encoded separately so we can express
        # uncertainty about type
        aifutils.mark_type(g, "http://www.test.edu/assertions/5", event,
                           SEEDLING_TYPES_NIST['Personnel.Elect'], system, 1.0)

        # create the two entities involved in the event
        electee = aifutils.make_entity(g, "http://www.test.edu/entities/1",
                                       system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/6", electee,
                           SEEDLING_TYPES_NIST.Person, system, 1.0)

        election_country = aifutils.make_entity(
            g, "http://www.test.edu/entities/2", system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/7",
                           election_country,
                           SEEDLING_TYPES_NIST.GeopoliticalEntity, system, 1.0)

        # link those entities to the event
        arg = URIRef(SEEDLING_TYPES_NIST['Personnel.Elect'] + "_Elect")
        aifutils.mark_as_argument(g, event, arg, electee, system, 0.785)
        arg2 = URIRef(SEEDLING_TYPES_NIST['Personnel.Elect'] + "_Place")
        aifutils.mark_as_argument(g, event, arg2, election_country, system,
                                  0.589)
        self.new_file(g, "test_create_an_event.ttl")
        self.dump_graph(g, "Example of creating an event")
예제 #21
0
class ScalingTest():
    filename = "scalingdata.ttl"
    LDC_NS = "https://github.com/NextCenturyCorporation/AIDA-Interchange-Format/LdcAnnotations#"
    g = aifutils.make_graph()
    system = aifutils.make_system_with_uri(g, 'http://www.test.edu/testSystem')

    # beginning sizes of data
    entity_count = 128000
    event_count = 38400
    relations_count = 200
    assertion_count = 1500

    entity_index = 1
    event_index = 1
    relation_index = 1
    assertion_index = 1

    # utility values, so taht we can easily create random things
    abc = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

    entity_resource_list = []

    def run_scaling_test(self):
        for ii in range(200):
            print("trying : Entity count: ", self.entity_count)
            start_time = int(round(time.time() * 1000))

            self.run_single_test()

            end_time = int(round(time.time() * 1000))
            duration = (start_time - end_time) / 1000
            size = 0

            f = open(self.filename)
            if os.path.isfile(self.filename):
                size = os.path.getsize(self.filename)
            size /= 1000000.
            print("Size of output: ", size, " duration: ", duration)

            # double size of entities and events after every iteration
            self.increase()

    def run_single_test(self):
        # adds entities and events and wrties to file
        for ii in range(self.entity_count):
            self.add_entity()

        for i in range(self.event_count):
            self.add_event()

        self.write_to_file(self.filename)

    def add_entity(self):
        # add an entity
        entity_resource = aifutils.make_entity(self.g, self.get_entity_uri(),
                                               self.system)
        self.entity_resource_list.append(entity_resource)

        # sometimes add hasName, textValue, or numericValue, NOTE: This does not check type!!!
        rand = random.random()
        if rand < 0.15:
            aifutils.mark_name(self.g, entity_resource,
                               self.get_random_string(5))
        elif rand < 0.3:
            aifutils.mark_text_value(self.g, entity_resource,
                                     self.get_random_string(7))
        elif rand < 0.4:
            aifutils.mark_numeric_value_as_double(self.g, entity_resource,
                                                  random.random())

        # set the type
        type_to_use = self.get_random_entity()
        type_assertion = aifutils.mark_type(self.g, self.get_assertion_uri(),
                                            entity_resource, type_to_use,
                                            self.system, 1.0)
        self.add_justification_and_private_data(type_assertion)

    def add_event(self):
        # add an event
        event_resource = aifutils.make_event(self.g, self.get_event_uri(),
                                             self.system)

        # add the type
        type_resource = self.EVENT_TYPES[
            random.randint(0, len(self.EVENT_TYPES)) - 1]
        type_assertion = aifutils.mark_type(self.g, self.get_assertion_uri(),
                                            event_resource, type_resource,
                                            self.system, 1.0)

        self.add_justification_and_private_data(type_assertion)

        # make two arguments
        for i in range(2):
            arg = URIRef(type_resource + self.get_random_suffix())
            argument = aifutils.mark_as_argument(self.g, event_resource, arg,
                                                 self.get_random_entity(),
                                                 self.system, 0.785,
                                                 self.get_assertion_uri())
            self.add_justification_and_private_data(argument)

    def add_justification_and_private_data(self, resource):
        docId = self.get_random_doc_id()

        # justify the type assertion
        aifutils.mark_text_justification(self.g, resource, docId, 1029, 1033,
                                         self.system, 0.973)

        # add some private data
        aifutils.mark_private_data(self.g, resource,
                                   "{ 'provenance' : '" + docId + "' }",
                                   self.system)

    def increase(self):
        self.entity_count *= 2
        self.event_count *= 2

    def get_uri(self, uri):
        return self.LDC_NS + uri

    def get_entity_uri(self):
        self.entity_index += 1
        return self.get_uri("entity-" + str(self.entity_index))

    def get_event_uri(self):
        self.event_index += 1
        return self.get_uri("event-" + str(self.event_index))

    def get_relation_uri(self):
        self.relation_index += 1
        return self.get_uri("relation-" + str(self.relation_index))

    def get_assertion_uri(self):
        self.assertion_index += 1
        return self.get_uri("assertion-" + str(self.assertion_index))

    def get_test_system_uri(self):
        return self.get_uri("testSystem")

    def get_random_doc_id(self):
        s = ""
        if random.getrandbits(1) == 1:
            s += "IC"
        else:
            s += "HC"
        s += "00"
        s += "" + str((random.randint(0, 1000)))
        s += self.abc[random.randint(0, len(self.abc) - 1)]
        s += self.abc[random.randint(0, len(self.abc) - 1)]
        s += self.abc[random.randint(0, len(self.abc) - 1)]
        return s

    def get_random_string(self, length):
        s = ""
        for i in range(0, length):
            s += self.abc[random.randint(0, len(self.abc) - 1)]
        return s

    def get_random_entity(self):
        return self.ENTITY_TYPES[random.randint(0, len(self.ENTITY_TYPES) - 1)]

    def get_random_suffix(self):
        s = "_" + self.ROLES[random.randint(0, len(self.ROLES) - 1)]
        return s

    def write_to_file(self, testname):
        print("\n\n", testname, "\n\n")
        file = open(testname, "w")
        file.write(str(self.g.serialize(format='turtle')))
        file.close()

    ENTITY_TYPES = [
        seedling_ontology.Person, seedling_ontology.Organization,
        seedling_ontology.Location, seedling_ontology.Facility,
        seedling_ontology.GeopoliticalEntity, seedling_ontology.FillerType,
        seedling_ontology.Business_DeclareBankruptcy,
        seedling_ontology.Business_End, seedling_ontology.Business_Merge,
        seedling_ontology.Business_Start, seedling_ontology.Conflict_Attack,
        seedling_ontology.Conflict_Demonstrate,
        seedling_ontology.Contact_Broadcast, seedling_ontology.Contact_Contact,
        seedling_ontology.Contact_Correspondence,
        seedling_ontology.Contact_Meet,
        seedling_ontology.Existence_DamageDestroy,
        seedling_ontology.Government_Agreements,
        seedling_ontology.Government_Legislate,
        seedling_ontology.Government_Spy, seedling_ontology.Government_Vote,
        seedling_ontology.Inspection_Artifact,
        seedling_ontology.Inspection_People, seedling_ontology.Justice_Acquit,
        seedling_ontology.Justice_Appeal, seedling_ontology.Justice_ArrestJail,
        seedling_ontology.Justice_ChargeIndict,
        seedling_ontology.Justice_Convict, seedling_ontology.Justice_Execute,
        seedling_ontology.Justice_Extradite, seedling_ontology.Justice_Fine,
        seedling_ontology.Justice_Investigate,
        seedling_ontology.Justice_Pardon,
        seedling_ontology.Justice_ReleaseParole,
        seedling_ontology.Justice_Sentence, seedling_ontology.Justice_Sue,
        seedling_ontology.Justice_TrialHearing, seedling_ontology.Life_BeBorn,
        seedling_ontology.Life_Die, seedling_ontology.Life_Divorce,
        seedling_ontology.Life_Injure, seedling_ontology.Life_Marry,
        seedling_ontology.Manufacture_Artifact,
        seedling_ontology.Movement_TransportArtifact,
        seedling_ontology.Movement_TransportPerson,
        seedling_ontology.Personnel_Elect,
        seedling_ontology.Personnel_EndPosition,
        seedling_ontology.Personnel_Nominate,
        seedling_ontology.Personnel_StartPosition,
        seedling_ontology.Transaction_Transaction,
        seedling_ontology.Transaction_TransferControl,
        seedling_ontology.Transaction_TransferMoney,
        seedling_ontology.Transaction_TransferOwnership,
        seedling_ontology.GeneralAffiliation_APORA,
        seedling_ontology.GeneralAffiliation_MORE,
        seedling_ontology.GeneralAffiliation_OPRA,
        seedling_ontology.GeneralAffiliation_OrganizationWebsite,
        seedling_ontology.GeneralAffiliation_PersonAge,
        seedling_ontology.GeneralAffiliation_Sponsorship,
        seedling_ontology.Measurement_Count,
        seedling_ontology.OrganizationAffiliation_EmploymentMembership,
        seedling_ontology.OrganizationAffiliation_Founder,
        seedling_ontology.OrganizationAffiliation_InvestorShareholder,
        seedling_ontology.OrganizationAffiliation_Leadership,
        seedling_ontology.OrganizationAffiliation_Ownership,
        seedling_ontology.OrganizationAffiliation_StudentAlum,
        seedling_ontology.PartWhole_Membership,
        seedling_ontology.PartWhole_Subsidiary,
        seedling_ontology.PersonalSocial_Business,
        seedling_ontology.PersonalSocial_Family,
        seedling_ontology.PersonalSocial_RoleTitle,
        seedling_ontology.PersonalSocial_Unspecified,
        seedling_ontology.Physical_LocatedNear,
        seedling_ontology.Physical_OrganizationHeadquarter,
        seedling_ontology.Physical_OrganizationLocationOrigin,
        seedling_ontology.Physical_Resident
    ]

    EVENT_TYPES = [
        seedling_ontology.Business_DeclareBankruptcy,
        seedling_ontology.Business_End, seedling_ontology.Business_Merge,
        seedling_ontology.Business_Start, seedling_ontology.Conflict_Attack,
        seedling_ontology.Conflict_Demonstrate,
        seedling_ontology.Contact_Broadcast, seedling_ontology.Contact_Contact,
        seedling_ontology.Contact_Correspondence,
        seedling_ontology.Contact_Meet,
        seedling_ontology.Existence_DamageDestroy,
        seedling_ontology.Government_Agreements,
        seedling_ontology.Government_Legislate,
        seedling_ontology.Government_Spy, seedling_ontology.Government_Vote,
        seedling_ontology.Inspection_Artifact,
        seedling_ontology.Inspection_People, seedling_ontology.Justice_Acquit,
        seedling_ontology.Justice_Appeal, seedling_ontology.Justice_ArrestJail,
        seedling_ontology.Justice_ChargeIndict,
        seedling_ontology.Justice_Convict, seedling_ontology.Justice_Execute,
        seedling_ontology.Justice_Extradite, seedling_ontology.Justice_Fine,
        seedling_ontology.Justice_Investigate,
        seedling_ontology.Justice_Pardon,
        seedling_ontology.Justice_ReleaseParole,
        seedling_ontology.Justice_Sentence, seedling_ontology.Justice_Sue,
        seedling_ontology.Justice_TrialHearing, seedling_ontology.Life_BeBorn,
        seedling_ontology.Life_Die, seedling_ontology.Life_Divorce,
        seedling_ontology.Life_Injure, seedling_ontology.Life_Marry,
        seedling_ontology.Manufacture_Artifact,
        seedling_ontology.Movement_TransportArtifact,
        seedling_ontology.Movement_TransportPerson,
        seedling_ontology.Personnel_Elect,
        seedling_ontology.Personnel_EndPosition,
        seedling_ontology.Personnel_Nominate,
        seedling_ontology.Personnel_StartPosition,
        seedling_ontology.Transaction_Transaction,
        seedling_ontology.Transaction_TransferControl,
        seedling_ontology.Transaction_TransferMoney,
        seedling_ontology.Transaction_TransferOwnership
    ]

    ROLES = [
        "Attacker", "Instrument", "Place", "Target", "Time", "Broadcaster",
        "Place", "Time", "Participant", "Place", "Participant", "Time",
        "Participant", "Affiliate", "Affiliation", "Affiliation", "Person",
        "Entity", "Sponsor", "Defendant", "Prosecutor", "Adjudicator",
        "Defendant", "Agent", "Instrument", "Victim", "Artifact",
        "Manufacturer", "Agent", "Artifact", "Destination", "Instrument",
        "Origin", "Time", "Agent", "Destination", "Instrument", "Origin",
        "Person", "Employee", "Organization", "Person", "Entity", "Place",
        "Beneficiary", "Giver", "Recipient", "Thing", "Time"
    ]
def add_filetype(g, one_unique_ke, filetype_str):
    system = aifutils.make_system_with_uri(g, "http://www.rpi.edu/fileType")
    file_type_json_object = {'fileType': filetype_str}
    file_type_json_content = json.dumps(file_type_json_object)
    aifutils.mark_private_data(g, one_unique_ke, file_type_json_content,
                               system)
def append_private_data(language_id,
                        input_folder,
                        lorelei_links,
                        freebase_links,
                        fine_grained_entity_dict,
                        translation_dict,
                        offset_vec,
                        offset_entity_corefer,
                        ltf_dir,
                        doc_id_to_root_dict=None,
                        eng_elmo=None,
                        ukr_elmo=None,
                        rus_elmo=None,
                        trigger_vec=None,
                        offset_event_vec=None):

    # count_flag = 0
    for one_file in os.listdir(input_folder):
        # print(one_file)
        if ".ttl" not in one_file:
            continue
        # ent_json_list = dict()
        one_file_id = one_file.replace(".ttl", "")
        if doc_id_to_root_dict is not None:
            root_docid = doc_id_to_root_dict[one_file_id]
        else:
            root_docid = ""
        one_file_path = os.path.join(input_folder, one_file)
        output_file = os.path.join(output_folder, one_file)
        turtle_content = open(one_file_path).read()
        g = Graph().parse(data=turtle_content, format='ttl')

        # # append file type
        # system = aifutils.make_system_with_uri(g, "http://www.rpi.edu/fileType")
        # unique_ke_list = list()
        # for p, s, o in g:
        #     if "http://www.rpi.edu" in o:
        #         if p not in unique_ke_list:
        #             unique_ke_list.append(p)
        # for one_unique_ke in unique_ke_list:
        #     file_type_json_object = {'fileType': language_id}
        #     file_type_json_content = json.dumps(file_type_json_object)
        #     aifutils.mark_private_data(g, one_unique_ke, file_type_json_content, system)

        # append EDL fine_grained_data
        # system = aifutils.make_system_with_uri(g, "http://www.rpi.edu/EDL_FineGrained")
        # for p, s, o in g:
        #     if 'linkTarget' not in s:
        #         continue
        #     linking_info = o.split(':')[-1]
        #     if linking_info in fine_grained_entity_dict:
        #         fine_grained_json_object = fine_grained_entity_dict[linking_info]
        #         fine_grained_json_content = json.dumps(fine_grained_json_object)
        #         aifutils.mark_private_data(g, p, fine_grained_json_content, system)

        # entities = []
        events = []
        args = []
        for s, p, o in g:
            # print(s, p, o)
            if 'type' in p and 'Entity' in o:
                add_filetype(g, s, language_id)
                # entities.append(s)
            elif 'type' in p and 'Event' in o:
                add_filetype(g, s, language_id)
                events.append(s)
            elif 'type' in p and ('Statement' in o or 'Relation' in o):
                add_filetype(g, s, language_id)
                args.append(s)
        # get entities without TITLE/TIME, etc
        entity_type_ttl = defaultdict()
        for entity in g.subjects(predicate=RDF.type, object=AIDA.Entity):
            for assertion in g.subjects(object=entity, predicate=RDF.subject):
                object_assrt = g.value(subject=assertion, predicate=RDF.object)
                predicate_assrt = g.value(subject=assertion,
                                          predicate=RDF.predicate)
                # only predicate ==`type`
                if predicate_assrt == RDF.type:
                    entity_type = object_assrt.split('#')[-1]
                    parent_type = entity_type.split('.')[0]
                    if parent_type in [
                            'PER', 'ORG', 'GPE', 'LOC', 'FAC', 'WEA', 'VEH',
                            'SID', 'CRM', 'BAL'
                    ]:
                        entity_type_ttl[entity] = entity_type

        entity_offset_map = defaultdict(list)
        event_offset_map = defaultdict(list)
        for s, p, o in g:
            if 'justifiedBy' in p:
                if s in entity_type_ttl:  #entities:
                    entity_offset_map[s].append(o)
                if s in events:
                    event_offset_map[s].append(o)

        offset_info = dict(
        )  # offset_info[offset]['startOffset']=start, offset_info[offset]['endOffsetInclusive']=end
        for s, p, o in g:
            p = p.toPython().split('#')[-1]
            if 'startOffset' == p or 'endOffsetInclusive' == p or 'source' == p:
                if s not in offset_info:
                    offset_info[s] = dict()
                offset_info[s][p] = o

        # trigger_emb_lists = defaultdict()
        for event in event_offset_map:
            event_vecs = []
            for one_offset in event_offset_map[event]:
                if len(offset_info[one_offset]) != 3:
                    continue
                for one_offset_type in offset_info[one_offset]:
                    if 'startOffset' in one_offset_type:
                        start_offset = int(
                            offset_info[one_offset][one_offset_type])
                    elif 'endOffsetInclusive' in one_offset_type:
                        end_offset = int(
                            offset_info[one_offset][one_offset_type])
                    elif 'source' in one_offset_type:
                        docid = offset_info[one_offset][
                            one_offset_type].toPython()
                    # search_key = "%s:%d-%d" % (docid, start_offset, end_offset)

                if trigger_vec:
                    # event embedding from files
                    for ent_vec_type in offset_event_vec[docid]:
                        for (vec_start, vec_end,
                             vec) in offset_event_vec[docid][ent_vec_type]:
                            # print(vec_start, vec_end, vec)
                            if vec_start >= start_offset and vec_end <= end_offset:
                                # print(search_key)
                                event_vecs.append(vec)
                else:
                    # event embedding from elmo
                    vec = generate_trigger_emb(docid, start_offset, end_offset,
                                               ltf_dir, language_id, eng_elmo,
                                               ukr_elmo, rus_elmo)
                    if vec is not None:
                        event_vecs.append(vec)

            if len(event_vecs) > 0:
                # print(event_vecs)
                trigger_emb_avg = np.mean(event_vecs, axis=0)
                evt_vec_json_object = {
                    'event_vec':
                    ','.join(['%0.8f' % dim for dim in trigger_emb_avg])
                }
                evt_vec_json_content = json.dumps(evt_vec_json_object)
                system = aifutils.make_system_with_uri(
                    g, "http://www.rpi.edu/event_representations")
                aifutils.mark_private_data(g, event, evt_vec_json_content,
                                           system)
                # trigger_emb_lists[event] = evt_vec_json_content
        # unique_events = []
        # for one_bnode in event_offset_map:
        #     if event_offset_map[one_bnode] in unique_events:
        #         continue
        #     if len(offset_info[one_bnode]) != 2:
        #         continue
        #     for one_offset_type in offset_info[one_bnode]:
        #         if 'startOffset' in one_offset_type:
        #             start_offset = int(offset_info[one_bnode][one_offset_type])
        #         elif 'endOffsetInclusive' in one_offset_type:
        #             end_offset = int(offset_info[one_bnode][one_offset_type])
        #     search_key = "%s:%d-%d" % (one_file_id, start_offset, end_offset)
        #
        #     # append event time
        #     try:
        #         time = time_map[search_key]
        #         time_norm = time_map_norm[search_key]
        #         system = aifutils.make_system_with_uri(g, "http://www.rpi.edu/event_time")
        #         time_json_dict = {'time': time, 'time_norm': time_norm}
        #         time_json_content = json.dumps(time_json_dict)
        #         aifutils.mark_private_data(g, event_offset_map[one_bnode], time_json_content, system)
        #         unique_events.append(event_offset_map[one_bnode])
        #     except KeyError:
        #         pass
        #         # continue

        unique_entities = set()
        # ###### old ########### change to one entity may have multiple offsets
        # for one_bnode in entity_offset_map:
        #     if len(offset_info[one_bnode]) != 2:
        #         continue
        #     for one_offset_type in offset_info[one_bnode]:
        #         if 'startOffset' in one_offset_type:
        #             start_offset = int(offset_info[one_bnode][one_offset_type])
        #         elif 'endOffsetInclusive' in one_offset_type:
        #             end_offset = int(offset_info[one_bnode][one_offset_type])
        #     search_key = "%s:%d-%d" % (one_file_id, start_offset, end_offset)
        for entity in entity_offset_map:
            entity_vecs = []
            entity_type = entity_type_ttl[entity]
            coarse_type = entity_type.split('.')[0]
            for one_offset in entity_offset_map[entity]:
                if len(offset_info[one_offset]) != 3:
                    continue
                for one_offset_type in offset_info[one_offset]:
                    if 'startOffset' in one_offset_type:
                        start_offset = int(
                            offset_info[one_offset][one_offset_type])
                    elif 'endOffsetInclusive' in one_offset_type:
                        end_offset = int(
                            offset_info[one_offset][one_offset_type])
                    elif 'source' in one_offset_type:
                        docid = offset_info[one_offset][
                            one_offset_type].toPython()
                search_key = "%s:%d-%d" % (docid, start_offset, end_offset)

                # append links
                if entity not in unique_entities:
                    # append Freebase linking result
                    try:
                        if search_key in freebase_links:
                            freebase_link = freebase_links[search_key]
                            system = aifutils.make_system_with_uri(
                                g, "http://www.rpi.edu/EDL_Freebase")
                            # freebase_json_dict = {'freebase_link': freebase_link}
                            # freebase_json_content = json.dumps(freebase_json_dict)
                            # aifutils.mark_private_data(g, one_offset, freebase_json_content, system)
                            freebase_json_content = json.dumps(
                                {'freebase_link': freebase_link})
                            aifutils.mark_private_data(g, entity,
                                                       freebase_json_content,
                                                       system)

                            # append EDL fine_grained_data
                            linking_info = sorted(
                                freebase_link.items(),
                                key=lambda x: x[1]['average_score'],
                                reverse=True)[0][0]
                            # linking_info = freebase_link.split(':')[-1]
                            if linking_info in fine_grained_entity_dict:
                                fine_grained_json_object = fine_grained_entity_dict[
                                    linking_info]
                                fine_grained_json_content = json.dumps({
                                    'finegrained_type':
                                    fine_grained_json_object
                                })
                                system = aifutils.make_system_with_uri(
                                    g, "http://www.rpi.edu/EDL_FineGrained")
                                aifutils.mark_private_data(
                                    g, entity, fine_grained_json_content,
                                    system)

                        # append multiple confidence
                        if search_key in lorelei_links:
                            # lorelei_link_dict = lorelei_links[search_key]
                            # print(lorelei_link_dict)
                            system = aifutils.make_system_with_uri(
                                g, "http://www.rpi.edu/EDL_LORELEI_maxPool")
                            p_link = URIRef(
                                'https://tac.nist.gov/tracks/SM-KBP/2019/ontologies/InterchangeOntology#link'
                            )
                            p_link_target = URIRef(
                                'https://tac.nist.gov/tracks/SM-KBP/2019/ontologies/InterchangeOntology#linkTarget'
                            )
                            for lorelei_link_ttl in g.objects(
                                    subject=entity, predicate=p_link):
                                link_target = str(
                                    g.value(subject=lorelei_link_ttl,
                                            predicate=p_link_target)
                                )  #.split(':')[-1]
                                # print('link_target', link_target)
                                if search_key not in lorelei_links or link_target not in lorelei_links[
                                        search_key]:  #???
                                    confidence = 0.001
                                else:
                                    confidence = lorelei_links[search_key][
                                        link_target]
                                # print('confidence', confidence)
                                aifutils.mark_confidence(
                                    g, lorelei_link_ttl, confidence, system)

                        # append corefer info
                        if search_key in offset_entity_corefer:
                            # print(one_file_id, search_key, entity_ttl, offset_entity_corefer[search_key])
                            if coarse_type in offset_entity_corefer[
                                    search_key]:
                                corefer_id = offset_entity_corefer[search_key][
                                    coarse_type]
                                # print(search_key, id)
                                system = aifutils.make_system_with_uri(
                                    g, "http://www.rpi.edu/coreference")
                                # cipher = AES.new(secret_key, AES.MODE_ECB)  # never use ECB in strong systems obviously
                                corefer_id_encoded = base64.b64encode(
                                    ('%s%s' % (root_docid, corefer_id)
                                     ).encode('utf-8')).decode("utf-8")
                                corefer_json_dict = {
                                    'coreference': corefer_id_encoded
                                }  #str(uuid.UUID(corefer_id).hex)}
                                corefer_json_content = json.dumps(
                                    corefer_json_dict)
                                aifutils.mark_private_data(
                                    g, entity, corefer_json_content, system)

                        # save entity
                        unique_entities.add(entity)
                    except KeyError as e:
                        traceback.print_exc()
                        pass

                # append translation (mention-level)
                if 'en' in language_id:
                    continue
                try:
                    translation_list = translation_dict[search_key]
                    system = aifutils.make_system_with_uri(
                        g, "http://www.rpi.edu/EDL_Translation")
                    translation_json_dict = {'translation': translation_list}
                    translation_json_content = json.dumps(
                        translation_json_dict)
                    aifutils.mark_private_data(g, one_offset,
                                               translation_json_content,
                                               system)
                except KeyError:
                    pass
                    # continue

                # collect entity vectors (mention-level)
                for ent_vec_type in offset_vec[docid]:
                    for (vec_start, vec_end,
                         vec) in offset_vec[docid][ent_vec_type]:
                        # print(vec_start, vec_end, vec)
                        if vec_start >= start_offset and vec_end <= end_offset:
                            # print(search_key)
                            entity_vecs.append(vec)
            # append entity vectors (mention-level)
            if len(entity_vecs) > 0:
                entity_vec = np.average(entity_vecs, 0)
                # print(entity, entity_vec)
                system = aifutils.make_system_with_uri(
                    g, "http://www.rpi.edu/entity_representations")
                ent_vec_json_object = {
                    'entity_vec_space': ent_vec_type,
                    'entity_vec':
                    ','.join(['%0.8f' % dim for dim in entity_vec])
                }
                ent_vec_json_content = json.dumps(ent_vec_json_object)
                # print(ent_vec_json_content)
                aifutils.mark_private_data(g, entity, ent_vec_json_content,
                                           system)
                # ent_json_list[entity] = ent_vec_json_content
                break

        g.serialize(destination=output_file, format='turtle')

    print("Now we have append the private data for %s" % language_id)
예제 #24
0
def append_private_data(language_id, input_folder, lorelei_links,
                        freebase_links, fine_grained_entity_dict,
                        translation_dict):

    count_flag = 0
    for one_file in os.listdir(input_folder):
        # print(one_file)
        if ".ttl" not in one_file:
            continue
        ent_json_list = dict()
        one_file_id = one_file.replace(".ttl", "")
        one_file_path = os.path.join(input_folder, one_file)
        output_file = os.path.join(output_folder, one_file)
        turtle_content = open(one_file_path).read()
        g = Graph().parse(data=turtle_content, format='ttl')

        # append EDL fine_grained_data
        # system = aifutils.make_system_with_uri(g, "http://www.rpi.edu/EDL_FineGrained")
        # for p, s, o in g:
        #     if 'linkTarget' not in s:
        #         continue
        #     linking_info = o.split(':')[-1]
        #     if linking_info in fine_grained_entity_dict:
        #         fine_grained_json_object = fine_grained_entity_dict[linking_info]
        #         fine_grained_json_content = json.dumps(fine_grained_json_object)
        #         aifutils.mark_private_data(g, p, fine_grained_json_content, system)

        entities = []
        events = []
        args = []
        for s, p, o in g:
            # print(s, p, o)
            if 'type' in p and 'Entity' in o:
                add_filetype(g, s, language_id)
                entities.append(s)
            elif 'type' in p and 'Event' in o:
                add_filetype(g, s, language_id)
                events.append(s)
            elif 'type' in p and ('Statement' in o or 'Relation' in o):
                add_filetype(g, s, language_id)
                args.append(s)
        # print('entities: ', len(entities))
        # print('events: ', len(events))

        entity_offset_map = defaultdict(list)
        event_offset_map = defaultdict(list)
        for s, p, o in g:
            if 'justifiedBy' in p:
                if s in entities:
                    entity_offset_map[s].append(o)
                if s in events:
                    event_offset_map[s].append(o)
        # ###### old ########### change to one entity may have multiple offsets
        # entity_offset_map = {}
        # event_offset_map = {}
        # for s, p, o in g:
        #     if 'justifiedBy' in p:
        #         if s in entities:
        #             entity_offset_map[o] = s
        #         elif s in events:
        #             event_offset_map[o] = s

        offset_info = dict(
        )  # offset_info[offset]['startOffset']=start, offset_info[offset]['endOffsetInclusive']=end
        for s, p, o in g:
            p = p.toPython().split('#')[-1]
            if 'startOffset' == p or 'endOffsetInclusive' == p or 'source' == p:
                if s not in offset_info:
                    offset_info[s] = dict()
                offset_info[s][p] = o

        # unique_events = []
        # for one_bnode in event_offset_map:
        #     if event_offset_map[one_bnode] in unique_events:
        #         continue
        #     if len(offset_info[one_bnode]) != 2:
        #         continue
        #     for one_offset_type in offset_info[one_bnode]:
        #         if 'startOffset' in one_offset_type:
        #             start_offset = int(offset_info[one_bnode][one_offset_type])
        #         elif 'endOffsetInclusive' in one_offset_type:
        #             end_offset = int(offset_info[one_bnode][one_offset_type])
        #     search_key = "%s:%d-%d" % (one_file_id, start_offset, end_offset)
        #
        #     # append event time
        #     try:
        #         time = time_map[search_key]
        #         time_norm = time_map_norm[search_key]
        #         system = aifutils.make_system_with_uri(g, "http://www.rpi.edu/event_time")
        #         time_json_dict = {'time': time, 'time_norm': time_norm}
        #         time_json_content = json.dumps(time_json_dict)
        #         aifutils.mark_private_data(g, event_offset_map[one_bnode], time_json_content, system)
        #         unique_events.append(event_offset_map[one_bnode])
        #     except KeyError:
        #         pass
        #         # continue

        unique_entities = []
        # ###### old ########### change to one entity may have multiple offsets
        # for one_bnode in entity_offset_map:
        #     if len(offset_info[one_bnode]) != 2:
        #         continue
        #     for one_offset_type in offset_info[one_bnode]:
        #         if 'startOffset' in one_offset_type:
        #             start_offset = int(offset_info[one_bnode][one_offset_type])
        #         elif 'endOffsetInclusive' in one_offset_type:
        #             end_offset = int(offset_info[one_bnode][one_offset_type])
        #     search_key = "%s:%d-%d" % (one_file_id, start_offset, end_offset)
        for entity in entity_offset_map:
            for one_offset in entity_offset_map[entity]:
                if len(offset_info[one_offset]) != 3:
                    continue
                for one_offset_type in offset_info[one_offset]:
                    if 'startOffset' in one_offset_type:
                        start_offset = int(
                            offset_info[one_offset][one_offset_type])
                    elif 'endOffsetInclusive' in one_offset_type:
                        end_offset = int(
                            offset_info[one_offset][one_offset_type])
                    elif 'source' in one_offset_type:
                        docid = offset_info[one_offset][
                            one_offset_type].toPython()
                search_key = "%s:%d-%d" % (docid, start_offset, end_offset)

                # append entity vecs
                for (vec_start, vec_end,
                     vec) in offset_vec[docid][ent_vec_type]:
                    # print(vec_start, vec_end, vec)
                    if vec_start >= start_offset and vec_end <= end_offset:
                        # print(search_key)
                        entity_vecs.append(vec)

                # append links
                if entity_offset_map[one_bnode] not in unique_entities:
                    unique_entities.append(entity_offset_map[one_bnode])
                    # append Freebase linking result
                    try:
                        if search_key in freebase_links:
                            freebase_link = freebase_links[search_key]
                            system = aifutils.make_system_with_uri(
                                g, "http://www.rpi.edu/EDL_Freebase")
                            # freebase_json_dict = {'freebase_link': freebase_link}
                            # freebase_json_content = json.dumps(freebase_json_dict)
                            # aifutils.mark_private_data(g, one_bnode, freebase_json_content, system)
                            freebase_json_content = json.dumps(
                                {'freebase_link': freebase_link})
                            aifutils.mark_private_data(
                                g, entity_offset_map[one_bnode],
                                freebase_json_content, system)

                            # append EDL fine_grained_data
                            linking_info = sorted(
                                freebase_link.items(),
                                key=lambda x: x[1]['average_score'],
                                reverse=True)[0][0]
                            # linking_info = freebase_link.split(':')[-1]
                            if linking_info in fine_grained_entity_dict:
                                fine_grained_json_object = fine_grained_entity_dict[
                                    linking_info]
                                fine_grained_json_content = json.dumps({
                                    'finegrained_type':
                                    fine_grained_json_object
                                })
                                system = aifutils.make_system_with_uri(
                                    g, "http://www.rpi.edu/EDL_FineGrained")
                                aifutils.mark_private_data(
                                    g, entity_offset_map[one_bnode],
                                    fine_grained_json_content, system)

                        # append multiple confidence
                        if search_key in lorelei_links:
                            lorelei_link_dict = lorelei_links[search_key]
                            # print(lorelei_link_dict)
                            system = aifutils.make_system_with_uri(
                                g, "http://www.rpi.edu/EDL_LORELEI_maxPool")
                            p_link = URIRef(
                                'https://tac.nist.gov/tracks/SM-KBP/2019/ontologies/InterchangeOntology#link'
                            )
                            p_link_target = URIRef(
                                'https://tac.nist.gov/tracks/SM-KBP/2019/ontologies/InterchangeOntology#linkTarget'
                            )
                            for lorelei_link_ttl in g.objects(
                                    subject=entity_offset_map[one_bnode],
                                    predicate=p_link):
                                link_target = str(
                                    g.value(subject=lorelei_link_ttl,
                                            predicate=p_link_target)
                                )  #.split(':')[-1]
                                # print('link_target', link_target)
                                if search_key not in lorelei_links or link_target not in lorelei_links[
                                        search_key]:  #???
                                    confidence = 0.001
                                else:
                                    confidence = lorelei_links[search_key][
                                        link_target]
                                # print('confidence', confidence)
                                aifutils.mark_confidence(
                                    g, lorelei_link_ttl, confidence, system)
                    except KeyError as e:
                        # print('error', str(e))
                        traceback.print_exc()
                        pass
                        # continue

                # append translation
                if 'en' in language_id:
                    continue
                try:
                    translation_list = translation_dict[search_key]
                    system = aifutils.make_system_with_uri(
                        g, "http://www.rpi.edu/EDL_Translation")
                    translation_json_dict = {'translation': translation_list}
                    translation_json_content = json.dumps(
                        translation_json_dict)
                    aifutils.mark_private_data(g, one_bnode,
                                               translation_json_content,
                                               system)
                except KeyError:
                    pass
                    # continue

        g.serialize(destination=output_file, format='ttl')

    print("Now we have append the private data for %s" % language_id)
def get_initialized_graph_and_system():
    graph = aifutils.make_graph()
    graph.bind('test', prefix)
    system = aifutils.make_system_with_uri(graph,
                                           "http://www.test.edu/testSystem")
    return graph, system
def transferAIF(p_id):
    #for k,p_id in enumerate(parent_dict):

    # Todo test
    #     if (k<8):
    #         continue
    #     print('k',k,p_id)
    g = Graph()

    #load rpi graph if exists
    if p_id in txt_mention_ttl_list:
        turtle_path = os.path.join(txt_mention_ttl_path, p_id + '.ttl')
        turtle_content = open(turtle_path).read()
        g.parse(data=turtle_content, format='n3')

    #load and merge cu graph if exists
    if p_id in cu_ttl_list:
        turtle_path = os.path.join(cu_ttl_path, p_id + '.ttl')
        turtle_content = open(turtle_path).read()
        g.parse(data=turtle_content, format='n3')

    #load and merge cu graph for instance matching if exists
    if p_id in cu_ttl_ins_list:
        turtle_path = os.path.join(cu_ttl_ins_path, p_id + '.ttl')
        turtle_content = open(turtle_path).read()
        g.parse(data=turtle_content, format='n3')

    sys_instance_matching = aifutils.make_system_with_uri(
        g, cu_pref + 'Systems/Instance-Matching/ResNet152')
    sys_grounding = aifutils.make_system_with_uri(
        g, cu_pref + 'Systems/Grounding/ELMo-PNASNET')
    usc_sys_grounding = aifutils.make_system_with_uri(
        g, usc_pref + 'Systems/ZSGrounder')

    #find vision and text entities
    sbj_all = set(g.subjects())
    img_entities = {}
    keyframe_entities = {}
    ltf_entities = {}
    for sbj in sbj_all:
        sbj_name = sbj.toPython()
        if cu_objdet_pref in sbj_name:
            if sbj.__class__ == rdflib.term.URIRef:
                if 'JPG' in sbj_name:
                    img_id = '/'.join(sbj_name.split('/')[-2:])
                    img_entities[img_id] = sbj
                elif 'Keyframe' in sbj_name:
                    kfrm_id = '/'.join(sbj_name.split('/')[-2:])
                    keyframe_entities[kfrm_id] = sbj
        elif rpi_entity_pref in sbj_name:
            if sbj.__class__ == rdflib.term.URIRef and rpi_entity_pref in sbj_name:
                ltf_entities[sbj_name] = sbj

    # Done
#     if p_id in []:#['IC0011TIB']:
#         continue
#     print('k',k,p_id)
#     if (g==None):
#         print('p_id', k, p_id)

##adding private data to entities for cu grounding
#images
    for key in img_entities:
        dtype = 'jpg'
        #instance features
        ftype = 'instance'
        data_instance = get_features(key, dtype, ftype)

        #semantic features
        ftype = 'semantic'
        data_semantic = get_features(key, dtype, ftype)

        #aggregation
        j_d_i = json.dumps({'columbia_vector_instance_v1.0': data_instance})
        j_d_s = json.dumps({'columbia_vector_grounding_v1.0': data_semantic})
        entity = img_entities[key]
        aifutils.mark_private_data(g, entity, j_d_i, sys_instance_matching)
        aifutils.mark_private_data(g, entity, j_d_s, sys_grounding)

    #keyframes
    for key in keyframe_entities:
        dtype = 'keyframe'
        #instance features
        ftype = 'instance'
        data_instance = get_features(key, dtype, ftype)

        #semantic features
        ftype = 'semantic'
        data_semantic = get_features(key, dtype, ftype)

        #aggregation
        j_d_i = json.dumps({'columbia_vector_instance_v1.0': data_instance})
        j_d_s = json.dumps({'columbia_vector_grounding_v1.0': data_semantic})
        entity = keyframe_entities[key]
        aifutils.mark_private_data(g, entity, j_d_i, sys_instance_matching)
        aifutils.mark_private_data(g, entity, j_d_s, sys_grounding)

    cnt_img = {}
    cnt_boxO = {}
    cnt_ltf = {}
    #add text features, grounding, linking
    for key in ltf_entities:
        if key not in grounding_dict:
            continue
        entity_name = None
        USC_GROUNDING = 'usc_vision' in grounding_dict[key][
            'grounding'].values()
        if not USC_GROUNDING:
            #             print('our grounding')
            #text features
            j_d_t = json.dumps({
                'columbia_vector_text_v1.0':
                grounding_dict[key]['textual_features'].tolist()
            })
            entity_ltf = ltf_entities[key]
            aifutils.mark_private_data(g, entity_ltf, j_d_t, sys_grounding)

            #type and name of entity to be linked
            type_rdf = grounding_dict[key]['type_rdf']
            entity_name = grounding_dict[key]['name']
            grndg_file_type = grounding_dict[key]['source_type']

        if entity_name is None:
            continue

        #keep track of entities with same names for avoiding clustering overlap
        if entity_name in cnt_ltf:
            cnt_ltf[entity_name] += 1
        else:
            cnt_ltf[entity_name] = 1

        clstr_prot_flag = False  #cluster obj for entity_ltf not created yet
        #adding grounding bboxes as new entities
        for img_id in grounding_dict[key]['grounding']:
            if img_id == 'system':
                continue
            grnd = grounding_dict[key]['grounding'][img_id]
            for ii, bbox in enumerate(grnd['bbox']):

                if img_id in cnt_img:  #to keep track of cnt of bbox of same image
                    cnt_img[img_id] += 1
                else:
                    cnt_img[img_id] = 1
                #add grounding bbox as entity
                score = grnd['bbox_score'][ii]
                if not USC_GROUNDING:
                    type_eid = cu_grndg_type_pref + f"{grndg_file_type}/{img_id.split('.')[0]}/{cnt_img[img_id]}/ERE"
                    ent_eid = cu_grndg_ent_pref + f"{grndg_file_type}/{img_id.split('.')[0]}/{cnt_img[img_id]}"
                    entity_grnd = aifutils.make_entity(g, ent_eid,
                                                       sys_grounding)
                    type_assertion = aifutils.mark_type(
                        g, type_eid, entity_grnd, type_rdf, sys_grounding,
                        score)
                elif USC_GROUNDING:
                    type_eid = usc_grndg_type_pref + f"{grndg_file_type}/{img_id.split('.')[0]}/{cnt_img[img_id]}/ERE"
                    ent_eid = usc_grndg_ent_pref + f"{grndg_file_type}/{img_id.split('.')[0]}/{cnt_img[img_id]}"
                    entity_grnd = aifutils.make_entity(g, ent_eid,
                                                       usc_sys_grounding)
                    type_assertion = aifutils.mark_type(
                        g, type_eid, entity_grnd, type_rdf, usc_sys_grounding,
                        score)

                # Done:
                # 1. add if for the branches for image and keyframe.
                # 2. add aifutils.mark_keyframe_video_justification
                # 3. check output
                # aifutils.mark_keyframe_video_justification(g, [entity, type_assertion], "NYT_ENG_20181231_03", "keyframe ID",
                #                                                    bb2, system, 0.234)
                # source: HC0005BR6_23
                # print(img_id)

                # Done:
                # merge usc_grounding dict
                # add usc_grounding entities and clusters

                # Test
#                 print("type_assertion",type_assertion, img_id)

                bb = Bounding_Box((bbox[0], bbox[1]), (bbox[2], bbox[3]))
                if not USC_GROUNDING:
                    if 'JPG' in type_assertion:
                        imgid = img_id.split('.')[0]
                        justif = aifutils.mark_image_justification(
                            g, [entity_grnd, type_assertion], imgid, bb,
                            sys_grounding, score)
#
                    elif 'Keyframe' in type_assertion:
                        imgid = img_id.split('.')[0].split('_')[0]
                        kfid = img_id.split('.')[0].split(
                            '_'
                        )[1]  # it should be keyframe image id or keyframe number
                        justif = aifutils.mark_keyframe_video_justification(g, [entity, type_assertion], imgid, kfid, \
                                                                       bb, sys_grounding, score)
                elif USC_GROUNDING:
                    imgid = img_id.split('.')[0]
                    justif = aifutils.mark_image_justification(
                        g, [entity_grnd, type_assertion], imgid, bb,
                        usc_sys_grounding, score)
                else:
                    print(
                        '[Merge Error] in Main Body: the type_assertion is wrong'
                    )
                aifutils.add_source_document_to_justification(g, justif, p_id)
                aifutils.mark_informative_justification(g, entity_grnd, justif)

                if not USC_GROUNDING:
                    grounding_features = grnd['grounding_features'][ii].tolist(
                    )
                    instance_features = grnd['instance_features'][ii].tolist()
                    #add private data to this very bbox entity
                    j_d_g = json.dumps(
                        {'columbia_vector_grounding_v1.0': grounding_features})
                    j_d_i = json.dumps(
                        {'columbia_vector_instance_v1.0': instance_features})
                    aifutils.mark_private_data(g, entity_grnd, j_d_g,
                                               sys_grounding)
                    aifutils.mark_private_data(g, entity_grnd, j_d_i,
                                               sys_instance_matching)

                #### add clusters
                # Grounding Cluster
                # Done: filtering about punctuation
#                 translator = str.maketrans(string.punctuation, '_'*len(string.punctuation),'' )
#                 'entity_name'.translate(translator)
                entity_name_tmp = '_'.join(entity_name.split(' '))
                entity_name_in_IRI = "".join(
                    x if x.isalpha() or x.isdigit() or x == '_' else '-'
                    for x in entity_name_tmp)
                # '_'.join(entity_name.split(' '))
                #gbbox entity to rpi entity
                if not USC_GROUNDING:
                    if not clstr_prot_flag:  #create cluster if not present
                        clst_eid = cu_grndg_clstr_txt_pref + f"{entity_name_in_IRI}/{cnt_ltf[entity_name]}"
                        clusterObj = aifutils.make_cluster_with_prototype(
                            g, clst_eid, entity_ltf, sys_grounding)
                        clstr_prot_flag = True
                    #cluster current bbox with current ltf_entity
                    score = grnd['men-img-score'][ii]
                    aifutils.mark_as_possible_cluster_member(
                        g, entity_grnd, clusterObj, score, sys_grounding)
                    # Done: add prototype as member
                    aifutils.mark_as_possible_cluster_member(
                        g, entity_ltf, clusterObj, 1, sys_grounding)
                elif USC_GROUNDING:
                    if not clstr_prot_flag:  #create cluster if not present
                        clst_eid = usc_grndg_clstr_txt_pref + f"{entity_name_in_IRI}/{cnt_ltf[entity_name]}"
                        clusterObj = aifutils.make_cluster_with_prototype(
                            g, clst_eid, entity_ltf, usc_sys_grounding)
                        clstr_prot_flag = True
                    #cluster current bbox with current ltf_entity
                    score = grnd['men-img-score'][ii]
                    aifutils.mark_as_possible_cluster_member(
                        g, entity_grnd, clusterObj, score, usc_sys_grounding)
                    # Done: add prototype as member
                    aifutils.mark_as_possible_cluster_member(
                        g, entity_ltf, clusterObj, 1, usc_sys_grounding)

                # BoundingBox Overlap Cluster (Instance Matching)
                #gbbox entity to objdet entity for instance matching
                if not USC_GROUNDING:
                    clstr_prot_b2b_flag = False
                    for jj, img_id_link in enumerate(
                            grnd['link_ids'][ii]):  #for all objdet bboxes
                        if img_id_link in img_entities:
                            entity_link_img = img_entities[img_id_link]
                        elif img_id_link in keyframe_entities:
                            entity_link_img = keyframe_entities[img_id_link]
                        else:
                            continue
                        if img_id in cnt_boxO:  #to keep track of cnt of bbox overlap for same image
                            cnt_boxO[img_id] += 1
                        else:
                            cnt_boxO[img_id] = 1
                        if not clstr_prot_b2b_flag:
                            clst_b2b_eid = cu_grndg_clstr_img_pref + f"{img_id.split('.')[0]}/{cnt_boxO[img_id]}"
                            clusterObj_b2b = aifutils.make_cluster_with_prototype(
                                g, clst_b2b_eid, entity_grnd,
                                sys_grounding)  # sys_instance_matching
                            clstr_prot_b2b_flag = True

                        score = grnd['link_scores'][ii][
                            jj]  #IoU of grnd bbox and objdet bbox
                        aifutils.mark_as_possible_cluster_member(
                            g, entity_link_img, clusterObj_b2b, score,
                            sys_grounding)  # sys_instance_matching
                        # Done: add prototype as member
                        aifutils.mark_as_possible_cluster_member(
                            g, entity_grnd, clusterObj_b2b, 1,
                            sys_grounding)  # sys_instance_matching

    # Check Point: merged_ttl_D2


#     /data/bobby/AIDA/M18_copy/data/merged_ttl/merged_ttl_D2/
#     IC0011VEA.ttl
#     GroundingBox
    with open(os.path.join(merged_graph_path, p_id + '.ttl'), 'w') as fout:
        serialization = BytesIO()
        g.serialize(destination=serialization, format='turtle')
        fout.write(serialization.getvalue().decode('utf-8'))
    #sys.stdout.write('Key {}/{} \r'.format(k,len(parent_dict)))
    sys.stdout.flush()
예제 #27
0
for imgid in det_results_vid:
    vidid = imgid.split('_')[0]
    id_set_vid.add(vidid)

# In[18]:

kb_dict = {}
entity_dict = {}
event_dict = {}

for root_doc in root_to_leaf:

    g = aifutils.make_graph()

    system_pa = aifutils.make_system_with_uri(
        g,
        "http://www.columbia.edu/AIDA/USC/Systems/ObjectDetection/FasterRCNN/PascalVOC"
    )
    system_co = aifutils.make_system_with_uri(
        g,
        "http://www.columbia.edu/AIDA/DVMM/Systems/ObjectDetection/FasterRCNN-NASNet/COCO"
    )
    system_oi = aifutils.make_system_with_uri(
        g,
        "http://www.columbia.edu/AIDA/DVMM/Systems/ObjectDetection/FasterRCNN-InceptionResNet/OpenImages"
    )
    system_ws = aifutils.make_system_with_uri(
        g,
        "http://www.columbia.edu/AIDA/DVMM/Systems/ObjectDetection/MITWeaklySupervised-ResNet/OpenImages"
    )

    for imgid in id_set_jpg & set(root_to_leaf[root_doc]):
예제 #28
0
    def test_create_hierarchical_cluster(self):
        # we want to say that the cluster of Trump entities might be the same as the cluster of the president entities
        g = aifutils.make_graph()
        g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri)

        #every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, 'http://www.test.edu/testSystem')

        # create president entities
        president_usa = aifutils.make_entity(g,
                                             "http://www.test.edu/entities/1",
                                             system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/1",
                           president_usa,
                           SEEDLING_TYPES_NIST.GeopoliticalEntity, system, 1.0)
        aifutils.mark_name(g, president_usa, "the president")

        new_president = aifutils.make_entity(g,
                                             "http://www.test.edu/entities/2",
                                             system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/2",
                           president_usa,
                           SEEDLING_TYPES_NIST.GeopoliticalEntity, system, 1.0)
        aifutils.mark_name(g, president_usa, "the newly-inaugurated president")

        president_45 = aifutils.make_entity(g,
                                            "http://www.test.edu/entities/3",
                                            system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/3",
                           president_usa,
                           SEEDLING_TYPES_NIST.GeopoliticalEntity, system, 1.0)
        aifutils.mark_name(g, president_usa, "the 45th president")

        # cluster president entities
        president_cluster = aifutils.make_cluster_with_prototype(
            g, "http://www.test.edu/clusters/president", president_usa, system)

        aifutils.mark_as_possible_cluster_member(g, president_usa,
                                                 president_cluster, 1, system)
        aifutils.mark_as_possible_cluster_member(g, new_president,
                                                 president_cluster, .9, system)
        aifutils.mark_as_possible_cluster_member(g, president_45,
                                                 president_cluster, .9, system)

        # create Trump entities
        donald_trump = aifutils.make_entity(g,
                                            "http://www.test.edu/entities/4",
                                            system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/4",
                           president_usa, SEEDLING_TYPES_NIST.Person, system,
                           1.0)
        aifutils.mark_name(g, president_usa, "Donald Trump")

        trump = aifutils.make_entity(g, "http://www.test.edu/entities/5",
                                     system)
        aifutils.mark_type(g, "http://www.test.edu/assertions/5",
                           president_usa, SEEDLING_TYPES_NIST.Person, system,
                           1.0)
        aifutils.mark_name(g, president_usa, "Trump")

        # cluster trump entities
        trump_cluster = aifutils.make_cluster_with_prototype(
            g, "http://www.test.edu/clusters/trump", donald_trump, system)
        aifutils.mark_as_possible_cluster_member(g, donald_trump,
                                                 trump_cluster, 1, system)
        aifutils.mark_as_possible_cluster_member(g, trump, trump_cluster, .9,
                                                 system)

        aifutils.mark_as_possible_cluster_member(g, president_cluster,
                                                 trump_cluster, .6, system)

        self.new_file(g, "test_create_hierarchical_cluster.ttl")
        self.dump_graph(g, "Seedling hierarchical cluster")
예제 #29
0
    def test_simple_hypothesis_with_importance_with_cluster(self):
        g = aifutils.make_graph()
        g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri)

        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, 'http://www.test.edu/testSystem')
        # buk document entity
        buk = aifutils.make_entity(g, "E779954.00005", system)
        buk_is_weapon = aifutils.mark_type(
            g,
            "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-1",
            buk, SEEDLING_TYPES_NIST.Weapon, system, 1.0)

        # buk cross-document-entity
        buk_kb_entity = aifutils.make_entity(
            g, "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#E0084",
            system)
        buk_kb_is_weapon = aifutils.mark_type(
            g,
            "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-2",
            buk_kb_entity, SEEDLING_TYPES_NIST.Weapon, system, 1.0)

        # russia document entity
        russia = aifutils.make_entity(
            g,
            "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#E779954.00004",
            system)
        russia_is_gpe = aifutils.mark_type(
            g,
            "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-3",
            russia, SEEDLING_TYPES_NIST.GeopoliticalEntity, system, 1.0)

        # cluster buk
        buk_cluster = aifutils.make_cluster_with_prototype(
            g,
            "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#cluster-1",
            buk_kb_entity, system)
        buk_is_clustered = aifutils.mark_as_possible_cluster_member(
            g, buk, buk_cluster, .9, system)
        # add importance to the cluster - test negative importance
        aifutils.mark_importance(g, buk_cluster, -70.234)

        # Russia owns buk relation
        buk_is_russian = aifutils.make_relation(
            g,
            "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#R779959.00004",
            system)
        aifutils.mark_type(
            g,
            "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-4",
            buk_is_russian, SEEDLING_TYPES_NIST['GeneralAffiliation.APORA'],
            system, 1.0)
        buk_argument = aifutils.mark_as_argument(
            g, buk_is_russian,
            SEEDLING_TYPES_NIST['GeneralAffiliation.APORA_Affiliate'], buk,
            system, 1.0)
        russia_argument = aifutils.mark_as_argument(
            g, buk_is_russian,
            SEEDLING_TYPES_NIST['GeneralAffiliation.APORA_Affiliation'],
            russia, system, 1.0)
        # add importance to the statements
        aifutils.mark_importance(g, buk_argument, 100.0)
        # add large importance
        aifutils.mark_importance(g, russia_argument, 9.999999e6)

        # Russia owns buk hypothesis
        buk_is_russian_hypothesis = aifutils.make_hypothesis(
            g,
            "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#hypothesis-1",
            [
                buk, buk_is_weapon, buk_is_clustered, buk_is_russian,
                buk_argument, russia_argument
            ], system)
        # test highest possible importance value
        aifutils.mark_importance(g, buk_is_russian_hypothesis,
                                 sys.float_info.max)

        self.new_file(g, "test_simple_hypothesis_with_importance_cluster.ttl")
        self.dump_graph(g, "Simple hypothesis with importance with cluster")
예제 #30
0
    def test_create_compound_justification(self):
        g = aifutils.make_graph()
        system = aifutils.make_system_with_uri(g, "http://www.test.edu/system")

        event = aifutils.make_event(
            g,
            "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#V779961.00010",
            system)
        event_type_assertion = aifutils.mark_type(
            g,
            "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-1",
            event, SEEDLING_TYPES_NIST['Personnel.Elect'], system, 1.0)

        # create the two entities involved in the event
        putin = aifutils.make_entity(
            g,
            "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#E781167.00398",
            system)
        person_type_assertion = aifutils.mark_type(
            g,
            "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-2",
            putin, SEEDLING_TYPES_NIST.Person, system, 1.0)

        russia = aifutils.make_entity(
            g,
            "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#E779954.00004",
            system)
        gpe_type_assertion = aifutils.mark_type(
            g,
            "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-3",
            russia, SEEDLING_TYPES_NIST.GeopoliticalEntity, system, 1.0)

        # link those entities to the event
        electee_argument = aifutils.mark_as_argument(
            g, event, SEEDLING_TYPES_NIST['Personnel.Elect_Elect'], putin,
            system, 0.785,
            "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-4"
        )
        place_argument = aifutils.mark_as_argument(
            g, event, SEEDLING_TYPES_NIST['Personnel.Elect_Place'], russia,
            system, 0.589,
            "https://tac.nist.gov/tracks/SM-KBP/2019/LdcAnnotations#assertion-5"
        )

        # the justification provides the evidence for our claim about the entity's type
        # we attach this justification to both the type assertion and the entity object itself, since it provides
        # evidence both for the entity's existence and its type.
        # in TA1 -> TA2 communications, we attach confidences at the level of justifications
        text_justification = aifutils.make_text_justification(
            g, "NYT_ENG_20181231", 42, 143, system, 0.973)
        aifutils.mark_justification(g, person_type_assertion,
                                    text_justification)
        aifutils.mark_justification(g, putin, text_justification)
        aifutils.add_source_document_to_justification(
            g, text_justification, "NYT_PARENT_ENG_20181231_03")

        bb1 = Bounding_Box((123, 45), (167, 98))
        # let's suppose we also have evidence from an image
        image_justification = aifutils.make_image_justification(
            g, "NYT_ENG_20181231_03", bb1, system, 0.123)
        bb2 = Bounding_Box((234, 56), (345, 101))
        # and also a video where the entity appears in a keyframe
        keyframe_video_justification = aifutils.make_keyframe_video_justification(
            g, "NYT_ENG_20181231_03", "keyframe ID", bb2, system, .0234)
        #and also a video where the entity does not appear in a keyframe
        shot_video_justification = aifutils.make_shot_video_justification(
            g, "SOME_VIDEO", "some shot ID", system, 0.487)
        # and even audio!
        audio_justification = aifutils.make_audio_justification(
            g, "NYT_ENG_201181231", 4.566, 9.876, system, 0.789)

        # combine all justifications into single justifiedBy triple with new confidence
        aifutils.mark_compound_justification(g, [electee_argument], [
            text_justification, image_justification,
            keyframe_video_justification, shot_video_justification,
            audio_justification
        ], system, .321)

        aifutils.mark_compound_justification(
            g, [place_argument], [text_justification, image_justification],
            system, 0.543)

        self.new_file(g, "test_create_compound_justification.ttl")
        self.dump_graph(g, "Example of compound justification")