Esempio n. 1
0
    def test_create_an_entity_with_image_justification_and_vector(self):
        g = aifutils.make_graph()
        g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri)

        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, "http://www.test.edu/testSystem")

        # it doesn't matter what URI we give entities, events, etc. so long as they are
        # unique
        entity = aifutils.make_entity(g, "http://www.test.edu/entities/1",
                                      system)

        # in order to allow uncertainty about the type of an entity, we don't mark an
        # entity's type directly on the entity, but rather make a separate assertion for it
        # its URI doesn't matter either
        type_assertion = aifutils.mark_type(
            g, "http://www.test.org/assertions/1", entity,
            SEEDLING_TYPES_NIST.Person, system, 1.0)

        # the justification provides the evidence for our claim about the entity's type
        # we attach this justification to both the type assertion and the entity object
        # itself, since it provides evidence both for the entity's existence and its type.
        # in TA1 -> TA2 communications, we attach confidences at the level of justifications
        # let's suppose we also have evidence from an image
        bb1 = Bounding_Box((123, 45), (167, 98))
        aifutils.mark_image_justification(g, [entity, type_assertion],
                                          "NYT_ENG_20181231_03", bb1, system,
                                          0.123)

        # also we can link this entity to something in an external KB
        aifutils.link_to_external_kb(g, entity, "freebase.FOO", system, .398)

        vec = {
            "vector_type": "http://www.test.edu/systemX/personVector",
            "vector_data": [2.0, 7.5, 0.2, 8.1]
        }
        # let's mark our entity with some arbitrary system-private data. You can attach such data
        # to nearly anything
        aifutils.mark_private_data_with_vector(g, entity, system, vec)
        self.new_file(
            g, "test_create_an_entity_with_image_justification_and_vector.ttl")
        self.dump_graph(
            g, "Example of entity with image justification and vector")
def transferAIF(p_id):
    #for k,p_id in enumerate(parent_dict):

    # Todo test
    #     if (k<8):
    #         continue
    #     print('k',k,p_id)
    g = Graph()

    #load rpi graph if exists
    if p_id in txt_mention_ttl_list:
        turtle_path = os.path.join(txt_mention_ttl_path, p_id + '.ttl')
        turtle_content = open(turtle_path).read()
        g.parse(data=turtle_content, format='n3')

    #load and merge cu graph if exists
    if p_id in cu_ttl_list:
        turtle_path = os.path.join(cu_ttl_path, p_id + '.ttl')
        turtle_content = open(turtle_path).read()
        g.parse(data=turtle_content, format='n3')

    #load and merge cu graph for instance matching if exists
    if p_id in cu_ttl_ins_list:
        turtle_path = os.path.join(cu_ttl_ins_path, p_id + '.ttl')
        turtle_content = open(turtle_path).read()
        g.parse(data=turtle_content, format='n3')

    sys_instance_matching = aifutils.make_system_with_uri(
        g, cu_pref + 'Systems/Instance-Matching/ResNet152')
    sys_grounding = aifutils.make_system_with_uri(
        g, cu_pref + 'Systems/Grounding/ELMo-PNASNET')
    usc_sys_grounding = aifutils.make_system_with_uri(
        g, usc_pref + 'Systems/ZSGrounder')

    #find vision and text entities
    sbj_all = set(g.subjects())
    img_entities = {}
    keyframe_entities = {}
    ltf_entities = {}
    for sbj in sbj_all:
        sbj_name = sbj.toPython()
        if cu_objdet_pref in sbj_name:
            if sbj.__class__ == rdflib.term.URIRef:
                if 'JPG' in sbj_name:
                    img_id = '/'.join(sbj_name.split('/')[-2:])
                    img_entities[img_id] = sbj
                elif 'Keyframe' in sbj_name:
                    kfrm_id = '/'.join(sbj_name.split('/')[-2:])
                    keyframe_entities[kfrm_id] = sbj
        elif rpi_entity_pref in sbj_name:
            if sbj.__class__ == rdflib.term.URIRef and rpi_entity_pref in sbj_name:
                ltf_entities[sbj_name] = sbj

    # Done
#     if p_id in []:#['IC0011TIB']:
#         continue
#     print('k',k,p_id)
#     if (g==None):
#         print('p_id', k, p_id)

##adding private data to entities for cu grounding
#images
    for key in img_entities:
        dtype = 'jpg'
        #instance features
        ftype = 'instance'
        data_instance = get_features(key, dtype, ftype)

        #semantic features
        ftype = 'semantic'
        data_semantic = get_features(key, dtype, ftype)

        #aggregation
        j_d_i = json.dumps({'columbia_vector_instance_v1.0': data_instance})
        j_d_s = json.dumps({'columbia_vector_grounding_v1.0': data_semantic})
        entity = img_entities[key]
        aifutils.mark_private_data(g, entity, j_d_i, sys_instance_matching)
        aifutils.mark_private_data(g, entity, j_d_s, sys_grounding)

    #keyframes
    for key in keyframe_entities:
        dtype = 'keyframe'
        #instance features
        ftype = 'instance'
        data_instance = get_features(key, dtype, ftype)

        #semantic features
        ftype = 'semantic'
        data_semantic = get_features(key, dtype, ftype)

        #aggregation
        j_d_i = json.dumps({'columbia_vector_instance_v1.0': data_instance})
        j_d_s = json.dumps({'columbia_vector_grounding_v1.0': data_semantic})
        entity = keyframe_entities[key]
        aifutils.mark_private_data(g, entity, j_d_i, sys_instance_matching)
        aifutils.mark_private_data(g, entity, j_d_s, sys_grounding)

    cnt_img = {}
    cnt_boxO = {}
    cnt_ltf = {}
    #add text features, grounding, linking
    for key in ltf_entities:
        if key not in grounding_dict:
            continue
        entity_name = None
        USC_GROUNDING = 'usc_vision' in grounding_dict[key][
            'grounding'].values()
        if not USC_GROUNDING:
            #             print('our grounding')
            #text features
            j_d_t = json.dumps({
                'columbia_vector_text_v1.0':
                grounding_dict[key]['textual_features'].tolist()
            })
            entity_ltf = ltf_entities[key]
            aifutils.mark_private_data(g, entity_ltf, j_d_t, sys_grounding)

            #type and name of entity to be linked
            type_rdf = grounding_dict[key]['type_rdf']
            entity_name = grounding_dict[key]['name']
            grndg_file_type = grounding_dict[key]['source_type']

        if entity_name is None:
            continue

        #keep track of entities with same names for avoiding clustering overlap
        if entity_name in cnt_ltf:
            cnt_ltf[entity_name] += 1
        else:
            cnt_ltf[entity_name] = 1

        clstr_prot_flag = False  #cluster obj for entity_ltf not created yet
        #adding grounding bboxes as new entities
        for img_id in grounding_dict[key]['grounding']:
            if img_id == 'system':
                continue
            grnd = grounding_dict[key]['grounding'][img_id]
            for ii, bbox in enumerate(grnd['bbox']):

                if img_id in cnt_img:  #to keep track of cnt of bbox of same image
                    cnt_img[img_id] += 1
                else:
                    cnt_img[img_id] = 1
                #add grounding bbox as entity
                score = grnd['bbox_score'][ii]
                if not USC_GROUNDING:
                    type_eid = cu_grndg_type_pref + f"{grndg_file_type}/{img_id.split('.')[0]}/{cnt_img[img_id]}/ERE"
                    ent_eid = cu_grndg_ent_pref + f"{grndg_file_type}/{img_id.split('.')[0]}/{cnt_img[img_id]}"
                    entity_grnd = aifutils.make_entity(g, ent_eid,
                                                       sys_grounding)
                    type_assertion = aifutils.mark_type(
                        g, type_eid, entity_grnd, type_rdf, sys_grounding,
                        score)
                elif USC_GROUNDING:
                    type_eid = usc_grndg_type_pref + f"{grndg_file_type}/{img_id.split('.')[0]}/{cnt_img[img_id]}/ERE"
                    ent_eid = usc_grndg_ent_pref + f"{grndg_file_type}/{img_id.split('.')[0]}/{cnt_img[img_id]}"
                    entity_grnd = aifutils.make_entity(g, ent_eid,
                                                       usc_sys_grounding)
                    type_assertion = aifutils.mark_type(
                        g, type_eid, entity_grnd, type_rdf, usc_sys_grounding,
                        score)

                # Done:
                # 1. add if for the branches for image and keyframe.
                # 2. add aifutils.mark_keyframe_video_justification
                # 3. check output
                # aifutils.mark_keyframe_video_justification(g, [entity, type_assertion], "NYT_ENG_20181231_03", "keyframe ID",
                #                                                    bb2, system, 0.234)
                # source: HC0005BR6_23
                # print(img_id)

                # Done:
                # merge usc_grounding dict
                # add usc_grounding entities and clusters

                # Test
#                 print("type_assertion",type_assertion, img_id)

                bb = Bounding_Box((bbox[0], bbox[1]), (bbox[2], bbox[3]))
                if not USC_GROUNDING:
                    if 'JPG' in type_assertion:
                        imgid = img_id.split('.')[0]
                        justif = aifutils.mark_image_justification(
                            g, [entity_grnd, type_assertion], imgid, bb,
                            sys_grounding, score)
#
                    elif 'Keyframe' in type_assertion:
                        imgid = img_id.split('.')[0].split('_')[0]
                        kfid = img_id.split('.')[0].split(
                            '_'
                        )[1]  # it should be keyframe image id or keyframe number
                        justif = aifutils.mark_keyframe_video_justification(g, [entity, type_assertion], imgid, kfid, \
                                                                       bb, sys_grounding, score)
                elif USC_GROUNDING:
                    imgid = img_id.split('.')[0]
                    justif = aifutils.mark_image_justification(
                        g, [entity_grnd, type_assertion], imgid, bb,
                        usc_sys_grounding, score)
                else:
                    print(
                        '[Merge Error] in Main Body: the type_assertion is wrong'
                    )
                aifutils.add_source_document_to_justification(g, justif, p_id)
                aifutils.mark_informative_justification(g, entity_grnd, justif)

                if not USC_GROUNDING:
                    grounding_features = grnd['grounding_features'][ii].tolist(
                    )
                    instance_features = grnd['instance_features'][ii].tolist()
                    #add private data to this very bbox entity
                    j_d_g = json.dumps(
                        {'columbia_vector_grounding_v1.0': grounding_features})
                    j_d_i = json.dumps(
                        {'columbia_vector_instance_v1.0': instance_features})
                    aifutils.mark_private_data(g, entity_grnd, j_d_g,
                                               sys_grounding)
                    aifutils.mark_private_data(g, entity_grnd, j_d_i,
                                               sys_instance_matching)

                #### add clusters
                # Grounding Cluster
                # Done: filtering about punctuation
#                 translator = str.maketrans(string.punctuation, '_'*len(string.punctuation),'' )
#                 'entity_name'.translate(translator)
                entity_name_tmp = '_'.join(entity_name.split(' '))
                entity_name_in_IRI = "".join(
                    x if x.isalpha() or x.isdigit() or x == '_' else '-'
                    for x in entity_name_tmp)
                # '_'.join(entity_name.split(' '))
                #gbbox entity to rpi entity
                if not USC_GROUNDING:
                    if not clstr_prot_flag:  #create cluster if not present
                        clst_eid = cu_grndg_clstr_txt_pref + f"{entity_name_in_IRI}/{cnt_ltf[entity_name]}"
                        clusterObj = aifutils.make_cluster_with_prototype(
                            g, clst_eid, entity_ltf, sys_grounding)
                        clstr_prot_flag = True
                    #cluster current bbox with current ltf_entity
                    score = grnd['men-img-score'][ii]
                    aifutils.mark_as_possible_cluster_member(
                        g, entity_grnd, clusterObj, score, sys_grounding)
                    # Done: add prototype as member
                    aifutils.mark_as_possible_cluster_member(
                        g, entity_ltf, clusterObj, 1, sys_grounding)
                elif USC_GROUNDING:
                    if not clstr_prot_flag:  #create cluster if not present
                        clst_eid = usc_grndg_clstr_txt_pref + f"{entity_name_in_IRI}/{cnt_ltf[entity_name]}"
                        clusterObj = aifutils.make_cluster_with_prototype(
                            g, clst_eid, entity_ltf, usc_sys_grounding)
                        clstr_prot_flag = True
                    #cluster current bbox with current ltf_entity
                    score = grnd['men-img-score'][ii]
                    aifutils.mark_as_possible_cluster_member(
                        g, entity_grnd, clusterObj, score, usc_sys_grounding)
                    # Done: add prototype as member
                    aifutils.mark_as_possible_cluster_member(
                        g, entity_ltf, clusterObj, 1, usc_sys_grounding)

                # BoundingBox Overlap Cluster (Instance Matching)
                #gbbox entity to objdet entity for instance matching
                if not USC_GROUNDING:
                    clstr_prot_b2b_flag = False
                    for jj, img_id_link in enumerate(
                            grnd['link_ids'][ii]):  #for all objdet bboxes
                        if img_id_link in img_entities:
                            entity_link_img = img_entities[img_id_link]
                        elif img_id_link in keyframe_entities:
                            entity_link_img = keyframe_entities[img_id_link]
                        else:
                            continue
                        if img_id in cnt_boxO:  #to keep track of cnt of bbox overlap for same image
                            cnt_boxO[img_id] += 1
                        else:
                            cnt_boxO[img_id] = 1
                        if not clstr_prot_b2b_flag:
                            clst_b2b_eid = cu_grndg_clstr_img_pref + f"{img_id.split('.')[0]}/{cnt_boxO[img_id]}"
                            clusterObj_b2b = aifutils.make_cluster_with_prototype(
                                g, clst_b2b_eid, entity_grnd,
                                sys_grounding)  # sys_instance_matching
                            clstr_prot_b2b_flag = True

                        score = grnd['link_scores'][ii][
                            jj]  #IoU of grnd bbox and objdet bbox
                        aifutils.mark_as_possible_cluster_member(
                            g, entity_link_img, clusterObj_b2b, score,
                            sys_grounding)  # sys_instance_matching
                        # Done: add prototype as member
                        aifutils.mark_as_possible_cluster_member(
                            g, entity_grnd, clusterObj_b2b, 1,
                            sys_grounding)  # sys_instance_matching

    # Check Point: merged_ttl_D2


#     /data/bobby/AIDA/M18_copy/data/merged_ttl/merged_ttl_D2/
#     IC0011VEA.ttl
#     GroundingBox
    with open(os.path.join(merged_graph_path, p_id + '.ttl'), 'w') as fout:
        serialization = BytesIO()
        g.serialize(destination=serialization, format='turtle')
        fout.write(serialization.getvalue().decode('utf-8'))
    #sys.stdout.write('Key {}/{} \r'.format(k,len(parent_dict)))
    sys.stdout.flush()
Esempio n. 3
0
def add_detections_to_graph(g, detections, parent_id, imgid, is_keyframe):

    str_append = 'Keyframe' if is_keyframe else 'JPG'
    if is_keyframe:
        vidid = imgid.split('_')[0]

    for ii, det in enumerate(detections):
        label = det['label']
        score = det['score']
        bbox = det['bbox']
        model = det['model']

        if score < score_threshold:
            continue

        if model == 'coco':
            sys = system_co
        elif model == 'voc':
            sys = system_pa
        elif model == 'oi':
            sys = system_oi
        elif model == 'ws':
            sys = system_ws
        elif model == 'coco/J':
            sys = system_co
        elif model == 'voc/J':
            sys = system_pa
        elif model == 'oi/J':
            sys = system_oi
        elif model == 'ws/J':
            sys = system_ws
        else:
            raise

        for iii, ont_id in enumerate(class2ont[label]):
            ont_name = ont2name[ont_id]

            labelrdf = LDC_ONTOLOGY.term(ont_name)

            if ont_name in ldc_entity_types_new:

                eid = f"http://www.columbia.edu/AIDA/DVMM/Entities/ObjectDetection/RUN00010/{str_append}/{imgid}/{ii}"

                entity = aifutils.make_entity(g, eid, sys)

                entity_dict[eid] = entity

                type_assertion = aifutils.mark_type(
                    g,
                    eid.replace('Entities', 'TypeAssertions') + f'/{iii}',
                    entity, labelrdf, sys, score)

                bb = Bounding_Box((bbox[0], bbox[1]), (bbox[2], bbox[3]))

                if is_keyframe:
                    justif = aifutils.mark_keyframe_video_justification(
                        g, [entity, type_assertion], vidid, imgid, bb, sys,
                        score)
                else:
                    justif = aifutils.mark_image_justification(
                        g, [entity, type_assertion], imgid, bb, sys, score)

                aifutils.add_source_document_to_justification(
                    g, justif, parent_id)
                aifutils.mark_informative_justification(g, entity, justif)
                #aifutils.mark_private_data(g, entity, json.dumps({}), sys)
                #if ont_name.split('.')[0] in allowed_to_have_name:
                #    aifutils.mark_name(g, entity, class_names[label].split('(')[-1][:-1])

            else:

                eid = f"http://www.columbia.edu/AIDA/DVMM/Events/ObjectDetection/RUN00010/{str_append}/{imgid}/{ii}"

                event = aifutils.make_event(g, eid, sys)

                event_dict[eid] = event

                type_assertion = aifutils.mark_type(
                    g,
                    eid.replace('Events', 'TypeAssertions') + f'/{iii}', event,
                    labelrdf, sys, score)

                bb = Bounding_Box((1, 1), image_shape[imgid])

                if is_keyframe:
                    justif = aifutils.mark_keyframe_video_justification(
                        g, [event, type_assertion], vidid, imgid, bb, sys,
                        score)
                else:
                    justif = aifutils.mark_image_justification(
                        g, [event, type_assertion], imgid, bb, sys, score)

                aifutils.add_source_document_to_justification(
                    g, justif, parent_id)
                aifutils.mark_informative_justification(g, event, justif)
Esempio n. 4
0
    def test_create_an_entity_with_all_justification_types_and_confidence(
            self):
        g = aifutils.make_graph()
        g.bind('ldcOnt', SEEDLING_TYPES_NIST.uri)

        # every AIF needs an object for the system responsible for creating it
        system = aifutils.make_system_with_uri(
            g, "http://www.test.edu/testSystem")

        # it doesn't matter what URI we give entities, events, etc. so long as they are
        # unique
        entity = aifutils.make_entity(g, "http://www.test.edu/entities/1",
                                      system)

        # in order to allow uncertainty about the type of an entity, we don't mark an
        # entity's type directly on the entity, but rather make a separate assertion for it
        # its URI doesn't matter either
        type_assertion = aifutils.mark_type(
            g, "http://www.test.org/assertions/1", entity,
            SEEDLING_TYPES_NIST.Person, system, 1.0)

        # the justification provides the evidence for our claim about the entity's type
        # we attach this justification to both the type assertion and the entity object
        # itself, since it provides evidence both for the entity's existence and its type.
        # in TA1 -> TA2 communications, we attach confidences at the level of justifications
        aifutils.mark_text_justification(g, [entity, type_assertion],
                                         "NYT_ENG_20181231", 42, 143, system,
                                         0.973)

        # let's suppose we also have evidence from an image
        bb1 = Bounding_Box((123, 45), (167, 98))
        aifutils.mark_image_justification(g, [entity, type_assertion],
                                          "NYT_ENG_20181231_03", bb1, system,
                                          0.123)

        # and also a video where the entity appears in a keyframe
        bb2 = Bounding_Box((123, 45), (167, 98))
        aifutils.mark_keyframe_video_justification(g, [entity, type_assertion],
                                                   "NYT_ENG_20181231_03",
                                                   "keyframe ID", bb2, system,
                                                   0.234)

        # and also a video where the entity does not appear in a keyframe
        aifutils.mark_shot_video_justification(g, [entity, type_assertion],
                                               "SOME_VIDEO", "some shot ID",
                                               system, 0.487)

        # and even audio!
        aifutils.mark_audio_justification(g, [entity, type_assertion],
                                          "NYT_ENG_201181231", 4.566, 9.876,
                                          system, 0.789)

        # also we can link this entity to something in an external KB
        aifutils.link_to_external_kb(g, entity, "freebase.FOO", system, .398)

        # let's mark our entity with some arbitrary system-private data. You can attach such data
        # to nearly anything
        aifutils.mark_private_data(g, entity, "{ 'hello' : 'world' }", system)
        self.new_file(
            g,
            "test_create_an_entity_with_all_justification_types_and_confidence.ttl"
        )

        self.dump_graph(g, "Example of entity with all justifications")
Esempio n. 5
0
def transferAIF(parent):
    chi = child[parent]

    if parent in kb_dict.keys():
        g = kb_dict[parent]

    else:
        g = aifutils.make_graph()

    entityDic_c = {}
    country_set_r = set()
    for img_id in chi:
        if img_id in flag_dict_s.keys():
            country_set_r.add(flag_dict_s[img_id])
    #=============== Flag ===========

    sys = aifutils.make_system_with_uri(
        g, "http://www.columbia.edu/AIDA/DVMM/Systems/Flag/Inception_v4")
    for key, value in index_category_f.items():
        if key not in country_set_r:
            continue
        key = key.replace(' ', '_')
        name = "http://www.columbia.edu/AIDA/DVMM/Entities/Country/" + str(
            value) + '/' + key
        entity = aifutils.make_entity(g, name, sys)
        entityDic_c[key] = entity
        #print "parent "+ parent
        type_assertion = aifutils.mark_type(g, \
            "http://www.columbia.edu/AIDA/DVMM/TypeAssertion/Country/"\
            +str(value)+'/'+key, entity, AIDA_PROGRAM_ONTOLOGY_G.term('GPE'), sys, 1)
    for img_id in chi:
        if img_id in flag_dict_s.keys():
            #print "imgID "+img_id
            flag_label = [
                '/m/07cmd', '/m/0dzct', '/m/03bt1vf', '/m/01g317', '/m/04yx4',
                '/m/01prls', '/m/07yv9', '/m/03120'
            ]
            key = img_id
            if key in OD_result.keys():
                for n in range(len(OD_result[key])):
                    eid = "http://www.columbia.edu/AIDA/DVMM/Entities/ObjectDetection/RUN00010/JPG/" + key + "/" + str(
                        n)
                    #print OD_result[key][n]['label']
                    if OD_result[key][n]['label'] in flag_label:
                        #print OD_result[key][n]['label']
                        eid = "http://www.columbia.edu/AIDA/DVMM/Entities/ObjectDetection/RUN00010/JPG/" + key + "/" + str(
                            n)
                        relation_entity = aifutils.make_relation(g, "http://www.columbia.edu/AIDA/DVMM/Relaion/Flag/"+\
                                                                key+"/"+str(n), sys)
                        #APORA = 'GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation'
                        score = 1
                        type_assertion = aifutils.mark_type(g, "http://www.columbia.edu/AIDA/DVMM/TypeAssertion/Flag/"+key+"/"+str(n), \
                            relation_entity, AIDA_PROGRAM_ONTOLOGY_A['GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen'], \
                                                    sys, score)
                        boxA = OD_result[key][n]['bbox']
                        bb2 = Bounding_Box((boxA[0], boxA[1]),
                                           (boxA[2], boxA[3]))
                        score = 1
                        justif = aifutils.mark_image_justification(g, [relation_entity, \
                                                           type_assertion], key, bb2, sys, score)
                        aifutils.add_source_document_to_justification(
                            g, justif, parent)
                        aifutils.mark_informative_justification(
                            g, relation_entity, justif)
                        score = 1
                        if eid in entity_dict.keys():
                            art_argument = aifutils.mark_as_argument(g, relation_entity, \
                            AIDA_PROGRAM_ONTOLOGY_A['GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen_Artifact'], \
                                                                entity_dict[eid], sys, score)
                            score = 1
                            justif = aifutils.mark_image_justification(g, [relation_entity, \
                                                               art_argument], key, bb2, sys, score)
                            aifutils.add_source_document_to_justification(
                                g, justif, parent)
                            aifutils.mark_informative_justification(
                                g, relation_entity, justif)
                        score = 1
                        nation_argument = aifutils.mark_as_argument(g, relation_entity, \
                            AIDA_PROGRAM_ONTOLOGY_A['GeneralAffiliation.ArtifactPoliticalOrganizationReligiousAffiliation.NationalityCitizen_Nationality'], \
                                                                    entityDic_c[flag_dict_s[img_id]], \
                                                                    sys, score)
                        score = 1
                        justif = aifutils.mark_image_justification(g, [relation_entity, \
                                                               nation_argument], key, bb2, sys, score)
                        aifutils.add_source_document_to_justification(
                            g, justif, parent)
                        aifutils.mark_informative_justification(
                            g, relation_entity, justif)

                    if OD_result[key][n]['label'] == '/m/03120':
                        #print flag_dict_s[img_id]
                        boxA = OD_result[key][n]['bbox']
                        bb2 = Bounding_Box((boxA[0], boxA[1]),
                                           (boxA[2], boxA[3]))
                        type_assertion = aifutils.mark_type(g, \
                        "http://www.columbia.edu/AIDA/DVMM/TypeAssertion/Country/RUN00010/JPG/"+\
                            str(key)+'/'+str(n), entityDic_c[flag_dict_s[img_id]], AIDA_PROGRAM_ONTOLOGY_G.term('GPE'), sys, 1)
                        score = 1
                        justif = aifutils.mark_image_justification(g, [entityDic_c[flag_dict_s[img_id]], type_assertion], \
                                                                   key, bb2, sys, score)
                        aifutils.add_source_document_to_justification(
                            g, justif, parent)
                        aifutils.mark_informative_justification(
                            g, entityDic_c[flag_dict_s[img_id]], justif)

    #break
    #=============== landmark ==========
    sys = aifutils.make_system_with_uri(
        g, "http://www.columbia.edu/AIDA/DVMM/Systems/Landmark/Delf")
    #building
    has_land = 0
    landmark_name_set = set()
    for imageN in chi:
        if imageN in landmark_dict.keys():

            name_lm = landmark_dict[imageN]
            if name_lm == '':
                continue
            #print name_lm
            if name_lm not in landmark_name_set:
                landmark_name_set.add(name_lm)
                name = "http://www.columbia.edu/AIDA/DVMM/Entities/Landmark/"+ \
                str(landmark_id[name_lm])+"/"+name_lm
                entity = aifutils.make_entity(g, name, sys)
                entityDic_b[name_lm] = entity
                #print AIDA_PROGRAM_ONTOLOGY2.term('Person')
                score = 1
                type_assertion = aifutils.mark_type(g, \
                    "http://www.columbia.edu/AIDA/DVMM/TypeAssertion/Landmark/"+ \
                str(landmark_id[name_lm])+"/"+name_lm \
                    , entity, AIDA_PROGRAM_ONTOLOGY_b.term('FAC'), sys, score)
                if 'Maidan' in name_lm:
                    aifutils.link_to_external_kb(g, entity,
                                                 "LDC2019E43:80000020", sys, 1)
                elif 'United_States_Capitol' in name_lm:
                    aifutils.link_to_external_kb(g, entity,
                                                 "LDC2019E43:4140827", sys, 1)

                else:
                    aifutils.link_to_external_kb(g, entity,
                                                 VIS_ONTOLOGY_b.term(name_lm),
                                                 sys, 1)

    for imageN in chi:
        total_key.add(imageN)
        if imageN in landmark_dict.keys():
            #print parent
            key = imageN
            name_lm = landmark_dict[imageN]
            if name_lm == '':
                continue
            #aifutils.mark_as_possible_cluster_member(g, \
            #    entity,clusterDic_b[0], 1, sys)

            # Need to change in future
            im = Image.open(input_img_path + imageN + '.jpg')
            width, height = im.size
            bb2 = Bounding_Box((0, 0), (width, height))  #l u r d
            type_assertion = aifutils.mark_type(g, \
            "http://www.columbia.edu/AIDA/DVMM/TypeAssertion/Landmark/RUN00010/JPG/"+\
                str(key)+'/'+str(0), entityDic_b[name_lm], AIDA_PROGRAM_ONTOLOGY_b.term('FAC'), sys, 1)
            justif = aifutils.mark_image_justification(
                g, [entityDic_b[name_lm], type_assertion], key, bb2, sys, 1)
            aifutils.add_source_document_to_justification(g, justif, parent)
            aifutils.mark_informative_justification(g, entityDic_b[name_lm],
                                                    justif)

    #=============face recognition=============
    sys = aifutils.make_system_with_uri(
        g, "http://www.columbia.edu/AIDA/DVMM/Systems/Face/FaceNet")

    NameDetected = set()
    NameDetected_score = {}
    nameCount = Counter()
    count = 0
    c_num = 0
    person_c_n = 0
    #print comblineSet
    In = 0
    #print bb
    featureDic = {}
    first = 1
    entityList = []
    arrayList = []
    person_label = [
        '/m/01g317', '/m/04yx4', '/m/03bt1vf', '/m/01bl7v', '/m/05r655',
        '/m/04hgtk', '/m/01bgsw'
    ]
    for x, y in result.items():
        #print x

        data = x.split('/')[-1]
        if '._' in data:
            continue
        data2 = data.split('_')
        #print data2[0]
        #print data2[1][:-4]
        key = data2[0]
        i = data2[1][:-4]
        #print chi
        #print key

        if key not in chi:
            continue
        if y[0].replace(' ', '_') not in nameSet or nameDic[y[0].replace(
                ' ', '_')] not in p1Set or float(y[1]) < 0.04 or y[0].replace(
                    ' ', '_') == 'Ban_Ki-moon':

            continue
        else:
            NameDetected.add(nameDic[y[0].replace(' ', '_')])
            score = float(y[1]) + 0.5
            NameDetected_score[nameDic[y[0].replace(' ', '_')]] = min(1, score)

    for x, y in result2.items():
        #print x
        data = x.split('/')[-2]
        if '._' in data:
            continue

        data = x.split('/')[-1]
        data2 = data.split('_')
        key = x.split('/')[-2]

        if videoDic[key] not in chi:
            continue

        if y[0].replace(' ', '_') not in nameSet or nameDic[y[0].replace(
                ' ', '_')] not in p1Set or float(y[1]) < 0.04 or y[0].replace(
                    ' ', '_') == 'Ban_Ki-moon':

            continue
        else:
            NameDetected.add(nameDic[y[0].replace(' ', '_')])
            score = float(y[1]) + 0.5
            NameDetected_score[nameDic[y[0].replace(' ', '_')]] = min(1, score)

    for key, value in index_category.items():  #key name. value is number
        if key not in NameDetected:
            continue
        key = key.replace(' ', '_')

        #keu = entityDic
        name = "http://www.columbia.edu/AIDA/DVMM/Entities/FaceID/" + str(
            value) + '/' + key
        entity = aifutils.make_entity(g, name, sys)
        entityDic[key] = entity
        #print AIDA_PROGRAM_ONTOLOGY2.term('Person')
        #score = NameDetected_score[key]
        type_assertion = aifutils.mark_type(g, \
            "http://www.columbia.edu/AIDA/DVMM/TypeAssertion/FaceID/"\
            +str(value)+'/'+key, entity, AIDA_PROGRAM_ONTOLOGY2.term('PER'), sys, 1)

        new_key = key.lower().replace('_', ' ')
        if new_key in name2ID.keys():
            aifutils.link_to_external_kb(g, entity,
                                         "LDC2019E43:" + name2ID[new_key], sys,
                                         1)
            #print 'Lorelei'
            #print parent
        else:
            a = 0
            #print 'dbpedia'
            #print parent
            aifutils.link_to_external_kb(g, entity, VIS_ONTOLOGY2.term(key),
                                         sys, 1)

    for x, y in result.items():
        #print x

        data = x.split('/')[-1]
        if '._' in data:
            continue
        data2 = data.split('_')
        #print data2[0]
        #print data2[1][:-4]
        key = data2[0]
        i = data2[1][:-4]
        #print chi
        #print key

        if key not in chi:
            continue
        #print chi
        #print key
        In = 1

        name = "http://www.columbia.edu/AIDA/DVMM/Entities/FaceDetection/RUN00010/" + str(
            key) + '/' + str(i)
        #entityList.append(key)
        #entityList.append(entity)
        entity = aifutils.make_entity(g, name, sys)

        first_cluster = 1
        #======================== JPG ===============
        eid_list = []

        if key in OD_result.keys():
            for n in range(len(OD_result[key])):

                #print OD_result[key][n]['label']
                if OD_result[key][n]['label'] in person_label:
                    #print OD_result[key][n]['label']
                    boxA = OD_result[key][n]['bbox']
                    boxB = (int(bb[x][0]), int(bb[x][1]), int(bb[x][2]),
                            int(bb[x][3]))
                    IOA = bb_intersection_over_union(boxA, boxB)
                    if IOA > 0.9:
                        #left,top,right,bottom =  OD_result[key][n]['bbox']
                        #if int(bb[x][1]) > left and int(bb[x][0]) > top and int(bb[x][3]) < right and int(bb[x][2]) < bottom:

                        eid = "http://www.columbia.edu/AIDA/DVMM/Entities/ObjectDetection/RUN00010/JPG/" + key + "/" + str(
                            n)
                        #print entity_dic2[key]
                        #print n
                        if n in entity_dic2[key]:
                            #if eid in entity_dict.keys():
                            score = IOA

                            eid_list.append(eid)
                            if first_cluster == 1:

                                first_cluster = 0
                                clusterName = aifutils.make_cluster_with_prototype(g, \
                                "http://www.columbia.edu/AIDA/DVMM/Clusters/HumanBody/RUN00010/JPG/"\
                                +key+"/"+str(n)+"/"+str(person_c_n),entity, sys)

                            aifutils.mark_as_possible_cluster_member(g, \
                                    entity_dict[eid],clusterName, score, sys)

            if first_cluster == 0:

                person_c_n += 1

        l, t, r, d = bb[x]
        if (r - l) * (d - t) > 3600:
            entityList.append(entity)
            arrayList.append(y[2])

        feature = {}
        feature['columbia_vector_faceID_FaceNet'] = y[2].tolist()
        json_data = json.dumps(feature)
        aifutils.mark_private_data(g, entity, json_data, sys)
        #labelrdf = VIS_ONTOLOGY.term(i_id)
        #Dscore = value[i][7]
        #if Dscore>1:
        Dscore = 1
        #type_assertion = aifutils.mark_type(g, "Columbia/DVMM/TypeAssertion/FaceRecognition/RUN00003/"+str(i_id)+"/"+str(i)+"/1",
        type_assertion = aifutils.mark_type(g, \
        "http://www.columbia.edu/AIDA/DVMM/TypeAssertion/FaceDetection/RUN00010/JPG/"+\
            str(key)+'/'+str(i), entity, AIDA_PROGRAM_ONTOLOGY2.term('PER'), sys, Dscore)

        bb2 = Bounding_Box((bb[x][0], bb[x][1]), (bb[x][2], bb[x][3]))

        justif = aifutils.mark_image_justification(g, [entity, type_assertion],
                                                   key, bb2, sys, 1)
        aifutils.add_source_document_to_justification(g, justif, parent)
        aifutils.mark_informative_justification(g, entity, justif)
        chi_set.add(key)
        parent_set.add(parent)

        if y[0].replace(' ', '_') not in nameSet or nameDic[y[0].replace(
                ' ', '_')] not in p1Set or float(y[1]) < 0.04 or y[0].replace(
                    ' ', '_') == 'Ban_Ki-moon':

            continue
        else:
            #nameCount2+=1
            person_set.add(y[0])
            doc_N.add(parent)
            img_N.add(key)

            score = sigmoid(float(y[1]) * 10)

            NameDetected.add(nameDic[y[0].replace(' ', '_')])
            #print y[0]
            entity_key = nameDic[y[0].replace(' ', '_')]
            type_assertion = aifutils.mark_type(g, \
            "http://www.columbia.edu/AIDA/DVMM/TypeAssertion/FaceID/"\
            +str(index_category[entity_key])+'/'+entity_key, entityDic[entity_key], AIDA_PROGRAM_ONTOLOGY2.term('PER'), sys, 1)

            justif = aifutils.mark_image_justification(g, [entityDic[nameDic[y[0].replace(' ','_')]], \
                                                           type_assertion], key, bb2, sys, score)
            aifutils.add_source_document_to_justification(g, justif, parent)
            aifutils.mark_informative_justification(
                g, entityDic[nameDic[y[0].replace(' ', '_')]], justif)

            #                                    entity, labelrdf, sys, score)

            #print str(value[i][2]).replace("L",'')

    #========================== Video Frame ================
    for x, y in result2.items():
        #print x
        data = x.split('/')[-2]
        if '._' in data:
            continue

        data = x.split('/')[-1]
        data2 = data.split('_')
        key = x.split('/')[-2]

        if videoDic[key] not in chi:
            continue
        #print "video"
        In = 1
        i = data2[-1][:-4]
        frame = data[:-len(data2[-1]) - 1]
        frameNum = frame.split('_')[-1]

        name = "http://www.columbia.edu/AIDA/DVMM/Entities/FaceDetection/RUN00010/Keyframe/"+\
        str(videoDic[key])+'_'+str(frameNum)+'/'+str(i)
        entity = aifutils.make_entity(g, name, sys)

        if str(videoDic[key]) + '_' + str(frameNum) in ODF_result.keys():
            first_cluster = 1

            for n in range(
                    len(ODF_result[str(videoDic[key]) + '_' + str(frameNum)])):

                if ODF_result[str(videoDic[key]) + '_' +
                              str(frameNum)][n]['label'] in person_label:

                    boxA = ODF_result[str(videoDic[key]) + '_' +
                                      str(frameNum)][n]['bbox']
                    boxB = (int(bb_2[x][0]), int(bb_2[x][1]), int(bb_2[x][2]),
                            int(bb_2[x][3]))
                    IOA = bb_intersection_over_union(boxA, boxB)
                    if IOA > 0.9:

                        eid = "http://www.columbia.edu/AIDA/DVMM/Entities/ObjectDetection/RUN00010/Keyframe/" + str(
                            videoDic[key]) + '_' + str(frameNum) + "/" + str(n)
                        if n in entity_dic2[str(videoDic[key]) + '_' +
                                            str(frameNum)]:
                            #if eid in entity_dict.keys():
                            score = IOA
                            #print x
                            #print entity_dict[eid]
                            #print n

                            if first_cluster == 1:

                                first_cluster = 0
                                clusterName = aifutils.make_cluster_with_prototype(g, \
                                "http://www.columbia.edu/AIDA/DVMM/Clusters/HumanBody/RUN00010/Keyframe/"+\
                                str(videoDic[key])+'_'+str(frameNum)+'/'+str(i)+'/'+\
                                str(person_c_n),entity, sys)
                                #aifutils.mark_as_possible_cluster_member(g, \
                                #    entity,clusterName, score, sys)

                            aifutils.mark_as_possible_cluster_member(g, \
                                    entity_dict[eid],clusterName, score, sys)
            if first_cluster == 0:
                person_c_n += 1

        #txn.put("Columbia/DVMM/TypeAssertion/FaceID/RUN00003/"+str(key)+'/'+str(i), value[i][4]);
        #featureDic[entity] = y[2]
        featureDic[key] = y[2]

        #entityList.append(key)
        l, t, r, d = bb_2[x]
        if (r - l) * (d - t) > 3600:
            entityList.append(entity)
            arrayList.append(y[2])
        #if first == 1:
        #    new_array = [y[2]]
        #    first = 0
        #else:
        #    new_array = np.concatenate((new_array, [y[2]]), axis=0)
        feature = {}
        feature['columbia_vector_faceID_FaceNet'] = y[2].tolist()
        json_data = json.dumps(feature)
        aifutils.mark_private_data(g, entity, json_data, sys)
        #labelrdf = VIS_ONTOLOGY.term(i_id)
        #Dscore = value[i][7]
        #if Dscore>1:
        Dscore = 1
        #type_assertion = aifutils.mark_type(g, "Columbia/DVMM/TypeAssertion/FaceRecognition/RUN00003/"+str(i_id)+"/"+str(i)+"/1",
        type_assertion = aifutils.mark_type(g, "http://www.columbia.edu/AIDA/DVMM/TypeAssertion/FaceDetection/RUN00010/Keyframe/"+\
            str(videoDic[key])+'_'+str(frameNum)+'/'+str(i), entity, AIDA_PROGRAM_ONTOLOGY2.term('PER'), sys, Dscore)
        #str(videoDic[key])+'_'+str(frameNum)+'/'+str(i), entity, AIDA_PROGRAM_ONTOLOGY2.Entity, sys, Dscore)
        #print bb[x][1]
        bb2 = Bounding_Box((bb_2[x][0], bb_2[x][1]), (bb_2[x][2], bb_2[x][3]))
        #aifutils.mark_image_justification(g, [entity, type_assertion], key, bb2, sys, 1)
        justif = aifutils.mark_keyframe_video_justification(g, [entity, type_assertion], videoDic[key], \
                                                            str(videoDic[key])+'_'+str(frameNum), bb2, sys, 1)
        aifutils.add_source_document_to_justification(g, justif, parent)
        aifutils.mark_informative_justification(g, entity, justif)

        chi_set.add(key)
        parent_set.add(parent)
        if y[0].replace(' ', '_') not in nameSet or nameDic[y[0].replace(
                ' ', '_')] not in p1Set or float(y[1]) < 0.04 or y[0].replace(
                    ' ', '_') == 'Ban_Ki-moon':
            continue
        else:
            #nameCount2+=1
            person_set.add(y[0])
            doc_N.add(parent)
            img_N.add(key)
            #if float(y[1])*10>1:
            #    score = 1-random.random()/10
            #else:
            #    score = float(y[1])*10
            score = sigmoid(float(y[1]) * 10)
            #place_of_birth_in_louisville_cluster = aifutils.mark_as_possible_cluster_member(g, \
            #    entity,clusterDic[nameDic[y[0].replace(' ','_')]], score, sys)
            NameDetected.add(nameDic[y[0].replace(' ', '_')])
            entity_key = nameDic[y[0].replace(' ', '_')]
            type_assertion = aifutils.mark_type(g, \
            "http://www.columbia.edu/AIDA/DVMM/TypeAssertion/FaceID/"\
            +str(index_category[entity_key])+'/'+entity_key, entityDic[entity_key], AIDA_PROGRAM_ONTOLOGY2.term('PER'), sys, 1)
            justif = aifutils.mark_keyframe_video_justification(g, [entityDic[nameDic[y[0].replace(' ','_')]], type_assertion], videoDic[key], \
                                                            str(videoDic[key])+'_'+str(frameNum), bb2, sys, score)
            aifutils.add_source_document_to_justification(g, justif, parent)
            aifutils.mark_informative_justification(
                g, entityDic[nameDic[y[0].replace(' ', '_')]], justif)

    #dbscan_run(arrayList,entityList)
    new_array = np.array(arrayList)

    if len(arrayList) > 1:

        # Compute DBSCAN
        #if __name__ == '__main__':
        db = DBSCAN(eps=0.55, min_samples=2).fit(new_array)
        core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
        core_samples_mask[db.core_sample_indices_] = True
        labels = db.labels_
        #print labels
        # Number of clusters in labels, ignoring noise if present.
        n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
        #print entityList
        #print('Estimated number of clusters: %d' % n_clusters_)

        clusterNameDic = {}

        firstMem = [0 for i in range(n_clusters_)]
        firstArray = {}
        for i in range(len(labels)):
            if labels[i] == -1:
                continue
            #print len(labels)
            #print len(entityList)
            #score = 1
            if firstMem[labels[i]] == 0:
                firstMem[labels[i]] = 1
                firstArray[labels[i]] = new_array[i]
                clusterNameDic[labels[i]] = aifutils.make_cluster_with_prototype(g, \
                    "http://www.columbia.edu/AIDA/DVMM/Clusters/FaceCoreference/RUN00010/"+\
                    str(labels[i]),entityList[i], sys)
                #print entityList[a][j]
            else:
                dist = np.linalg.norm(firstArray[labels[i]] - new_array[i])
                if dist > 1:
                    score = 0.001
                else:
                    score = 1 - dist / 2
                #score = sigmoid(dist)
                #print score
                aifutils.mark_as_possible_cluster_member(g, \
                    entityList[i],clusterNameDic[labels[i]], score, sys)

    sys = aifutils.make_system_with_uri(
        g, "http://www.columbia.edu/AIDA/DVMM/Systems/Face/FaceNet")
    for key, value in index_category.items():
        if key not in NameDetected:
            continue
        key = key.replace(' ', '_')

        #print name2ID[]
        new_key = key.lower().replace('_', ' ')
        #print new_key
        try:
            #print RPI[parent].keys()
            #print name2ID[new_key]
            if name2ID[new_key] in RPI[parent].keys():
                print(new_key)
                #print parent
                cluster = aifutils.make_cluster_with_prototype(g, \
                    "http://www.columbia.edu/AIDA/DVMM/Clusters/NamedPersonCoreference/"+\
                    str(value)+'/'+key,entityDic[key], sys)
                score = 1
                #aifutils.mark_as_possible_cluster_member(g, ,cluster, score, sys)
                for i in range(len(RPI[parent][name2ID[new_key]])):
                    aifutils.mark_as_possible_cluster_member(g, \
                        RPI[parent][name2ID[new_key]][i],cluster, score, sys)
        except KeyError:
            a = 0

    directory = ttl_out + '/'
    if not os.path.exists(directory):
        os.makedirs(directory)
    with open(directory + parent + '.ttl', 'w') as fout:
        serialization = BytesIO()
        # need .buffer because serialize will write bytes, not str
        g.serialize(destination=serialization, format='turtle')
        fout.write(serialization.getvalue().decode('utf-8'))