Exemple #1
0
def composition_lens_name(specs):
    specs[St.lens_operation] = Ns.lensOpt
    src_name = get_uri_local_name(specs[St.subjectsTarget])
    trg_name = get_uri_local_name(specs[St.objectsTarget])
    specs[St.lens] = "{}comp_{}_{}".format(Ns.lens, src_name, trg_name)
    if len(specs[St.lens]) > 255:
        specs[St.lens] = "{}comp_{}_{}".format(Ns.lens, Ut.hash_it(src_name),
                                               Ut.hash_it(trg_name))
    update_specification(specs)
Exemple #2
0
def diff_lens_name(specs):
    specs[St.lens_operation] = Ns.lensOpd
    # THE NAMES ARE HASHED AS THEY APPEAR TO BE TOO LONG FOR A FILE NAME
    # THIS IS AN EXAMPLE
    # print len("diff_eter_2014_orgreg_20170718_nearbyGeoSim1Kilometer_University_LatitudeLongitude_P871330770"
    #           "_refined_eter_2014_orgreg_20170718_nearbyGeoSim1Kilometer_University_LatitudeLongitude_P871330770"
    #           "_approxStrSim_English_Institution_Name_P255977302-Metadata-20180107.t")
    src_name = Ut.hash_it(get_uri_local_name(specs[St.subjectsTarget]))
    trg_name = Ut.hash_it(get_uri_local_name(specs[St.objectsTarget]))
    specs[St.lens] = "{}diff_{}_{}".format(Ns.lens, src_name, trg_name)
    update_specification(specs)
Exemple #3
0
def reconstruct(linkset, gr_type, predicate):

    print "RECONSTRUCTING"
    # pattern = re.compile('[^a-zA-Z]')
    graph_format = "\t{:40} {}"
    sub_obj = None
    source = ""
    target = ""
    correspondence = ""
    singleton = ""
    singleton_triple = "\n\t\t?{:50} ?{:20} ?{} .".format(
        "subject", "sing_predicate", "object")

    singleton_matrix = Qry.sparql_xml_to_matrix(
        Qry.get_singleton_graph(linkset))
    # print "Singleton graph of {}".format(linkset), singleton_matrix
    # exit(0)
    # SINGLETON EXAMPLE
    # GRAPH <http://risis.eu/lens/singletonMetadata/transitive_C000_ExactName>
    # {
    # 	?subject            sing_predicate          ?object .
    # }
    if singleton_matrix is not None and singleton_matrix[
            St.result] is not None:
        singleton_graph = singleton_matrix[St.result][1][0]
        if singleton_graph is not None:
            singleton = "\n{}\n{}\n{}\n{}\n" \
                .format("\tGRAPH <{}>".format(singleton_graph),
                        "\t{",
                        "\t\t?{:50} ?{:20} ?{} .".format("subject", "sing_predicate", "object"),
                        "\t}")
            # print  "\t", singleton

    # print str(graph_type).upper()
    # print str(graph_type).upper() == "LINKSET"

    # ABOUT LINKSET UNION
    if str(gr_type).upper() == "LINKSET":

        print "\nRECONSTRUCTING CASE: Linkset"

        datatype_matrix = Qry.get_linkset_datatypes(linkset)
        # print datatype_matrix

        if datatype_matrix is not None and datatype_matrix[St.result]:
            sub_obj = datatype_matrix[St.result][1][4:6]
            # source = pattern.sub("", str(datatype_matrix [St.result][1][4]))
            source = get_uri_local_name(str(datatype_matrix[St.result][1][4]))
            # target = pattern.sub("", str(datatype_matrix [St.result][1][5]))
            target = get_uri_local_name(str(datatype_matrix[St.result][1][5]))

            # CORRESPONDENCE EXAMPLE
            # GRAPH <http://risis.eu/lens/transitive_C000_ExactName>
            # {
            # 	?leidenRanking ?singPre ?eter .
            # }
            correspondence = "{}\n{}\n{}\n{}".\
                format("\tGRAPH <{}>".format(linkset),
                       "\t{",
                       "\t\t?{:50} ?{:20} ?{} .".format(source, predicate, target),
                       "\t}")

    # DETERMINING WHETHER A LENS IS STEMMED FROM THE SAME subjectsTarget & objectsTarget
    elif str(gr_type).upper() == "LENS":
        print "\nRECONSTRUCTING CASE: Lens"
        #TODO USE PROPERTY PATH
        query = """
        PREFIX bdb: <http://vocabularies.bridgedb.org/ops#>
        PREFIX void: <http://rdfs.org/ns/void#>
        SELECT ?target ?subjectsTarget ?objectsTarget
        {{
          <{}> void:target ?target .
          ?target
            void:subjectsTarget     ?subjectsTarget ;
            void:objectsTarget      ?objectsTarget .
        }}
        """.format(linkset)
        # print query
        evaluation = False

        datatype_matrix = Qry.sparql_xml_to_matrix(query)
        # print "DATATYPE: ", datatype_matrix
        # print len(datatype_matrix)

        if datatype_matrix is None:
            print "THERE IS NO METADATA FOR THIS DATASET. "
            print "\nNO POSSIBLE RECONSTRUCTION FOR {}: {}".format(
                gr_type, linkset)
            print "ARE YOU SURE THE GRAPH IS OF TYPE [{}]?".format(gr_type)
            return None

        elif (datatype_matrix is not None) and (len(datatype_matrix) > 1):
            element = datatype_matrix[St.result][1][1:]
            # print element
            for i in range(1, len(datatype_matrix)):
                check = datatype_matrix[St.result][i][1:]
                evaluation = element == check
                # print check
                # print "result: ", evaluation
                if evaluation is not True:
                    evaluation = False
                    break
                else:
                    evaluation = True

            if evaluation is True:

                # singleton_matrix = sparql_xml_to_matrix(singleton_graph_query, database_name, host)

                sub_obj = element
                # source = pattern.sub("", str(element[0]))
                source = get_uri_local_name(str(element[0]))
                # target = pattern.sub("", str(element[1]))
                target = get_uri_local_name(str(element[1]))

                correspondence = "{}\n{}\n{}\n{}" \
                    .format("\tGRAPH <{}>".format(linkset),
                            "\t{",
                            "\t\t?{:50} ?{:20} ?{} .".format(source, predicate, target),
                            "\t}")

                print graph_format.format(sub_obj[0], sub_obj[1])

            else:
                return None

    # TEMPORARY GRAPH EXAMPLE
    # INSERT
    # {
    #   GRAPH temp:load001
    #   {
    #       ?leidenRanking  ?singPre                ?eter .
    #       ?subject        ?sing_predicate         ?object .
    #   }
    # }
    # WHERE
    # {
    #   GRAPH <http://risis.eu/lens/transitive_C000_ExactName>
    #   {
    # 	    ?leidenRanking  ?singPre                ?eter .
    #   }
    #   GRAPH <http://risis.eu/lens/singletonMetadata/transitive_C000_ExactName>
    #   {
    # 	    ?subject        ?sing_predicate          ?object .
    #   }
    # }
    insert_q = "{}\n{}\n{}\n{}\n{}{}\n{}\n{}" \
               "\n{}\n{}\n{}\n{}{}". \
        format("INSERT",
               "{",
               "   GRAPH tmpgraph:{}".format(predicate),
               "   {",
               "\t\t?{:50} ?{:20} ?{} .".format(source, predicate, target),
               "{}".format(singleton_triple),
               "    }",
               "}",

               "WHERE",
               "{",
               "{}".format(correspondence),
               "{}".format(singleton),
               "}")

    if singleton is not None:
        correspondence += singleton

    if sub_obj is not None:
        print graph_format.format(sub_obj[0], sub_obj[1])

    return [sub_obj, insert_q]
Exemple #4
0
def lens_transitive(specs, activated=False):

    # CHECK BOTH DATASETS FOR SAME MECHANISM
    print "GENERATE THE LENS NAME"
    Lu.composition_lens_name(specs)

    print "GET THE SAME AS COUNT"
    specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.lens_operation])
    # print same_as_count

    # GENERATE THE INSERT QUERY FOR TRANSITIVITY
    # transitive_analyses = lens_transitive_query(specs)
    # if transitive_analyses is None:
    #     return
    # specs[St.insert_query] = transitive_analyses[1]
    # print insert_query
    # exit(0)
    # specs['is_transitive_by'] = transitive_analyses[0]
    ln = get_uri_local_name(specs[St.lens])
    sg = specs[St.subjectsTarget]
    tg = specs[St.objectsTarget]
    ssg = "{}{}".format(Ns.singletons, get_uri_local_name(sg))
    tsg = "{}{}".format(Ns.singletons, get_uri_local_name(tg))

    print "SOURCE: {}".format(sg)
    print "TARGET: {}".format(tg)
    print "1. GENERATING THE INSERT QUERY"
    specs[St.insert_query] = transitive_insert_query(ln, sg, tg, ssg, tsg)

    if activated is True:

        # RUN THE QUERY AT THE END POINT
        print "2. RUNNING THE INSERT QUERY"
        Qry.boolean_endpoint_response(specs[St.insert_query])

        # GET THE SIZE OF THE LENS JUST CREATED ABOVE
        print "3. ETTING THE SIZE OF THE LENS JUST INSERTED"
        size = Qry.get_namedgraph_size(specs[St.lens], isdistinct=False)

        # IF ACTIVATED, INSERT THE METADATA
        if size > 0:

            # GENERATE THE METADATA ABOUT THE LENS JUST CREATED
            print "4. SOME {} TRANSITIVE TRIPLES WERE FOUND".format(size)
            metadata = transitive_metadata(specs, size)
            # print metadata

            print "5. INSERTING THE METADATA"
            Qry.boolean_endpoint_response(metadata)

            print "6. REGISTER THE LENS"
            Urq.register_lens(specs, is_created=True)

            # RUN A CORRESPONDENCE CONSTRUCT QUERY FOR BACKING UP THE DATA TO DISC
            print "7. GENERATE THE CONSTRUCT FOR FILE DUMP"
            construct_correspondence = Qry.endpointconstruct(
                Qry.construct_namedgraph(specs[St.lens]))

            if construct_correspondence is not None:
                construct_correspondence = construct_correspondence.replace(
                    '{', "<{}>\n{{".format(specs[St.lens]), 1)

            # RUN A SINGLETON METADATA CONSTRUCT QUERY FOR BACKING UP THE DATA TO DISC
            construct_singletons = Qry.endpointconstruct(
                Qry.construct_namedgraph("{}{}".format(Ns.singletons,
                                                       specs[St.lens_name])))

            if construct_singletons is not None:
                construct_singletons = construct_singletons. \
                    replace('{', "<{}{}>\n{{".format(Ns.singletons, specs[St.lens_name]), 1)

            # WRITE TO FILE
            print "WRITING TO FILE"
            write_to_file(graph_name=ln,
                          metadata=metadata,
                          directory=DIRECTORY,
                          correspondences=construct_correspondence,
                          singletons=construct_singletons)

            # return specs[St.lens]
            message = "THE LENS WAS CREATED!<br/>URI = {}".format(
                specs[St.lens])
            print message
            print "\t*** JOB DONE! ***"
            return {
                St.message: message,
                St.error_code: 0,
                St.result: specs[St.lens]
            }

    if activated is False:
        logger.warning(
            "THE FUNCTION IS NOT ACTIVATED BUT THE METADATA THAT IS "
            "SUPPOSED TO BE ENTERED IS WRITEN TO THE CONSOLE.")
Exemple #5
0
def lens_transitive_query(data):
    """
    :param data:
    :return: a list of:
        is transitive by : the graph by with the other are transitive
        the insert query : that is used to generate the transitive LS
    """

    # print "TRANSITIVITY"
    insert_query = ""
    is_transitive_by = ""
    # pattern = re.compile('[^a-zA-Z]')

    # CHECK WHETHER THE GRAPH ALREADY EXIST
    ask_lens = Qry.graph_exists(data[St.lens])
    # print "lens: {}".format(data[ST.lens_uri])
    if ask_lens is True:
        logger.warning(
            "\n{} ALREADY EXISTS. \nTO PROCEED ANYWAY, PLEASE DELETE "
            "THE LINKSET FIRST OR CHANGE THE CONTEXT CODE\n".format(
                data[St.lens]))
        return

    # CHECK WHETHER THE GRAPHS EXIST
    ask_src = Qry.graph_exists(data[St.src_dataset])
    ask_trg = Qry.graph_exists(data[St.trg_dataset])
    if (ask_src is False) or (ask_trg is False):
        message = "SOURCE: {} [exist={}]\nTARGET: {} [exist={}]\n{}".format(
            data[St.src_dataset], ask_src, data[St.trg_dataset], ask_trg,
            "WE CAN NOT POSSIBLY RUN A TRANSITIVITY OPERATION OVER NON EXITING GRAPH"
        )
        print message
        # logger.warning("\nWE CAN NOT POSSIBLY RUN A TRANSITIVITY OPERATION OVER NON EXITING GRAPH")
        return

    # #####################################################################
    """ RECONSTRUCTION OF THE WHERE QUERIES                            """
    # ###############################2#####################################

    s_predicate = "s_predicate"
    o_predicate = "o_predicate"
    result1 = reconstruct(data[St.src_dataset], data[St.src_graph_type],
                          s_predicate)
    result2 = reconstruct(data[St.trg_dataset], data[St.trg_graph_type],
                          o_predicate)

    # #####################################################################
    """ EXIT IF ONE OF THE DATASETS INPUT IS NOT COMPLIANT             """
    # ###############################2#####################################
    if (result1 is None) or (result2 is None):
        print "\nWE CANNOT PROCEED BECAUSE THERE IS NO TRANSITIVITY HERE :)"
        return None

    intersection_result = intersect(result1[0], result2[0])

    if intersection_result is None:
        print "\tSource:", result1[0]
        print "\tTarget:", result2[0]
        print "\nWE CANNOT PROCEED BECAUSE WE COULD NOT FIND ANY INTERSECTION BETWEEN THE DATASETS."
        return None

    if intersection_result is not None:

        if len(intersection_result) > 0:

            # print "VALUE", result1[0]
            is_transitive = intersect(result1[0], result2[0])
            # print is_transitive
            if len(is_transitive) > 1:
                print is_transitive
                print "\nWE CANNOT PROCEED BECAUSE THERE IS NO TRANSITIVITY HERE :)"
                return None

            is_transitive_by = is_transitive[0]

            # #####################################################################
            """ ACCESS SINGLETON GRAPH                                         """
            # #####################################################################
            subject_sing_query = ""
            string = "\t\t?{:50} ?{:20} ?{} .".format("subject",
                                                      "sing_predicate",
                                                      "object")
            singleton_gph_count = 0
            alternative1 = ""
            alternative2 = ""

            # THE SUBJECT DATASET CONTAINS A SINGLETON NAMED GRAPH
            if result1[1].__contains__(string):
                singleton_gph_count += 1
                alternative1 = "\n\t\t\t?{:20} ?{} ;".format(
                    "sing_predicate1", "object1")

            if result2[1].__contains__(string):
                singleton_gph_count += 1
                alternative2 = "\n\t\t\t?{:20} ?{} .".format(
                    "sing_predicate2", "object2")

            if alternative2 == "":
                alternative1 = "\n\t\t\t?{:20} ?{} .".format(
                    "sing_predicate1", "object1")

            linktype = "singPre"
            # object_sing_query = ""
            if singleton_gph_count > 0:
                subject_sing_query = "\n{}\n{}\n{}{}{}\n{}"\
                    .format("\tGRAPH tmpgraph:sing",
                            "\t{",
                            "\t\t?{}".format(s_predicate),
                            "{}".format(alternative1),
                            "{}".format(alternative2),
                            "\t}")

            # elif singleton_graph_count == 0:
            #     linktype = "<{}>".format(linktype)

            _subject = list(set(result1[0]).difference(result2[0]))

            if len(_subject) > 0:
                # _subject = pattern.sub("", str(_subject[0]))
                _subject = get_uri_local_name(_subject[0])

                _object = list(set(result2[0]).difference(result1[0]))
                if len(_object) > 0:
                    # _object = pattern.sub("", str(_object[0]))
                    _object = get_uri_local_name(_object[0])

                    print "\nTRANSITIVITY ANALYSES"
                    print "\t{:15}: {}".format("SUBJECT", _subject)
                    print "\t{:15}: {}".format("OBJECT", _object)
                    print "\t{:15}: {}".format("TRANSITIVE by",
                                               is_transitive[0])

                string1 = "\t\t?{:50} ?{:20} ?{} .".format(
                    s_predicate, "sing_predicate1", "object1")
                string2 = "\t\t?{:50} ?{:20} ?{} .".format(
                    o_predicate, "sing_predicate2", "object2")
                insert_query = "\n{}\n{}" \
                               "\n\n###### PART 1: {} \n{}" \
                               "\n\n###### PART 2: {}\n{}" \
                               "\n\n###### PART 3: {}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
                               "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
                               "\n\n###### PART 4: {}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
                               "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
                               "\n\n###### PART 5: {}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
                               "\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}\n{}" \
                               "\n\n###### PART 6: {}\n{}\n{}\n{}\n{}\n{}" \
                    .format("prefix tmpgraph:<{}>".format(Ns.tmpgraph),
                            "prefix tmpvocab:<{}>".format(Ns.tmpvocab),

                            # ### PART 1 ##########################################
                            # #####################################################
                            "LOAD SUBJECT CORRESPONDENCES AND SINGLETON METADATA (TEMPORARILY)",
                            "{} ;".format(result1[1].replace(string, string1)),

                            # ### PART 2 ##########################################
                            # #####################################################
                            "LOAD OBJECT CORRESPONDENCES AND SINGLETON METADATA (TEMPORARILY)",
                            "{} ;".format(result2[1].replace(string, string2)),

                            # ### PART 3 ##########################################
                            # #####################################################
                            "LOAD TEMPORARY CORRESPONDENCE  GRAPH",
                            # INSERT CORRESPONDENCE IN TEMPORARY GRAPH
                            "INSERT",
                            "{",
                            "\tGRAPH tmpgraph:corr01",
                            "\t{",
                            "\t\t?{} ?{} ?{} .".format(_subject, s_predicate, _object),
                            "\t}",
                            "{}".format(subject_sing_query),
                            "}",
                            "WHERE",
                            "{",
                            # WHERE SUBJECT CORRESPONDENCES  AND METADATA
                            "\tGRAPH tmpgraph:{}".format(s_predicate),
                            "\t{",
                            "\t\t?{:50} ?{:20} ?{} .".
                            # format(pattern.sub("", result1[0][0]), s_predicate, pattern.sub("", result1[0][1])),
                            format(get_uri_local_name(result1[0][0]), s_predicate, get_uri_local_name(result1[0][1])),

                            "\t\t?{:50} ?{:20} ?{} .".format(s_predicate, "sing_predicate1", "object1"),
                            "\t}",
                            # WHERE OBJECT CORRESPONDENCES AND METADATA
                            "\tGRAPH tmpgraph:{}".format(o_predicate),
                            "\t{",
                            "\t\t?{:50} ?{:20} ?{} .".
                            # format(pattern.sub("", result2[0][0]), o_predicate, pattern.sub("", result2[0][1])),
                            format(get_uri_local_name(result2[0][0]), o_predicate, get_uri_local_name(result2[0][1])),
                            "\t\t?{:50} ?{:20} ?{} .".format(o_predicate, "sing_predicate2", "object2"),
                            "\t}",
                            "} ;",

                            # ### PART 4 ##########################################
                            # #####################################################
                            "LOAD THE DEFINITIVE CORRESPONDENCE GRAPH",
                            "INSERT",
                            "{",
                            # INSERT DEFINITE CORRESPONDENCES
                            "\tGRAPH <{}>".format(data[St.lens]),
                            "\t{",
                            "\t\t?{:50} ?{:20} ?{} .".format(_subject, linktype, _object),
                            "\t}",
                            # INSERT TEMPORALLY THE MAPPING BETWEEN NEWLY GENERATED SINGLETONS AND OLD ONCE
                            "\tGRAPH tmpgraph:sing_replaced",
                            "\t{",
                            "\t\t?{:50} {:20} ?{} .".format(linktype, "tmpvocab:replaced", s_predicate),
                            "\t}",

                            "}",
                            "WHERE",
                            "{",
                            # LOAD FROM TEMPORARY CORRESPONDENCE GRAPH
                            "\tGRAPH tmpgraph:corr01",
                            "\t{",
                            "\t\t?{} ?{} ?{} .".format(_subject, s_predicate, _object),
                            "\t\t### Create A SINGLETON URI",
                            "\t\tBIND( replace(\"{}_#\",\"#\",".format(data['link_predicate']),
                            "\t\tSTRAFTER(str(UUID()),\"uuid:\")) as ?pre )",
                            "\t\tBIND(iri(?pre) as ?singPre)",
                            "\t}",
                            "} ;",

                            # ### PART 5 ##########################################
                            # #####################################################
                            "###### LOAD THE DEFINITIVE SINGLETON GRAPH",
                            "INSERT",
                            "{",
                            "\tGRAPH <{}>".format(data['singleton_graph']),
                            "\t{",
                            "\t\t?{:50} ?{:20} ?{} .".format(linktype, "predicate", "object"),
                            "\t}",
                            "}",

                            "WHERE",
                            "{",
                            "\tGRAPH tmpgraph:sing",
                            "\t{",
                            "\t\t?{:50} ?{:20} ?{} .".format(s_predicate, "predicate", "object"),

                            "\t}",

                            "\tGRAPH tmpgraph:sing_replaced",
                            "\t{",
                            "\t\t?{:50} {:20} ?{} .".format(linktype, "tmpvocab:replaced", s_predicate),
                            "\t}",
                            "} ;",

                            # #### PART 5 ##########################################
                            # #####################################################
                            "DROP ALL TEMPORARY GRAPHS",
                            "DROP SILENT GRAPH tmpgraph:{} ;".format(o_predicate),
                            "DROP SILENT GRAPH tmpgraph:{} ;".format(s_predicate),
                            "DROP SILENT GRAPH tmpgraph:corr01 ;",
                            "DROP SILENT GRAPH tmpgraph:sing ;",
                            "DROP SILENT GRAPH tmpgraph:sing_replaced")

        # print "\n### INSERT QUERY"
        # print insert_query
        return is_transitive_by, insert_query