Esempio n. 1
0
def specification_2_linkset_subset(specs, activated=False):

    if activated is True:
        print Ut.headings("EXECUTING LINKSET SUBSET SPECS...")
    else:
        print Ut.headings(
            "THE FUNCTION [specification_2_linkset_subset] IS NOT ACTIVATED")
        return {St.message: Ec.ERROR_CODE_0, St.error_code: 0, St.result: None}

    # ACCESS THE TASK SPECIFIC PREDICATE COUNT
    specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.mechanism])

    # UPDATE THE QUERY THAT IS GOING TO BE EXECUTED
    if specs[St.sameAsCount]:

        source = specs[St.source]
        target = specs[St.target]

        # UPDATE THE SPECS OF SOURCE AND TARGETS
        update_specification(source)
        update_specification(target)

        # GENERATE THE NAME OF THE LINKSET
        Ls.set_subset_name(specs)

        # SETTING SOME GENERIC METADATA INFO
        specs[St.link_name] = "same"
        specs[St.linkset_name] = specs[St.linkset_name]
        specs[St.link] = "http://risis.eu/linkset/predicate/{}".format(
            specs[St.link_name])
        specs[
            St.
            link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format(
                specs[St.link_name])
        specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name])
        specs[St.assertion_method] = "{}{}".format(Ns.method,
                                                   specs[St.linkset_name])
        specs[St.justification] = "{}{}".format(Ns.justification,
                                                specs[St.linkset_name])

        # COMMENT ON THE LINK PREDICATE
        specs[St.link_comment] = "The predicate <{}> is used in replacement of the linktype <{}> used in the " \
                                 "original <{}> dataset.".format(
            specs[St.link], specs[St.source][St.link_old], specs[St.source][St.graph])

        # COMMENT ON THE JUSTIFICATION FOR THIS LINKSET
        specs[St.justification_comment] = "In OrgRef's a set of entities are linked to GRID. The linking method " \
                                          "used by OrgRef is unknown. Here we assume that it is a curated work " \
                                          "and extracted it as a linkset.",

        # COMMENT ON THE LINKSET ITSELF
        specs[St.linkset_comment] = "The current linkset is a subset of the <{0}> dataset that links <{0}> to " \
                                    "<{1}>. The methodology used by <{0}> to generate this builtin linkset in " \
                                    "unknown.".format(specs[St.source][St.graph], specs[St.target][St.graph])

        source[St.entity_ns] = str(source[St.entity_datatype]).replace(
            source[St.entity_name], '')
        target[St.entity_ns] = str(target[St.entity_datatype]).replace(
            target[St.entity_name], '')

        # GENERATE THE LINKSET
        inserted_linkset = spa_linkset_subset(specs, activated)
        # print "LINKSET SUBSET RESULT:", inserted_linkset

        if inserted_linkset[St.message].__contains__("ALREADY EXISTS"):
            return inserted_linkset

        if specs[St.triples] > "0":

            # REGISTER THE ALIGNMENT
            if inserted_linkset[St.message].__contains__("ALREADY EXISTS"):
                Urq.register_alignment_mapping(specs, created=False)
            else:
                Urq.register_alignment_mapping(specs, created=True)

        return inserted_linkset

    else:
        print Ec.ERROR_CODE_1
        return {St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None}
Esempio n. 2
0
def refining(specs, insert_query, activated=False):

    refined = {St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None}
    diff = {St.message: Ec.ERROR_CODE_4, St.error_code: 1, St.result: None}

    # UPDATE THE SPECS VARIABLE
    # print "UPDATE THE SPECS VARIABLE"
    update_specification(specs)
    update_specification(specs[St.source])
    update_specification(specs[St.target])

    # ACCESS THE TASK SPECIFIC PREDICATE COUNT
    specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.mechanism])
    # print "sameAsCount:", specs[St.sameAsCount]

    if specs[St.sameAsCount] is None:
        return {'refined': refined, 'difference': diff}

    # GENERATE THE NAME OF THE LINKSET
    Ls.set_refined_name(specs)
    # print "\nREFINED NAME:", specs[St.refined]
    # print "LINKSET TO REFINE BEFORE CHECK:", specs[St.linkset]

    # CHECK WHETHER OR NOT THE LINKSET WAS ALREADY CREATED
    check = Ls.run_checks(specs, check_type="refine")
    # print "\nREFINED NAME:", specs[St.refined]
    # print "LINKSET TO REFINE:", specs[St.linkset]

    if check[St.message] == "NOT GOOD TO GO":
        # refined = check[St.refined]
        # difference = check["difference"]
        return check

    # print "\nREFINED:", specs[St.refined]
    # print "LINKSET TO REFINE:", specs[St.linkset]
    # print "CHECK:", check

    # THE LINKSET DOES NOT EXIT, LETS CREATE IT NOW
    print Ls.refined_info(specs, specs[St.sameAsCount])

    # POINT TO THE LINKSET THE CURRENT LINKSET WAS DERIVED FROM
    print "1. wasDerivedFrom {}".format(specs[St.linkset])
    specs[St.derivedfrom] = "\t\tprov:wasDerivedFrom\t\t\t<{}> ;".format(
        specs[St.linkset])

    # print "REFINED NAME:",  specs[St.refined_name]
    # print "REFINED:", specs[St.refined]
    # print "LINKSET TO BE REFINED:", specs[St.linkset]

    print "\n2. RETRIEVING THE METADATA ABOUT THE GRAPH TO REFINE"
    # metadata_q = Qry.q_linkset_metadata(specs[St.linkset])
    metadata_q = """
    prefix ll:    <{}>
    SELECT DISTINCT ?type ?singletonGraph
    {{
        # LINKSET METADATA
        <{}>
            a                       ?type ;
            ll:singletonGraph		?singletonGraph .
    }}
    """.format(Ns.alivocab, specs[St.linkset])
    print "QUERY:", metadata_q
    matrix = Qry.sparql_xml_to_matrix(metadata_q)
    # print "\nMETA DATA: ", matrix

    if matrix:

        if matrix[St.message] == "NO RESPONSE":
            print Ec.ERROR_CODE_1
            print matrix[St.message]
            return {'refined': refined, 'difference': diff}

        elif matrix[St.result] is None:
            print matrix[St.message]
            returned = {
                St.message: matrix[St.message],
                St.error_code: 666,
                St.result: None
            }
            return {'refined': returned, 'difference': diff}

    else:
        print Ec.ERROR_CODE_1
        return {'refined': refined, 'difference': diff}

    # GET THE SINGLETON GRAPH OF THE LINKSET TO BE REFINED
    print "\n3. GETTING THE SINGLETON GRAPH OF THE GRAPH TO REFINE"
    specs[St.singletonGraph] = matrix[St.result][1][1]
    # print matrix[St.result][1][0]

    specs[St.insert_query] = insert_query(specs)
    print specs[St.insert_query]

    if type(specs[St.insert_query]) == str:
        is_run = Qry.boolean_endpoint_response(specs[St.insert_query])

    else:
        print "\n4. RUNNING THE EXTRACTION QUERY"
        print specs[St.insert_query][0]
        # is_run = Qry.boolean_endpoint_response(specs[St.insert_query][0])
        Qry.boolean_endpoint_response(specs[St.insert_query][0])

        print "\n5. RUNNING THE FINDING QUERY"
        print specs[St.insert_query][1]
        is_run = Qry.boolean_endpoint_response(specs[St.insert_query][1])

    print "\n>>> RUN SUCCESSFULLY:", is_run.upper()

    # NO INSERTION HAPPENED
    if is_run == "true" or is_run == Ec.ERROR_STARDOG_1:

        # GENERATE THE
        #   (1) LINKSET METADATA
        #   (2) LINKSET OF CORRESPONDENCES
        #   (3) SINGLETON METADATA
        # AND WRITE THEM ALL TO FILE

        print "GENERATING THE METADATA"
        pro_message = refine_metadata(specs)

        # SET THE RESULT ASSUMING IT WENT WRONG
        refined = {
            St.message: Ec.ERROR_CODE_4,
            St.error_code: 4,
            St.result: None
        }
        diff = {St.message: Ec.ERROR_CODE_4, St.error_code: 4, St.result: None}

        server_message = "Linksets created as: [{}]".format(specs[St.refined])
        message = "The linkset was created as [{}]. <br/>{}".format(
            specs[St.refined], pro_message)

        # MESSAGE ABOUT THE INSERTION STATISTICS
        print "\t", server_message

        if int(specs[St.triples]) > 0:

            # UPDATE THE REFINED VARIABLE AS THE INSERTION WAS SUCCESSFUL
            refined = {
                St.message: message,
                St.error_code: 0,
                St.result: specs[St.linkset]
            }

            print "REGISTERING THE ALIGNMENT"
            if refined[St.message].__contains__("ALREADY EXISTS"):
                register_alignment_mapping(specs, created=False)
            else:
                register_alignment_mapping(specs, created=True)

            try:
                print "\nCOMPUTE THE DIFFERENCE AND DOCUMENT IT"
                diff_lens_specs = {
                    St.researchQ_URI: specs[St.researchQ_URI],
                    St.subjectsTarget: specs[St.linkset],
                    St.objectsTarget: specs[St.refined]
                }
                diff = Df.difference(diff_lens_specs, activated=activated)
                message_2 = "\t>>> {} CORRESPONDENCES INSERTED AS THE DIFFERENCE".format(
                    diff_lens_specs[St.triples])
                print message_2
            except Exception as err:
                print "THE DIFFERENCE FAILED: ", str(err.message)

            print "\tLinkset created as: ", specs[St.refined]
            print "\t*** JOB DONE! ***"

            return {'refined': refined, 'difference': diff}

        else:
            print ">>> NO TRIPLE WAS INSERTED BECAUSE NO MATCH COULD BE FOUND"
            return {'refined': refined, 'difference': diff}

    else:
        print "NO MATCH COULD BE FOUND."
Esempio n. 3
0
def lens_transitive(specs, activated=False):

    # CHECK BOTH DATASETS FOR SAME MECHANISM
    print "GENERATE THE LENS NAME"
    Lu.composition_lens_name(specs)

    print "GET THE SAME AS COUNT"
    specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.lens_operation])
    # print same_as_count

    # GENERATE THE INSERT QUERY FOR TRANSITIVITY
    # transitive_analyses = lens_transitive_query(specs)
    # if transitive_analyses is None:
    #     return
    # specs[St.insert_query] = transitive_analyses[1]
    # print insert_query
    # exit(0)
    # specs['is_transitive_by'] = transitive_analyses[0]
    ln = get_uri_local_name(specs[St.lens])
    sg = specs[St.subjectsTarget]
    tg = specs[St.objectsTarget]
    ssg = "{}{}".format(Ns.singletons, get_uri_local_name(sg))
    tsg = "{}{}".format(Ns.singletons, get_uri_local_name(tg))

    print "SOURCE: {}".format(sg)
    print "TARGET: {}".format(tg)
    print "1. GENERATING THE INSERT QUERY"
    specs[St.insert_query] = transitive_insert_query(ln, sg, tg, ssg, tsg)

    if activated is True:

        # RUN THE QUERY AT THE END POINT
        print "2. RUNNING THE INSERT QUERY"
        Qry.boolean_endpoint_response(specs[St.insert_query])

        # GET THE SIZE OF THE LENS JUST CREATED ABOVE
        print "3. ETTING THE SIZE OF THE LENS JUST INSERTED"
        size = Qry.get_namedgraph_size(specs[St.lens], isdistinct=False)

        # IF ACTIVATED, INSERT THE METADATA
        if size > 0:

            # GENERATE THE METADATA ABOUT THE LENS JUST CREATED
            print "4. SOME {} TRANSITIVE TRIPLES WERE FOUND".format(size)
            metadata = transitive_metadata(specs, size)
            # print metadata

            print "5. INSERTING THE METADATA"
            Qry.boolean_endpoint_response(metadata)

            print "6. REGISTER THE LENS"
            Urq.register_lens(specs, is_created=True)

            # RUN A CORRESPONDENCE CONSTRUCT QUERY FOR BACKING UP THE DATA TO DISC
            print "7. GENERATE THE CONSTRUCT FOR FILE DUMP"
            construct_correspondence = Qry.endpointconstruct(
                Qry.construct_namedgraph(specs[St.lens]))

            if construct_correspondence is not None:
                construct_correspondence = construct_correspondence.replace(
                    '{', "<{}>\n{{".format(specs[St.lens]), 1)

            # RUN A SINGLETON METADATA CONSTRUCT QUERY FOR BACKING UP THE DATA TO DISC
            construct_singletons = Qry.endpointconstruct(
                Qry.construct_namedgraph("{}{}".format(Ns.singletons,
                                                       specs[St.lens_name])))

            if construct_singletons is not None:
                construct_singletons = construct_singletons. \
                    replace('{', "<{}{}>\n{{".format(Ns.singletons, specs[St.lens_name]), 1)

            # WRITE TO FILE
            print "WRITING TO FILE"
            write_to_file(graph_name=ln,
                          metadata=metadata,
                          directory=DIRECTORY,
                          correspondences=construct_correspondence,
                          singletons=construct_singletons)

            # return specs[St.lens]
            message = "THE LENS WAS CREATED!<br/>URI = {}".format(
                specs[St.lens])
            print message
            print "\t*** JOB DONE! ***"
            return {
                St.message: message,
                St.error_code: 0,
                St.result: specs[St.lens]
            }

    if activated is False:
        logger.warning(
            "THE FUNCTION IS NOT ACTIVATED BUT THE METADATA THAT IS "
            "SUPPOSED TO BE ENTERED IS WRITEN TO THE CONSOLE.")
Esempio n. 4
0
def refine_lens(specs, activated=False, check_file=False):

    try:

        message = Ec.ERROR_CODE_0.replace('\n', "<br/>")
        if activated is False:
            print Ut.headings("THE FUNCTION [refine_lens] IS NOT ACTIVATED")
            return {St.message: message, St.error_code: 4, St.result: None}

        # 1. UPDATING THE SPECS BY CHANGING LINKSET TO TENS
        specs[St.refined] = specs['linkset']
        specs.pop('linkset')
        Ut.update_specification(specs)

        # CHECKING WHETHER THE LENS IS REFINENABLE
        # Refine.is_refinable(specs[St.refined])

        # PRINTING THE SPECIFICATIONS
        # lensUt.print_specs(specs)

        # ASSIGN THE SAME AS COUNT
        specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.mechanism])

        message = Ec.ERROR_CODE_4.replace('\n', "<br/>")
        if specs[St.sameAsCount]:

            source = specs[St.source]
            target = specs[St.target]

            # 2. SET THE LENS NAME
            # *******************************
            print "\n2. SET THE LENS NAME"
            # *******************************
            lensUt.lens_refine_name(specs, 'refine')

            #*******************************
            # GOOD TO GO CHECK
            # *******************************
            query = """
        SELECT *
        {{
            <{}> ?predicate ?object .
        }}
            """.format(specs[St.lens])
            check = Lens_Union.run_checks(specs, query, operator="refine")

            # NOT GOOD TO GO, IT ALREADY EXISTS
            if check[St.message].__contains__("ALREADY EXISTS"):
                return {
                    St.message: check[St.message],
                    St.error_code: 71,
                    St.result: specs[St.lens]
                }

            # *******************************
            # GOOD TO GO
            # *******************************
            else:

                lens_start = time.time()
                # UPDATE THE SPECIFICATION
                Ut.update_specification(specs[St.source])
                Ut.update_specification(specs[St.target])

                # PRINTING THE SPECIFICATIONS
                lensUt.print_specs(specs)

                ########################################################################
                print """\n4. EXECUTING THE GEO-MATCH                                """
                ########################################################################
                geo_match(specs)

                ########################################################################
                print """\n5. EXTRACT THE NUMBER OF TRIPLES                          """
                ########################################################################
                specs[St.triples] = Qry.get_namedgraph_size("{0}{1}".format(
                    Ns.lens, specs[St.lens_name]))

                ########################################################################
                print """\n6. ASSIGN THE SPARQL INSERT QUERY                         """
                ########################################################################
                specs[St.insert_query] = "{} ;\n{};\n{}".format(
                    geo_load_query(specs, True), geo_load_query(specs, False),
                    geo_match_query(specs))

                lens_end = time.time()
                diff = lens_end - lens_start
                print "\n\t>>> Executed so far in    : {:<14}".format(
                    str(datetime.timedelta(seconds=diff)))

                if int(specs[St.triples]) > 0:

                    ########################################################################
                    print """\n4. INSERTING THE GENERIC METADATA                         """
                    ########################################################################
                    metadata = Gn.lens_refine_geo_metadata(specs)
                    Qry.boolean_endpoint_response(metadata)

                    ########################################################################
                    print """\n5. WRITING TO FILE                                        """
                    ########################################################################
                    src = [source[St.graph_name], "", source[St.entity_ns]]
                    trg = [target[St.graph_name], "", target[St.entity_ns]]

                    # linkset_path = "D:\datasets\Linksets\ExactName"
                    linkset_path = DIRECTORY
                    writelinkset(src,
                                 trg,
                                 specs[St.lens_name],
                                 linkset_path,
                                 metadata,
                                 check_file=check_file)
                    server_message = "Linksets created as: {}".format(
                        specs[St.lens])
                    message = "The linkset was created as [{}] with {} triples found!".format(
                        specs[St.lens], specs[St.triples])

                    print "\n\t", server_message

                    Urq.register_lens(specs, is_created=True)

                    ls_end_2 = time.time()
                    diff = ls_end_2 - lens_end
                    print ">>> Executed in    : {:<14}".format(
                        str(datetime.timedelta(seconds=diff)))

                    print "\t*** JOB DONE! ***"

                    return {
                        St.message: message,
                        St.error_code: 0,
                        St.result: specs[St.lens]
                    }

                else:
                    print "\tThe linkset was not generated as no match could be found"
                    print "\t*** JOB DONE! ***"
                    return {
                        St.message: message,
                        St.error_code: 4,
                        St.result: None
                    }

    except Exception as err:
        traceback.print_exc()
        return {St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None}
        # print geo_load_query(specs, is_source=True)
        # print geo_load_query(specs, is_source=False)
        # geo_match_query(specs)

        # traceback.print_exception()


# import Alignments.Manage.AdminGraphs as adm
# adm.drop_a_lens("http://risis.eu/lens/refine_union_Grid_20170712_Eter_2014_N291690309", display=True, activated=True)
# refine_lens(specs_example, activated=True, check_file=False)
#
# adm.drop_a_lens("http://risis.eu/lens/refine_union_Orgreg_20170718_Eter_2014_P1061032980", display=True, activated=True)
# refine_lens(specs_example_2, activated=True, check_file=False)
#
# adm.drop_a_lens("http://risis.eu/lens/refine_union_Orgreg_20170718_Grid_20170712_N1966224323", display=True, activated=True)
# refine_lens(specs_example_3, activated=True, check_file=False)