Esempio n. 1
0
def expand_approx(specs, theta, stop_words_string, stop_symbols_string, linkset2expand, reorder=True):

    data = None
    inserted_1 = 0
    inserted_2 = 0
    total = 0
    count= 0
    abort = False
    for is_source in [True, False]:

        count += 1
        print Ut.headings("********* PASS {} *********").format(count)

        # if is_source is False:
        #     specs[St.corr_reducer] = data[St.result]
            # print data[St.result]

        data = prefixed_inverted_index( specs, theta=theta, reorder=reorder, stop_words_string=stop_words_string,
                             stop_symbols_string=stop_symbols_string, expands=True, is_source=is_source,
                             linkset2expand=linkset2expand, check_file=False)

        if count == 1:
            inserted_1 += data['inserted']
            total += inserted_1

        else:
            inserted_2 += data['inserted']
            total += inserted_2

        if data[St.message].__contains__('ALREADY EXISTS'):
            abort = True
            print "\n>>> THE PROCESS IS BEING ABORTED AS THE FIRST " \
                  "PASS REVEALS THE EXISTENCE OF AN EXPANSION OF THE GRAPH."
            break

    if abort is False:
        # REMOVE DUPLICATES
        print "REMOVING REPETITION"
        if data is not None and data[St.result] is not None:
            print "\t", Qry.remove_repetition_same_direction(data[St.result])

        # PRINT THE FINAL TRIPLE COUNT
        final_inserted = Qry.get_triples_count(data[St.result])
        if final_inserted is None:
            final_inserted = 0
        else:
            final_inserted = int(final_inserted)
        print "\nOVERALL STATS:\n\tCORRESPONDENCES DISCOVERED AT PASS 1   : {}\n\tCORRESPONDENCES DISCOVERED AT PASS 2   : {}".format(
            inserted_1, inserted_2)
        print "\tOVERALL CORRESPONDENCES DISCOVERED     : {}".format(total)
        print "\tTOTAL REPEATED CORRESPONDENCES REMOVED : {}".format(total - final_inserted)
        print "\tTOTAL CORRESPONDENCES INSERTED         : {}".format(final_inserted)
        # print data

        return data
Esempio n. 2
0
def union(specs, activated=False):

    if activated is False:
        # logger.warning("THE FUNCTION IS NOT ACTIVATED")
        print("THE FUNCTION IS NOT ACTIVATED")
        return {
            St.message: "THE FUNCTION IS NOT ACTIVATED.",
            St.error_code: 1,
            St.result: None
        }

    print "\nEXECUTING UNION SPECS" \
          "\n======================================================" \
          "========================================================"
    """
    THE generate_lens_name FUNCTION RETURNS THE NAME OF THE UNION AND A
    QUERY THAT ALLOWS TO ASk WHETHER THE LENS TO BE CREATED EXIST BY CHECKING
    WHETHER THERE EXISTS A LENS WITH THE SAME COMPOSITION IN TERMS GRAPHS USED FOR THE UNION
    """

    # SET THE NAME OF THE UNION-LENS
    print "1. DATASETS:", len(specs[St.datasets])
    for ds in specs[St.datasets]:
        print "\t- {}".format(ds)
    info = Lu.generate_lens_name(specs[St.datasets])

    specs[St.lens] = "{}{}".format(Ns.lens, info["name"])
    print "\n2. LENS: ", info["name"]

    # CHECK WHETHER THE LENS EXISTS
    check = run_checks(specs, info["query"])
    if check[St.result] != "GOOD TO GO":
        if check[St.message].__contains__("ALREADY EXISTS"):
            Urq.register_lens(specs, is_created=False)
        return check
    # print "AFTER CHECK"

    # PREPARATION FOR THE CREATION OF THE LENS
    specs[St.lens_target_triples] = ""
    specs[St.expectedTriples] = 0
    specs[St.insert_query] = ""
    lens = specs[St.lens]
    source = "{}{}".format(Ns.tmpgraph, "load00")
    message_2 = Ec.ERROR_CODE_8.replace("#", specs[St.lens])
    count = -1
    insert_ans = False

    try:

        # GO THROUGH THE LINKSETS/LENSES IN THE LENS
        #   1-SUM UP THE EXPECTED NUMBER OF TRIPLES
        #   2-GENERATE THE TRIPLES REPRESENTATION OF GHE GRAPHS COMPOSING THIS LENS
        #   3-GENERATE THE INSERT QUERY FOR MOVING BOTH LINKSET AND SINGLETON GRAPHS TO THE UNION GRAPH
        total_size = 0

        # LOAD ALL GRAPHS IN LOAD00
        specs[St.insert_query] += "DROP SILENT GRAPH <{}{}> ;\n".format(
            Ns.tmpgraph, "load00")

        # ITERATE THROUGH THE PROVIDED GRAPHS
        for linkset in specs[St.datasets]:

            # print "TARGET: ", linkset
            count += 1

            # GET THE TOTAL NUMBER OF CORRESPONDENCE TRIPLES INSERTED
            curr_triples = Qry.get_triples(linkset)
            # PROBABLY THE LINKSET HAS NO SUCH PROPERTY " void:triples  ?triples ."

            if curr_triples is None:
                curr_triples = Qry.get_triples_count(linkset)

            total_size += int(curr_triples)
            print "{} Contains {} triples".format(linkset, curr_triples)

            if curr_triples is not None:
                specs[St.expectedTriples] += int(curr_triples)
            else:
                # THE IS A PROBLEM WITH THE GRAPH FOR SEVERAL POSSIBLE REASONS
                return {
                    St.message: message_2.replace("\n", "<br/>"),
                    St.error_code: 1,
                    St.result: None
                }

            # GENERATE TRIPLES OUT OF THE TARGETS
            specs[
                St.
                lens_target_triples] += "\n\t        void:target                         <{}> ;".format(
                    linkset)

            # GET THE INSERT QUERY
            # BOTH THE LINKSET AND THE SINGLETONS ARE MOVED TO A SINGLE GRAPH
            partial_query = Qry.q_copy_graph(source, source, linkset)
            if count == 0:
                specs[St.insert_query] += partial_query
            else:
                specs[St.insert_query] += " ;\n{}".format(partial_query)

        # INTERSECTION MANIPULATION OVER THE UNION (SOURCE)
        insert_query = union_insert_q(lens, source, specs[St.lens_name])
        # print "manipulation:", manipulation
        specs[St.insert_query] += " ;\n{}".format(insert_query)

        # GENERATE THE LENS UNION
        if activated is True:

            # print specs[St.insert_query]
            insert_ans = Qry.boolean_endpoint_response(specs[St.insert_query])

            specs[St.triples] = Qry.get_namedgraph_size(lens, isdistinct=False)
            if specs[St.triples] == "0":
                message = Ec.ERROR_CODE_9
                print message
                # return None
                return {
                    St.message: message.replace("\n", "<br/>"),
                    St.error_code: 1,
                    St.result: None
                }

            # CHECK WHETHER THE RESULT CONTAINS DUPLICATES
            contains_duplicated = Qry.contains_duplicates(lens)
            print "Contains Opposite Direction Duplicated:", contains_duplicated

            # IF IT DOES, REMOVE THE DUPLICATES
            if contains_duplicated is True:
                # logger.warning("THE LENS CONTAINS DUPLICATES.")
                print "THE LENS CONTAINS DUPLICATES."
                Qry.remove_duplicates(lens)
                # logger.warning("THE DUPLICATES ARE NOW REMOVED.")
                print "THE DUPLICATES ARE NOW REMOVED."

            print "Number of triples loaded              : {}".format(
                total_size)

            specs[St.triples] = Qry.get_namedgraph_size(lens, isdistinct=False)
            print "\t>>> INSERTED:  {}\n\t>>> INSERTED TRIPLES: {}".format(
                insert_ans, specs[St.triples])

            print "Inserted : {}".format(specs[St.triples])
            print "Removed  : {}".format(total_size - int(specs[St.triples]))

            # LOAD THE METADATA
            # NOT GOOD AS THE LENS ALSO HAS A SINGLETON GRAPH
            # inserted_correspondences = int(Qry.get_union_triples(lens))
            inserted_correspondences = int(specs[St.triples])
            # print "inserted_correspondences:", inserted_correspondences
            specs[St.removedDuplicates] = specs[
                St.expectedTriples] - inserted_correspondences
            metadata = Gn.union_meta(specs)
            # print "METADATA:", metadata
            meta_ans = Qry.boolean_endpoint_response(metadata)
            print "\t>>> IS THE METADATA GENERATED AND INSERTED?  {}".format(
                meta_ans)

        construct_response = Qry.get_constructed_graph(specs[St.lens])
        if construct_response is not None:
            print "\t>>> WRITING TO FILE"
            construct_response = construct_response.replace(
                '{', "<{}>\n{{".format(specs[St.lens]), 1)
            write_to_file(graph_name=specs[St.lens_name],
                          metadata=None,
                          correspondences=construct_response,
                          directory=DIRECTORY)
        print "\tLens created as : ", specs[St.lens]

        # REGISTER THE LINKSET
        Urq.register_lens(specs, is_created=True)

        # return specs[St.lens]
        message = "THE LENS WAS CREATED as {}. " \
                  "With initially {} triples loaded, {} duplicated triples were found and removed.".\
            format(specs[St.lens], total_size, total_size - int(specs[St.triples]))

        print "\t*** JOB DONE! ***"
        return {
            St.message: message,
            St.error_code: 0,
            St.result: specs[St.lens]
        }

    except Exception as err:
        # logger.warning(err)
        if insert_ans == "true":
            "DROP THE INSERTED UNION"
            drop_linkset(lens, activated=True)

        print "ERROR IN UNION LENS CREATION:", err
        return {St.message: ERROR_CODE_11, St.error_code: 11, St.result: None}