def expand_approx(specs, theta, stop_words_string, stop_symbols_string, linkset2expand, reorder=True): data = None inserted_1 = 0 inserted_2 = 0 total = 0 count= 0 abort = False for is_source in [True, False]: count += 1 print Ut.headings("********* PASS {} *********").format(count) # if is_source is False: # specs[St.corr_reducer] = data[St.result] # print data[St.result] data = prefixed_inverted_index( specs, theta=theta, reorder=reorder, stop_words_string=stop_words_string, stop_symbols_string=stop_symbols_string, expands=True, is_source=is_source, linkset2expand=linkset2expand, check_file=False) if count == 1: inserted_1 += data['inserted'] total += inserted_1 else: inserted_2 += data['inserted'] total += inserted_2 if data[St.message].__contains__('ALREADY EXISTS'): abort = True print "\n>>> THE PROCESS IS BEING ABORTED AS THE FIRST " \ "PASS REVEALS THE EXISTENCE OF AN EXPANSION OF THE GRAPH." break if abort is False: # REMOVE DUPLICATES print "REMOVING REPETITION" if data is not None and data[St.result] is not None: print "\t", Qry.remove_repetition_same_direction(data[St.result]) # PRINT THE FINAL TRIPLE COUNT final_inserted = Qry.get_triples_count(data[St.result]) if final_inserted is None: final_inserted = 0 else: final_inserted = int(final_inserted) print "\nOVERALL STATS:\n\tCORRESPONDENCES DISCOVERED AT PASS 1 : {}\n\tCORRESPONDENCES DISCOVERED AT PASS 2 : {}".format( inserted_1, inserted_2) print "\tOVERALL CORRESPONDENCES DISCOVERED : {}".format(total) print "\tTOTAL REPEATED CORRESPONDENCES REMOVED : {}".format(total - final_inserted) print "\tTOTAL CORRESPONDENCES INSERTED : {}".format(final_inserted) # print data return data
def union(specs, activated=False): if activated is False: # logger.warning("THE FUNCTION IS NOT ACTIVATED") print("THE FUNCTION IS NOT ACTIVATED") return { St.message: "THE FUNCTION IS NOT ACTIVATED.", St.error_code: 1, St.result: None } print "\nEXECUTING UNION SPECS" \ "\n======================================================" \ "========================================================" """ THE generate_lens_name FUNCTION RETURNS THE NAME OF THE UNION AND A QUERY THAT ALLOWS TO ASk WHETHER THE LENS TO BE CREATED EXIST BY CHECKING WHETHER THERE EXISTS A LENS WITH THE SAME COMPOSITION IN TERMS GRAPHS USED FOR THE UNION """ # SET THE NAME OF THE UNION-LENS print "1. DATASETS:", len(specs[St.datasets]) for ds in specs[St.datasets]: print "\t- {}".format(ds) info = Lu.generate_lens_name(specs[St.datasets]) specs[St.lens] = "{}{}".format(Ns.lens, info["name"]) print "\n2. LENS: ", info["name"] # CHECK WHETHER THE LENS EXISTS check = run_checks(specs, info["query"]) if check[St.result] != "GOOD TO GO": if check[St.message].__contains__("ALREADY EXISTS"): Urq.register_lens(specs, is_created=False) return check # print "AFTER CHECK" # PREPARATION FOR THE CREATION OF THE LENS specs[St.lens_target_triples] = "" specs[St.expectedTriples] = 0 specs[St.insert_query] = "" lens = specs[St.lens] source = "{}{}".format(Ns.tmpgraph, "load00") message_2 = Ec.ERROR_CODE_8.replace("#", specs[St.lens]) count = -1 insert_ans = False try: # GO THROUGH THE LINKSETS/LENSES IN THE LENS # 1-SUM UP THE EXPECTED NUMBER OF TRIPLES # 2-GENERATE THE TRIPLES REPRESENTATION OF GHE GRAPHS COMPOSING THIS LENS # 3-GENERATE THE INSERT QUERY FOR MOVING BOTH LINKSET AND SINGLETON GRAPHS TO THE UNION GRAPH total_size = 0 # LOAD ALL GRAPHS IN LOAD00 specs[St.insert_query] += "DROP SILENT GRAPH <{}{}> ;\n".format( Ns.tmpgraph, "load00") # ITERATE THROUGH THE PROVIDED GRAPHS for linkset in specs[St.datasets]: # print "TARGET: ", linkset count += 1 # GET THE TOTAL NUMBER OF CORRESPONDENCE TRIPLES INSERTED curr_triples = Qry.get_triples(linkset) # PROBABLY THE LINKSET HAS NO SUCH PROPERTY " void:triples ?triples ." if curr_triples is None: curr_triples = Qry.get_triples_count(linkset) total_size += int(curr_triples) print "{} Contains {} triples".format(linkset, curr_triples) if curr_triples is not None: specs[St.expectedTriples] += int(curr_triples) else: # THE IS A PROBLEM WITH THE GRAPH FOR SEVERAL POSSIBLE REASONS return { St.message: message_2.replace("\n", "<br/>"), St.error_code: 1, St.result: None } # GENERATE TRIPLES OUT OF THE TARGETS specs[ St. lens_target_triples] += "\n\t void:target <{}> ;".format( linkset) # GET THE INSERT QUERY # BOTH THE LINKSET AND THE SINGLETONS ARE MOVED TO A SINGLE GRAPH partial_query = Qry.q_copy_graph(source, source, linkset) if count == 0: specs[St.insert_query] += partial_query else: specs[St.insert_query] += " ;\n{}".format(partial_query) # INTERSECTION MANIPULATION OVER THE UNION (SOURCE) insert_query = union_insert_q(lens, source, specs[St.lens_name]) # print "manipulation:", manipulation specs[St.insert_query] += " ;\n{}".format(insert_query) # GENERATE THE LENS UNION if activated is True: # print specs[St.insert_query] insert_ans = Qry.boolean_endpoint_response(specs[St.insert_query]) specs[St.triples] = Qry.get_namedgraph_size(lens, isdistinct=False) if specs[St.triples] == "0": message = Ec.ERROR_CODE_9 print message # return None return { St.message: message.replace("\n", "<br/>"), St.error_code: 1, St.result: None } # CHECK WHETHER THE RESULT CONTAINS DUPLICATES contains_duplicated = Qry.contains_duplicates(lens) print "Contains Opposite Direction Duplicated:", contains_duplicated # IF IT DOES, REMOVE THE DUPLICATES if contains_duplicated is True: # logger.warning("THE LENS CONTAINS DUPLICATES.") print "THE LENS CONTAINS DUPLICATES." Qry.remove_duplicates(lens) # logger.warning("THE DUPLICATES ARE NOW REMOVED.") print "THE DUPLICATES ARE NOW REMOVED." print "Number of triples loaded : {}".format( total_size) specs[St.triples] = Qry.get_namedgraph_size(lens, isdistinct=False) print "\t>>> INSERTED: {}\n\t>>> INSERTED TRIPLES: {}".format( insert_ans, specs[St.triples]) print "Inserted : {}".format(specs[St.triples]) print "Removed : {}".format(total_size - int(specs[St.triples])) # LOAD THE METADATA # NOT GOOD AS THE LENS ALSO HAS A SINGLETON GRAPH # inserted_correspondences = int(Qry.get_union_triples(lens)) inserted_correspondences = int(specs[St.triples]) # print "inserted_correspondences:", inserted_correspondences specs[St.removedDuplicates] = specs[ St.expectedTriples] - inserted_correspondences metadata = Gn.union_meta(specs) # print "METADATA:", metadata meta_ans = Qry.boolean_endpoint_response(metadata) print "\t>>> IS THE METADATA GENERATED AND INSERTED? {}".format( meta_ans) construct_response = Qry.get_constructed_graph(specs[St.lens]) if construct_response is not None: print "\t>>> WRITING TO FILE" construct_response = construct_response.replace( '{', "<{}>\n{{".format(specs[St.lens]), 1) write_to_file(graph_name=specs[St.lens_name], metadata=None, correspondences=construct_response, directory=DIRECTORY) print "\tLens created as : ", specs[St.lens] # REGISTER THE LINKSET Urq.register_lens(specs, is_created=True) # return specs[St.lens] message = "THE LENS WAS CREATED as {}. " \ "With initially {} triples loaded, {} duplicated triples were found and removed.".\ format(specs[St.lens], total_size, total_size - int(specs[St.triples])) print "\t*** JOB DONE! ***" return { St.message: message, St.error_code: 0, St.result: specs[St.lens] } except Exception as err: # logger.warning(err) if insert_ans == "true": "DROP THE INSERTED UNION" drop_linkset(lens, activated=True) print "ERROR IN UNION LENS CREATION:", err return {St.message: ERROR_CODE_11, St.error_code: 11, St.result: None}