def specification_2_linkset_subset(specs, activated=False): if activated is True: print Ut.headings("EXECUTING LINKSET SUBSET SPECS...") else: print Ut.headings( "THE FUNCTION [specification_2_linkset_subset] IS NOT ACTIVATED") return {St.message: Ec.ERROR_CODE_0, St.error_code: 0, St.result: None} # ACCESS THE TASK SPECIFIC PREDICATE COUNT specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.mechanism]) # UPDATE THE QUERY THAT IS GOING TO BE EXECUTED if specs[St.sameAsCount]: source = specs[St.source] target = specs[St.target] # UPDATE THE SPECS OF SOURCE AND TARGETS update_specification(source) update_specification(target) # GENERATE THE NAME OF THE LINKSET Ls.set_subset_name(specs) # SETTING SOME GENERIC METADATA INFO specs[St.link_name] = "same" specs[St.linkset_name] = specs[St.linkset_name] specs[St.link] = "http://risis.eu/linkset/predicate/{}".format( specs[St.link_name]) specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.link_name]) specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name]) specs[St.assertion_method] = "{}{}".format(Ns.method, specs[St.linkset_name]) specs[St.justification] = "{}{}".format(Ns.justification, specs[St.linkset_name]) # COMMENT ON THE LINK PREDICATE specs[St.link_comment] = "The predicate <{}> is used in replacement of the linktype <{}> used in the " \ "original <{}> dataset.".format( specs[St.link], specs[St.source][St.link_old], specs[St.source][St.graph]) # COMMENT ON THE JUSTIFICATION FOR THIS LINKSET specs[St.justification_comment] = "In OrgRef's a set of entities are linked to GRID. The linking method " \ "used by OrgRef is unknown. Here we assume that it is a curated work " \ "and extracted it as a linkset.", # COMMENT ON THE LINKSET ITSELF specs[St.linkset_comment] = "The current linkset is a subset of the <{0}> dataset that links <{0}> to " \ "<{1}>. The methodology used by <{0}> to generate this builtin linkset in " \ "unknown.".format(specs[St.source][St.graph], specs[St.target][St.graph]) source[St.entity_ns] = str(source[St.entity_datatype]).replace( source[St.entity_name], '') target[St.entity_ns] = str(target[St.entity_datatype]).replace( target[St.entity_name], '') # GENERATE THE LINKSET inserted_linkset = spa_linkset_subset(specs, activated) # print "LINKSET SUBSET RESULT:", inserted_linkset if inserted_linkset[St.message].__contains__("ALREADY EXISTS"): return inserted_linkset if specs[St.triples] > "0": # REGISTER THE ALIGNMENT if inserted_linkset[St.message].__contains__("ALREADY EXISTS"): Urq.register_alignment_mapping(specs, created=False) else: Urq.register_alignment_mapping(specs, created=True) return inserted_linkset else: print Ec.ERROR_CODE_1 return {St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None}
def refining(specs, insert_query, activated=False): refined = {St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None} diff = {St.message: Ec.ERROR_CODE_4, St.error_code: 1, St.result: None} # UPDATE THE SPECS VARIABLE # print "UPDATE THE SPECS VARIABLE" update_specification(specs) update_specification(specs[St.source]) update_specification(specs[St.target]) # ACCESS THE TASK SPECIFIC PREDICATE COUNT specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.mechanism]) # print "sameAsCount:", specs[St.sameAsCount] if specs[St.sameAsCount] is None: return {'refined': refined, 'difference': diff} # GENERATE THE NAME OF THE LINKSET Ls.set_refined_name(specs) # print "\nREFINED NAME:", specs[St.refined] # print "LINKSET TO REFINE BEFORE CHECK:", specs[St.linkset] # CHECK WHETHER OR NOT THE LINKSET WAS ALREADY CREATED check = Ls.run_checks(specs, check_type="refine") # print "\nREFINED NAME:", specs[St.refined] # print "LINKSET TO REFINE:", specs[St.linkset] if check[St.message] == "NOT GOOD TO GO": # refined = check[St.refined] # difference = check["difference"] return check # print "\nREFINED:", specs[St.refined] # print "LINKSET TO REFINE:", specs[St.linkset] # print "CHECK:", check # THE LINKSET DOES NOT EXIT, LETS CREATE IT NOW print Ls.refined_info(specs, specs[St.sameAsCount]) # POINT TO THE LINKSET THE CURRENT LINKSET WAS DERIVED FROM print "1. wasDerivedFrom {}".format(specs[St.linkset]) specs[St.derivedfrom] = "\t\tprov:wasDerivedFrom\t\t\t<{}> ;".format( specs[St.linkset]) # print "REFINED NAME:", specs[St.refined_name] # print "REFINED:", specs[St.refined] # print "LINKSET TO BE REFINED:", specs[St.linkset] print "\n2. RETRIEVING THE METADATA ABOUT THE GRAPH TO REFINE" # metadata_q = Qry.q_linkset_metadata(specs[St.linkset]) metadata_q = """ prefix ll: <{}> SELECT DISTINCT ?type ?singletonGraph {{ # LINKSET METADATA <{}> a ?type ; ll:singletonGraph ?singletonGraph . }} """.format(Ns.alivocab, specs[St.linkset]) print "QUERY:", metadata_q matrix = Qry.sparql_xml_to_matrix(metadata_q) # print "\nMETA DATA: ", matrix if matrix: if matrix[St.message] == "NO RESPONSE": print Ec.ERROR_CODE_1 print matrix[St.message] return {'refined': refined, 'difference': diff} elif matrix[St.result] is None: print matrix[St.message] returned = { St.message: matrix[St.message], St.error_code: 666, St.result: None } return {'refined': returned, 'difference': diff} else: print Ec.ERROR_CODE_1 return {'refined': refined, 'difference': diff} # GET THE SINGLETON GRAPH OF THE LINKSET TO BE REFINED print "\n3. GETTING THE SINGLETON GRAPH OF THE GRAPH TO REFINE" specs[St.singletonGraph] = matrix[St.result][1][1] # print matrix[St.result][1][0] specs[St.insert_query] = insert_query(specs) print specs[St.insert_query] if type(specs[St.insert_query]) == str: is_run = Qry.boolean_endpoint_response(specs[St.insert_query]) else: print "\n4. RUNNING THE EXTRACTION QUERY" print specs[St.insert_query][0] # is_run = Qry.boolean_endpoint_response(specs[St.insert_query][0]) Qry.boolean_endpoint_response(specs[St.insert_query][0]) print "\n5. RUNNING THE FINDING QUERY" print specs[St.insert_query][1] is_run = Qry.boolean_endpoint_response(specs[St.insert_query][1]) print "\n>>> RUN SUCCESSFULLY:", is_run.upper() # NO INSERTION HAPPENED if is_run == "true" or is_run == Ec.ERROR_STARDOG_1: # GENERATE THE # (1) LINKSET METADATA # (2) LINKSET OF CORRESPONDENCES # (3) SINGLETON METADATA # AND WRITE THEM ALL TO FILE print "GENERATING THE METADATA" pro_message = refine_metadata(specs) # SET THE RESULT ASSUMING IT WENT WRONG refined = { St.message: Ec.ERROR_CODE_4, St.error_code: 4, St.result: None } diff = {St.message: Ec.ERROR_CODE_4, St.error_code: 4, St.result: None} server_message = "Linksets created as: [{}]".format(specs[St.refined]) message = "The linkset was created as [{}]. <br/>{}".format( specs[St.refined], pro_message) # MESSAGE ABOUT THE INSERTION STATISTICS print "\t", server_message if int(specs[St.triples]) > 0: # UPDATE THE REFINED VARIABLE AS THE INSERTION WAS SUCCESSFUL refined = { St.message: message, St.error_code: 0, St.result: specs[St.linkset] } print "REGISTERING THE ALIGNMENT" if refined[St.message].__contains__("ALREADY EXISTS"): register_alignment_mapping(specs, created=False) else: register_alignment_mapping(specs, created=True) try: print "\nCOMPUTE THE DIFFERENCE AND DOCUMENT IT" diff_lens_specs = { St.researchQ_URI: specs[St.researchQ_URI], St.subjectsTarget: specs[St.linkset], St.objectsTarget: specs[St.refined] } diff = Df.difference(diff_lens_specs, activated=activated) message_2 = "\t>>> {} CORRESPONDENCES INSERTED AS THE DIFFERENCE".format( diff_lens_specs[St.triples]) print message_2 except Exception as err: print "THE DIFFERENCE FAILED: ", str(err.message) print "\tLinkset created as: ", specs[St.refined] print "\t*** JOB DONE! ***" return {'refined': refined, 'difference': diff} else: print ">>> NO TRIPLE WAS INSERTED BECAUSE NO MATCH COULD BE FOUND" return {'refined': refined, 'difference': diff} else: print "NO MATCH COULD BE FOUND."
def lens_transitive(specs, activated=False): # CHECK BOTH DATASETS FOR SAME MECHANISM print "GENERATE THE LENS NAME" Lu.composition_lens_name(specs) print "GET THE SAME AS COUNT" specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.lens_operation]) # print same_as_count # GENERATE THE INSERT QUERY FOR TRANSITIVITY # transitive_analyses = lens_transitive_query(specs) # if transitive_analyses is None: # return # specs[St.insert_query] = transitive_analyses[1] # print insert_query # exit(0) # specs['is_transitive_by'] = transitive_analyses[0] ln = get_uri_local_name(specs[St.lens]) sg = specs[St.subjectsTarget] tg = specs[St.objectsTarget] ssg = "{}{}".format(Ns.singletons, get_uri_local_name(sg)) tsg = "{}{}".format(Ns.singletons, get_uri_local_name(tg)) print "SOURCE: {}".format(sg) print "TARGET: {}".format(tg) print "1. GENERATING THE INSERT QUERY" specs[St.insert_query] = transitive_insert_query(ln, sg, tg, ssg, tsg) if activated is True: # RUN THE QUERY AT THE END POINT print "2. RUNNING THE INSERT QUERY" Qry.boolean_endpoint_response(specs[St.insert_query]) # GET THE SIZE OF THE LENS JUST CREATED ABOVE print "3. ETTING THE SIZE OF THE LENS JUST INSERTED" size = Qry.get_namedgraph_size(specs[St.lens], isdistinct=False) # IF ACTIVATED, INSERT THE METADATA if size > 0: # GENERATE THE METADATA ABOUT THE LENS JUST CREATED print "4. SOME {} TRANSITIVE TRIPLES WERE FOUND".format(size) metadata = transitive_metadata(specs, size) # print metadata print "5. INSERTING THE METADATA" Qry.boolean_endpoint_response(metadata) print "6. REGISTER THE LENS" Urq.register_lens(specs, is_created=True) # RUN A CORRESPONDENCE CONSTRUCT QUERY FOR BACKING UP THE DATA TO DISC print "7. GENERATE THE CONSTRUCT FOR FILE DUMP" construct_correspondence = Qry.endpointconstruct( Qry.construct_namedgraph(specs[St.lens])) if construct_correspondence is not None: construct_correspondence = construct_correspondence.replace( '{', "<{}>\n{{".format(specs[St.lens]), 1) # RUN A SINGLETON METADATA CONSTRUCT QUERY FOR BACKING UP THE DATA TO DISC construct_singletons = Qry.endpointconstruct( Qry.construct_namedgraph("{}{}".format(Ns.singletons, specs[St.lens_name]))) if construct_singletons is not None: construct_singletons = construct_singletons. \ replace('{', "<{}{}>\n{{".format(Ns.singletons, specs[St.lens_name]), 1) # WRITE TO FILE print "WRITING TO FILE" write_to_file(graph_name=ln, metadata=metadata, directory=DIRECTORY, correspondences=construct_correspondence, singletons=construct_singletons) # return specs[St.lens] message = "THE LENS WAS CREATED!<br/>URI = {}".format( specs[St.lens]) print message print "\t*** JOB DONE! ***" return { St.message: message, St.error_code: 0, St.result: specs[St.lens] } if activated is False: logger.warning( "THE FUNCTION IS NOT ACTIVATED BUT THE METADATA THAT IS " "SUPPOSED TO BE ENTERED IS WRITEN TO THE CONSOLE.")
def refine_lens(specs, activated=False, check_file=False): try: message = Ec.ERROR_CODE_0.replace('\n', "<br/>") if activated is False: print Ut.headings("THE FUNCTION [refine_lens] IS NOT ACTIVATED") return {St.message: message, St.error_code: 4, St.result: None} # 1. UPDATING THE SPECS BY CHANGING LINKSET TO TENS specs[St.refined] = specs['linkset'] specs.pop('linkset') Ut.update_specification(specs) # CHECKING WHETHER THE LENS IS REFINENABLE # Refine.is_refinable(specs[St.refined]) # PRINTING THE SPECIFICATIONS # lensUt.print_specs(specs) # ASSIGN THE SAME AS COUNT specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.mechanism]) message = Ec.ERROR_CODE_4.replace('\n', "<br/>") if specs[St.sameAsCount]: source = specs[St.source] target = specs[St.target] # 2. SET THE LENS NAME # ******************************* print "\n2. SET THE LENS NAME" # ******************************* lensUt.lens_refine_name(specs, 'refine') #******************************* # GOOD TO GO CHECK # ******************************* query = """ SELECT * {{ <{}> ?predicate ?object . }} """.format(specs[St.lens]) check = Lens_Union.run_checks(specs, query, operator="refine") # NOT GOOD TO GO, IT ALREADY EXISTS if check[St.message].__contains__("ALREADY EXISTS"): return { St.message: check[St.message], St.error_code: 71, St.result: specs[St.lens] } # ******************************* # GOOD TO GO # ******************************* else: lens_start = time.time() # UPDATE THE SPECIFICATION Ut.update_specification(specs[St.source]) Ut.update_specification(specs[St.target]) # PRINTING THE SPECIFICATIONS lensUt.print_specs(specs) ######################################################################## print """\n4. EXECUTING THE GEO-MATCH """ ######################################################################## geo_match(specs) ######################################################################## print """\n5. EXTRACT THE NUMBER OF TRIPLES """ ######################################################################## specs[St.triples] = Qry.get_namedgraph_size("{0}{1}".format( Ns.lens, specs[St.lens_name])) ######################################################################## print """\n6. ASSIGN THE SPARQL INSERT QUERY """ ######################################################################## specs[St.insert_query] = "{} ;\n{};\n{}".format( geo_load_query(specs, True), geo_load_query(specs, False), geo_match_query(specs)) lens_end = time.time() diff = lens_end - lens_start print "\n\t>>> Executed so far in : {:<14}".format( str(datetime.timedelta(seconds=diff))) if int(specs[St.triples]) > 0: ######################################################################## print """\n4. INSERTING THE GENERIC METADATA """ ######################################################################## metadata = Gn.lens_refine_geo_metadata(specs) Qry.boolean_endpoint_response(metadata) ######################################################################## print """\n5. WRITING TO FILE """ ######################################################################## src = [source[St.graph_name], "", source[St.entity_ns]] trg = [target[St.graph_name], "", target[St.entity_ns]] # linkset_path = "D:\datasets\Linksets\ExactName" linkset_path = DIRECTORY writelinkset(src, trg, specs[St.lens_name], linkset_path, metadata, check_file=check_file) server_message = "Linksets created as: {}".format( specs[St.lens]) message = "The linkset was created as [{}] with {} triples found!".format( specs[St.lens], specs[St.triples]) print "\n\t", server_message Urq.register_lens(specs, is_created=True) ls_end_2 = time.time() diff = ls_end_2 - lens_end print ">>> Executed in : {:<14}".format( str(datetime.timedelta(seconds=diff))) print "\t*** JOB DONE! ***" return { St.message: message, St.error_code: 0, St.result: specs[St.lens] } else: print "\tThe linkset was not generated as no match could be found" print "\t*** JOB DONE! ***" return { St.message: message, St.error_code: 4, St.result: None } except Exception as err: traceback.print_exc() return {St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None} # print geo_load_query(specs, is_source=True) # print geo_load_query(specs, is_source=False) # geo_match_query(specs) # traceback.print_exception() # import Alignments.Manage.AdminGraphs as adm # adm.drop_a_lens("http://risis.eu/lens/refine_union_Grid_20170712_Eter_2014_N291690309", display=True, activated=True) # refine_lens(specs_example, activated=True, check_file=False) # # adm.drop_a_lens("http://risis.eu/lens/refine_union_Orgreg_20170718_Eter_2014_P1061032980", display=True, activated=True) # refine_lens(specs_example_2, activated=True, check_file=False) # # adm.drop_a_lens("http://risis.eu/lens/refine_union_Orgreg_20170718_Grid_20170712_N1966224323", display=True, activated=True) # refine_lens(specs_example_3, activated=True, check_file=False)