def main_alignment(alignment): # **************************************************************************** # GIVEN AN ALIGNMENT, RETURN THE MAIN ONE # **************************************************************************** try: # LOCAL NAME OF THE GRAPH name = Ut.get_uri_local_name_plus(alignment) print "{:12} : {}".format("LOCAL NAME", name) query_search = std_queries["graphs_search"].format(name) response = Qry.sparql_xml_to_matrix(query_search) results = response["result"] if results is not None: for i in range(1, len(results)): if results[i][0].__contains__("singletons") is False: return results[i][0] if str(alignment).__contains__(Ns.singletons): return str(alignment).replace(Ns.singletons, Ns.linkset) else: return alignment except ValueError: traceback.print_exc() return alignment
def added(early_version, late_version, stat=False, display=True, activated=False): if activated is False: print "\nTHE FUNCTION [added] IS NOT ACTIVATED" return {St.subject: None, St.predicate: None, St.triples: None} if stat is False: subj_added = subject(late_version, early_version, count=stat) prop_added = predicate(late_version, early_version, count=stat) # RESPONSE FOR TRIPLES ADDED resp_subj_added = Qr.sparql_xml_to_matrix(subj_added) resp_prop_added = Qr.sparql_xml_to_matrix(prop_added) status = (resp_subj_added[St.result] is not None and len(resp_subj_added[St.result]) > 1) or \ (resp_prop_added[St.result] is not None and len(resp_prop_added[St.result]) > 1) if display is True: # DISPLAY THE RESULTS FOR SUBJECT ADDED print "\n>>> DISPLAY THE RESULTS FOR SUBJECT ADDED" Qr.display_matrix(resp_subj_added, limit=10, is_activated=True) # DISPLAY THE RESULTS FOR PREDICATE ADDED print "\n>>> DISPLAY THE RESULTS FOR PREDICATE ADDED" Qr.display_matrix(resp_prop_added, limit=10, is_activated=True) return {"status": status, St.subject: resp_subj_added[St.result], St.predicate: resp_prop_added[St.result]} else: subj_added = subject(late_version, early_version, count=stat) prop_added = predicate(late_version, early_version, count=stat) resp_subj_added = Qr.sparql_xml_to_matrix(subj_added) resp_prop_added = Qr.sparql_xml_to_matrix(prop_added) status = (resp_subj_added[St.result] is not None and int(resp_subj_added[St.result][1][0]) > 0)\ or (resp_prop_added[St.result] is not None and int(resp_prop_added[St.result][1][0]) > 0) return {"status": status, St.subject: resp_subj_added[St.result][1][0], St.predicate: resp_prop_added[St.result][1][0]}
def linkset_wasderivedfrom(refined_linkset_uri): query = """ select * {{ <{}> <http://www.w3.org/ns/prov#wasDerivedFrom> ?wasDerivedFrom . }} """.format(refined_linkset_uri) # print query dictionary_result = Qry.sparql_xml_to_matrix(query) # print dictionary_result # print dictionary_result if dictionary_result: if dictionary_result[St.result]: return dictionary_result[St.result][1][0] return None
def check_constraint(): text = constraint_text.lower() text = text.split(",") # CONSTRAINT BUILDER c_builder = Buffer.StringIO() if constraint_targets is not None: for dictionary in constraint_targets: graph = dictionary[St.graph] data_list = dictionary[St.data] properties = data_list[0][St.properties] prop = properties[0] if Ut.is_nt_format(properties[0]) else "<{}>".format(properties[0]) # WRITING THE CONSTRAINT ON THE GRAPH graph_q = """ {{ GRAPH <{0}> {{ ?lookup {1} ?constraint . }} }} """.format(graph, prop) c_builder.write(graph_q) if len(c_builder.getvalue()) == 0 else \ c_builder.write("UNION {}".format(graph_q)) # WRITING THE FILTER if len(c_builder.getvalue()) > 0: for i in range(0, len(text)): if i == 0 : c_builder.write(""" FILTER (LCASE(STR(?constraint)) = "{}" """.format(text[i].strip())) else: c_builder.write(""" || LCASE(STR(?constraint)) = "{}" """.format(text[i].strip())) c_builder.write(")") # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES query = Qry.cluster_rsc_strengths_query(resources, linkset) query = query.replace("# CONSTRAINTS IF ANY", c_builder.getvalue()) # print query response = Qry.sparql_xml_to_matrix(query) if response[St.result] is None: return False return True
def main_alignment(alignment): # LOCAL NAME OF THE GRAPH name = Ut.get_uri_local_name_plus(alignment) print "{:12} : {}".format("LOCAL NAME", name) query = std_queries["graphs_search"].format(name) response = Qry.sparql_xml_to_matrix(query) results = response["result"] if results is not None: for i in range(1, len(results)): if results[i][0].__contains__("singletons") is False: return results[i][0] if str(alignment).__contains__(Ns.singletons): return str(alignment).replace(Ns.singletons, Ns.linkset) else: return alignment
def get_corr_reducer(graph): query = """ SELECT ?uri1 ?uri2 {{ GRAPH <{}> {{ ?uri1 ?p ?uri2 . }} }}""".format(graph) alignment = Qry.sparql_xml_to_matrix(query) table_matrix = alignment[St.result] reducer_dict = {} if len(table_matrix) > 0: for row in table_matrix[1:]: src_uri = row[0].strip() trg_uri = row[1].strip() if len(row) == 2 and (src_uri, trg_uri) not in reducer_dict: reducer_dict[(src_uri, trg_uri)] = 1 return reducer_dict
def get_table(dataset_specs, reducer=None): # ADD THE REDUCER IF SET. THE REDUCER OR (DATASET REDUCER) HELPS ELIMINATING # THE COMPUTATION OF SIMILARITY FOR INSTANCES THAT WHERE ALREADY MATCHED print "\nLOADING: {} {}".format(dataset_specs[St.graph], dataset_specs[St.entity_datatype]) if reducer is None: reducer_comment = "#" reducer = "" else: reducer_comment = "" reducer = reducer aligns = dataset_specs[St.aligns] if Ut.is_nt_format(dataset_specs[St.aligns]) \ else "<{}>".format(dataset_specs[St.aligns]) query = """ SELECT DISTINCT * {{ GRAPH <{0}> {{ ?subject a <{1}> ; {2} ?object . }} {4}FILTER NOT EXISTS {4}{{ {4} GRAPH <{3}> {4} {{ {4} {{ ?subject ?pred ?obj . }} {4} UNION {4} {{ ?obj ?pred ?subject. }} {4} }} {4}}} }} {5} """.format(dataset_specs[St.graph], dataset_specs[St.entity_datatype], aligns, reducer, reducer_comment, LIMIT) table_matrix = Qry.sparql_xml_to_matrix(query) # Qry.display_matrix(table_matrix, is_activated=True) # print table_matrix # print query if table_matrix[St.result]: print "\tINPUT SIZE: {}".format(str(len(table_matrix[St.result]) - 1)) return table_matrix[St.result]
def get_table(dataset_specs, reducer=None): # ADD THE REDUCER IF SET if reducer is None: reducer_comment = "#" reducer = "" else: reducer_comment = "" reducer = reducer aligns = dataset_specs[St.aligns] if Ut.is_nt_format(dataset_specs[St.aligns]) \ else "<{}>".format(dataset_specs[St.aligns]) query = """ SELECT DISTINCT * {{ GRAPH <{0}> {{ ?subject a <{1}> ; {2} ?object . }} {4}FILTER NOT EXISTS {4}{{ {4} GRAPH <{3}> {4} {{ {4} {{ ?subject ?pred ?obj . }} {4} UNION {4} {{ ?obj ?pred ?subject. }} {4} }} {4}}} }} {5} """.format( dataset_specs[St.graph], dataset_specs[St.entity_datatype], aligns, reducer, reducer_comment, LIMIT) table_matrix = Qry.sparql_xml_to_matrix(query) # Qry.display_matrix(table_matrix, is_activated=True) # print table_matrix # print query return table_matrix[St.result]
def lens_targets_unique(unique_list, graph): def get_targets(graph_uri): target_query = PREFIX + """ ### GET LINKSET METADATA SELECT DISTINCT ?g WHERE {{ {{ <{0}> void:subjectsTarget ?g . }} UNION {{ <{0}> void:objectsTarget ?g . }} UNION {{ <{0}> void:target ?g . }} UNION {{ <{0}> alivocab:hasAlignmentTarget ?alignmentTarget . ?alignmentTarget alivocab:hasTarget ?g . }} }} """.format(graph_uri) return target_query def get_lens_union_targets(lens): u_query = PREFIX + """ select * {{ {{ <{0}> void:target ?target . }} UNION {{ <{0}> alivocab:hasAlignmentTarget ?alignmentTarget . ?alignmentTarget alivocab:hasTarget ?target . }} }} """.format(lens) return u_query # THIS FUNCTION TAKES AS INPUT A LENS AND FILLS IN THE DICTIONARY # ARGUMENT WITH UNIQUE DATASETS INVOLVED IN THE LENS # GET THE TYPE OF THE GRAPH: e.g.: http://rdfs.org/ns/void#Linkset type_matrix = Qry.get_graph_type(graph) # print type_matrix if type_matrix[St.message] != "NO RESPONSE": if type_matrix[St.result]: # THIS IS THE BASE OF THE RECURSION if type_matrix[ St.result][1][0] == "http://rdfs.org/ns/void#Linkset": # QUERY FOR THE GRAPHS/DATASETS query = get_targets(graph) result = Qry.sparql_xml_to_matrix(query) # print query # print "\n\nRESULT:", result # SAVE THE GRAPH AND MAKE SURE THEY ARE UNIQUE for i in range(1, len(result[St.result])): if result[St.result][i][0] not in unique_list: unique_list.append(result[St.result][i][0]) # print result[i] return if type_matrix[St.result][1][ 0] == "http://vocabularies.bridgedb.org/ops#Lens": # print "I am Keanu Reeves" # GET THE OPERATOR # alivocab:operator http://risis.eu/lens/operator/union operator = Qry.get_lens_operator(graph) print "\nOPERATOR:", operator if operator == "http://risis.eu/lens/operator/union": # GET THE LIST OF TARGETS target_matrix = Qry.sparql_xml_to_matrix( get_lens_union_targets(graph)) if target_matrix[St.result]: for i in range(1, len(target_matrix[St.result])): lens_targets_unique(unique_list, target_matrix[St.result][i][0])
def run_checks(specs, query): print "\n3. RUNNING GOOD TO GO CHECK" # print "QUERY FOR CHECK:", query # CHECK-1: CHECK WHETHER THE LENS EXIST BY ASKING ITS METADATA WHETHER IT IS COMPOSED OF THE SAME GRAPHS print "QUERY:", query ask = Qry.sparql_xml_to_matrix(query) print "\t3.1 ANSWER 1:", ask['message'] # ASK IS NOT SUPPOSED TO BE NONE # CHECK-1-RESULT: PROBLEM CONNECTING WITH THE SERVER if ask is None: # print "IN 1" print Ec.ERROR_CODE_1 return {St.message: Ec.ERROR_CODE_1, St.error_code: 1, St.result: None} # CHECK-1-RESULT: ASK HAS A RESULT, MEANING THE LENS EXIT UNDER THE SAME COMPOSITION OF GRAPHS elif ask[St.message] != "NO RESPONSE": print "\tFOUND" if ask[St.result]: for i in range(1, len(ask[St.result])): print "\t\t- {}".format(ask[St.result][i][0]) # IF THERE IS RESULT WITH THE SAME NUMBER OF TARGETS THEN THE LENS ALREADY EXISTS if ask[St.result] and len(ask[St.result]) - 1 == len( specs[St.datasets]): message = Ec.ERROR_CODE_7.replace("#", specs[St.lens]).replace( "@", ask[St.result][1][0]) print message return { St.message: message.replace("\n", "<br/>"), St.error_code: 1, St.result: specs[St.lens] } print "\tCHECK 1: THERE IS NO METADATA FOR TIS LENS" # ELSE # WITH THE UNSTATED ELSE, WE GET OUT AND PROCEED TO THE CREATION OF A NEW LENS else: print "IN 3" print Ec.ERROR_CODE_1 return {St.message: Ec.ERROR_CODE_1, St.error_code: 1, St.result: None} # print "GOT OUT!!!" update_specification(specs) # print "CHECK 2: CHECK WHETHER THE ACTUAL LENS EXISTS UNDER THIS NAME" check_02 = "\nASK {{ graph <{}> {{ ?S ?p ?o . }} }}".format(specs[St.lens]) ask = Qry.boolean_endpoint_response(check_02) # print specs # print check_02 # print ask if ask is None: # PROBLEM CONNECTING WITH THE SERVER print " CHECK 2: PROBLEM CONNECTING WITH THE SERVER" print Ec.ERROR_CODE_1 return { St.message: Ec.ERROR_CODE_1, St.error_code: 1, St.result: specs[St.lens] } if ask == "true": print " CHECK 2: THE LINKSET ALREADY EXISTS" message = Ec.ERROR_CODE_6.replace("#", specs[St.lens]) print message return { St.message: message.replace("\n", "<br/>"), St.error_code: 1, St.result: specs[St.lens] } print "\n\tDIAGNOSTICS: GOOD TO GO\n" return { St.message: "GOOD TO GO", St.error_code: 0, St.result: "GOOD TO GO" }
def reconstruct(linkset, gr_type, predicate): print "RECONSTRUCTING" # pattern = re.compile('[^a-zA-Z]') graph_format = "\t{:40} {}" sub_obj = None source = "" target = "" correspondence = "" singleton = "" singleton_triple = "\n\t\t?{:50} ?{:20} ?{} .".format( "subject", "sing_predicate", "object") singleton_matrix = Qry.sparql_xml_to_matrix( Qry.get_singleton_graph(linkset)) # print "Singleton graph of {}".format(linkset), singleton_matrix # exit(0) # SINGLETON EXAMPLE # GRAPH <http://risis.eu/lens/singletonMetadata/transitive_C000_ExactName> # { # ?subject sing_predicate ?object . # } if singleton_matrix is not None and singleton_matrix[ St.result] is not None: singleton_graph = singleton_matrix[St.result][1][0] if singleton_graph is not None: singleton = "\n{}\n{}\n{}\n{}\n" \ .format("\tGRAPH <{}>".format(singleton_graph), "\t{", "\t\t?{:50} ?{:20} ?{} .".format("subject", "sing_predicate", "object"), "\t}") # print "\t", singleton # print str(graph_type).upper() # print str(graph_type).upper() == "LINKSET" # ABOUT LINKSET UNION if str(gr_type).upper() == "LINKSET": print "\nRECONSTRUCTING CASE: Linkset" datatype_matrix = Qry.get_linkset_datatypes(linkset) # print datatype_matrix if datatype_matrix is not None and datatype_matrix[St.result]: sub_obj = datatype_matrix[St.result][1][4:6] # source = pattern.sub("", str(datatype_matrix [St.result][1][4])) source = get_uri_local_name(str(datatype_matrix[St.result][1][4])) # target = pattern.sub("", str(datatype_matrix [St.result][1][5])) target = get_uri_local_name(str(datatype_matrix[St.result][1][5])) # CORRESPONDENCE EXAMPLE # GRAPH <http://risis.eu/lens/transitive_C000_ExactName> # { # ?leidenRanking ?singPre ?eter . # } correspondence = "{}\n{}\n{}\n{}".\ format("\tGRAPH <{}>".format(linkset), "\t{", "\t\t?{:50} ?{:20} ?{} .".format(source, predicate, target), "\t}") # DETERMINING WHETHER A LENS IS STEMMED FROM THE SAME subjectsTarget & objectsTarget elif str(gr_type).upper() == "LENS": print "\nRECONSTRUCTING CASE: Lens" #TODO USE PROPERTY PATH query = """ PREFIX bdb: <http://vocabularies.bridgedb.org/ops#> PREFIX void: <http://rdfs.org/ns/void#> SELECT ?target ?subjectsTarget ?objectsTarget {{ <{}> void:target ?target . ?target void:subjectsTarget ?subjectsTarget ; void:objectsTarget ?objectsTarget . }} """.format(linkset) # print query evaluation = False datatype_matrix = Qry.sparql_xml_to_matrix(query) # print "DATATYPE: ", datatype_matrix # print len(datatype_matrix) if datatype_matrix is None: print "THERE IS NO METADATA FOR THIS DATASET. " print "\nNO POSSIBLE RECONSTRUCTION FOR {}: {}".format( gr_type, linkset) print "ARE YOU SURE THE GRAPH IS OF TYPE [{}]?".format(gr_type) return None elif (datatype_matrix is not None) and (len(datatype_matrix) > 1): element = datatype_matrix[St.result][1][1:] # print element for i in range(1, len(datatype_matrix)): check = datatype_matrix[St.result][i][1:] evaluation = element == check # print check # print "result: ", evaluation if evaluation is not True: evaluation = False break else: evaluation = True if evaluation is True: # singleton_matrix = sparql_xml_to_matrix(singleton_graph_query, database_name, host) sub_obj = element # source = pattern.sub("", str(element[0])) source = get_uri_local_name(str(element[0])) # target = pattern.sub("", str(element[1])) target = get_uri_local_name(str(element[1])) correspondence = "{}\n{}\n{}\n{}" \ .format("\tGRAPH <{}>".format(linkset), "\t{", "\t\t?{:50} ?{:20} ?{} .".format(source, predicate, target), "\t}") print graph_format.format(sub_obj[0], sub_obj[1]) else: return None # TEMPORARY GRAPH EXAMPLE # INSERT # { # GRAPH temp:load001 # { # ?leidenRanking ?singPre ?eter . # ?subject ?sing_predicate ?object . # } # } # WHERE # { # GRAPH <http://risis.eu/lens/transitive_C000_ExactName> # { # ?leidenRanking ?singPre ?eter . # } # GRAPH <http://risis.eu/lens/singletonMetadata/transitive_C000_ExactName> # { # ?subject ?sing_predicate ?object . # } # } insert_q = "{}\n{}\n{}\n{}\n{}{}\n{}\n{}" \ "\n{}\n{}\n{}\n{}{}". \ format("INSERT", "{", " GRAPH tmpgraph:{}".format(predicate), " {", "\t\t?{:50} ?{:20} ?{} .".format(source, predicate, target), "{}".format(singleton_triple), " }", "}", "WHERE", "{", "{}".format(correspondence), "{}".format(singleton), "}") if singleton is not None: correspondence += singleton if sub_obj is not None: print graph_format.format(sub_obj[0], sub_obj[1]) return [sub_obj, insert_q]
def is_refinable(graph): # x = "http://risis.eu/lens/union_Grid_20170712_H2020_P1626350579" description = Buffer.StringIO() query = """ PREFIX bdb: <{}> PREFIX void: <{}> PREFIX ll: <{}> SELECT DISTINCT ?subjectsTarget ?objectsTarget ?subjectsDatatype ?objectsDatatype {{ <{}> void:target|void:subjectsTarget|void:objectsTarget ?linkset . ?linkset void:objectsTarget ?objectsTarget ; void:subjectsTarget ?subjectsTarget ; bdb:objectsDatatype ?objectsDatatype ; bdb:subjectsDatatype ?subjectsDatatype . }}""".format(Ns.bdb, Ns.void, Ns.alivocab, graph) response = Qry.sparql_xml_to_matrix(query) # print response if response: result = response[St.result] if result is not None and len(result) == 2: description.write( "\n{}\nIS REFINABLE AS ALL LINKSETS INVOLVED IN " "THE LENS SHARE THE SAME SPECIFICATION DESCRIBED BELOW ...". format(graph)) for i in range(1, len(result)): description.write("\n\n\t{:17}: {}".format( result[0][0], result[i][0])) description.write("\n\t{:17}: {}".format( result[0][1], result[i][1])) description.write("\n\t{:17}: {}".format( result[0][2], result[i][2])) description.write("\n\t{:17}: {}\n".format( result[0][3], result[i][3])) print description.getvalue() return { St.message: True, St.result: result, 'description': description } description.write("\n{}\nIS NOT REFINABLE...".format(graph)) if result is not None: result = response[St.result] description.write( " AS THE LINKSETS INVOLVED IN " "THE LENS DO NOT SHARE THE SAME SPECIFICATIONS AS DESCRIBED BELOW ..." ) for i in range(1, len(result)): description.write("\n\n{:17}: {}".format( result[0][0], result[i][0])) description.write("\n{:17}: {}".format(result[0][1], result[i][1])) description.write("\n{:17}: {}".format(result[0][2], result[i][2])) description.write("\n{:17}: {}\n".format( result[0][3], result[i][3])) print description.getvalue() description.write(" {}".format(response[St.message])) print description.getvalue() return { St.message: False, St.result: response, 'description': description } description.write(" {}".format(response[St.message])) description.write("\n{}\nIS NOT REFINABLE...".format(graph)) print description.getvalue() return {St.message: False, St.result: response, 'description': description}
def university_connected_geo(file_path, merged_lens, country_constraint, activated=False): if activated is False: print "THE FUNCTION [university_connected] IS NOT ACTIVATED." return None # VARIABLES grid = "<http://www.grid.ac/ontology/hasAddress>/<http://www.grid.ac/ontology/countryCode>" eter = "<http://risis.eu/eter_2014/ontology/predicate/Country_Code>" country_predicates = [eter, grid] graphs = [Data.eter_GRAPH, Data.grid_GRAPH] names = [">>> ETER", ">>> GRID"] results = [{"result": None}, {"result": None}] size = 0 excel = Builder.StringIO() # 1. THE QUERY CONSTRAINT FILTER query_filter = "" for i in range(0, len(country_constraint)): query_filter += "ucase(?country) = ucase(\"{}\")".format(country_constraint[i]) if i == 0 \ else " || ucase(?country) = ucase(\"{}\")".format(country_constraint[i]) # 2. MAIN QUERY query = """ PREFIX lens:<http://risis.eu/lens/> PREFIX dataset:<http://risis.eu/dataset/> PREFIX property:<http://risis.eu/orgreg_20170718/ontology/predicate/> PREFIX rsc:<http://risis.eu/orgreg_20170718/ontology/class/> SELECT DISTINCT ?entity ?university ?name {{ {{ SELECT DISTINCT ?entity ?university ?name {{ # UNIVERSITIES IN ORGREG GRAPH <{4}> {{ ?orgreg_entity a rsc:University . ?orgreg_entity property:Entity_current_name_English ?name . }} # UNIVERSITIES CONNECTED GRAPH <{1}> {{ ?entity a ?type . }} # ALL UNIVERSITIES CONNECTED IN THE LENS BIND(<{0}> AS ?lens) {{ GRAPH ?lens {{ ?entity ?pred ?orgreg_entity . }}}} UNION {{ GRAPH ?lens {{ ?orgreg_entity ?pred ?entity . }}}} }} }} VALUES ?name_pred {{ <http://www.w3.org/2000/01/rdf-schema#label> <http://risis.eu/eter_2014/ontology/predicate/Institution_Name> <http://risis.eu/orgreg_20170718/ontology/predicate/Entity_current_name_English> <http://risis.eu/orgref_20170703/ontology/predicate/Name> <http://risis.eu/leidenRanking_2015/ontology/predicate/actor> <http://xmlns.com/foaf/0.1/name> }} # FETCH ORGANIZATION URI AND NAME GRAPH <{1}> {{ ?entity ?name_pred ?university . OPTIONAL {{ ?entity {2} ?_country . }} BIND (IF(bound(?_country), ?_country , "NONE") AS ?country) FILTER ({3}) }} }} ORDER BY ?university """ # QUERY LOOP for i in range(0, 2): if i > 2: continue start = time.time() cur_query = query.format(merged_lens, graphs[i], country_predicates[i], query_filter, Data.orgreg_GRAPH) # print cur_query results[i] = Qr.sparql_xml_to_matrix(cur_query) # Qr.display_result(query=cur_query, spacing=50, limit=5, is_activated=True) temp_size = dict(results[i])["result"].__len__() - 1 elapsed = str(datetime.timedelta(seconds=time.time() - start)) sofar = str(datetime.timedelta(seconds=time.time() - begining)) print "\n {} in {} and so far in [{}]".format(names[i], elapsed, sofar) if temp_size > size: size = temp_size # print cur_query # exit(0) print "\n >>> MAX SIZE {}".format(size) # GOING THROUGH THE SIX DATASETS for row in range(1, size + 1): excel.write(str(row) + "\t") # GO THROUGH EATCH RESULT for i in range(0, 2): query_results = dict(results[i])["result"] if query_results is not None and row < len(query_results): elt = "{}\t{}\t{}".format(query_results[row][0], query_results[row][1].replace("\t", ""), query_results[row][2].replace("\t", "")) excel.write(elt + "\t") if i < 1 else excel.write(elt + "\n") else: excel.write("\t\t\t") if i < 1 else excel.write("\t\t\n") # SAMPLE if row == 100: print "\n", excel.getvalue() # break # SAMPLE # print "\n", excel.getvalue() with open(name=file_path, mode="wb") as writer: writer.write(excel.getvalue())
def register_research_question(question): print "REGISTERING A RESEARCH QUESTION." \ "\n======================================================" \ "========================================================" if True: # CHECK WHETHER THE RESEARCH QUESTION ALREADY EXISTS question = to_bytes(to_unicode(question, "utf-8")) existence_query = check_rq_existence(question) check = Qry.boolean_endpoint_response(existence_query) # LOOK FOR A RESEARCH QUESTION OF THE SAME NAMES GRAPH find_query = find_rq(question) # AN INTERNAL PROBLEM OCCURRED if check is None: return check # THE RESEARCH QUESTION WAS ALREADY REGISTERED elif check == "true": find = Qry.sparql_xml_to_matrix(find_query) # print find if find: if find[St.result]: message = MESSAGE_1.replace("@", find[St.result][1][0]) print message return { St.message: message.replace("@", "<br/>"), St.result: find[St.result][1][0] } return find else: return find # REGISTERING YOUR RESEARCH QUESTION else: print "REGISTERING THE RESEARCH QUESTION" ins_rq = research_question(question) # print ins_rq inserted = Qry.boolean_endpoint_response(ins_rq) print "INSERTED RESULT:", inserted # THE REGISTRATION WAS NOT SUCCESSFUL if inserted is None: print "THE RESEARCH QUESTION WAS REGISTERED" print MESSAGE_3 # THE REGISTRATION WAS SUCCESSFUL. RETRIEVE THE URI if inserted == "true" or inserted == STARDOG_BOOLEAN_BUG_MESSAGE: print "THE RESEARCH QUESTION IS REGISTERED" find = Qry.sparql_xml_to_matrix(find_query) if find: if find[St.result]: message = MESSAGE_2.replace("@", find[St.result][1][0]) print message return { St.message: message.replace("@", "<br/>"), St.result: find[St.result][1][0] } return { St.message: MESSAGE_4.replace("@", "<br/>"), St.result: None } else: return find print { St.message: MESSAGE_3.replace("@", "<br/>"), St.result: None }
def cluster_d_test_statss(linkset, network_size=3, targets=None, directory=None, greater_equal=True, print_it=False, limit=None, activated=False): network = [] print "LINK NETWORK INVESTIGATION" if activated is False: print "\tTHE FUNCTION I NOT ACTIVATED" return "" date = datetime.date.isoformat(datetime.date.today()).replace('-', '') linkset_name = Ut.get_uri_local_name(linkset) count_1 = 0 count_2 = 0 sheet_builder = Buffer.StringIO() analysis_builder = Buffer.StringIO() sheet_builder.write("Count ID STRUCTURE E-STRUCTURE-SIZE A. NETWORK QUALITY" " M. NETWORK QUALITY REFERENCE\n") linkset = linkset.strip() check = False # RUN THE CLUSTER clusters_0 = Cls.links_clustering(linkset, limit) for i_cluster in clusters_0.items(): # network = [] resources = "" uri_size = 0 count_1 += 1 children = i_cluster[1][St.children] cluster_size = len(children) # if "<http://www.grid.ac/institutes/grid.10493.3f>" not in children: # continue check = cluster_size >= network_size if greater_equal else cluster_size == network_size # NETWORK OF A PARTICULAR SIZE if check: count_2 += 1 # file_name = i_cluster[0] # 2: FETCHING THE CORRESPONDENTS smallest_hash = float('inf') child_list = "" for child in children: hashed = hash(child) if hashed <= smallest_hash: smallest_hash = hashed # GENERAL INFO 1: RESOURCES INVOLVED child_list += "\t{}\n".format(child) use = "<{}>".format(child) if Ut.is_nt_format(child) is not True else child resources += "\n\t\t\t\t{}".format(use) if len(child) > uri_size: uri_size = len(child) if directory: # MAKE SURE THE FILE NAME OF THE CLUSTER IS ALWAYS THE SAME file_name = "{}".format(str(smallest_hash).replace("-", "N")) if str( smallest_hash).startswith("-") \ else "P{}".format(smallest_hash) # QUERY FOR FETCHING ALL LINKED RESOURCES FROM THE LINKSET query = """ PREFIX prov: <{3}> PREFIX ll: <{4}> SELECT DISTINCT ?lookup ?object ?Strength ?Evidence {{ VALUES ?lookup{{ {0} }} {{ GRAPH <{1}> {{ ?lookup ?predicate ?object .}} }} UNION {{ GRAPH <{1}> {{?object ?predicate ?lookup . }} }} GRAPH <{2}> {{ ?predicate prov:wasDerivedFrom ?DerivedFrom . OPTIONAL {{ ?DerivedFrom ll:hasStrength ?Strength . }} OPTIONAL {{ ?DerivedFrom ll:hasEvidence ?Evidence . }} }} }} """.format(resources, linkset, linkset.replace("lens", "singletons"), Ns.prov, Ns.alivocab) # print query # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES response = Qry.sparql_xml_to_matrix(query) # A DICTIONARY OF KEY: (SUBJECT-OBJECT) VALUE:STRENGTH response_dic = dict() result = response[St.result] if result: for i in range(1, len(result)): key = (result[i][0], result[i][1]) if key not in response_dic: response_dic[key] = result[i][2] # print response_dic # GENERAL INFO 2: info = "SIZE {} \nCLUSTER {} \nNAME {}\n".format(cluster_size, count_1, file_name) info2 = "CLUSTER [{}] NAME [{}] SIZE [{}]".format(count_1, file_name, cluster_size) analysis_builder.write("{}\n".format(info)) print "{:>5} {}".format(count_2, info2) analysis_builder.write("RESOURCES INVOLVED\n") analysis_builder.write(child_list) analysis_builder.write("\nCORRESPONDENT FOUND ") analysis_builder.write( Qry.display_matrix(response, spacing=uri_size, output=True, line_feed='.', is_activated=True)) # INFO TYPE 3: PROPERTY-VALUES OF THE RESOURCES INVOLVED analysis_builder.write("\n\nDISAMBIGUATION HELPER ") if targets is None: analysis_builder.write(Cls.disambiguate_network(linkset, children)) else: analysis_builder.write(Cls.disambiguate_network_2(children, targets)) position = i_cluster[1][St.row] if St.annotate in i_cluster[1]: analysis_builder.write("\n\nANNOTATED CLUSTER PROCESS") analysis_builder.write(i_cluster[1][St.annotate]) # THE CLUSTER # print "POSITION: {}".format(position) # print "\nMATRIX DISPLAY\n" # for i in range(0, position): # resource = (i_cluster[1][St.matrix])[i] # print "\t{}".format(resource[:position]) # print "\t{}".format(resource) # GENERATING THE NETWORK AS A TUPLE WHERE A TUPLE REPRESENT TWO RESOURCES IN A RELATIONSHIP :-) network = [] for i in range(1, position): for j in range(1, position): if (i, j) in (i_cluster[1][St.matrix_d]) and (i_cluster[1][St.matrix_d])[(i, j)] != 0: r = (i_cluster[1][St.matrix_d])[(i, 0)] c = (i_cluster[1][St.matrix_d])[(0, j)] r_name = "{}:{}".format(i, Ut.get_uri_local_name(r)) c_name = "{}:{}".format(j, Ut.get_uri_local_name(c)) network += [(r_name, c_name)] # network += [(r_smart, c_smart)] # print "\tNETWORK", network if print_it: print "" print analysis_builder.getvalue() # SETTING THE DIRECTORY if directory: # linkset_name = Ut.get_uri_local_name(linkset) # date = datetime.date.isoformat(datetime.date.today()).replace('-', '') temp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\{}_{}\\".format( network_size, date, linkset_name, cluster_size, file_name)) if not os.path.exists(temp_directory): os.makedirs(temp_directory) """"""""""""" PLOTTING """"""""""""" # FIRE THE DRAWING: Supported formats: eps, pdf, pgf, png, ps, raw, rgba, svg, svgz. analysis_builder.write( draw_graph(graph=network, file_path="{}{}.{}".format(temp_directory, "cluster_{}".format(file_name), "pdf"), show_image=False) ) """"""""""""" WRITING TO DISC """"""""""""" # WRITE TO DISC Ut.write_2_disc(file_directory=temp_directory, file_name="cluster_{}".format(file_name, ), data=analysis_builder.getvalue(), extension="txt") analysis_builder = Buffer.StringIO() if directory: if network: automated_decision = metric(network)["AUTOMATED_DECISION"] eval_sheet(targets, count_2, "{}_{}".format(cluster_size, file_name), sheet_builder, linkset, children, automated_decision) else: print network if directory: # if len(sheet_builder.getvalue()) > 150 and count_2 == 2: if len(sheet_builder.getvalue()) > 150 and len(clusters_0) == count_1: tmp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\\".format( network_size, date, linkset_name)) """"""""""""" WRITING CLUSTER SHEET TO DISC """"""""""""" print "\nWRITING CLUSTER SHEET AT\n\t{}".format(tmp_directory) Ut.write_2_disc(file_directory=tmp_directory, file_name="{}_ClusterSheet".format(cluster_size), data=sheet_builder.getvalue(), extension="txt") # if count_2 == 2: # break print ">>> FOUND: {}".format(count_2) if directory is None: return "{}\t{}".format(network_size, count_2)
def cluster_d_test(linkset, network_size=3, network_size_max=3, targets=None, constraint_targets=None, constraint_text="", directory=None, greater_equal=True, print_it=False, limit=None, only_good=False, activated=False): # FOR CONSTRAINTS TO WORK, IT SHOULD NOT BE NONE network = [] print "\nLINK NETWORK INVESTIGATION" if activated is False: print "\tTHE FUNCTION I NOT ACTIVATED" return "" elif network_size > network_size_max and greater_equal is False: print "\t[network_size] SHOULD BE SMALLER THAN [network_size_max]" return "" date = datetime.date.isoformat(datetime.date.today()).replace('-', '') linkset_name = Ut.get_uri_local_name(linkset) linkset = linkset.strip() if network_size_max - network_size == 0: greater_equal = False check = False # RUN THE CLUSTER clusters_0 = Cls.links_clustering(linkset, limit) if greater_equal is True: temp_size = 0 for cluster, cluster_val in clusters_0.items(): new_size = len(list(cluster_val["nodes"])) if new_size > temp_size: temp_size = new_size network_size_max = temp_size print "THE BIGGEST NETWORK'S: {}".format(network_size_max) def check_constraint(): text = constraint_text.lower() text = text.split(",") # CONSTRAINT BUILDER c_builder = Buffer.StringIO() if constraint_targets is not None: for dictionary in constraint_targets: graph = dictionary[St.graph] data_list = dictionary[St.data] properties = data_list[0][St.properties] prop = properties[0] if Ut.is_nt_format(properties[0]) else "<{}>".format(properties[0]) # WRITING THE CONSTRAINT ON THE GRAPH graph_q = """ {{ GRAPH <{0}> {{ ?lookup {1} ?constraint . }} }} """.format(graph, prop) c_builder.write(graph_q) if len(c_builder.getvalue()) == 0 else \ c_builder.write("UNION {}".format(graph_q)) # WRITING THE FILTER if len(c_builder.getvalue()) > 0: for i in range(0, len(text)): if i == 0 : c_builder.write(""" FILTER (LCASE(STR(?constraint)) = "{}" """.format(text[i].strip())) else: c_builder.write(""" || LCASE(STR(?constraint)) = "{}" """.format(text[i].strip())) c_builder.write(")") # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES query = Qry.cluster_rsc_strengths_query(resources, linkset) query = query.replace("# CONSTRAINTS IF ANY", c_builder.getvalue()) # print query response = Qry.sparql_xml_to_matrix(query) if response[St.result] is None: return False return True for index in range(network_size, network_size_max + 1): count_1 = 0 count_2 = 0 curr_network_size = index print "\nCLUSTERS OF SIZE {}".format(index) sheet_builder = Buffer.StringIO() analysis_builder = Buffer.StringIO() sheet_builder.write("Count ID STRUCTURE E-STRUCTURE-SIZE A. NETWORK QUALITY" " M. NETWORK QUALITY REFERENCE\n") for cluster, cluster_val in clusters_0.items(): # network = [] resources = "" uri_size = 0 count_1 += 1 children = list(cluster_val["nodes"]) strengths = cluster_val["strengths"] cluster_size = len(children) # if "<http://www.grid.ac/institutes/grid.10493.3f>" not in children: # continue check = cluster_size >= curr_network_size if greater_equal else cluster_size == curr_network_size # NETWORK OF A PARTICULAR SIZE if check: # file_name = i_cluster[0] # 2: FETCHING THE CORRESPONDENTS smallest_hash = float('inf') child_list = "" for child in children: # CREATE THE HASHED ID AS THE CLUSTER NAME hashed = hash(child) if hashed <= smallest_hash: smallest_hash = hashed # GENERAL INFO 1: RESOURCES INVOLVED child_list += "\t{}\n".format(child) # LIST OF RESOURCES IN THE CLUTER use = "<{}>".format(child) if Ut.is_nt_format(child) is not True else child resources += "\n\t\t\t\t{}".format(use) if len(child) > uri_size: uri_size = len(child) # MAKE SURE THE FILE NAME OF THE CLUSTER IS ALWAYS THE SAME file_name = "{}".format(str(smallest_hash).replace("-", "N")) if str( smallest_hash).startswith("-") \ else "P{}".format(smallest_hash) if constraint_targets is not None and check_constraint() is False: continue count_2 += 1 # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES query = Qry.cluster_rsc_strengths_query(resources, linkset) response = Qry.sparql_xml_to_matrix(query) # GENERAL INFO 2: info = "SIZE {} \nCLUSTER {} \nNAME {}\n".format(cluster_size, count_1, file_name) info2 = "CLUSTER [{}] NAME [{}] SIZE [{}]".format(count_1, file_name, cluster_size) analysis_builder.write("{}\n".format(info)) analysis_builder.write("RESOURCES INVOLVED\n") analysis_builder.write(child_list) analysis_builder.write("\nCORRESPONDENT FOUND ") analysis_builder.write( Qry.display_matrix(response, spacing=uri_size, output=True, line_feed='.', is_activated=True)) # INFO TYPE 3: PROPERTY-VALUES OF THE RESOURCES INVOLVED analysis_builder.write("\n\nDISAMBIGUATION HELPER ") if targets is None: analysis_builder.write(Cls.disambiguate_network(linkset, children)) else: report = Cls.disambiguate_network_2(children, targets) if report is not None: analysis_builder.write(report) # GENERATING THE NETWORK AS A TUPLE WHERE A TUPLE REPRESENT TWO RESOURCES IN A RELATIONSHIP :-) network = [] link_count = 0 for link in cluster_val["links"]: link_count += 1 name_1 = "{}-{}".format(Ut.hash_it(link[0]), Ut.get_uri_local_name(link[0])) name_2 = "{}-{}".format(Ut.hash_it(link[1]), Ut.get_uri_local_name(link[1])) network += [(name_1, name_2)] # GET THE AUTOMATED FLAG if print_it: print "" print analysis_builder.getvalue() # SETTING THE DIRECTORY if directory: if network: automated_decision = metric(network)["AUTOMATED_DECISION"] if only_good is True and automated_decision.startswith("GOOD") is not True: count_2 -= 1 continue print "{:>5} {}".format(count_2, info2) eval_sheet(targets, count_2, "{}_{}".format(cluster_size, file_name), sheet_builder, linkset, children, automated_decision) else: print network # linkset_name = Ut.get_uri_local_name(linkset) # date = datetime.date.isoformat(datetime.date.today()).replace('-', '') temp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\{}_{}\\".format( curr_network_size, date, linkset_name, cluster_size, file_name)) if not os.path.exists(temp_directory): os.makedirs(temp_directory) """"""""""""" PLOTTING """"""""""""" # FIRE THE DRAWING: Supported formats: eps, pdf, pgf, png, ps, raw, rgba, svg, svgz. analysis_builder.write( draw_graph(graph=network, file_path="{}{}.{}".format(temp_directory, "cluster_{}".format(file_name), "pdf"), show_image=False) ) """"""""""""" WRITING TO DISC """"""""""""" # WRITE TO DISC Ut.write_2_disc(file_directory=temp_directory, file_name="cluster_{}".format(file_name, ), data=analysis_builder.getvalue(), extension="txt") analysis_builder = Buffer.StringIO() if directory: # if len(sheet_builder.getvalue()) > 150 and count_2 == 2: if len(sheet_builder.getvalue()) > 150 and len(clusters_0) == count_1: tmp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\\".format( curr_network_size, date, linkset_name)) """"""""""""" WRITING CLUSTER SHEET TO DISC """"""""""""" print "\n\tWRITING CLUSTER SHEET AT\n\t{}".format(tmp_directory) Ut.write_2_disc(file_directory=tmp_directory, file_name="{}_ClusterSheet".format(cluster_size), data=sheet_builder.getvalue(), extension="txt") # if count_2 == 2: # break if greater_equal is True: # no need to continue as we already did all network greater of equal to "network-size" input break print "\t>>> FOUND: {} CLUSTERS OF SIZE {}".format(count_2, curr_network_size) if directory is None: return "{}\t{}".format(curr_network_size, count_2)
def cluster_d_test_stats(linkset, network_size=3, targets=None, directory=None, greater_equal=True, print_it=False, limit=None, activated=False): network = [] print "LINK NETWORK INVESTIGATION" if activated is False: print "\tTHE FUNCTION I NOT ACTIVATED" return "" date = datetime.date.isoformat(datetime.date.today()).replace('-', '') linkset_name = Ut.get_uri_local_name(linkset) count_1 = 0 count_2 = 0 sheet_builder = Buffer.StringIO() analysis_builder = Buffer.StringIO() sheet_builder.write("Count ID STRUCTURE E-STRUCTURE-SIZE A. NETWORK QUALITY" " M. NETWORK QUALITY REFERENCE\n") linkset = linkset.strip() check = False # RUN THE CLUSTER clusters_0 = Cls.links_clustering(linkset, limit) for cluster, cluster_val in clusters_0.items(): # network = [] resources = "" uri_size = 0 count_1 += 1 children = list(cluster_val["nodes"]) strengths = cluster_val["strengths"] cluster_size = len(children) # if "<http://www.grid.ac/institutes/grid.10493.3f>" not in children: # continue check = cluster_size >= network_size if greater_equal else cluster_size == network_size # NETWORK OF A PARTICULAR SIZE if check: count_2 += 1 # file_name = i_cluster[0] # 2: FETCHING THE CORRESPONDENTS smallest_hash = float('inf') child_list = "" for child in children: hashed = hash(child) if hashed <= smallest_hash: smallest_hash = hashed # GENERAL INFO 1: RESOURCES INVOLVED child_list += "\t{}\n".format(child) use = "<{}>".format(child) if Ut.is_nt_format(child) is not True else child resources += "\n\t\t\t\t{}".format(use) if len(child) > uri_size: uri_size = len(child) if directory: # MAKE SURE THE FILE NAME OF THE CLUSTER IS ALWAYS THE SAME file_name = "{}".format(str(smallest_hash).replace("-", "N")) if str( smallest_hash).startswith("-") \ else "P{}".format(smallest_hash) # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES query = Qry.cluster_rsc_strengths_query(resources, linkset) response = Qry.sparql_xml_to_matrix(query) # GENERAL INFO 2: info = "SIZE {} \nCLUSTER {} \nNAME {}\n".format(cluster_size, count_1, file_name) info2 = "CLUSTER [{}] NAME [{}] SIZE [{}]".format(count_1, file_name, cluster_size) analysis_builder.write("{}\n".format(info)) print "{:>5} {}".format(count_2, info2) analysis_builder.write("RESOURCES INVOLVED\n") analysis_builder.write(child_list) analysis_builder.write("\nCORRESPONDENT FOUND ") analysis_builder.write( Qry.display_matrix(response, spacing=uri_size, output=True, line_feed='.', is_activated=True)) # INFO TYPE 3: PROPERTY-VALUES OF THE RESOURCES INVOLVED analysis_builder.write("\n\nDISAMBIGUATION HELPER ") if targets is None: analysis_builder.write(Cls.disambiguate_network(linkset, children)) else: analysis_builder.write(Cls.disambiguate_network_2(children, targets)) # GENERATING THE NETWORK AS A TUPLE WHERE A TUPLE REPRESENT TWO RESOURCES IN A RELATIONSHIP :-) network = [] link_count = 0 for link in cluster_val["links"]: link_count += 1 name_1 = "{}".format(Ut.get_uri_local_name(link[0])) name_2 = "{}".format(Ut.get_uri_local_name(link[1])) network += [(name_1, name_2)] if print_it: print "" print analysis_builder.getvalue() # SETTING THE DIRECTORY if directory: # linkset_name = Ut.get_uri_local_name(linkset) # date = datetime.date.isoformat(datetime.date.today()).replace('-', '') temp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\{}_{}\\".format( network_size, date, linkset_name, cluster_size, file_name)) if not os.path.exists(temp_directory): os.makedirs(temp_directory) """"""""""""" PLOTTING """"""""""""" # FIRE THE DRAWING: Supported formats: eps, pdf, pgf, png, ps, raw, rgba, svg, svgz. analysis_builder.write( draw_graph(graph=network, file_path="{}{}.{}".format(temp_directory, "cluster_{}".format(file_name), "pdf"), show_image=False) ) """"""""""""" WRITING TO DISC """"""""""""" # WRITE TO DISC Ut.write_2_disc(file_directory=temp_directory, file_name="cluster_{}".format(file_name, ), data=analysis_builder.getvalue(), extension="txt") analysis_builder = Buffer.StringIO() if network: automated_decision = metric(network)["AUTOMATED_DECISION"] eval_sheet(targets, count_2, "{}_{}".format(cluster_size, file_name), sheet_builder, linkset, children, automated_decision) else: print network if directory: # if len(sheet_builder.getvalue()) > 150 and count_2 == 2: if len(sheet_builder.getvalue()) > 150 and len(clusters_0) == count_1: tmp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\\".format( network_size, date, linkset_name)) """"""""""""" WRITING CLUSTER SHEET TO DISC """"""""""""" print "\nWRITING CLUSTER SHEET AT\n\t{}".format(tmp_directory) Ut.write_2_disc(file_directory=tmp_directory, file_name="{}_ClusterSheet".format(cluster_size), data=sheet_builder.getvalue(), extension="txt") # if count_2 == 2: # break print ">>> FOUND: {}".format(count_2) if directory is None: return "{}\t{}".format(network_size, count_2)
def register_alignment_mapping(alignment_mapping, created): print "\nREGISTERING AN [ALIGNMENT-MAPPING]" question_uri = alignment_mapping[St.researchQ_URI] # MAKE SURE THE WRITE URI IS USED WHEN REGISTERING A REFINED LINKSET linkset_uri = alignment_mapping[ St.refined] if St.refined in alignment_mapping else alignment_mapping[ St.linkset] print "\tLINKSET TO REGISTER:", linkset_uri # LINKSET EXISTS if linkset_uri: # 1 CHECK WHETHER THE ALIGNMENT WAS REGISTERED ask_query = linkset_composition(alignment_mapping, request_ask_select_or_insert="ask") # print ask_query if ask_query is None: return ask = Qry.boolean_endpoint_response(ask_query) # print ask_query print "\t>>> ASK WHETHER THE [ALIGNMENT] WAS REGISTERED:", ask # 2 THE ALIGNMENT WAS NOT REGISTERED if ask == "false": # REGISTER THE ALIGNMENT-MAPPING insert_alignment_query = linkset_composition( alignment_mapping, request_ask_select_or_insert="insert") insert_alignment = Qry.boolean_endpoint_response( insert_alignment_query) # print insert_alignment_query print "\t>>> IS THE [ALIGNMENT] NOW INSERTED?:", insert_alignment # 2.1 RETRIEVE THE ALIGNMENT-MAPPING URI alignment_uri = None alignment_uri_query = ask_query.replace( "ASK", "SELECT ?alignmentMapping") alignment_uri_resp = Qry.sparql_xml_to_matrix(alignment_uri_query) if alignment_uri_resp: if alignment_uri_resp[St.result]: alignment_uri = alignment_uri_resp[St.result][1][0] print "\t>>> ALIGNMENT REGISTERED AS:", alignment_uri if alignment_uri: # IF WE ARE DEALING WITH A REFINED LINKSET, REGISTER ITS EVOLUTION if St.refined in alignment_mapping: print "REGISTERING THE EVOLUTION OF THIS REFINED LINKSET TO\n\t{}".format( alignment_uri) evolution_str = linkset_evolution(question_uri, linkset_uri) register_evolution(question_uri, alignment_uri, evolution_str) # 2.2 ADD THE LINKSET TO THE ALIGNMENT assign_ls_query = linkset_createdorused(question_uri, alignment_uri, alignment_mapping, is_created=created) is_linkset_registered = Qry.boolean_endpoint_response( assign_ls_query) print ">>> IS THE [LINKSET] REGISTERED?:", is_linkset_registered # 3 THE ALIGNMENT WAS REGISTERED else: # CHECK IF THE LINKSET WAS REGISTERED # is_linkset_registered_query = ask_query.replace("> .", "> ;\n\t\t?pred\t<{}> .".format(linkset_uri)) # is_linkset_registered_query = is_linkset_registered_query.replace(">\" .", ">\" ;\n\t\t?pred\t<{}> .".format(linkset_uri)) is_linkset_registered_query = ask_query.replace( "###@SLOT", "\n\t\t\t?alignmentMapping ?pred\t<{}> .".format(linkset_uri)) # print "CHECKING WHETHER THE LINKSET WAS TRULY REGISTERED QUERY:", is_linkset_registered_query is_linkset_registered = Qry.boolean_endpoint_response( is_linkset_registered_query) # print is_linkset_registered_query print "\t>>> ASK WHETHER [LINKSET] WAS REGISTERED?:", is_linkset_registered if is_linkset_registered == "false": # RETRIEVE THE ALIGNMENT-MAPPING URI alignment_uri = None alignment_uri_query = ask_query.replace( "ASK", "SELECT ?alignmentMapping") # print "alignment_uri_query:", alignment_uri_query alignment_uri_resp = Qry.sparql_xml_to_matrix( alignment_uri_query) if alignment_uri_resp: if alignment_uri_resp[St.result]: alignment_uri = alignment_uri_resp[St.result][1][0] if alignment_uri: # IF WE ARE DEALING WITH A REFINED LINKSET, # REGISTER ITS EVOLUTION IF NOT REGISTERED YET if St.refined in alignment_mapping: print "REGISTERING THE EVOLUTION OF THIS REFINED LINKSET" evolution_str = linkset_evolution( question_uri, linkset_uri) register_evolution(question_uri, alignment_uri, evolution_str) # 2.3 ADD THE LINKSET TO THE ALIGNMENT assign_ls_query = linkset_createdorused(question_uri, alignment_uri, alignment_mapping, is_created=created) is_linkset_registered = Qry.boolean_endpoint_response( assign_ls_query) print "\t>>> IS LINKSET NOW REGISTERED?:", is_linkset_registered
def modified(early_version, late_version, stat=False, display=True, activated=False): if activated is False: print "\nTHE FUNCTION [removed] IS NOT ACTIVATED" return {St.subject: None, St.predicate: None, St.triples: None} if stat is False: # TRIPLES REMOVED mod_pred = modified_predicate(early_version, late_version, count=stat) # EXECUTING THE PREDICATE MODIFICATION QUERY start = time.time() resp_mod_pred = Qr.sparql_xml_to_matrix(mod_pred) matched_time_1 = str(datetime.timedelta(seconds=time.time() - start)) print " \t{:50} [{}]".format("... predicate matched in", matched_time_1) # DROPPING THE TEMP GRAPH USED FOR THE QUERY # print "DROPPING TEMPS..." drop = """ DROP SILENT GRAPH <{}_TEMP>; DROP SILENT GRAPH <{}_TEMP> """.format(early_version, late_version) start = time.time() Qr.endpoint(drop) dropped_time_1 = str(datetime.timedelta(seconds=time.time() - start)) print "\t{:50} [{}]".format("... predicate temp graph dropped in", dropped_time_1) print "\t{:50} [{}]".format("... elapse time", str(datetime.timedelta(seconds=time.time() - start))) mod_val = modified_value(early_version, late_version, count=stat) # EXECUTING THE VALUE MODIFICATION QUERY start_2 = time.time() resp_mod_val = Qr.sparql_xml_to_matrix(mod_val) matched_time_2 = str(datetime.timedelta(seconds=time.time() - start_2)) print "\t{:50} [{}]".format("... value matched in", matched_time_2) # DROPPING THE TEMP GRAPH USED FOR THE QUERY # print "DROPPING TEMPS..." drop = """ DROP SILENT GRAPH <{}_TEMP>; DROP SILENT GRAPH <{}_TEMP> """.format(early_version, late_version) start_2 = time.time() Qr.endpoint(drop) dropped_time_2 = str(datetime.timedelta(seconds=time.time() - start_2)) print "\t{:50} [{}]".format("... value temp graph dropped in", dropped_time_2) print "\t{:50} [{}]".format("... elapse time", str(datetime.timedelta(seconds=time.time() - start))) status = (resp_mod_val[St.result] is not None and len(resp_mod_val[St.result]) > 1) or \ (resp_mod_pred[St.result] is not None and len(resp_mod_pred[St.result]) > 1) if display is True: # DISPLAY THE RESULTS FOR VALUE REMOVED print "\n>>> DISPLAY THE RESULTS FOR VALUE MODIFIED" Qr.display_matrix(resp_mod_val, spacing=90, limit=10, is_activated=True) # DISPLAY THE RESULTS FOR VALUE REMOVED print "\n>>> DISPLAY THE RESULTS FOR PREDICATE MODIFIED" Qr.display_matrix(resp_mod_pred, spacing=90, limit=10, is_activated=True) return {"status": status, St.predicate: resp_mod_pred[St.result], St.triples: resp_mod_val[St.result]} else: mod_pred = modified_predicate(early_version, late_version, count=stat) mod_val = modified_value(early_version, late_version, count=stat) resp_mod_pred = Qr.sparql_xml_to_matrix(mod_pred) resp_mod_val = Qr.sparql_xml_to_matrix(mod_val) # resp_mod_val = {St.result: None} resp_mod_pred_bool = resp_mod_pred[St.result] is None resp_mod_val_bool = resp_mod_val[St.result] is None status = (resp_mod_pred_bool is not True and int(resp_mod_pred[St.result][1][0]) > 0) or \ (resp_mod_val_bool is not None and int(resp_mod_val[St.result][1][0]) > 0) return {"status": status, St.predicate: resp_mod_pred[St.result][1][0] if resp_mod_pred_bool is not True else None, St.triples: resp_mod_val[St.result][1][0] if resp_mod_val_bool is not True else None}
def refining(specs, insert_query, activated=False): refined = {St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None} diff = {St.message: Ec.ERROR_CODE_4, St.error_code: 1, St.result: None} # UPDATE THE SPECS VARIABLE # print "UPDATE THE SPECS VARIABLE" update_specification(specs) update_specification(specs[St.source]) update_specification(specs[St.target]) # ACCESS THE TASK SPECIFIC PREDICATE COUNT specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.mechanism]) # print "sameAsCount:", specs[St.sameAsCount] if specs[St.sameAsCount] is None: return {'refined': refined, 'difference': diff} # GENERATE THE NAME OF THE LINKSET Ls.set_refined_name(specs) # print "\nREFINED NAME:", specs[St.refined] # print "LINKSET TO REFINE BEFORE CHECK:", specs[St.linkset] # CHECK WHETHER OR NOT THE LINKSET WAS ALREADY CREATED check = Ls.run_checks(specs, check_type="refine") # print "\nREFINED NAME:", specs[St.refined] # print "LINKSET TO REFINE:", specs[St.linkset] if check[St.message] == "NOT GOOD TO GO": # refined = check[St.refined] # difference = check["difference"] return check # print "\nREFINED:", specs[St.refined] # print "LINKSET TO REFINE:", specs[St.linkset] # print "CHECK:", check # THE LINKSET DOES NOT EXIT, LETS CREATE IT NOW print Ls.refined_info(specs, specs[St.sameAsCount]) # POINT TO THE LINKSET THE CURRENT LINKSET WAS DERIVED FROM print "1. wasDerivedFrom {}".format(specs[St.linkset]) specs[St.derivedfrom] = "\t\tprov:wasDerivedFrom\t\t\t<{}> ;".format( specs[St.linkset]) # print "REFINED NAME:", specs[St.refined_name] # print "REFINED:", specs[St.refined] # print "LINKSET TO BE REFINED:", specs[St.linkset] print "\n2. RETRIEVING THE METADATA ABOUT THE GRAPH TO REFINE" # metadata_q = Qry.q_linkset_metadata(specs[St.linkset]) metadata_q = """ prefix ll: <{}> SELECT DISTINCT ?type ?singletonGraph {{ # LINKSET METADATA <{}> a ?type ; ll:singletonGraph ?singletonGraph . }} """.format(Ns.alivocab, specs[St.linkset]) print "QUERY:", metadata_q matrix = Qry.sparql_xml_to_matrix(metadata_q) # print "\nMETA DATA: ", matrix if matrix: if matrix[St.message] == "NO RESPONSE": print Ec.ERROR_CODE_1 print matrix[St.message] return {'refined': refined, 'difference': diff} elif matrix[St.result] is None: print matrix[St.message] returned = { St.message: matrix[St.message], St.error_code: 666, St.result: None } return {'refined': returned, 'difference': diff} else: print Ec.ERROR_CODE_1 return {'refined': refined, 'difference': diff} # GET THE SINGLETON GRAPH OF THE LINKSET TO BE REFINED print "\n3. GETTING THE SINGLETON GRAPH OF THE GRAPH TO REFINE" specs[St.singletonGraph] = matrix[St.result][1][1] # print matrix[St.result][1][0] specs[St.insert_query] = insert_query(specs) print specs[St.insert_query] if type(specs[St.insert_query]) == str: is_run = Qry.boolean_endpoint_response(specs[St.insert_query]) else: print "\n4. RUNNING THE EXTRACTION QUERY" print specs[St.insert_query][0] # is_run = Qry.boolean_endpoint_response(specs[St.insert_query][0]) Qry.boolean_endpoint_response(specs[St.insert_query][0]) print "\n5. RUNNING THE FINDING QUERY" print specs[St.insert_query][1] is_run = Qry.boolean_endpoint_response(specs[St.insert_query][1]) print "\n>>> RUN SUCCESSFULLY:", is_run.upper() # NO INSERTION HAPPENED if is_run == "true" or is_run == Ec.ERROR_STARDOG_1: # GENERATE THE # (1) LINKSET METADATA # (2) LINKSET OF CORRESPONDENCES # (3) SINGLETON METADATA # AND WRITE THEM ALL TO FILE print "GENERATING THE METADATA" pro_message = refine_metadata(specs) # SET THE RESULT ASSUMING IT WENT WRONG refined = { St.message: Ec.ERROR_CODE_4, St.error_code: 4, St.result: None } diff = {St.message: Ec.ERROR_CODE_4, St.error_code: 4, St.result: None} server_message = "Linksets created as: [{}]".format(specs[St.refined]) message = "The linkset was created as [{}]. <br/>{}".format( specs[St.refined], pro_message) # MESSAGE ABOUT THE INSERTION STATISTICS print "\t", server_message if int(specs[St.triples]) > 0: # UPDATE THE REFINED VARIABLE AS THE INSERTION WAS SUCCESSFUL refined = { St.message: message, St.error_code: 0, St.result: specs[St.linkset] } print "REGISTERING THE ALIGNMENT" if refined[St.message].__contains__("ALREADY EXISTS"): register_alignment_mapping(specs, created=False) else: register_alignment_mapping(specs, created=True) try: print "\nCOMPUTE THE DIFFERENCE AND DOCUMENT IT" diff_lens_specs = { St.researchQ_URI: specs[St.researchQ_URI], St.subjectsTarget: specs[St.linkset], St.objectsTarget: specs[St.refined] } diff = Df.difference(diff_lens_specs, activated=activated) message_2 = "\t>>> {} CORRESPONDENCES INSERTED AS THE DIFFERENCE".format( diff_lens_specs[St.triples]) print message_2 except Exception as err: print "THE DIFFERENCE FAILED: ", str(err.message) print "\tLinkset created as: ", specs[St.refined] print "\t*** JOB DONE! ***" return {'refined': refined, 'difference': diff} else: print ">>> NO TRIPLE WAS INSERTED BECAUSE NO MATCH COULD BE FOUND" return {'refined': refined, 'difference': diff} else: print "NO MATCH COULD BE FOUND."
def universities_in(file_path, country, activated=False): if activated is False: print "THE FUNCTION [universities_in] IS NOT ACTIVATED." return None query_filter = "" for i in range(0, len(country)): query_filter += "ucase(?country) = ucase(\"{}\")".format(country[i]) if i == 0 \ else "|| ucase(?country) = ucase(\"{}\")".format(country[i]) netherlands = """ SELECT DISTINCT ?subj ?university {{ VALUES ?name_pred {{ <http://www.w3.org/2000/01/rdf-schema#label> <http://risis.eu/eter_2014/ontology/predicate/Institution_Name> <http://risis.eu/orgreg_20170718/ontology/predicate/Entity_current_name_English> <http://risis.eu/orgref_20170703/ontology/predicate/Name> <http://risis.eu/leidenRanking_2015/ontology/predicate/actor> <http://xmlns.com/foaf/0.1/name> }} GRAPH <{}> {{ ?subj ?name_pred ?university . ?subj {} ?country . FILTER ({}) }} }} ORDER BY ?university """ # VARIABLES grid = "<http://www.grid.ac/ontology/hasAddress>/<http://www.grid.ac/ontology/countryCode>" eter = "<http://risis.eu/eter_2014/ontology/predicate/Country_Code>" orgreg = "<http://risis.eu/orgreg_20170718/ontology/predicate/characteristicsOf>" \ "/<http://risis.eu/orgreg_20170718/ontology/predicate/Country_of_establishment>" orgref = "<http://risis.eu/orgref_20170703/ontology/predicate/Country>" leiden = "<http://risis.eu/leidenRanking_2015/ontology/predicate/country>" h2020 = "<http://risis.eu/cordisH2020/vocab/country>" countries = [leiden, eter, orgreg, orgref, h2020, grid] graphs = [Data.leiden_GRAPH, Data.eter_GRAPH, Data.orgreg_GRAPH, Data.orgref_GRAPH, Data.h2020_GRAPH, Data.grid_GRAPH] names = ["\n>>> LEIDEN", "\n>>> ETER", "\n>>> ORGREG", "\n>>> ORGREG", "\n>>> H2020", "\n>>> GRID"] results = ["", "", "", "", "", ""] size = 0 excel = Builder.StringIO() # QUERY LOOP for i in range(0, 6): start = time.time() query = netherlands.format(graphs[i], countries[i], query_filter) # print query # Qr.display_result(query=query, spacing=50, limit=5, is_activated=True) results[i] = Qr.sparql_xml_to_matrix(query) temp_size = dict(results[i])["result"].__len__() - 1 elapsed = str(datetime.timedelta(seconds=time.time() - start)) sofar = str(datetime.timedelta(seconds=time.time() - begining)) print "{} {} in {} and so far in [{}]".format(names[i], temp_size, elapsed, sofar) if temp_size > size: size = temp_size print "\n>>> MAX SIZE {}".format(size) for row in range(1, size + 1): excel.write(str(row) + "\t") # GO THROUGH EATCH RESULT for i in range(0, 6): query_results = dict(results[i])["result"] if row < len(query_results): elt = "{}\t{}".format(query_results[row][0], (query_results[row][1]).replace("\t", "")) excel.write(elt + "\t") if i < 5 else excel.write(elt + "\n") else: excel.write("\t\t") if i < 5 else excel.write("\t\t\n") # SAMPLE if row == 100: print "\n", excel.getvalue() # break with open(name=file_path, mode="wb") as writer: writer.write(excel.getvalue())