def get_graph_filter(research_uri, graph_uri, filter_uri=''): if filter_uri == '': filter_uri = '?filter' else: filter_uri = '<' + filter_uri + '>' query = """ SELECT ?comment ?method {{ GRAPH <{0}> {{ {4} a <{1}Filter> ; <{2}appliesTo> <{3}> ; rdfs:comment ?comment ; <{2}method> ?method . }} }} """.format(research_uri, Ns.riclass, Ns.alivocab, graph_uri, filter_uri) # print query result = sparql_xml_to_matrix(query) return result
def alignments_mappings_description(question_uri, alignment_uri): query = """ PREFIX alivocab: <http://risis.eu/alignment/predicate/> PREFIX void: <http://rdfs.org/ns/void#> PREFIX prov: <http://www.w3.org/ns/prov#> ### EXTRACT ALIGNMENT MAPPINGS select * {{ GRAPH <{0}> {{ <{0}> alivocab:created <{1}> . <{1}> ?pred ?obj . }} }} """.format(question_uri, alignment_uri) # print query # RUN THE QUERY alg_matrix = sparql_xml_to_matrix(query) # print alg_matrix if alg_matrix: # display_matrix(alg_matrix, is_activated=True) if alg_matrix[St.result]: # alignments = alg_matrix[St.result] # alignments = reduce(lambda x , y: x + y, alg_matrix[St.result][1:]) return alg_matrix[St.result][1:] return None
def filter_data(question_uri, filter_uri): view_filter_query = """ PREFIX alivocab: <http://risis.eu/alignment/predicate/> PREFIX void: <http://rdfs.org/ns/void#> ### GETTING THE VIEW_FILTERS select ?target ?selected ?selected_selectedOptional {{ GRAPH <{0}> {{ <{1}> void:target ?target ; ?selected_selectedOptional ?selected . #alivocab:selected|alivocab:selectedOptional ?selected . }} }} ORDER BY ?target ?selected """.format(question_uri, filter_uri) # print view_filter_query # RUN QUERY view_filter_matrix = sparql_xml_to_matrix(view_filter_query) # print "view_filter_query:", view_filter_query if view_filter_matrix and view_filter_matrix[St.result]: # print "view_filter_matrix:", view_filter_matrix[St.result] # display_matrix(view_filter_matrix, is_activated=True) return view_filter_matrix[St.result] else: return None
def datasets_selected(question_uri): ds_mapping_query = """ ### EXTRACTING DATASETS MAPPING PREFIX alivocab: <http://risis.eu/alignment/predicate/> SELECT * {{ GRAPH <{0}> {{ <{0}> alivocab:selected ?datasets . ?datasets a <http://risis.eu/class/Dataset> ; alivocab:hasDatatype ?datatype . }} {{ SELECT ?datasets (count(distinct ?s) as ?count) {{ GRAPH ?datasets {{ ?s a ?datatype .}} }} GROUP BY ?datasets }} }}""".format(question_uri) # print ds_mapping_query # RUN THE QUERY ds_matrix = sparql_xml_to_matrix(ds_mapping_query) # REDUCE if ds_matrix: if ds_matrix[St.result]: datasets = ds_matrix[St.result][1:] # datasets = reduce(lambda x, y: x + y, ds_matrix[St.result][1:]) return datasets return None
def research_first_hops(question_uri): query = """ PREFIX alivocab: <http://risis.eu/alignment/predicate/> ### GETTING THE VIEW_LENS ELEMENTS (LINKSET OR/AND LENS) select ?researchQuestion ?view ?viewLens ?filters {{ GRAPH <{0}> {{ <{0}> a <http://risis.eu/class/ResearchQuestion> ; rdfs:label ?researchQuestion ; alivocab:created ?view . ?view alivocab:hasViewLens ?viewLens ; alivocab:hasFilter ?filters . }} }} """.format(question_uri) # print query # RUN QUERY first_hop_info_matrix = sparql_xml_to_matrix(query) # print "view_filter_query:", view_filter_query if first_hop_info_matrix and first_hop_info_matrix[St.result]: # print "view_filter_matrix:", view_filter_matrix[St.result] display_matrix(first_hop_info_matrix, is_activated=False) return first_hop_info_matrix[St.result] else: return None
def linksets_and_lenses(question_uri, view_uri): view_lens_query = """ PREFIX alivocab: <http://risis.eu/alignment/predicate/> ### GETTING THE VIEW_LENS ELEMENTS (LINKSET OR/AND LENS) select ?linkset_lens {{ GRAPH <{}> {{ <{}> a <http://risis.eu/class/View> ; alivocab:hasViewLens/alivocab:selected ?linkset_lens }} }} ORDER BY ?linkset_lens """.format(question_uri, view_uri) # RUN THE QUERY view_lens_matrix = sparql_xml_to_matrix(view_lens_query) # REDUCE if view_lens_matrix: if view_lens_matrix[St.result]: view_lens = reduce(lambda x, y: x + y, view_lens_matrix[St.result][1:]) return view_lens return None
def filters(question_uri, view_uri): query = """ PREFIX alivocab: <http://risis.eu/alignment/predicate/> ### GETTING THE FILTERS OF THE VIEW ### [A FILTER IS COMPOSED OF A DATASET AND SELECTED PROPERTIES] select ?filters {{ GRAPH <{}> {{ <{}> alivocab:hasFilter ?filters . }} }} """.format(question_uri, view_uri) # print query # RUN QUERY filters_matrix = sparql_xml_to_matrix(query) # print "view_filter_query:", view_filter_query if filters_matrix and filters_matrix[St.result]: # print "view_filter_matrix:", view_filter_matrix[St.result] display_matrix(filters_matrix, is_activated=False) return filters_matrix[St.result] else: return None
def created_used_lens(question_uri): query = """ PREFIX alivocab: <http://risis.eu/alignment/predicate/> PREFIX bdb: <http://vocabularies.bridgedb.org/ops#> ### GETTING THE VIEW_LENS ELEMENTS (LINKSET OR/AND LENS) select ?created ?lens ?count {{ GRAPH <{0}> {{ <{0}> ?created ?lens. ?lens a bdb:Lens . }} {{ SELECT ?lens (count(distinct ?subj) as ?count) {{ GRAPH ?lens {{ ?subj ?sing ?pre . }} GRAPH ?singGraph {{ ?sing ?sP ?sO . }} }} GROUP BY ?lens }} }} """.format(question_uri) # print query # RUN THE QUERY lenses_matrix = sparql_xml_to_matrix(query) if lenses_matrix: # display_matrix(alg_matrix, is_activated=True) if lenses_matrix[St.result]: # alignments = alg_matrix[St.result] # alignments = reduce(lambda x , y: x + y, alg_matrix[St.result][1:]) return lenses_matrix[St.result][1:] return None
def research_label(question_uri): # RESEARCH QUESTION rq_query = """ PREFIX alivocab: <http://risis.eu/alignment/predicate/> ### GETTING THE RESEARCH QUESTION LABEL select * {{ GRAPH <{0}> {{ <{0}> a <http://risis.eu/class/ResearchQuestion> ; rdfs:label ?researchQuestion . }} }} """.format(question_uri) question_matrix = sparql_xml_to_matrix(rq_query) if question_matrix and question_matrix[St.result]: return question_matrix[St.result][1][0] return None
def retrieve_view(question_uri, view_uri): view_lens_query = """ PREFIX alivocab: <http://risis.eu/alignment/predicate/> ### GETTING THE VIEW_LENS ELEMENTS (LINKSET OR/AND LENS) select ?linkset_lens {{ GRAPH <{}> {{ #?idea alivocab:created ?view . <{}> a <http://risis.eu/class/View> ; alivocab:hasViewLens/alivocab:selected ?linkset_lens }} }} """.format(question_uri, view_uri) # RUN THE QUERY view_lens_matrix = sparql_xml_to_matrix(view_lens_query) # REDUCE view_lens = None if view_lens_matrix: if view_lens_matrix[St.result]: view_lens = reduce(lambda x, y: x + y, view_lens_matrix[St.result][1:]) # print "view_lens:", view_lens # print "view_lens_query:", view_lens_query # print "view_lens_matrix:", view_lens_matrix view_filter_query = """ PREFIX alivocab: <http://risis.eu/alignment/predicate/> PREFIX void: <http://rdfs.org/ns/void#> ### GETTING THE VIEW_FILTERS select ?target ?datatype ?selected ?selectedOptional {{ GRAPH <{}> {{ #?idea alivocab:created ?view . <{}> a <http://risis.eu/class/View> ; alivocab:hasFilter ?filter . ?filter void:target ?target ; OPTIONAL {{ ?filter void:hasDatatype ?datatype }} OPTIONAL {{ SELECT ?filter (GROUP_CONCAT(DISTINCT ?sel; SEPARATOR=", ") as ?selected) {{ ?filter alivocab:selected ?sel }} group by ?filter }}. OPTIONAL {{ SELECT ?filter (GROUP_CONCAT(?selOpt; SEPARATOR=", ") as ?selectedOptional) {{ ?filter alivocab:selectedOptional ?selOpt }} group by ?filter }}. }} }} ORDER BY ?target """.format(question_uri, view_uri) # print view_filter_query print view_filter_query # RUN QUERY view_filter_matrix = sparql_xml_to_matrix(view_filter_query) # print "view_filter_query:", view_filter_query if view_filter_matrix: if view_filter_matrix[St.result]: # print "view_filter_matrix:", view_filter_matrix[St.result] return { "view_lens": view_lens, "view_filter_matrix": view_filter_matrix[St.result] } return {"view_lens": view_lens, "view_filter_matrix": None}
def stats(graph, display_table=False, display_text=False): optional = dict() stat = {} text = buffer() # 1. FIND ALL TYPES IN THE GRAPH qry_types = """ ### RETRIEVE ALL TYPES FROM THE GRAPH SELECT DISTINCT ?Types (count(distinct ?resource) as ?EntityCount) {{ GRAPH <{}> {{ ?resource a ?Types . }} }} GROUP by ?Types ?EntityType ORDER BY ?Graph """.format(graph) types_matrix = sparql_xml_to_matrix(qry_types) # print types_matrix if display_table: display_matrix(types_matrix, spacing=70, limit=100, is_activated=True) # 2. OR EACH TYPES GET ALL PROPERTIES if types_matrix["result"] is not None: types = types_matrix["result"] for i in range(1, len(types)): curr_type = types[i][0] type_name = Ut.get_uri_local_name(curr_type ) instances = int(types[i][1]) optional[type_name] = dict() qry_properties = """ ### RETRIEVE ALL PROPERTIES FOR THE TYPE [{0}] SELECT DISTINCT ?Properties_for_{0} {{ GRAPH <{1}> {{ ?resource a <{2}> ; ?Properties_for_{0} ?object . }} }} """.format(type_name, graph, curr_type) properties_matrix = sparql_xml_to_matrix(qry_properties) if properties_matrix["result"] is not None: columns = 4 rows = len(properties_matrix["result"]) if display_table: print "\nPROPERTY COUNT:", len(properties_matrix["result"]) - 1 display_matrix(properties_matrix, spacing=70, limit=100, is_activated=False) # PROPERTY OCCURENCE COUNT matrix = [[str(x * y * 0).replace("0", "") for x in range(columns)] for y in range(rows)] properties = properties_matrix["result"] matrix[0][0] = properties[0][0] matrix[0][1] = "Optional" matrix[0][2] = "Instances" matrix[0][3] = "Percentage" # print type_name cur_dic = optional[type_name] for i in range(1, len(properties)): qry_occurence = """ ### RETRIEVE THE NUMBER OF OCCURRENCES FOR THIS PROPERTY ### TYPE : {2} ### PROPERTY : {3} ### GRAPH : {1} SELECT (count(?object) as ?Occurrences) {{ GRAPH <{1}> {{ ?resource a <{2}> ; <{3}> ?object . }} }} """.format(type_name, graph, curr_type, properties[i][0]) # print qry_occurence Occurrences_matrix = sparql_xml_to_matrix(qry_occurence) if Occurrences_matrix["result"] is not None: # print Occurrences_matrix["result"][1][0] # print i matrix[i][0] = properties[i][0] matrix[i][2] = Occurrences_matrix["result"][1][0] matrix[i][3] = int(Occurrences_matrix["result"][1][0])/float(instances) if int(Occurrences_matrix["result"][1][0])%float(instances) == 0: matrix[i][1] = False cur_dic[properties[i][0]] = False else: matrix[i][1] = True cur_dic[properties[i][0]] = True # matrix = properties_matrix["result"] + matrix # print matrix to_display = {"message": "OK", "result": matrix} if display_table: display_matrix(to_display, spacing=50, limit=100, is_activated=True) stat[type_name] = matrix text.write("\nGRAPH: {}".format(graph)) for key, value in optional.items(): line = "-------------------------------------------------------------------------------------------------" text.write("\n\n\tENTITY TYPE: {}".format(key)) text.write("\n\t\t{:100}{}".format(line, "------------")) text.write("\n\t\t{:<3}{:97}{}".format(len(optional[key]), "Properties", "Optional")) text.write("\n\t\t{:100}{}".format(line, "------------")) for pro, opt in value.items(): if opt: text.write("\n\t\t{:100}{}".format("{} ***".format(pro), opt)) else: text.write("\n\t\t{:100}{}".format(pro, opt)) if display_text: print text.getvalue() return optional
def stats_optimised(graph, display_table=False, display_text=False, boolean=True): optional = dict() stat = {} text = buffer() # 1. FIND ALL TYPES IN THE GRAPH qry_types = """ ### RETRIEVE ALL TYPES FROM THE GRAPH SELECT DISTINCT ?Types (count(distinct ?resource) as ?EntityCount) {{ GRAPH <{}> {{ ?resource a ?Types . }} }} GROUP by ?Types ?EntityType ORDER BY ?Graph """.format(graph) # print qry_types types_matrix = sparql_xml_to_matrix(qry_types) # print types_matrix # if display_table: display_matrix(types_matrix, spacing=70, limit=100, is_activated=display_table) # 2. FOR EACH TYPES GET ALL PROPERTIES if types_matrix["result"] is not None: types = types_matrix["result"] for i in range(1, len(types)): curr_type = types[i][0] type_name = Ut.get_uri_local_name(curr_type) instances = int(types[i][1]) optional[curr_type] = dict() qry_properties = """ ### RETRIEVE ALL PROPERTIES FOR THE TYPE [{0}] SELECT DISTINCT ?Properties_for_{0} {{ GRAPH <{1}> {{ ?resource a <{2}> ; ?Properties_for_{0} ?object . }} }} """.format(type_name, graph, curr_type) properties_matrix = sparql_xml_to_matrix(qry_properties) # if display_table: # print "\nPROPERTY COUNT:", len(properties_matrix["result"]) - 1 display_matrix(properties_matrix, spacing=70, limit=100, is_activated=display_table) # PROPERTY OCCURENCE COUNT pro_text = buffer() sel_text = buffer() grp_text = buffer() if properties_matrix["result"] is not None: pro_text.write("\nSELECT ?predicate (COUNT(distinct ?resource) as ?Occurrences)") pro_text.write("\n{{\n\tGRAPH <{}> ".format(graph)) pro_text.write("\n\t{{\n\t\t?resource a <{}> .".format(curr_type)) pro_text.write("\n\t\t?resource ?predicate ?object .") pro_text.write("\n\t}}\n}}\nGROUP BY ?predicate".format(grp_text.getvalue())) properties = properties_matrix["result"] cur_dic = optional[curr_type] count = 0 append = "" # RUN THE QUERY FOR PROPERTIES OCCURRENCES qry_property_stats = pro_text.getvalue() # print qry_property_stats Occurrences_matrix = sparql_xml_to_matrix(qry_property_stats) # if display_table: display_matrix(Occurrences_matrix, spacing=70, limit=100, is_activated=display_table) if Occurrences_matrix["result"] != None: Occurrences = Occurrences_matrix["result"] for i in range(1, len(Occurrences)): # THE PROPERTY IS THE KEY OF THE DICTIONARY if boolean is True: cur_dic[Occurrences[i][0]] = int(Occurrences[i][1]) % float(instances) != 0 else: cur_dic[Occurrences[i][0]] = math.floor(100 * int(Occurrences[i][1]) / float(instances)) text.write("\nGRAPH: {}".format(graph)) for key, value in optional.items(): line = "-------------------------------------------------------------------------------------------------" text.write("\n\n\tENTITY TYPE: {}".format(key)) text.write("\n\t\t{:100}{}".format(line, "------------")) text.write("\n\t\t{:<5}{:97}{}".format(len(optional[key]), "Properties", "Optional")) text.write("\n\t\t{:100}{}".format(line, "------------")) for pro, opt in value.items(): if opt: text.write("\n\t\t{:100}{}".format("{} ***".format(pro), opt)) else: text.write("\n\t\t{:100}{}".format(pro, opt)) if display_text: print text.getvalue() return optional
def intersection_extended(specs, lens_name, display=False): # print Ut.headings("EXTENDED INTERSECTION") inter = "" insert = Buffer.StringIO() insert_sing = Buffer.StringIO() model_1 = """ ### ABOUT {0} GRAPH <{0}> {{ {1} }} """ model_2 = """ ### {2}. ABOUT {0} GRAPH <{0}> {{ ?{1} ?pred_{2} ?{3} . }} ### SINGLETONS GRAPH <{4}> {{ ?pred_{2} ?x_{2} ?y_{2} . }}""" count_graph = 1 for graph in specs[St.datasets]: query = """ PREFIX void: <{}> PREFIX bdb: <{}> SELECT distinct ?subTarget ?objTarget ?subjectEntityType ?objectEntityType {{ <{}> #void:target*/(void:subjectsTarget|void:objectsTarget)* ?x ; void:target*/(void:subjectsTarget|void:objectsTarget)* ?x . ?x void:subjectsTarget ?subTarget ; void:objectsTarget ?objTarget ; bdb:subjectsDatatype ?subjectEntityType ; bdb:objectsDatatype ?objectEntityType . FILTER NOT EXISTS {{ ?subTarget a void:Linkset }} FILTER NOT EXISTS {{ ?objTarget a void:Linkset }} }}""".format(Ns.void, Ns.bdb, graph) # print "INTERSECTION QUERY:", query response = sparql_xml_to_matrix(query) if display: print "INTERSECTION QUERY:", query # print "\nGRAPH:", graph # print "RESPONSE:", response # exit(0) if response: targets = response[St.result] # IF THE RESULT HAS MORE THAN # print "LENGTH:", len(targets) if targets is not None and len(targets) > 2: union = "" for i in range(1, len(targets)): append = "UNION" if i < len(targets) - 1 else "" tab = "" if i == 1 else "" src = Ut.get_uri_local_name(targets[i][0]) trg = Ut.get_uri_local_name(targets[i][1]) if src[0].isdigit(): src = "D{}".format(src) if trg[0].isdigit(): trg = "D{}".format(trg) src_TYPE = Ut.get_uri_local_name(targets[i][2]) trg_TYPE = Ut.get_uri_local_name(targets[i][3]) src_variable = "{}_{}_1".format(src, src_TYPE[short:]) if src == trg and src_TYPE == trg_TYPE: trg_variable = "{}_{}_2".format(trg, trg_TYPE[short:]) else: trg_variable = "{}_{}_1".format(trg, trg_TYPE[short:]) union += "\n\t\t{0}{{ ?{1} ?predicate_{2} ?{3} . }} {4}".format( tab, src_variable, i, trg_variable, append) union = model_1.format(graph, union) # print "UNION:", union inter += union # ONLY TWO TARGETS elif targets and len(targets) == 2: src = Ut.get_uri_local_name(targets[1][0]) trg = Ut.get_uri_local_name(targets[1][1]) if src[0].isdigit(): src = "D{}".format(src) if trg[0].isdigit(): trg = "D{}".format(trg) src_TYPE = Ut.get_uri_local_name(targets[1][2]) trg_TYPE = Ut.get_uri_local_name(targets[1][3]) src_variable = "{}_{}_1".format(src, src_TYPE[short:]) if src == trg and src_TYPE == trg_TYPE: trg_variable = "{}_{}_2".format(trg, trg_TYPE[short:]) else: trg_variable = "{}_{}_1".format(trg, trg_TYPE[short:]) inter += model_2.format( graph, src_variable, count_graph, trg_variable, "{}{}".format(Ns.singletons, Ut.get_uri_local_name_plus(graph))) insert.write("\t\t?{} ?pred_{} ?{} .\n".format( src_variable, count_graph, trg_variable)) insert_sing.write( "\t\t?pred_{0} ?x_{0} ?y_{0}.\n".format(count_graph)) count_graph += 1 # print inter # exit(0) insert_query = """INSERT\n{{ ### LINKS GRAPH <{5}{4}> {{\n{1}\t}} ### METADATA GRAPH <{6}{4}> {{\n{3}\t}}\n}}\nWHERE\n{{{2}\n}} """.format("", insert.getvalue(), inter, insert_sing.getvalue(), lens_name, Ns.lens, Ns.singletons) return insert_query
def intersection(specs, display=False): inter = "" count_graph = 1 for graph in specs[St.datasets]: query = """ PREFIX void: <{}> PREFIX bdb: <{}> SELECT distinct ?subTarget ?objTarget ?subjectEntityType ?objectEntityType {{ <{}> #void:target*/(void:subjectsTarget|void:objectsTarget)* ?x ; void:target*/(void:subjectsTarget|void:objectsTarget)* ?x . ?x void:subjectsTarget ?subTarget ; void:objectsTarget ?objTarget ; bdb:subjectsDatatype ?subjectEntityType ; bdb:objectsDatatype ?objectEntityType . FILTER NOT EXISTS {{ ?subTarget a void:Linkset }} FILTER NOT EXISTS {{ ?objTarget a void:Linkset }} }}""".format(Ns.void, Ns.bdb, graph) # print "INTERSECTION QUERY:", query response = sparql_xml_to_matrix(query) if display: print "INTERSECTION QUERY:", query # print "\nGRAPH:", graph # print "RESPONSE:", response # exit(0) if response: targets = response[St.result] # IF THE RESULT HAS MORE THAN # print "LENGTH:", len(targets) if targets is not None and len(targets) > 2: union = "" for i in range(1, len(targets)): append = "UNION" if i < len(targets) - 1 else "" tab = "" if i == 1 else "" src = Ut.get_uri_local_name(targets[i][0]) trg = Ut.get_uri_local_name(targets[i][1]) if src[0].isdigit(): src = "D{}".format(src) if trg[0].isdigit(): trg = "D{}".format(trg) src_TYPE = Ut.get_uri_local_name(targets[i][2]) trg_TYPE = Ut.get_uri_local_name(targets[i][3]) src_variable = "{}_{}_1".format(src, src_TYPE[short:]) if src == trg and src_TYPE == trg_TYPE: trg_variable = "{}_{}_2".format(trg, trg_TYPE[short:]) else: trg_variable = "{}_{}_1".format(trg, trg_TYPE[short:]) union += "\n\t\t{0}{{ ?{1} ?predicate_{2} ?{3} . }} {4}".format( tab, src_variable, i, trg_variable, append) union = """ ### ABOUT {0} GRAPH <{0}> {{ {1} }} """.format(graph, union) # print "UNION:", union inter += union elif targets and len(targets) == 2: src = Ut.get_uri_local_name(targets[1][0]) trg = Ut.get_uri_local_name(targets[1][1]) if src[0].isdigit(): src = "D{}".format(src) if trg[0].isdigit(): trg = "D{}".format(trg) src_TYPE = Ut.get_uri_local_name(targets[1][2]) trg_TYPE = Ut.get_uri_local_name(targets[1][3]) src_variable = "{}_{}_1".format(src, src_TYPE[short:]) if src == trg and src_TYPE == trg_TYPE: trg_variable = "{}_{}_2".format(trg, trg_TYPE[short:]) else: trg_variable = "{}_{}_1".format(trg, trg_TYPE[short:]) inter += """ ### ABOUT {0} GRAPH <{0}> {{ ?{1} ?pred_{2} ?{3} . }} """.format(graph, src_variable, count_graph, trg_variable) count_graph += 1 # print inter # exit(0) return inter