def load_default_namespaces(directory): print headings( "LOADING DEFAULT NAMESPACES TO STARDOG [{}]".format(stardog_db)) if path.isdir(directory) is False: return "\n>>> [{}] IS NOT A DIRECTORY ".format(directory) f_path = path.join(directory, "namespace.bat" if Ut.is_windows() else "namespace.sh") # PLATFORM DEPENDENT CMD if Ut.is_windows(): cmd = namespaces.format("call ", Svr.settings[St.stardog_uri]) else: cmd = namespaces.format( stardog_bin, Svr.settings[St.stardog_uri], ) # EXECUTE THE CMD result = Ut.run_cdm(cmd, f_path, delete_after=True, output=False) # DISPLAY THE FINAL RETURN print "Finished with: {}".format(result)
def print_specs(specs): print Ut.headings("SPECIFICATIONS DATA", line=False) # PRINT SPECS for key, data in specs.items(): if key == "target" or key == "source": new_line = "\n" else: new_line = "" if type(data) == str or type(data) == int or type(data) == unicode: value = to_unicode(data) #.encode(encoding='utf-8') elif type(data) == float or type(data) == int: value = to_unicode(data) else: value = type(data) print "{}\t{:22}{}".format(new_line, key, "{}".format(": {}".format(to_bytes(value)))) if type(data) == dict: for detail, val in data.items(): print "\t\t{:18}: {}".format(detail, val) print ""
def composition_lens_name(specs): specs[St.lens_operation] = Ns.lensOpt src_name = get_uri_local_name(specs[St.subjectsTarget]) trg_name = get_uri_local_name(specs[St.objectsTarget]) specs[St.lens] = "{}comp_{}_{}".format(Ns.lens, src_name, trg_name) if len(specs[St.lens]) > 255: specs[St.lens] = "{}comp_{}_{}".format(Ns.lens, Ut.hash_it(src_name), Ut.hash_it(trg_name)) update_specification(specs)
def expand_approx(specs, theta, stop_words_string, stop_symbols_string, linkset2expand, reorder=True): data = None inserted_1 = 0 inserted_2 = 0 total = 0 count= 0 abort = False for is_source in [True, False]: count += 1 print Ut.headings("********* PASS {} *********").format(count) # if is_source is False: # specs[St.corr_reducer] = data[St.result] # print data[St.result] data = prefixed_inverted_index( specs, theta=theta, reorder=reorder, stop_words_string=stop_words_string, stop_symbols_string=stop_symbols_string, expands=True, is_source=is_source, linkset2expand=linkset2expand, check_file=False) if count == 1: inserted_1 += data['inserted'] total += inserted_1 else: inserted_2 += data['inserted'] total += inserted_2 if data[St.message].__contains__('ALREADY EXISTS'): abort = True print "\n>>> THE PROCESS IS BEING ABORTED AS THE FIRST " \ "PASS REVEALS THE EXISTENCE OF AN EXPANSION OF THE GRAPH." break if abort is False: # REMOVE DUPLICATES print "REMOVING REPETITION" if data is not None and data[St.result] is not None: print "\t", Qry.remove_repetition_same_direction(data[St.result]) # PRINT THE FINAL TRIPLE COUNT final_inserted = Qry.get_triples_count(data[St.result]) if final_inserted is None: final_inserted = 0 else: final_inserted = int(final_inserted) print "\nOVERALL STATS:\n\tCORRESPONDENCES DISCOVERED AT PASS 1 : {}\n\tCORRESPONDENCES DISCOVERED AT PASS 2 : {}".format( inserted_1, inserted_2) print "\tOVERALL CORRESPONDENCES DISCOVERED : {}".format(total) print "\tTOTAL REPEATED CORRESPONDENCES REMOVED : {}".format(total - final_inserted) print "\tTOTAL CORRESPONDENCES INSERTED : {}".format(final_inserted) # print data return data
def diff_lens_name(specs): specs[St.lens_operation] = Ns.lensOpd # THE NAMES ARE HASHED AS THEY APPEAR TO BE TOO LONG FOR A FILE NAME # THIS IS AN EXAMPLE # print len("diff_eter_2014_orgreg_20170718_nearbyGeoSim1Kilometer_University_LatitudeLongitude_P871330770" # "_refined_eter_2014_orgreg_20170718_nearbyGeoSim1Kilometer_University_LatitudeLongitude_P871330770" # "_approxStrSim_English_Institution_Name_P255977302-Metadata-20180107.t") src_name = Ut.hash_it(get_uri_local_name(specs[St.subjectsTarget])) trg_name = Ut.hash_it(get_uri_local_name(specs[St.objectsTarget])) specs[St.lens] = "{}diff_{}_{}".format(Ns.lens, src_name, trg_name) update_specification(specs)
def load_data(data_path, save_in): count_record = 0 clusters = {} base = "http://voc.nl/people/" # READING THE CLUSTER FILE with open(data_path, "rb") as data: for record in data: count_record += 1 print "\r\t\t>>>", count_record, features = record.split(";") if len(features) > 0: if features[1] not in clusters: clusters[features[1]] = { "nodes": ["<{}{}>".format(base, features[2])] } else: clusters[features[1]]["nodes"] += [ "<{}{}>".format(base, features[2]) ] # if count_record == 5: # break freq = {} for key, val in clusters.items(): clusters[key]["links"] = Ut.ordered_combinations(val["nodes"]) strengths = {} for link in clusters[key]["links"]: link_key = Ut.get_key(link[0], link[1]) strengths[link_key] = 1 clusters[key]["strengths"] = strengths size = str(len(val["nodes"])) if size not in freq: freq[size] = 1 else: freq[size] += 1 print "SERIALIZING THE DICTIONARY" serialize_dict(save_in, clusters, cluster_limit=1000) print "JOB DONE!!!"
def export_flat_alignment_service(alignment): alignment = str(alignment).strip() row_alignment = alignment alignment = alignment if Ut.is_nt_format( alignment) is True else "<{}>".format(alignment) # CONSTRUCT QUERY query = """ PREFIX ll: <{0}> PREFIX linkset: <{1}> PREFIX lens: <{2}> PREFIX singletons: <{3}> CONSTRUCT {{ ?srcCorr ll:mySameAs ?trgCorr . ?trgCorr ll:mySameAs ?srcCorr . }} WHERE {{ BIND( {4} as ?alignment ) # THE ALIGNMENT GRAPH WITH EXPLICIT SYMMETRY GRAPH ?alignment {{ ?srcCorr ?singleton ?trgCorr . }} }} ; CONSTRUCT {{ ?alignment ?pred ?obj . ?obj ?predicate ?object . }} WHERE {{ # THE METADATA BIND( {4} as ?alignment ) ?alignment ?pred ?obj . OPTIONAL {{ ?obj ?predicate ?object . }} }} """.format( Ns.alivocab, Ns.linkset, Ns.lens, Ns.singletons, alignment, ) print query exit(0) # FIRE THE CONSTRUCT AGAINST THE TRIPLE STORE alignment_construct = Qry.endpointconstruct(query) # REMOVE EMPTY LINES triples = len(regex.findall('ll:mySameAs', alignment_construct)) alignment_construct = "\n".join( [line for line in alignment_construct.splitlines() if line.strip()]) result = "### TRIPLE COUNT: {}\n### LINKSET: {}\n".format( triples, alignment) + alignment_construct message = "You have just downloaded the graph [{}] which contains [{}] correspondences. ".format( row_alignment, triples) return {'result': result, 'message': message}
def eval_sheet(targets, count, smallest_hash, a_builder, alignment, children, automated_decision): first = False a_builder.write("\n{:<5}\t{:<20}{:12}{:20}{:23}{:23}".format(count, smallest_hash, "", "", automated_decision, "")) if targets is None: a_builder.write(Cls.disambiguate_network(alignment, children)) else: response = Cls.disambiguate_network_2(children, targets, output=False) if response: temp = "" dataset = "" # for line in response: # print line for i in range(1, len(response)): resource = Ut.get_uri_local_name(response[i][0]) if i == 1: temp = "{:25}: {}".format(resource, response[i][1]) elif dataset == response[i][0]: temp = "{:25} | {}".format(temp, response[i][1]) else: if first is False: a_builder.write(" {}\n".format(temp)) else: a_builder.write("{:108}{}\n".format("", temp)) first = True temp = "{:25}: {}".format(resource, response[i][1]) dataset = response[i][0] a_builder.write("{:108}{}\n".format("", temp))
def process_input(text): try: # DIACRITIC CHARACTERS MAPPING temp = Ut.character_mapping(text) # temp = to_bytes(text.lower()) temp = temp.lower() # temp = str(temp).decode(encoding="utf-8") # REMOVE DATA IN BRACKETS # REMOVE (....) FROM THE VALUE if remove_term_in_bracket is True: temp = remove_info_in_bracket(temp) # REMOVE STOP WORLD if len(stop_word) > 0: temp = remove_stop_words(temp) # REMOVE SYMBOLS OR CHARACTER stop_symbols = to_bytes(stop_symbols_string).replace( "–", "\xe2\x80\x93") if stop_symbols_string is not None and len(stop_symbols_string) > 0: pattern = str("[{}]".format(stop_symbols.strip())).replace(" ", "") temp = re.sub(pattern, "", temp) return temp.strip() except Exception as error: print "!!!!!!!!!!!!! PROBLEM !!!!!!!!!!!!!!!!!!!" print str(error.message) return text
def main_alignment(alignment): # **************************************************************************** # GIVEN AN ALIGNMENT, RETURN THE MAIN ONE # **************************************************************************** try: # LOCAL NAME OF THE GRAPH name = Ut.get_uri_local_name_plus(alignment) print "{:12} : {}".format("LOCAL NAME", name) query_search = std_queries["graphs_search"].format(name) response = Qry.sparql_xml_to_matrix(query_search) results = response["result"] if results is not None: for i in range(1, len(results)): if results[i][0].__contains__("singletons") is False: return results[i][0] if str(alignment).__contains__(Ns.singletons): return str(alignment).replace(Ns.singletons, Ns.linkset) else: return alignment except ValueError: traceback.print_exc() return alignment
def target_datatype_properties(model, label, linkset_label): main_tabs = "\t\t\t" tabs = "{}\t\t\t\t\t\t\t\t\t\t\t\t".format(main_tabs) # ALIGNMENT COMBINATION: LIST OD DICTIONARIES alignment_targets = "" property_list_bind = "" count = 0 for item in model: count += 1 target = item[St.graph] data = item[St.data] # LIST OF DICTIONARIES for n in range(0, len(data)): code = "llTarget:{}_{}".format(label, Ut.hash_it(target + str(data[n]))) datatype = data[n][St.entity_datatype] properties = data[n][St.properties] property_list = "" # LIST OF PROPERTIES for i in range(0, len(properties)): i_property = properties[i] if Ut.is_nt_format( properties[i]) else "<{}>".format( data[i][St.properties][i]) property_list += "?property_{}_{}_{} ".format(count, n, i) if i == 0 \ else ",\n{}?property_{}_{}_{} ".format(tabs, count, n, i) if i == 0 and count == 1: property_list_bind += """BIND( IRI("{}") AS ?property_{}_{}_{})""".format( i_property, count, n, i) else: property_list_bind += """\n{}BIND( IRI("{}") AS ?property_{}_{}_{})""".format( main_tabs, i_property, count, n, i) triples = """ {5}linkset:{4} ll:hasAlignmentTarget {0} . {5}{0} ll:hasTarget <{1}> . {5}{0} ll:hasDatatype <{2}> . {5}{0} ll:aligns {3}. """.format(code, target, datatype, property_list, linkset_label, main_tabs) # print triples alignment_targets += triples return {"list": alignment_targets, "binds": property_list_bind}
def set_refined_name(specs): reducer = "" intermediate = "" threshold = "" extended_graph = "" delta = "" # THE REDUCER if St.reducer in specs[St.source]: reducer += specs[St.source][St.reducer] if St.reducer in specs[St.target]: reducer += specs[St.target][St.reducer] # THE EXTENDED GRAPH if St.extended_graph in specs[St.source]: extended_graph += str(specs[St.source][St.extended_graph]) if St.extended_graph in specs[St.target]: extended_graph += str(specs[St.target][St.extended_graph]) # THE INTERMEDIATE GRAPH if St.intermediate_graph in specs: intermediate = specs[St.intermediate_graph] if St.threshold in specs: threshold += str(specs[St.threshold]) # THE NUMERIC DELTA if St.delta in specs: delta += str(specs[St.delta]) hashed = hash(reducer + extended_graph + intermediate + threshold + delta + specs[St.source][St.aligns_name] + specs[St.target][St.aligns_name] + specs[St.linkset_name]) append = str(hashed).replace( "-", "N") if str(hashed).__contains__("-") else "P{}".format(hashed) specs[St.refined_name] = "refined_{}_{}_{}_{}".format( specs[St.linkset_name], specs[St.mechanism], specs[St.source][St.aligns_name], append) dir_name = DIRECTORY date = datetime.date.isoformat(datetime.date.today()).replace('-', '') singleton_metadata_file = "{}(SingletonMetadata)-{}.trig".format( specs[St.refined_name], date) singleton_metadata_output = "{}/{}".format(dir_name, singleton_metadata_file) future_path = os.path.join(DIRECTORY, singleton_metadata_output) future_path = future_path.replace("\\", "/").replace("//", "/") if len(future_path) > 255: full_hashed = Ut.hash_it(specs[St.refined_name]) specs[St.refined_name] = "refined_{}_{}".format( specs[St.mechanism], full_hashed) specs[St.refined] = specs[St.linkset].replace(specs[St.linkset_name], specs[St.refined_name]) specs[St.refined] = specs[St.refined].replace("/lens/", "/linkset/") print "\t- specs[St.refined]", specs[St.refined]
def properties(graph, datatype=None): comment = "# " if datatype is None else "" datatype = datatype if Ut.is_nt_format(datatype) is True else "<{}>".format(datatype) graph = graph if Ut.is_nt_format(graph) is True else "<{}>".format(graph) properties = """ # <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> SELECT DISTINCT ?predicate WHERE {{ GRAPH {} {{ {}?subj {} ?type . ?subj ?predicate ?obj . }} }} """.format(graph, comment, datatype) print properties Qr.display_result(query=properties, spacing=50, limit=0, is_activated=True)
def export_flat_alignment(alignment): print Ut.headings("EXPORTING THE ALIGNMENT WITH NO METADATA") print "Export for: {}".format(alignment) alignment = str(alignment).strip() row_alignment = alignment alignment = alignment if Ut.is_nt_format( alignment) is True else "<{}>".format(alignment) # CONSTRUCT QUERY query = """ PREFIX ll: <{}> CONSTRUCT {{ ?x ll:mySameAs ?z }} WHERE {{ GRAPH {} {{ ?x ?y ?z }} }} order by ?x """.format(Ns.alivocab, alignment) # print query # FIRE THE CONSTRUCT AGAINST THE TRIPLE STORE alignment_construct = Qry.endpointconstruct(query) # REMOVE EMPTY LINES # COMMA IS COUNTED WHENEVER THERE ARE MORE OBJECTS FOR THE SUBJECT triples = len(regex.findall('ll:mySameAs', alignment_construct)) + len( regex.findall(',', alignment_construct)) alignment_construct = "\n".join( [line for line in alignment_construct.splitlines() if line.strip()]) alignment_construct = alignment_construct.replace( "{", "{}\n{{".format(alignment)) # RESULTS result = "### TRIPLE COUNT: {0}\n### LINKSET: {1}\n".format( triples, alignment) + alignment_construct message = "You have just downloaded the graph [{}] which contains [{}] correspondences. ".format( row_alignment, triples) return {'result': result, 'message': message, "triples": triples}
def linkset_createdorused(question_uri, alignment_mapping_uri, specs, is_created=True): if alignment_mapping_uri.__contains__("<<"): alignment_mapping_uri = str(alignment_mapping_uri).replace( "<<", "<").replace(">>", ">") if Ut.is_nt_format(alignment_mapping_uri) is False: alignment_mapping_uri = "<{}>".format(alignment_mapping_uri) linkset_uri = specs[St.refined] if St.refined in specs else specs[ St.linkset] comment = "#" if is_created is True: created = "alivocab:created" opposed = "prov:used\t\t" print "REGISTERING [{}] AS CREATED".format(linkset_uri) else: created = "prov:used\t\t" opposed = "alivocab:created" comment = "#" print "REGISTERING [{}] AS IMPORTED".format(linkset_uri) query = PREFIX + """ INSERT {{ GRAPH <{0}> {{ {1} {2} <{3}> . {4}{1} prov:wasDerivedFrom <{3}> . }} }} WHERE {{ GRAPH <{0}> {{ FILTER NOT EXISTS {{ {1} {5} <{3}> . }} }} ### BIND(iri(\"{1}\") AS ?aligns) }} """.format(question_uri, alignment_mapping_uri, created, linkset_uri, comment, opposed) # print query return query
def set_writers(specs): # SET THE PATH WHERE THE LINKSET WILL BE SAVED AND GET THE WRITERS print DIRECTORY writers = Ut.get_writers(specs[St.linkset_name], directory=DIRECTORY) for key, writer in writers.items(): # BECAUSE THE DICTIONARY ALSO CONTAINS OUTPUT PATH if type(writer) is not str: if key is not St.batch_writer and key is not St.meta_writer: writer.write(prefix) if key is St.crpdce_writer: writer.write("\nlinkset:{}\n{{\n".format( specs[St.linkset_name])) elif key is St.singletons_writer: writer.write("\nsingletons:{}\n{{".format( specs[St.linkset_name])) return writers
def check_constraint(): text = constraint_text.lower() text = text.split(",") # CONSTRAINT BUILDER c_builder = Buffer.StringIO() if constraint_targets is not None: for dictionary in constraint_targets: graph = dictionary[St.graph] data_list = dictionary[St.data] properties = data_list[0][St.properties] prop = properties[0] if Ut.is_nt_format(properties[0]) else "<{}>".format(properties[0]) # WRITING THE CONSTRAINT ON THE GRAPH graph_q = """ {{ GRAPH <{0}> {{ ?lookup {1} ?constraint . }} }} """.format(graph, prop) c_builder.write(graph_q) if len(c_builder.getvalue()) == 0 else \ c_builder.write("UNION {}".format(graph_q)) # WRITING THE FILTER if len(c_builder.getvalue()) > 0: for i in range(0, len(text)): if i == 0 : c_builder.write(""" FILTER (LCASE(STR(?constraint)) = "{}" """.format(text[i].strip())) else: c_builder.write(""" || LCASE(STR(?constraint)) = "{}" """.format(text[i].strip())) c_builder.write(")") # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES query = Qry.cluster_rsc_strengths_query(resources, linkset) query = query.replace("# CONSTRAINTS IF ANY", c_builder.getvalue()) # print query response = Qry.sparql_xml_to_matrix(query) if response[St.result] is None: return False return True
def main_alignment(alignment): # LOCAL NAME OF THE GRAPH name = Ut.get_uri_local_name_plus(alignment) print "{:12} : {}".format("LOCAL NAME", name) query = std_queries["graphs_search"].format(name) response = Qry.sparql_xml_to_matrix(query) results = response["result"] if results is not None: for i in range(1, len(results)): if results[i][0].__contains__("singletons") is False: return results[i][0] if str(alignment).__contains__(Ns.singletons): return str(alignment).replace(Ns.singletons, Ns.linkset) else: return alignment
def export_flat_alignment_and_metadata(alignment): flat = export_flat_alignment(alignment) alignment = str(alignment).strip() row_alignment = alignment alignment = alignment if Ut.is_nt_format( alignment) is True else "<{}>".format(alignment) # CONSTRUCT QUERY query = """ PREFIX ll: <{0}> PREFIX linkset: <{1}> PREFIX lens: <{2}> PREFIX singletons: <{3}> CONSTRUCT {{ ?alignment ?pred ?obj . ?obj ?predicate ?object . }} WHERE {{ BIND ({4} AS ?alignment) # THE METADATA ?alignment ?pred ?obj . OPTIONAL {{ ?obj ?predicate ?object . }} }} #LIMIT 10 """.format(Ns.alivocab, Ns.linkset, Ns.lens, Ns.singletons, alignment) # print query # FIRE THE CONSTRUCT AGAINST THE TRIPLE STORE alignment_construct = Qry.endpointconstruct(query, clean=False) # REMOVE EMPTY LINES triples = flat["triples"] # triples = len(re.findall('ll:mySameAs', alignment_construct)) alignment_construct = "\n".join([line for line in alignment_construct.splitlines() if line.strip()]) + "\n\n" + \ flat['result'] result = "### GENERIC METADATA FOR \n### LINKSET: {}\n\n{}".format( alignment, alignment_construct) message = "You have just downloaded the graph [{}] which contains [{}] correspondences. ".format( row_alignment, triples) print result return {'result': result, 'message': message}
def set_subset_name(specs, inverse=False): if inverse is False: h_name = specs[St.mechanism] + \ specs[St.source][St.graph_name] + specs[St.source][St.link_old_name] + \ specs[St.target][St.graph_name] + specs[St.source][St.entity_datatype] + \ specs[St.target][St.entity_datatype] hashed = hash(h_name) append = str(hashed).replace( "-", "N") if str(hashed).__contains__("-") else "P{}".format(hashed) specs[St.linkset_name] = "subset_{}_{}_{}_{}_{}_{}".format( specs[St.source][St.graph_name], specs[St.target][St.graph_name], specs[St.mechanism], specs[St.source][St.entity_name], specs[St.source][St.link_old_name], append) dir_name = DIRECTORY date = datetime.date.isoformat(datetime.date.today()).replace('-', '') singleton_metadata_file = "{}(SingletonMetadata)-{}.trig".format( specs[St.linkset_name], date) singleton_metadata_output = "{}/{}".format(dir_name, singleton_metadata_file) future_path = os.path.join(DIRECTORY, singleton_metadata_output) future_path = future_path.replace("\\", "/").replace("//", "/") if len(future_path) > 255: full_hashed = Ut.hash_it(specs[St.linkset_name]) specs[St.linkset_name] = "{}_{}_{}".format( specs[St.source][St.graph_name], specs[St.mechanism], full_hashed) # if len(specs[St.linkset_name]) > 255: # specs[St.linkset_name] = Ut.hash_it(specs[St.linkset_name]) specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name]) # print specs[St.linkset] return specs[St.linkset]
def get_table(dataset_specs, reducer=None): # ADD THE REDUCER IF SET. THE REDUCER OR (DATASET REDUCER) HELPS ELIMINATING # THE COMPUTATION OF SIMILARITY FOR INSTANCES THAT WHERE ALREADY MATCHED print "\nLOADING: {} {}".format(dataset_specs[St.graph], dataset_specs[St.entity_datatype]) if reducer is None: reducer_comment = "#" reducer = "" else: reducer_comment = "" reducer = reducer aligns = dataset_specs[St.aligns] if Ut.is_nt_format(dataset_specs[St.aligns]) \ else "<{}>".format(dataset_specs[St.aligns]) query = """ SELECT DISTINCT * {{ GRAPH <{0}> {{ ?subject a <{1}> ; {2} ?object . }} {4}FILTER NOT EXISTS {4}{{ {4} GRAPH <{3}> {4} {{ {4} {{ ?subject ?pred ?obj . }} {4} UNION {4} {{ ?obj ?pred ?subject. }} {4} }} {4}}} }} {5} """.format(dataset_specs[St.graph], dataset_specs[St.entity_datatype], aligns, reducer, reducer_comment, LIMIT) table_matrix = Qry.sparql_xml_to_matrix(query) # Qry.display_matrix(table_matrix, is_activated=True) # print table_matrix # print query if table_matrix[St.result]: print "\tINPUT SIZE: {}".format(str(len(table_matrix[St.result]) - 1)) return table_matrix[St.result]
def linkset_evolution(research_question_uri, refined_linkset_uri): # BUILD THE SPECIFICATION specs = { St.researchQ_URI: research_question_uri.strip(), St.linkset: refined_linkset_uri } # print specs # DOCUMENT THE ALIGNMENT document = "" metadata = linkset_evolution_composition(alignment_mapping=specs) # print "METADATA:", metadata if metadata: # 1: GETTING SUBJECT - OBJECT & MECHANISM- elements1 = re.findall('\t.*:aligns(.*) "<{0,1}', metadata) # elements2 = re.findall('(<.*?>)', metadata, re.S) elements2 = re.findall('"(<{0,1}.*?>{0,1})"', metadata, re.S) # print "1: ", elements1 # print "2:", elements2 # print "" if len(elements1) == len(elements2) == 3: for i in range(3): append = " | " if i < 2 else "" two = elements2[i] if Ut.is_nt_format( elements2[i]) else "<{}>".format(elements2[i]) document += "{}={}{}".format(elements1[i], two, append) document = "[{}]".format(document) # FOLLOW DOWN THE PATH new_link = linkset_wasderivedfrom(refined_linkset_uri) new_document = linkset_evolution(research_question_uri, new_link) # print "DONE!!!!" # RECURSIVE CALL return document + ";\n" + linkset_evolution( research_question_uri, new_link) if new_document else document # print "NO EVOLUTION RESULT" return document
def get_table(dataset_specs, reducer=None): # ADD THE REDUCER IF SET if reducer is None: reducer_comment = "#" reducer = "" else: reducer_comment = "" reducer = reducer aligns = dataset_specs[St.aligns] if Ut.is_nt_format(dataset_specs[St.aligns]) \ else "<{}>".format(dataset_specs[St.aligns]) query = """ SELECT DISTINCT * {{ GRAPH <{0}> {{ ?subject a <{1}> ; {2} ?object . }} {4}FILTER NOT EXISTS {4}{{ {4} GRAPH <{3}> {4} {{ {4} {{ ?subject ?pred ?obj . }} {4} UNION {4} {{ ?obj ?pred ?subject. }} {4} }} {4}}} }} {5} """.format( dataset_specs[St.graph], dataset_specs[St.entity_datatype], aligns, reducer, reducer_comment, LIMIT) table_matrix = Qry.sparql_xml_to_matrix(query) # Qry.display_matrix(table_matrix, is_activated=True) # print table_matrix # print query return table_matrix[St.result]
def generate_lens_name(datasets, operator="union"): datasets.sort() ds_concat = "" for dataset in datasets: ds_concat += dataset # RETURN THE LIST OF DATASET UNIQUE NAMES unique_list = list() # THE QUERY FOR CHECKING WHETHER THE LENS EXIST query = ask_union(datasets) for dataset in datasets: lens_targets_unique(unique_list, dataset) # print unique_list name = "" for i in range(0, len(unique_list)): local_name = Ut.get_uri_local_name(unique_list[i]) link = "" if i == 0 else "_" # print (local_name[0]).upper() name += link + (local_name[0]).upper() + local_name[1:] hash_value = hash(name + ds_concat) hash_value = str(hash_value).replace( "-", "N") if str(hash_value).__contains__("-") else "P{}".format(hash_value) name = "{}_{}_{}".format(operator, name, hash_value) # print name # print query # print hash(name) return {"name": name, "query": query}
def set_linkset_name(specs, inverse=False): src_aligns = "" trg_aligns = "" reducer = "" intermediate = "" threshold = "" delta = "" geo = "" unit = "" source = specs[St.source] target = specs[St.target] if St.reducer in source: reducer += source[St.reducer] # GEO DATA unit_value = "" if St.longitude in source: geo += source[St.longitude] if St.latitude in source: geo += source[St.latitude] if St.longitude in target: geo += target[St.longitude] if St.latitude in source: geo += target[St.latitude] if St.unit in specs: geo += str(specs[St.unit]) unit = Ut.get_uri_local_name(str(specs[St.unit])) if St.unit_value in specs: geo += str(specs[St.unit_value]) unit_value = str(specs[St.unit_value]) if St.reducer in specs[St.target]: reducer += target[St.reducer] if St.intermediate_graph in specs: intermediate = str(specs[St.intermediate_graph]) if St.threshold in specs: threshold += str(specs[St.threshold]) if St.delta in specs: delta += str(specs[St.delta]) if St.aligns_name in source: src_aligns += source[St.aligns_name] elif St.latitude_name in source: # src_aligns += source[St.latitude_name] src_aligns += "Latitude" if St.longitude_name in source: # src_aligns += source[St.longitude_name] src_aligns += "Longitude" if St.aligns_name in target: trg_aligns += target[St.aligns_name] elif St.latitude_name in target: # trg_aligns += target[St.latitude_name] trg_aligns += "Latitude" if St.longitude_name in target: # trg_aligns += target[St.longitude_name] trg_aligns += "Longitude" dir_name = DIRECTORY date = datetime.date.isoformat(datetime.date.today()).replace('-', '') if inverse is False: h_name = specs[St.mechanism] + \ source[St.graph_name] + src_aligns + \ target[St.graph_name] + trg_aligns + \ source[St.entity_datatype] + target[St.entity_datatype] + "-" +\ reducer + intermediate + threshold + delta + geo hashed = hash(h_name) append = str(hashed).replace( "-", "N") if str(hashed).__contains__("-") else "P{}".format(hashed) specs[St.linkset_name] = "{}_{}_{}{}{}_{}_{}_{}".format( source[St.graph_name], target[St.graph_name], specs[St.mechanism], unit_value, unit, source[St.entity_name], src_aligns, append) singleton_metadata_file = "{}(SingletonMetadata)-{}.trig".format( specs[St.linkset_name], date) singleton_metadata_output = "{}/{}".format(dir_name, singleton_metadata_file) future_path = os.path.join(DIRECTORY, singleton_metadata_output) future_path = future_path.replace("\\", "/").replace("//", "/") if len(future_path) > 255: full_hashed = Ut.hash_it(specs[St.linkset_name]) specs[St.linkset_name] = "{}_{}_{}".format(source[St.graph_name], specs[St.mechanism], full_hashed) # if len(specs[St.linkset_name]) > 255: # specs[St.linkset_name] = Ut.hash_it(specs[St.linkset_name]) specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name]) return specs[St.linkset] else: h_name = specs[St.mechanism] + \ target[St.graph_name] + trg_aligns + \ source[St.graph_name] + src_aligns + \ target[St.entity_datatype] + source[St.entity_datatype] + "-" +\ reducer + intermediate + threshold + delta + geo hashed = hash(h_name) append = str(hashed).replace( "-", "N") if str(hashed).__contains__("-") else "P{}".format(hashed) specs[St.linkset_name] = "{}_{}_{}{}{}_{}_{}_{}".format( target[St.graph_name], source[St.graph_name], specs[St.mechanism], unit_value, unit, target[St.entity_name], trg_aligns, append) singleton_metadata_file = "{}(SingletonMetadata)-{}.trig".format( specs[St.linkset_name], date) singleton_metadata_output = "{}/{}".format(dir_name, singleton_metadata_file) future_path = os.path.join(DIRECTORY, singleton_metadata_output) future_path = future_path.replace("\\", "/").replace("//", "/") if len(future_path) > 255: full_hashed = Ut.hash_it(specs[St.linkset_name]) specs[St.linkset_name] = "{}_{}_{}".format(target[St.graph_name], specs[St.mechanism], full_hashed) # if len(specs[St.linkset_name]) > 255: # specs[St.linkset_name] = Ut.hash_it(specs[St.linkset_name]) specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name]) print "\t- specs[St.linkset]", specs[St.linkset] return specs[St.linkset]
def delete_linkset_rq(rq_uri, linkset_uri): # print "DELETE THE FILTERS AND DISCONNECT THE LINKSET" query1 = PREFIX + """ # DELETE THE FILTERS DELETE {{ GRAPH <{0}> {{ <{0}> alivocab:created ?filter . ?filter ?pred ?obj . }} }} WHERE {{ GRAPH <{0}> {{ BIND(<{1}> AS ?linkset) <{0}> alivocab:created ?filter . ?filter alivocab:appliesTo ?linkset . ?filter ?pred ?obj . }} }}; # 1 DISCONNECT THE LINKSET DELETE {{ GRAPH <{0}> {{ ?s ?p ?linkset . }} }} WHERE {{ BIND(<{1}> AS ?linkset) . GRAPH <{0}> {{ ?s alivocab:created|prov:used ?linkset. ?s ?p ?linkset . }} FILTER NOT EXISTS {{ GRAPH <{0}> {{ ?view_lens alivocab:selected ?linkset. }} }} FILTER NOT EXISTS {{ ?lens a ?type; void:target|void:subjectsTarget|void:objectsTarget ?linkset. GRAPH <{0}> {{ ?s3 alivocab:created|prov:used ?lens. }} }} }}""".format(rq_uri, linkset_uri) query2 = PREFIX + """DROP SILENT GRAPH <{0}> ; DROP SILENT GRAPH <{1}> ; # 2-B DELETE THE METADATA COMPLETELY IF IT'S NOT USED IN ANY RQ DELETE {{ ?linkset ?p ?o . ?object ?pred ?obj . }} WHERE {{ BIND(<{1}> AS ?linkset) . ?linkset bdb:assertionMethod|bdb:linksetJustification ?object . ?object ?pred ?obj . ?linkset ?p ?o . # FILTER NOT EXISTS # {{ # GRAPH ?rqg # {{ # ?rqg a <http://risis.eu/class/ResearchQuestion> . # ?sg ?pg ?linkset. # }} # }} }} """.format(Ut.from_alignment2singleton(linkset_uri), linkset_uri) # print "DELETE THE BOTH METADATA" # query2 = PREFIX + """ # # 2-A DELETE THE SINGLETON GRAPH IF IT'S NOT USED IN ANY RQ # DROP SILENT GRAPH <{}> # # DELETE # # {{ # # GRAPH ?singletonGraph {{ ?x ?y ?z }} . # # }} # # WHERE # # {{ # # BIND(<{1}> AS ?linkset) . # # ?linkset alivocab:singletonGraph ?singletonGraph . # # GRAPH ?singletonGraph {{ ?x ?y ?z }} . # # FILTER NOT EXISTS # # {{ # # GRAPH ?rqg # # {{ # # ?rqg a <http://risis.eu/class/ResearchQuestion> . # # ?sg ?pg ?linkset. # # }} # # }} # # FILTER NOT EXISTS # # {{ # # ?lens a ?type; # # void:target|void:subjectsTarget|void:objectsTarget ?linkset. # # }} # # }} # ; # # 2-B DELETE THE METADATA COMPLETELY IF IT'S NOT USED IN ANY RQ # DELETE # {{ # ?linkset ?p ?o . # ?object ?pred ?obj . # }} # WHERE # {{ # BIND(<{1}> AS ?linkset) . # # ?linkset bdb:assertionMethod|bdb:linksetJustification ?object . # ?object ?pred ?obj . # ?linkset ?p ?o . # # # FILTER NOT EXISTS # # {{ # # GRAPH ?rqg # # {{ # # ?rqg a <http://risis.eu/class/ResearchQuestion> . # # ?sg ?pg ?linkset. # # }} # # }} # }} # """.format(rq_uri, linkset_uri) # print "DELETING THE LINKSET ITSELF" # query3 = PREFIX + """ # # 2-C DELETE THE LINKSET COMPLETELY IF IT'S NOT USED IN ANY RQ # DELETE # {{ # GRAPH ?linkset {{ ?sub ?pred ?obj . }} # }} # WHERE # {{ # BIND(<{1}> AS ?linkset) . # GRAPH ?linkset # {{ # ?sub ?pred ?obj . # }} # # FILTER NOT EXISTS # # {{ # # GRAPH ?rqg # # {{ # # ?rqg a <http://risis.eu/class/ResearchQuestion> . # # ?sg ?pg ?linkset. # # }} # # }} # }} # # """.format(rq_uri, linkset_uri) # print query return [query1, query2]
def specification_2_linkset_subset(specs, activated=False): if activated is True: print Ut.headings("EXECUTING LINKSET SUBSET SPECS...") else: print Ut.headings( "THE FUNCTION [specification_2_linkset_subset] IS NOT ACTIVATED") return {St.message: Ec.ERROR_CODE_0, St.error_code: 0, St.result: None} # ACCESS THE TASK SPECIFIC PREDICATE COUNT specs[St.sameAsCount] = Qry.get_same_as_count(specs[St.mechanism]) # UPDATE THE QUERY THAT IS GOING TO BE EXECUTED if specs[St.sameAsCount]: source = specs[St.source] target = specs[St.target] # UPDATE THE SPECS OF SOURCE AND TARGETS update_specification(source) update_specification(target) # GENERATE THE NAME OF THE LINKSET Ls.set_subset_name(specs) # SETTING SOME GENERIC METADATA INFO specs[St.link_name] = "same" specs[St.linkset_name] = specs[St.linkset_name] specs[St.link] = "http://risis.eu/linkset/predicate/{}".format( specs[St.link_name]) specs[ St. link_subpropertyof] = "http://risis.eu/linkset/predicate/{}".format( specs[St.link_name]) specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name]) specs[St.assertion_method] = "{}{}".format(Ns.method, specs[St.linkset_name]) specs[St.justification] = "{}{}".format(Ns.justification, specs[St.linkset_name]) # COMMENT ON THE LINK PREDICATE specs[St.link_comment] = "The predicate <{}> is used in replacement of the linktype <{}> used in the " \ "original <{}> dataset.".format( specs[St.link], specs[St.source][St.link_old], specs[St.source][St.graph]) # COMMENT ON THE JUSTIFICATION FOR THIS LINKSET specs[St.justification_comment] = "In OrgRef's a set of entities are linked to GRID. The linking method " \ "used by OrgRef is unknown. Here we assume that it is a curated work " \ "and extracted it as a linkset.", # COMMENT ON THE LINKSET ITSELF specs[St.linkset_comment] = "The current linkset is a subset of the <{0}> dataset that links <{0}> to " \ "<{1}>. The methodology used by <{0}> to generate this builtin linkset in " \ "unknown.".format(specs[St.source][St.graph], specs[St.target][St.graph]) source[St.entity_ns] = str(source[St.entity_datatype]).replace( source[St.entity_name], '') target[St.entity_ns] = str(target[St.entity_datatype]).replace( target[St.entity_name], '') # GENERATE THE LINKSET inserted_linkset = spa_linkset_subset(specs, activated) # print "LINKSET SUBSET RESULT:", inserted_linkset if inserted_linkset[St.message].__contains__("ALREADY EXISTS"): return inserted_linkset if specs[St.triples] > "0": # REGISTER THE ALIGNMENT if inserted_linkset[St.message].__contains__("ALREADY EXISTS"): Urq.register_alignment_mapping(specs, created=False) else: Urq.register_alignment_mapping(specs, created=True) return inserted_linkset else: print Ec.ERROR_CODE_1 return {St.message: Ec.ERROR_CODE_1, St.error_code: 5, St.result: None}
def lens_refine_name(specs, lens_type): extra = "" source = specs[St.source] target = specs[St.target] if St.reducer in source: extra += source[St.reducer] # GEO DATA unit_value = "" if St.longitude in source: extra += source[St.longitude] if St.latitude in source: extra += source[St.latitude] if St.longitude in target: extra += target[St.longitude] if St.latitude in source: extra += target[St.latitude] if St.unit in specs: extra += str(specs[St.unit]) unit = Ut.get_uri_local_name(str(specs[St.unit])) if St.unit_value in specs: extra += str(specs[St.unit_value]) unit_value = str(specs[St.unit_value]) if St.reducer in specs[St.target]: extra += target[St.reducer] if St.intermediate_graph in specs: intermediate = str(specs[St.intermediate_graph]) if St.threshold in specs: extra += str(specs[St.threshold]) if St.delta in specs: extra += str(specs[St.delta]) if St.aligns_name in source: extra += source[St.aligns_name] elif St.latitude_name in source: # src_aligns += source[St.latitude_name] extra += "Latitude" if St.longitude_name in source: # src_aligns += source[St.longitude_name] extra += "Longitude" if St.aligns_name in target: extra += target[St.aligns_name] elif St.latitude_name in target: # trg_aligns += target[St.latitude_name] extra += "Latitude" if St.longitude_name in target: # trg_aligns += target[St.longitude_name] extra += "Longitude" unique = Ut.hash_it(extra) specs[St.lens] = u"{}refine_{}_{}".format(unique, Ns.lens, specs[St.refined_name]) update_specification(specs)
def set_linkset_expands_name(specs): unique = "" source = specs[St.source] target = specs[St.target] if St.reducer in source: unique += source[St.reducer] # GEO DATA # unit_value = "" if St.longitude in source: unique += source[St.longitude] if St.latitude in source: unique += source[St.latitude] if St.longitude in target: unique += target[St.longitude] if St.latitude in source: unique += target[St.latitude] if St.unit in specs: unique += str(specs[St.unit]) unit = Ut.get_uri_local_name(str(specs[St.unit])) unique += unit if St.unit_value in specs: unique += str(specs[St.unit_value]) unit_value = str(specs[St.unit_value]) unique += unit_value if St.reducer in specs[St.target]: unique += target[St.reducer] if St.intermediate_graph in specs: unique = str(specs[St.intermediate_graph]) if St.threshold in specs: unique += str(specs[St.threshold]) if St.delta in specs: unique += str(specs[St.delta]) if St.aligns_name in source: unique += source[St.aligns_name] elif St.latitude_name in source: # src_aligns += source[St.latitude_name] unique += "Latitude" if St.longitude_name in source: # src_aligns += source[St.longitude_name] unique += "Longitude" if St.aligns_name in target: unique += target[St.aligns_name] elif St.latitude_name in target: # trg_aligns += target[St.latitude_name] unique += "Latitude" if St.longitude_name in target: # trg_aligns += target[St.longitude_name] unique += "Longitude" dir_name = DIRECTORY date = datetime.date.isoformat(datetime.date.today()).replace('-', '') unique = Ut.hash_it(specs[St.mechanism] + source[St.graph_name] + target[St.graph_name] + source[St.entity_datatype] + target[St.entity_datatype] + unique) if St.expands in specs: specs[St.linkset_name] = "expands_{}_{}".format( specs[St.expands_name], unique) specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name]) singleton_metadata_file = "{}(SingletonMetadata)-{}.trig".format( specs[St.linkset_name], date) singleton_metadata_output = "{}/{}".format(dir_name, singleton_metadata_file) future_path = os.path.join(DIRECTORY, singleton_metadata_output) future_path = future_path.replace("\\", "/").replace("//", "/") if len(future_path) > 255: full_hashed = Ut.hash_it(specs[St.linkset_name]) specs[St.linkset_name] = "expands_{}_{}_{}".format( source[St.graph_name], specs[St.mechanism], full_hashed) print "\t- specs[St.linkset]", specs[St.linkset] return specs[St.linkset] else: return set_linkset_name(specs)
matching = ["good", "bad", "good", "bad", "bad", "bad", "bad"] eccentricity = { '<http://risis.eu/eter_2014/resource/EE0006>': 2, '<http://risis.eu/leidenRanking_2015/resource/115>': 3, '<http://risis.eu/orgref_20180301/resource/787614>': 3, '<http://risis.eu/orgreg_20170718/resource/CHAREE0006-1>': 3, '<http://www.grid.ac/institutes/grid.6988.f>': 2, '<http://risis.eu/orgreg_20170718/resource/CHAREE0006-2>': 3, '<http://risis.eu/orgreg_20170718/resource/CHAREE0002-2>': 2, '<http://risis.eu/cordisH2020/resource/participant_999842536>': 2, '<http://risis.eu/eter_2014/resource/EE0002>': 2, '<http://risis.eu/orgreg_20170718/resource/CHAREE0002-1>': 3 } print filter(lambda x: x.upper() == "GOOD", matching) result = filter(lambda x: int(x[1]) == 3, eccentricity.items()) Ut.print_list(result) # ******************************************************************************** # TESTING GRAPH CREATION # ******************************************************************************** # process_cluster(info_2, rscs, "C:\Productivity\\1 - GA - VALIDATION", cluster_id="ad15fdc8") PLOTES = False if PLOTES: file_path = "C:\Productivity\\2 - MatchingTools\\image_{}.pdf" nodes_al = ["Al", "Koudouss", "Koudous", "oladele", "Idrissou"] nodes = [ "Veruska", "Carretta", "Zamborlini", "Al", "Koudouss", "Koudous", "oladele", "Idrissou", "Kamila", "Mila" ] links_ve = [("Veruska", "Carretta"), ("Carretta", "Zamborlini"),