def import_gadm(): total = 0 limit = 2000 f_path = "C:\Users\Al\PycharmProjects\AlignmentUI\src\UploadedFiles\gadm.ttl" b_path = "C:\Users\Al\PycharmProjects\AlignmentUI\src\UploadedFiles\gadm{}".format( Ut.batch_extension()) # CREATE THE WRITERS writer = codecs.open(f_path, "wb", "utf-8") batch_writer = codecs.open(b_path, "wb", "utf-8") # GENERATING THE BATCH FILE TEXT graph = "{}gadm".format(Ns.dataset) stardog_path = '' if Ut.OPE_SYS == "windows" else Svr.settings[ St.stardog_path] load_text = """echo "Loading data" {}stardog data add {} -g {} "{}" """.format(stardog_path, Svr.settings[St.database], graph, f_path) batch_writer.write(to_unicode(load_text)) batch_writer.close() print "1. GET THE TOTAL NUMBER OF TRIPLES TO LOAD" count_query = import_gadm_query(is_count=True) # print count_query count_res = Qry.virtuoso_request(count_query) result = count_res['result'] if result is None: print "NO RESULT FOR THIS ENRICHMENT." return count_res print "2. PROCESSING THE COUNT RESULT" g = rdflib.Graph() g.parse(data=result, format="turtle") attribute = rdflib.URIRef("http://www.w3.org/2005/sparql-results#value") for subject, predicate, obj in g.triples((None, attribute, None)): total = int(obj) iterations = total / limit if total % limit == 0 else total / limit + 1 print "\tTOTAL TRIPLES TO RETREIVE : {} \n\tTOTAL NUMBER OF ITERATIONS : {}\n".format( total, iterations) # RUN THE ITERATIONS try: for i in range(0, iterations): offset = i * limit + 1 print "ROUND: {} OFFSET: {}".format(i, offset) print "\tRUNNING THE QUERY" import_query = import_gadm_query(limit=limit, offset=offset, is_count=False) response = Qry.virtuoso_request(import_query) print "RESPONSE SIZE: ".format(response["result"]) print "\tWRITING THE RESULT TO FILE" writer.write(response["result"]) break except Exception as err: print str(err.message) # CLOSE THE IMPORT WRITER writer.close() print "4. RUNNING THE BATCH FILE" print "THE DATA IS BEING LOADED OVER HTTP POST." if Svr.settings[St.split_sys] is True \ else "THE DATA IS BEING LOADED AT THE STARDOG LOCAL HOST FROM BATCH." print "PATH:", b_path os.system(b_path) print "JOB DONE!!!"
def visualise(graphs, directory, credential): # production_directory = "/scratch/risis/data/rdf-data/links" # directory = production_directory writer = Buffer.StringIO() g = rdflib.Graph() source = {} target = {} attribute = {} src_count = 0 trg_count = 0 prd_count = 0 singletons = {} triples = 0 datasets = [None, None] code = 0 for graph in graphs: # print graph code += 1 links = export_alignment(graph) # THE MECHANISM USED mechanism = links['mechanism'] # print "mechanism", mechanism # THE SOURCE AND TARGET DATASETS if datasets == [None, None]: if str(links["type"]) == Ns.lens_type: datasets = links["lens_targets"] else: datasets = [links["source"], links['target']] # MAKE SURE THAT FOR ALL ALIGNMENT, THE SOURCE DATASET AND TARGET DATASETS ARE THE SAME elif datasets != [links["source"], links['target']]: print "No visualisation for different set of source-target" return None print "DATASETS: ", datasets # print links['result'] if links['result'] is not None: # LOAD THE CORRESPONDENCES TO THE MAIN GRAPH g.parse(data=links['result'], format="turtle") # INDEX THE CORRESPONDENCES USING THE SINGLETON PROPERTY sg = rdflib.Graph() sg.parse(data=links['result'], format="turtle") triples += len(sg) for subject, predicate, obj in sg.triples((None, None, None)): mech = "{}_{}".format(mechanism, code) if predicate not in singletons: singletons[predicate] = [mech] elif mech not in singletons[mech]: singletons[mech] += [mech] # WRITING THE FILE count = 0 writer.write("PREFIX ll: <{}>\n".format(Ns.alivocab)) writer.write("PREFIX rdf: <{}>\n".format(Ns.rdf)) writer.write("PREFIX link: <http://risis.eu/alignment/link/>\n") writer.write("PREFIX plot: <http://risis.eu/alignment/plot/>\n") writer.write("PREFIX mechanism: <{}>\n".format(Ns.mechanism)) print "size: ", len(datasets) if len(datasets) > 2: name = hash("".join(datasets)) name = "{}".format(str(name).replace( "-", "P")) if str(name).__contains__("-") else "P{}".format(name) else: name = "{}_{}".format(Ut.get_uri_local_name(datasets[0]), Ut.get_uri_local_name(datasets[1])) print "NAME: ", name # DROPPING GRAPH IF IT ALREADY EXISTS writer.write("\n#DROP SILENT GRAPH plot:{} ;\n".format(name)) # INSERT NEW DATA writer.write("#INSERT DATA\n#{") writer.write("\n\tplot:{}\n".format(name)) writer.write("\t{") # GOING THROUGH ALL CORRESPONDENCES OF HE MAIN GRAPH (MERGED) for subject, predicate, obj in g.triples((None, None, None)): count += 1 # INDEX THE SOURCE CORRESPONDENCE if subject not in source: src_count += 1 source[subject] = src_count # INDEX THE TARGET CORRESPONDENCE if obj not in target: trg_count += 1 target[obj] = trg_count # INDEX THE PAIR pre_code = "{}_{}".format(source[subject], target[obj]) if pre_code not in attribute: prd_count += 1 attribute[pre_code] = prd_count # WRITE THE PLOT COORDINATE AND ITS METADATA writer.write("\n\t\t### [ {} ]\n".format(count)) writer.write("\t\t{}\n".format(predicate).replace(Ns.alivocab, "ll:")) writer.write("\t\t\tlink:source {} ;\n".format(source[subject])) writer.write("\t\t\tlink:target {} ;\n".format(target[obj])) writer.write("\t\t\tlink:source_uri <{}> ;\n".format(subject)) writer.write("\t\t\tlink:target_uri <{}> ;\n".format(obj)) for value in singletons[predicate]: if str(value) != "None_1": writer.write( "\t\t\tlink:mechanism {} ;\n".format(value).replace( Ns.mechanism, "mechanism:")) writer.write("\t\t\trdf:type link:Link .\n") writer.write("") writer.write("\t}\n#}") # THE PATH OF THE OUTPUT FILES date = datetime.date.isoformat(datetime.date.today()).replace('-', '') f_path = "{0}{1}{1}{2}_plots_{3}.trig".format(directory, os.path.sep, name, date) b_path = "{0}{1}{1}{2}_plots_{3}{4}".format(directory, os.path.sep, name, date, Ut.batch_extension()) print "DIRECTORY:", directory # MAKE SURE THE FOLDER EXISTS try: if not os.path.exists(directory): os.makedirs(directory) except OSError as err: print "\n\t[utility_LOAD_TRIPLE_STORE:]", err return # CREATE THE FILES plot_writer = codecs.open(f_path, "wb", "utf-8") batch_writer = codecs.open(b_path, "wb", "utf-8") # print "3. GENERATING THE BATCH FILE TEXT" # enriched_graph = "{}{}_plots".format(Ns.plot, name) # stardog_path = '' if Ut.OPE_SYS == "windows" else Svr.settings[St.stardog_path] # load_text = """echo "Loading data" # {}stardog data add {} -g {} "{}" # """.format(stardog_path, Svr.DATABASE, enriched_graph, f_path) # GENERATE THE BATCH FILE FOR AUTOMATIC LOAD user = "******" password = "******" if credential is not None: if "user" in credential: user = credential["user"] if "password" in credential: password = credential["password"] load_text = "echo \"Loading data\"\n" \ "/usr/local/virtuoso-opensource/bin/isql 1112 {} {} exec=\"DB.DBA.TTLP_MT (file_to_string_output" \ "('/scratch/risis/data/rdf-data/links/Plots/{}_plots{}.trig'), '', 'http://risis.eu/converted', " \ "256);\"".format(user, password, name, date) batch_writer.write(to_unicode(load_text)) batch_writer.close() os.chmod(b_path, 0o777) # WRITE THE CORRESPONDENCES TO FILE plot_writer.write(writer.getvalue()) plot_writer.close() print "PLOT: {}".format(f_path) print "BATCH: {}".format(b_path) print "Job Done!!!" # Qry.virtuoso_request(writer.getvalue()) # print count, triples # file.close() return {'result': writer.getvalue(), 'message': "Constructed"}
def enrich(specs, directory, endpoint): # TODO RUN IT IF THERE IS NOT GRAPH ENRICHED WITH THE SAME NAME # specs[St.graph] = "http://grid.ac/20170712" print "ENRICHING DATA/GRAPH FROM EXPORT-ALIGNMENT" print "GRAPH:", specs[St.graph] print "ENTITY TYPE:", specs[St.entity_datatype] print "LAT PREDICATE:", specs[St.long_predicate] print "LONG PREDICATE:", specs[St.lat_predicate] print "FILE DIRECTORY:", directory name = Ut.get_uri_local_name(specs[St.graph]) print endpoint data_1 = Qry.virtuoso_request( "ask {{ GRAPH <{}> {{ ?x ?y ?z . }} }}".format(specs[St.graph]), endpoint) data_1 = regex.findall("rs:boolean[ ]*(.*)[ ]*\.", data_1["result"]) if len(data_1) > 0: data_1 = data_1[0].strip() == "true" if data_1 is False: print "GRAPH: {} {}".format( specs[St.graph], "DOES NOT EXIST AT THE REMOTE VIRTUOSO SITE.") # CHECKING WHETHER BOTH DATASETS ARE AT THE VIRTUOSO TRIPLE STORE data_2 = Qry.virtuoso_request( "ask {GRAPH <http://geo.risis.eu/gadm>{ ?x ?y ?z . }}", endpoint) data_2 = regex.findall("rs:boolean[ ]*(.*)[ ]*\.", data_2["result"]) if len(data_2) > 0: data_2 = data_2[0].strip() == "true" if data_2 is False: print "GRAPH: {} {}".format( specs[St.graph], "DOES NOT EXIST AT THE REMOTE VIRTUOSO SITE.") if data_1 is False or data_2 is False: message = "BECAUSE BOTH DATASETS NEED TO BE PRESENT AT OUR TRIPLES STORE, WE ARE UNABLE TO EXECUTE THE REQUEST." return { St.message: message, St.result: 'The dataset {} ' 'cannot be enriched with GADM boundary at the moment.'.format( specs[St.graph]) } total = 0 limit = 20000 date = datetime.date.isoformat(datetime.date.today()).replace('-', '') f_path = "{0}{1}{1}{2}_enriched_{3}.ttl".format(directory, os.path.sep, name, date) b_path = "{0}{1}{1}{2}_enriched_{3}{4}".format(directory, os.path.sep, name, date, Ut.batch_extension()) # MAKE SURE THE FOLDER EXISTS try: if not os.path.exists(directory): os.makedirs(directory) except OSError as err: print "\n\t[utility_LOAD_TRIPLE_STORE:]", err return print "\n1. GETTING THE TOTAL NUMBER OF TRIPLES." count_query = enrich_query(specs, limit=0, offset=0, is_count=True) print count_query count_res = Qry.virtuoso_request(count_query, endpoint) result = count_res['result'] # GET THE TOTAL NUMBER OF TRIPLES if result is None: print "NO RESULT FOR THIS ENRICHMENT." return count_res g = rdflib.Graph() g.parse(data=result, format="turtle") attribute = rdflib.URIRef("http://www.w3.org/2005/sparql-results#value") for subject, predicate, obj in g.triples((None, attribute, None)): total = int(obj) # NUMBER OF REQUEST NEEDED iterations = total / limit if total % limit == 0 else total / limit + 1 print "\n2. TOTAL TRIPLES TO RETREIVE : {} \n\tTOTAL NUMBER OF ITERATIONS : {}\n".format( total, iterations) writer = codecs.open(f_path, "wb", "utf-8") batch_writer = codecs.open(b_path, "wb", "utf-8") print "3. GENERATING THE BATCH FILE TEXT" enriched_graph = "{}_enriched".format(specs[St.graph]) stardog_path = '' if Ut.OPE_SYS == "windows" else Svr.settings[ St.stardog_path] load_text = """echo "Loading data" {}stardog data add {} -g {} "{}" """.format(stardog_path, Svr.settings[St.database], enriched_graph, f_path) batch_writer.write(to_unicode(load_text)) batch_writer.close() # RUN THE ITERATIONS for i in range(0, iterations): offset = i * 20000 + 1 print "\tROUND: {} OFFSET: {}".format(i + 1, offset) # print "\t\t1. GENERATING THE ENRICHMENT QUERY" virtuoso = enrich_query(specs, limit=limit, offset=offset, is_count=False) # print virtuoso # exit(0) # print Qry.virtuoso(virtuoso)["result"] # print "\t\t2. RUNNING THE QUERY + WRITE THE RESULT TO FILE" writer.write(Qry.virtuoso_request(virtuoso, endpoint)["result"]) writer.close() print "\n4. RUNNING THE BATCH FILE" print "\tTHE DATA IS BEING LOADED OVER HTTP POST." if Svr.settings[St.split_sys] is True \ else "\tTHE DATA IS BEING LOADED AT THE STARDOG LOCAL HOST FROM BATCH." # os.system(b_path) # RUN THE BATCH FILE print "\tFILE: {}".format(f_path) print "\tBATCH: {}\n".format(b_path) os.chmod(b_path, 0o777) Ut.batch_load(b_path) if os.path.exists(b_path) is True: os.remove(b_path) # TODO 1. REGISTER THE DATASET TO BE ENRICHED IF NOT YET REGISTER # TODO 2. ADD THE ENRICHED DATASET TO THE RESEARCH QUESTION (REGISTER). # TODO 3. MAYBE, CREATE THE LINKSET BETWEEN THE SOURCE AND THE RESULTING size = Qry.get_namedgraph_size(enriched_graph) print "JOB DONE...!!!!!!" return { St.message: "The select dataset was enriched with the GADM boundary as {}. " "{} triples were created.".format(enriched_graph, size), St.result: enriched_graph }
# https://docupub.com/pdfmerge/ _format = "%a %b %d %Y %H:%M:%S" date = datetime.datetime.today() lock_file = None RESET_SERVER_BATS = False _line = "--------------------------------------------------------------" \ "--------------------------------------------------------------" if __name__ == "__main__": try: if RESET_SERVER_BATS is True: START_path = "{}stardogStart{}".format(Svr.SRC_DIR, Ut.batch_extension()) STOP_path = "{}stardogStop{}".format(Svr.SRC_DIR, Ut.batch_extension()) if os.path.exists(START_path) is True: os.remove(START_path) if os.path.exists(STOP_path) is True: os.remove(STOP_path) lock_file = [ name for name in os.listdir(Svr.settings[St.stardog_data_path]) if name.endswith('.lock') ] # print lock_file except Exception as err: print str(err)