def import_gadm(): total = 0 limit = 2000 f_path = "C:\Users\Al\PycharmProjects\AlignmentUI\src\UploadedFiles\gadm.ttl" b_path = "C:\Users\Al\PycharmProjects\AlignmentUI\src\UploadedFiles\gadm{}".format( Ut.batch_extension()) # CREATE THE WRITERS writer = codecs.open(f_path, "wb", "utf-8") batch_writer = codecs.open(b_path, "wb", "utf-8") # GENERATING THE BATCH FILE TEXT graph = "{}gadm".format(Ns.dataset) stardog_path = '' if Ut.OPE_SYS == "windows" else Svr.settings[ St.stardog_path] load_text = """echo "Loading data" {}stardog data add {} -g {} "{}" """.format(stardog_path, Svr.settings[St.database], graph, f_path) batch_writer.write(to_unicode(load_text)) batch_writer.close() print "1. GET THE TOTAL NUMBER OF TRIPLES TO LOAD" count_query = import_gadm_query(is_count=True) # print count_query count_res = Qry.virtuoso_request(count_query) result = count_res['result'] if result is None: print "NO RESULT FOR THIS ENRICHMENT." return count_res print "2. PROCESSING THE COUNT RESULT" g = rdflib.Graph() g.parse(data=result, format="turtle") attribute = rdflib.URIRef("http://www.w3.org/2005/sparql-results#value") for subject, predicate, obj in g.triples((None, attribute, None)): total = int(obj) iterations = total / limit if total % limit == 0 else total / limit + 1 print "\tTOTAL TRIPLES TO RETREIVE : {} \n\tTOTAL NUMBER OF ITERATIONS : {}\n".format( total, iterations) # RUN THE ITERATIONS try: for i in range(0, iterations): offset = i * limit + 1 print "ROUND: {} OFFSET: {}".format(i, offset) print "\tRUNNING THE QUERY" import_query = import_gadm_query(limit=limit, offset=offset, is_count=False) response = Qry.virtuoso_request(import_query) print "RESPONSE SIZE: ".format(response["result"]) print "\tWRITING THE RESULT TO FILE" writer.write(response["result"]) break except Exception as err: print str(err.message) # CLOSE THE IMPORT WRITER writer.close() print "4. RUNNING THE BATCH FILE" print "THE DATA IS BEING LOADED OVER HTTP POST." if Svr.settings[St.split_sys] is True \ else "THE DATA IS BEING LOADED AT THE STARDOG LOCAL HOST FROM BATCH." print "PATH:", b_path os.system(b_path) print "JOB DONE!!!"
def enrich(specs, directory, endpoint): # TODO RUN IT IF THERE IS NOT GRAPH ENRICHED WITH THE SAME NAME # specs[St.graph] = "http://grid.ac/20170712" print "ENRICHING DATA/GRAPH FROM EXPORT-ALIGNMENT" print "GRAPH:", specs[St.graph] print "ENTITY TYPE:", specs[St.entity_datatype] print "LAT PREDICATE:", specs[St.long_predicate] print "LONG PREDICATE:", specs[St.lat_predicate] print "FILE DIRECTORY:", directory name = Ut.get_uri_local_name(specs[St.graph]) print endpoint data_1 = Qry.virtuoso_request( "ask {{ GRAPH <{}> {{ ?x ?y ?z . }} }}".format(specs[St.graph]), endpoint) data_1 = regex.findall("rs:boolean[ ]*(.*)[ ]*\.", data_1["result"]) if len(data_1) > 0: data_1 = data_1[0].strip() == "true" if data_1 is False: print "GRAPH: {} {}".format( specs[St.graph], "DOES NOT EXIST AT THE REMOTE VIRTUOSO SITE.") # CHECKING WHETHER BOTH DATASETS ARE AT THE VIRTUOSO TRIPLE STORE data_2 = Qry.virtuoso_request( "ask {GRAPH <http://geo.risis.eu/gadm>{ ?x ?y ?z . }}", endpoint) data_2 = regex.findall("rs:boolean[ ]*(.*)[ ]*\.", data_2["result"]) if len(data_2) > 0: data_2 = data_2[0].strip() == "true" if data_2 is False: print "GRAPH: {} {}".format( specs[St.graph], "DOES NOT EXIST AT THE REMOTE VIRTUOSO SITE.") if data_1 is False or data_2 is False: message = "BECAUSE BOTH DATASETS NEED TO BE PRESENT AT OUR TRIPLES STORE, WE ARE UNABLE TO EXECUTE THE REQUEST." return { St.message: message, St.result: 'The dataset {} ' 'cannot be enriched with GADM boundary at the moment.'.format( specs[St.graph]) } total = 0 limit = 20000 date = datetime.date.isoformat(datetime.date.today()).replace('-', '') f_path = "{0}{1}{1}{2}_enriched_{3}.ttl".format(directory, os.path.sep, name, date) b_path = "{0}{1}{1}{2}_enriched_{3}{4}".format(directory, os.path.sep, name, date, Ut.batch_extension()) # MAKE SURE THE FOLDER EXISTS try: if not os.path.exists(directory): os.makedirs(directory) except OSError as err: print "\n\t[utility_LOAD_TRIPLE_STORE:]", err return print "\n1. GETTING THE TOTAL NUMBER OF TRIPLES." count_query = enrich_query(specs, limit=0, offset=0, is_count=True) print count_query count_res = Qry.virtuoso_request(count_query, endpoint) result = count_res['result'] # GET THE TOTAL NUMBER OF TRIPLES if result is None: print "NO RESULT FOR THIS ENRICHMENT." return count_res g = rdflib.Graph() g.parse(data=result, format="turtle") attribute = rdflib.URIRef("http://www.w3.org/2005/sparql-results#value") for subject, predicate, obj in g.triples((None, attribute, None)): total = int(obj) # NUMBER OF REQUEST NEEDED iterations = total / limit if total % limit == 0 else total / limit + 1 print "\n2. TOTAL TRIPLES TO RETREIVE : {} \n\tTOTAL NUMBER OF ITERATIONS : {}\n".format( total, iterations) writer = codecs.open(f_path, "wb", "utf-8") batch_writer = codecs.open(b_path, "wb", "utf-8") print "3. GENERATING THE BATCH FILE TEXT" enriched_graph = "{}_enriched".format(specs[St.graph]) stardog_path = '' if Ut.OPE_SYS == "windows" else Svr.settings[ St.stardog_path] load_text = """echo "Loading data" {}stardog data add {} -g {} "{}" """.format(stardog_path, Svr.settings[St.database], enriched_graph, f_path) batch_writer.write(to_unicode(load_text)) batch_writer.close() # RUN THE ITERATIONS for i in range(0, iterations): offset = i * 20000 + 1 print "\tROUND: {} OFFSET: {}".format(i + 1, offset) # print "\t\t1. GENERATING THE ENRICHMENT QUERY" virtuoso = enrich_query(specs, limit=limit, offset=offset, is_count=False) # print virtuoso # exit(0) # print Qry.virtuoso(virtuoso)["result"] # print "\t\t2. RUNNING THE QUERY + WRITE THE RESULT TO FILE" writer.write(Qry.virtuoso_request(virtuoso, endpoint)["result"]) writer.close() print "\n4. RUNNING THE BATCH FILE" print "\tTHE DATA IS BEING LOADED OVER HTTP POST." if Svr.settings[St.split_sys] is True \ else "\tTHE DATA IS BEING LOADED AT THE STARDOG LOCAL HOST FROM BATCH." # os.system(b_path) # RUN THE BATCH FILE print "\tFILE: {}".format(f_path) print "\tBATCH: {}\n".format(b_path) os.chmod(b_path, 0o777) Ut.batch_load(b_path) if os.path.exists(b_path) is True: os.remove(b_path) # TODO 1. REGISTER THE DATASET TO BE ENRICHED IF NOT YET REGISTER # TODO 2. ADD THE ENRICHED DATASET TO THE RESEARCH QUESTION (REGISTER). # TODO 3. MAYBE, CREATE THE LINKSET BETWEEN THE SOURCE AND THE RESULTING size = Qry.get_namedgraph_size(enriched_graph) print "JOB DONE...!!!!!!" return { St.message: "The select dataset was enriched with the GADM boundary as {}. " "{} triples were created.".format(enriched_graph, size), St.result: enriched_graph }