def enrich(specs, directory, endpoint):

    # TODO RUN IT IF THERE IS NOT GRAPH ENRICHED WITH THE SAME NAME

    # specs[St.graph] = "http://grid.ac/20170712"
    print "ENRICHING DATA/GRAPH FROM EXPORT-ALIGNMENT"
    print "GRAPH:", specs[St.graph]
    print "ENTITY TYPE:", specs[St.entity_datatype]
    print "LAT PREDICATE:", specs[St.long_predicate]
    print "LONG PREDICATE:", specs[St.lat_predicate]
    print "FILE DIRECTORY:", directory
    name = Ut.get_uri_local_name(specs[St.graph])

    print endpoint
    data_1 = Qry.virtuoso_request(
        "ask {{ GRAPH <{}> {{ ?x ?y ?z . }} }}".format(specs[St.graph]),
        endpoint)
    data_1 = regex.findall("rs:boolean[ ]*(.*)[ ]*\.", data_1["result"])
    if len(data_1) > 0:
        data_1 = data_1[0].strip() == "true"
        if data_1 is False:
            print "GRAPH: {} {}".format(
                specs[St.graph], "DOES NOT EXIST AT THE REMOTE VIRTUOSO SITE.")

    # CHECKING WHETHER BOTH DATASETS ARE AT THE VIRTUOSO TRIPLE STORE
    data_2 = Qry.virtuoso_request(
        "ask {GRAPH <http://geo.risis.eu/gadm>{ ?x ?y ?z . }}", endpoint)
    data_2 = regex.findall("rs:boolean[ ]*(.*)[ ]*\.", data_2["result"])
    if len(data_2) > 0:
        data_2 = data_2[0].strip() == "true"
        if data_2 is False:
            print "GRAPH: {} {}".format(
                specs[St.graph], "DOES NOT EXIST AT THE REMOTE VIRTUOSO SITE.")

    if data_1 is False or data_2 is False:
        message = "BECAUSE BOTH DATASETS NEED TO BE PRESENT AT OUR TRIPLES STORE, WE ARE UNABLE TO EXECUTE THE REQUEST."
        return {
            St.message:
            message,
            St.result:
            'The dataset {} '
            'cannot be enriched with GADM boundary  at the moment.'.format(
                specs[St.graph])
        }

    total = 0
    limit = 20000
    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
    f_path = "{0}{1}{1}{2}_enriched_{3}.ttl".format(directory, os.path.sep,
                                                    name, date)
    b_path = "{0}{1}{1}{2}_enriched_{3}{4}".format(directory, os.path.sep,
                                                   name, date,
                                                   Ut.batch_extension())

    # MAKE SURE THE FOLDER EXISTS
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError as err:
        print "\n\t[utility_LOAD_TRIPLE_STORE:]", err
        return

    print "\n1. GETTING THE TOTAL NUMBER OF TRIPLES."
    count_query = enrich_query(specs, limit=0, offset=0, is_count=True)
    print count_query
    count_res = Qry.virtuoso_request(count_query, endpoint)
    result = count_res['result']

    # GET THE TOTAL NUMBER OF TRIPLES
    if result is None:
        print "NO RESULT FOR THIS ENRICHMENT."
        return count_res

    g = rdflib.Graph()
    g.parse(data=result, format="turtle")
    attribute = rdflib.URIRef("http://www.w3.org/2005/sparql-results#value")
    for subject, predicate, obj in g.triples((None, attribute, None)):
        total = int(obj)

    # NUMBER OF REQUEST NEEDED
    iterations = total / limit if total % limit == 0 else total / limit + 1
    print "\n2. TOTAL TRIPLES TO RETREIVE  : {} \n\tTOTAL NUMBER OF ITERATIONS : {}\n".format(
        total, iterations)

    writer = codecs.open(f_path, "wb", "utf-8")
    batch_writer = codecs.open(b_path, "wb", "utf-8")

    print "3. GENERATING THE BATCH FILE TEXT"
    enriched_graph = "{}_enriched".format(specs[St.graph])
    stardog_path = '' if Ut.OPE_SYS == "windows" else Svr.settings[
        St.stardog_path]

    load_text = """echo "Loading data"
            {}stardog data add {} -g {} "{}"
            """.format(stardog_path, Svr.settings[St.database], enriched_graph,
                       f_path)

    batch_writer.write(to_unicode(load_text))
    batch_writer.close()

    # RUN THE ITERATIONS
    for i in range(0, iterations):

        offset = i * 20000 + 1
        print "\tROUND: {} OFFSET: {}".format(i + 1, offset)

        # print "\t\t1. GENERATING THE ENRICHMENT QUERY"
        virtuoso = enrich_query(specs,
                                limit=limit,
                                offset=offset,
                                is_count=False)
        # print virtuoso
        # exit(0)
        # print Qry.virtuoso(virtuoso)["result"]

        # print "\t\t2. RUNNING THE QUERY + WRITE THE RESULT TO FILE"
        writer.write(Qry.virtuoso_request(virtuoso, endpoint)["result"])

    writer.close()
    print "\n4. RUNNING THE BATCH FILE"
    print "\tTHE DATA IS BEING LOADED OVER HTTP POST." if Svr.settings[St.split_sys] is True \
        else "\tTHE DATA IS BEING LOADED AT THE STARDOG LOCAL HOST FROM BATCH."
    # os.system(b_path)

    # RUN THE BATCH FILE
    print "\tFILE: {}".format(f_path)
    print "\tBATCH: {}\n".format(b_path)
    os.chmod(b_path, 0o777)
    Ut.batch_load(b_path)
    if os.path.exists(b_path) is True:
        os.remove(b_path)

    # TODO 1. REGISTER THE DATASET TO BE ENRICHED IF NOT YET REGISTER
    # TODO 2. ADD THE ENRICHED DATASET TO THE RESEARCH QUESTION (REGISTER).
    # TODO 3. MAYBE, CREATE THE LINKSET BETWEEN THE SOURCE AND THE RESULTING

    size = Qry.get_namedgraph_size(enriched_graph)

    print "JOB DONE...!!!!!!"

    return {
        St.message:
        "The select dataset was enriched with the GADM boundary as {}. "
        "{} triples were created.".format(enriched_graph, size),
        St.result:
        enriched_graph
    }
Exemple #2
0
if Ut.OPE_SYS != 'windows':
    print "MAC BATCH: {}".format(writers[St.batch_output_path])
    os.chmod(writers[St.batch_output_path], 0o777)

if count > 0:

    print "\n{} INSTANCES FOUND.".format(count)
    print "6. RUNNING THE BATCH FILE FOR LOADING THE CORRESPONDENCES INTO THE TRIPLE STORE\n\t\t{}", writers[
        St.batch_output_path]

    if Svr.settings[St.split_sys] is True:
        print "THE DATA IS BEING LOADED OVER HTTP POST."
    else:
        print "THE DATA IS BEING LOADED AT THE STARDOG LOCAL HOST FROM BATCH."
        # os.system(writers[St.batch_output_path])
        Ut.batch_load(writers[St.batch_output_path])
    # inserted = Qry.insert_size(specs[St.linkset], isdistinct=False)

    metadata = Gn.linkset_metadata(specs, display=False).replace("INSERT DATA", "")
    writers[St.meta_writer].write(to_unicode(metadata))

    if int(specs[St.triples]) > 0:
        Qry.boolean_endpoint_response(metadata)
        writers[St.meta_writer].close()

        # REGISTER THE ALIGNMENT
        # if check[St.result].__contains__("ALREADY EXISTS"):
        #     Urq.register_alignment_mapping(specs, created=False)
        # else:
        #     Urq.register_alignment_mapping(specs, created=True)
        Urq.register_alignment_mapping(specs, created=False)
Exemple #3
0
import Alignments.Utility as Ut
import os
import Alignments.Settings as St
import Alignments.Server_Settings as Srv
# https://docupub.com/pdfmerge/

# directory = "C:\Program Files\stardog-4.1.3\data"
# Ut.listening(directory)

print "STOPPING THE STARDOG SERVER"

lock_file = [
    name for name in os.listdir(Srv.settings[St.stardog_data_path])
    if name.endswith('.lock')
]

if len(lock_file) > 0:
    off = Ut.batch_load("C:\stardogStop.bat")
    print "RESPONSE: {}".format(off["result"])
else:
    print "THE SERVER WAS NOT ON."