def import_gadm():

    total = 0
    limit = 2000
    f_path = "C:\Users\Al\PycharmProjects\AlignmentUI\src\UploadedFiles\gadm.ttl"
    b_path = "C:\Users\Al\PycharmProjects\AlignmentUI\src\UploadedFiles\gadm{}".format(
        Ut.batch_extension())

    # CREATE THE WRITERS
    writer = codecs.open(f_path, "wb", "utf-8")
    batch_writer = codecs.open(b_path, "wb", "utf-8")

    # GENERATING THE BATCH FILE TEXT
    graph = "{}gadm".format(Ns.dataset)
    stardog_path = '' if Ut.OPE_SYS == "windows" else Svr.settings[
        St.stardog_path]
    load_text = """echo "Loading data"
                {}stardog data add {} -g {} "{}"
                """.format(stardog_path, Svr.settings[St.database], graph,
                           f_path)
    batch_writer.write(to_unicode(load_text))
    batch_writer.close()

    print "1. GET THE TOTAL NUMBER OF TRIPLES TO LOAD"
    count_query = import_gadm_query(is_count=True)
    # print count_query
    count_res = Qry.virtuoso_request(count_query)
    result = count_res['result']
    if result is None:
        print "NO RESULT FOR THIS ENRICHMENT."
        return count_res

    print "2. PROCESSING THE COUNT RESULT"
    g = rdflib.Graph()
    g.parse(data=result, format="turtle")
    attribute = rdflib.URIRef("http://www.w3.org/2005/sparql-results#value")
    for subject, predicate, obj in g.triples((None, attribute, None)):
        total = int(obj)
    iterations = total / limit if total % limit == 0 else total / limit + 1
    print "\tTOTAL TRIPLES TO RETREIVE  : {} \n\tTOTAL NUMBER OF ITERATIONS : {}\n".format(
        total, iterations)

    # RUN THE ITERATIONS
    try:
        for i in range(0, iterations):

            offset = i * limit + 1
            print "ROUND: {} OFFSET: {}".format(i, offset)

            print "\tRUNNING THE QUERY"
            import_query = import_gadm_query(limit=limit,
                                             offset=offset,
                                             is_count=False)
            response = Qry.virtuoso_request(import_query)

            print "RESPONSE SIZE: ".format(response["result"])

            print "\tWRITING THE RESULT TO FILE"
            writer.write(response["result"])

            break

    except Exception as err:
        print str(err.message)

    # CLOSE THE IMPORT WRITER
    writer.close()
    print "4. RUNNING THE BATCH FILE"
    print "THE DATA IS BEING LOADED OVER HTTP POST." if Svr.settings[St.split_sys] is True \
        else "THE DATA IS BEING LOADED AT THE STARDOG LOCAL HOST FROM BATCH."
    print "PATH:", b_path
    os.system(b_path)
    print "JOB DONE!!!"
def visualise(graphs, directory, credential):

    # production_directory = "/scratch/risis/data/rdf-data/links"
    # directory = production_directory

    writer = Buffer.StringIO()
    g = rdflib.Graph()
    source = {}
    target = {}
    attribute = {}
    src_count = 0
    trg_count = 0
    prd_count = 0
    singletons = {}
    triples = 0
    datasets = [None, None]
    code = 0

    for graph in graphs:
        # print graph

        code += 1
        links = export_alignment(graph)

        # THE MECHANISM USED
        mechanism = links['mechanism']
        # print "mechanism", mechanism

        # THE SOURCE AND TARGET DATASETS
        if datasets == [None, None]:

            if str(links["type"]) == Ns.lens_type:
                datasets = links["lens_targets"]
            else:
                datasets = [links["source"], links['target']]

        # MAKE SURE THAT FOR ALL ALIGNMENT, THE SOURCE DATASET AND TARGET DATASETS ARE THE SAME
        elif datasets != [links["source"], links['target']]:
            print "No visualisation for different set of source-target"
            return None

        print "DATASETS: ", datasets

        # print links['result']
        if links['result'] is not None:

            # LOAD THE CORRESPONDENCES TO THE MAIN GRAPH
            g.parse(data=links['result'], format="turtle")

            # INDEX THE CORRESPONDENCES USING THE SINGLETON PROPERTY
            sg = rdflib.Graph()
            sg.parse(data=links['result'], format="turtle")
            triples += len(sg)
            for subject, predicate, obj in sg.triples((None, None, None)):
                mech = "{}_{}".format(mechanism, code)
                if predicate not in singletons:
                    singletons[predicate] = [mech]
                elif mech not in singletons[mech]:
                    singletons[mech] += [mech]

    # WRITING THE FILE
    count = 0
    writer.write("PREFIX ll: <{}>\n".format(Ns.alivocab))
    writer.write("PREFIX rdf: <{}>\n".format(Ns.rdf))
    writer.write("PREFIX link: <http://risis.eu/alignment/link/>\n")
    writer.write("PREFIX plot: <http://risis.eu/alignment/plot/>\n")
    writer.write("PREFIX mechanism: <{}>\n".format(Ns.mechanism))

    print "size: ", len(datasets)
    if len(datasets) > 2:
        name = hash("".join(datasets))
        name = "{}".format(str(name).replace(
            "-", "P")) if str(name).__contains__("-") else "P{}".format(name)
    else:
        name = "{}_{}".format(Ut.get_uri_local_name(datasets[0]),
                              Ut.get_uri_local_name(datasets[1]))
    print "NAME: ", name

    # DROPPING GRAPH IF IT ALREADY EXISTS
    writer.write("\n#DROP SILENT GRAPH plot:{} ;\n".format(name))

    # INSERT NEW DATA
    writer.write("#INSERT DATA\n#{")
    writer.write("\n\tplot:{}\n".format(name))
    writer.write("\t{")

    # GOING THROUGH ALL CORRESPONDENCES OF HE MAIN GRAPH (MERGED)
    for subject, predicate, obj in g.triples((None, None, None)):

        count += 1

        # INDEX THE SOURCE CORRESPONDENCE
        if subject not in source:
            src_count += 1
            source[subject] = src_count

        # INDEX THE TARGET CORRESPONDENCE
        if obj not in target:
            trg_count += 1
            target[obj] = trg_count

        # INDEX THE PAIR
        pre_code = "{}_{}".format(source[subject], target[obj])
        if pre_code not in attribute:
            prd_count += 1
            attribute[pre_code] = prd_count

        # WRITE THE PLOT COORDINATE AND ITS METADATA
        writer.write("\n\t\t### [ {} ]\n".format(count))
        writer.write("\t\t{}\n".format(predicate).replace(Ns.alivocab, "ll:"))
        writer.write("\t\t\tlink:source     {} ;\n".format(source[subject]))
        writer.write("\t\t\tlink:target     {} ;\n".format(target[obj]))
        writer.write("\t\t\tlink:source_uri <{}> ;\n".format(subject))
        writer.write("\t\t\tlink:target_uri <{}> ;\n".format(obj))

        for value in singletons[predicate]:
            if str(value) != "None_1":
                writer.write(
                    "\t\t\tlink:mechanism  {} ;\n".format(value).replace(
                        Ns.mechanism, "mechanism:"))
        writer.write("\t\t\trdf:type        link:Link .\n")
        writer.write("")
    writer.write("\t}\n#}")

    # THE PATH OF THE OUTPUT FILES

    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
    f_path = "{0}{1}{1}{2}_plots_{3}.trig".format(directory, os.path.sep, name,
                                                  date)
    b_path = "{0}{1}{1}{2}_plots_{3}{4}".format(directory, os.path.sep, name,
                                                date, Ut.batch_extension())
    print "DIRECTORY:", directory

    # MAKE SURE THE FOLDER EXISTS
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError as err:
        print "\n\t[utility_LOAD_TRIPLE_STORE:]", err
        return

    # CREATE THE FILES
    plot_writer = codecs.open(f_path, "wb", "utf-8")
    batch_writer = codecs.open(b_path, "wb", "utf-8")

    # print "3. GENERATING THE BATCH FILE TEXT"
    # enriched_graph = "{}{}_plots".format(Ns.plot, name)
    # stardog_path = '' if Ut.OPE_SYS == "windows" else Svr.settings[St.stardog_path]

    # load_text = """echo "Loading data"
    # {}stardog data add {} -g {} "{}"
    # """.format(stardog_path, Svr.DATABASE, enriched_graph, f_path)

    # GENERATE THE BATCH FILE FOR AUTOMATIC LOAD
    user = "******"
    password = "******"
    if credential is not None:
        if "user" in credential:
            user = credential["user"]
        if "password" in credential:
            password = credential["password"]

    load_text = "echo \"Loading data\"\n" \
                "/usr/local/virtuoso-opensource/bin/isql 1112 {} {} exec=\"DB.DBA.TTLP_MT (file_to_string_output" \
                "('/scratch/risis/data/rdf-data/links/Plots/{}_plots{}.trig'), '', 'http://risis.eu/converted', " \
                "256);\"".format(user, password, name, date)
    batch_writer.write(to_unicode(load_text))
    batch_writer.close()
    os.chmod(b_path, 0o777)

    # WRITE THE CORRESPONDENCES TO FILE
    plot_writer.write(writer.getvalue())
    plot_writer.close()

    print "PLOT: {}".format(f_path)
    print "BATCH: {}".format(b_path)
    print "Job Done!!!"
    # Qry.virtuoso_request(writer.getvalue())
    # print count, triples
    # file.close()

    return {'result': writer.getvalue(), 'message': "Constructed"}
def enrich(specs, directory, endpoint):

    # TODO RUN IT IF THERE IS NOT GRAPH ENRICHED WITH THE SAME NAME

    # specs[St.graph] = "http://grid.ac/20170712"
    print "ENRICHING DATA/GRAPH FROM EXPORT-ALIGNMENT"
    print "GRAPH:", specs[St.graph]
    print "ENTITY TYPE:", specs[St.entity_datatype]
    print "LAT PREDICATE:", specs[St.long_predicate]
    print "LONG PREDICATE:", specs[St.lat_predicate]
    print "FILE DIRECTORY:", directory
    name = Ut.get_uri_local_name(specs[St.graph])

    print endpoint
    data_1 = Qry.virtuoso_request(
        "ask {{ GRAPH <{}> {{ ?x ?y ?z . }} }}".format(specs[St.graph]),
        endpoint)
    data_1 = regex.findall("rs:boolean[ ]*(.*)[ ]*\.", data_1["result"])
    if len(data_1) > 0:
        data_1 = data_1[0].strip() == "true"
        if data_1 is False:
            print "GRAPH: {} {}".format(
                specs[St.graph], "DOES NOT EXIST AT THE REMOTE VIRTUOSO SITE.")

    # CHECKING WHETHER BOTH DATASETS ARE AT THE VIRTUOSO TRIPLE STORE
    data_2 = Qry.virtuoso_request(
        "ask {GRAPH <http://geo.risis.eu/gadm>{ ?x ?y ?z . }}", endpoint)
    data_2 = regex.findall("rs:boolean[ ]*(.*)[ ]*\.", data_2["result"])
    if len(data_2) > 0:
        data_2 = data_2[0].strip() == "true"
        if data_2 is False:
            print "GRAPH: {} {}".format(
                specs[St.graph], "DOES NOT EXIST AT THE REMOTE VIRTUOSO SITE.")

    if data_1 is False or data_2 is False:
        message = "BECAUSE BOTH DATASETS NEED TO BE PRESENT AT OUR TRIPLES STORE, WE ARE UNABLE TO EXECUTE THE REQUEST."
        return {
            St.message:
            message,
            St.result:
            'The dataset {} '
            'cannot be enriched with GADM boundary  at the moment.'.format(
                specs[St.graph])
        }

    total = 0
    limit = 20000
    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
    f_path = "{0}{1}{1}{2}_enriched_{3}.ttl".format(directory, os.path.sep,
                                                    name, date)
    b_path = "{0}{1}{1}{2}_enriched_{3}{4}".format(directory, os.path.sep,
                                                   name, date,
                                                   Ut.batch_extension())

    # MAKE SURE THE FOLDER EXISTS
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError as err:
        print "\n\t[utility_LOAD_TRIPLE_STORE:]", err
        return

    print "\n1. GETTING THE TOTAL NUMBER OF TRIPLES."
    count_query = enrich_query(specs, limit=0, offset=0, is_count=True)
    print count_query
    count_res = Qry.virtuoso_request(count_query, endpoint)
    result = count_res['result']

    # GET THE TOTAL NUMBER OF TRIPLES
    if result is None:
        print "NO RESULT FOR THIS ENRICHMENT."
        return count_res

    g = rdflib.Graph()
    g.parse(data=result, format="turtle")
    attribute = rdflib.URIRef("http://www.w3.org/2005/sparql-results#value")
    for subject, predicate, obj in g.triples((None, attribute, None)):
        total = int(obj)

    # NUMBER OF REQUEST NEEDED
    iterations = total / limit if total % limit == 0 else total / limit + 1
    print "\n2. TOTAL TRIPLES TO RETREIVE  : {} \n\tTOTAL NUMBER OF ITERATIONS : {}\n".format(
        total, iterations)

    writer = codecs.open(f_path, "wb", "utf-8")
    batch_writer = codecs.open(b_path, "wb", "utf-8")

    print "3. GENERATING THE BATCH FILE TEXT"
    enriched_graph = "{}_enriched".format(specs[St.graph])
    stardog_path = '' if Ut.OPE_SYS == "windows" else Svr.settings[
        St.stardog_path]

    load_text = """echo "Loading data"
            {}stardog data add {} -g {} "{}"
            """.format(stardog_path, Svr.settings[St.database], enriched_graph,
                       f_path)

    batch_writer.write(to_unicode(load_text))
    batch_writer.close()

    # RUN THE ITERATIONS
    for i in range(0, iterations):

        offset = i * 20000 + 1
        print "\tROUND: {} OFFSET: {}".format(i + 1, offset)

        # print "\t\t1. GENERATING THE ENRICHMENT QUERY"
        virtuoso = enrich_query(specs,
                                limit=limit,
                                offset=offset,
                                is_count=False)
        # print virtuoso
        # exit(0)
        # print Qry.virtuoso(virtuoso)["result"]

        # print "\t\t2. RUNNING THE QUERY + WRITE THE RESULT TO FILE"
        writer.write(Qry.virtuoso_request(virtuoso, endpoint)["result"])

    writer.close()
    print "\n4. RUNNING THE BATCH FILE"
    print "\tTHE DATA IS BEING LOADED OVER HTTP POST." if Svr.settings[St.split_sys] is True \
        else "\tTHE DATA IS BEING LOADED AT THE STARDOG LOCAL HOST FROM BATCH."
    # os.system(b_path)

    # RUN THE BATCH FILE
    print "\tFILE: {}".format(f_path)
    print "\tBATCH: {}\n".format(b_path)
    os.chmod(b_path, 0o777)
    Ut.batch_load(b_path)
    if os.path.exists(b_path) is True:
        os.remove(b_path)

    # TODO 1. REGISTER THE DATASET TO BE ENRICHED IF NOT YET REGISTER
    # TODO 2. ADD THE ENRICHED DATASET TO THE RESEARCH QUESTION (REGISTER).
    # TODO 3. MAYBE, CREATE THE LINKSET BETWEEN THE SOURCE AND THE RESULTING

    size = Qry.get_namedgraph_size(enriched_graph)

    print "JOB DONE...!!!!!!"

    return {
        St.message:
        "The select dataset was enriched with the GADM boundary as {}. "
        "{} triples were created.".format(enriched_graph, size),
        St.result:
        enriched_graph
    }
Exemple #4
0
# https://docupub.com/pdfmerge/

_format = "%a %b %d %Y %H:%M:%S"
date = datetime.datetime.today()
lock_file = None
RESET_SERVER_BATS = False
_line = "--------------------------------------------------------------" \
        "--------------------------------------------------------------"

if __name__ == "__main__":

    try:

        if RESET_SERVER_BATS is True:
            START_path = "{}stardogStart{}".format(Svr.SRC_DIR,
                                                   Ut.batch_extension())
            STOP_path = "{}stardogStop{}".format(Svr.SRC_DIR,
                                                 Ut.batch_extension())
            if os.path.exists(START_path) is True:
                os.remove(START_path)
            if os.path.exists(STOP_path) is True:
                os.remove(STOP_path)

        lock_file = [
            name for name in os.listdir(Svr.settings[St.stardog_data_path])
            if name.endswith('.lock')
        ]
        # print lock_file

    except Exception as err:
        print str(err)