Esempio n. 1
0
def export_flat_alignment_service(alignment):

    alignment = str(alignment).strip()
    row_alignment = alignment
    alignment = alignment if Ut.is_nt_format(
        alignment) is True else "<{}>".format(alignment)
    # CONSTRUCT QUERY
    query = """
    PREFIX ll: <{0}>
    PREFIX linkset: <{1}>
    PREFIX lens: <{2}>
    PREFIX singletons: <{3}>
    CONSTRUCT
    {{
        ?srcCorr  ll:mySameAs ?trgCorr .
        ?trgCorr  ll:mySameAs ?srcCorr .
    }}
    WHERE
    {{
        BIND( {4} as ?alignment )
        # THE ALIGNMENT GRAPH WITH EXPLICIT SYMMETRY
        GRAPH ?alignment
        {{
            ?srcCorr ?singleton ?trgCorr .
        }}
    }} ;

    CONSTRUCT
    {{
        ?alignment ?pred  ?obj .
        ?obj  ?predicate ?object .
    }}
    WHERE
    {{
        # THE METADATA
        BIND( {4} as ?alignment )
        ?alignment  ?pred  ?obj .
        OPTIONAL {{ ?obj  ?predicate ?object . }}
    }}

    """.format(
        Ns.alivocab,
        Ns.linkset,
        Ns.lens,
        Ns.singletons,
        alignment,
    )
    print query
    exit(0)
    # FIRE THE CONSTRUCT AGAINST THE TRIPLE STORE
    alignment_construct = Qry.endpointconstruct(query)
    # REMOVE EMPTY LINES
    triples = len(regex.findall('ll:mySameAs', alignment_construct))
    alignment_construct = "\n".join(
        [line for line in alignment_construct.splitlines() if line.strip()])
    result = "### TRIPLE COUNT: {}\n### LINKSET: {}\n".format(
        triples, alignment) + alignment_construct
    message = "You have just downloaded the graph [{}] which contains [{}] correspondences. ".format(
        row_alignment, triples)
    return {'result': result, 'message': message}
Esempio n. 2
0
def properties(graph, datatype=None):

    comment = "# " if datatype is None else ""
    datatype = datatype if Ut.is_nt_format(datatype) is True else "<{}>".format(datatype)
    graph = graph if Ut.is_nt_format(graph) is True else "<{}>".format(graph)
    properties = """
    # <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>
    SELECT DISTINCT ?predicate
    WHERE
    {{
        GRAPH {}
        {{
            {}?subj {} ?type .
            ?subj ?predicate ?obj .
        }}
    }}
    """.format(graph, comment, datatype)
    print properties
    Qr.display_result(query=properties, spacing=50, limit=0, is_activated=True)
Esempio n. 3
0
def linkset_createdorused(question_uri,
                          alignment_mapping_uri,
                          specs,
                          is_created=True):

    if alignment_mapping_uri.__contains__("<<"):
        alignment_mapping_uri = str(alignment_mapping_uri).replace(
            "<<", "<").replace(">>", ">")

    if Ut.is_nt_format(alignment_mapping_uri) is False:
        alignment_mapping_uri = "<{}>".format(alignment_mapping_uri)

    linkset_uri = specs[St.refined] if St.refined in specs else specs[
        St.linkset]

    comment = "#"

    if is_created is True:
        created = "alivocab:created"
        opposed = "prov:used\t\t"
        print "REGISTERING [{}] AS CREATED".format(linkset_uri)

    else:
        created = "prov:used\t\t"
        opposed = "alivocab:created"
        comment = "#"
        print "REGISTERING [{}] AS IMPORTED".format(linkset_uri)

    query = PREFIX + """
    INSERT
    {{
        GRAPH <{0}>
        {{
            {1}   {2}        <{3}> .
            {4}{1}  prov:wasDerivedFrom     <{3}> .
        }}
    }}
    WHERE
    {{
        GRAPH <{0}>
        {{
            FILTER NOT EXISTS
            {{
                {1}    {5}        <{3}> .
            }}
        }}
        ### BIND(iri(\"{1}\") AS ?aligns)
    }}
    """.format(question_uri, alignment_mapping_uri, created, linkset_uri,
               comment, opposed)

    # print query

    return query
Esempio n. 4
0
def target_datatype_properties(model, label, linkset_label):

    main_tabs = "\t\t\t"
    tabs = "{}\t\t\t\t\t\t\t\t\t\t\t\t".format(main_tabs)
    # ALIGNMENT COMBINATION: LIST OD DICTIONARIES
    alignment_targets = ""
    property_list_bind = ""
    count = 0
    for item in model:
        count += 1
        target = item[St.graph]
        data = item[St.data]

        # LIST OF DICTIONARIES
        for n in range(0, len(data)):
            code = "llTarget:{}_{}".format(label,
                                           Ut.hash_it(target + str(data[n])))
            datatype = data[n][St.entity_datatype]
            properties = data[n][St.properties]
            property_list = ""

            # LIST OF PROPERTIES
            for i in range(0, len(properties)):
                i_property = properties[i] if Ut.is_nt_format(
                    properties[i]) else "<{}>".format(
                        data[i][St.properties][i])
                property_list += "?property_{}_{}_{} ".format(count, n, i) if i == 0 \
                    else ",\n{}?property_{}_{}_{} ".format(tabs, count, n, i)

                if i == 0 and count == 1:
                    property_list_bind += """BIND( IRI("{}") AS ?property_{}_{}_{})""".format(
                        i_property, count, n, i)
                else:
                    property_list_bind += """\n{}BIND( IRI("{}") AS ?property_{}_{}_{})""".format(
                        main_tabs, i_property, count, n, i)

            triples = """
    {5}linkset:{4}  ll:hasAlignmentTarget  {0} .
    {5}{0}  ll:hasTarget    <{1}> .
    {5}{0}  ll:hasDatatype  <{2}> .
    {5}{0}  ll:aligns       {3}.
    """.format(code, target, datatype, property_list, linkset_label, main_tabs)
            # print triples
            alignment_targets += triples

    return {"list": alignment_targets, "binds": property_list_bind}
Esempio n. 5
0
    def check_constraint():

        text = constraint_text.lower()
        text = text.split(",")

        # CONSTRAINT BUILDER
        c_builder = Buffer.StringIO()
        if constraint_targets is not None:
            for dictionary in constraint_targets:
                graph = dictionary[St.graph]
                data_list = dictionary[St.data]
                properties = data_list[0][St.properties]
                prop = properties[0] if Ut.is_nt_format(properties[0]) else "<{}>".format(properties[0])

                # WRITING THE CONSTRAINT ON THE GRAPH
                graph_q = """
       {{
           GRAPH <{0}>
           {{
               ?lookup {1} ?constraint .
           }}
       }}
       """.format(graph, prop)
                c_builder.write(graph_q) if len(c_builder.getvalue()) == 0  else \
                    c_builder.write("UNION {}".format(graph_q))

            # WRITING THE FILTER
            if len(c_builder.getvalue()) > 0:
                for i in range(0, len(text)):
                    if i == 0 :
                        c_builder.write("""
       FILTER (LCASE(STR(?constraint)) = "{}" """.format(text[i].strip()))
                    else:
                        c_builder.write("""
       || LCASE(STR(?constraint)) = "{}" """.format(text[i].strip()))
                c_builder.write(")")


        # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES
        query = Qry.cluster_rsc_strengths_query(resources, linkset)
        query = query.replace("# CONSTRAINTS IF ANY", c_builder.getvalue())
        # print query
        response = Qry.sparql_xml_to_matrix(query)
        if response[St.result] is None:
            return False
        return True
Esempio n. 6
0
def export_flat_alignment_and_metadata(alignment):

    flat = export_flat_alignment(alignment)

    alignment = str(alignment).strip()
    row_alignment = alignment
    alignment = alignment if Ut.is_nt_format(
        alignment) is True else "<{}>".format(alignment)
    # CONSTRUCT QUERY
    query = """
    PREFIX ll: <{0}>
    PREFIX linkset: <{1}>
    PREFIX lens: <{2}>
    PREFIX singletons: <{3}>
    CONSTRUCT
    {{
        ?alignment ?pred  ?obj .
        ?obj  ?predicate ?object .
    }}
    WHERE
    {{
        BIND ({4} AS ?alignment)
        # THE METADATA
        ?alignment  ?pred  ?obj .
        OPTIONAL {{ ?obj  ?predicate ?object . }}
    }} #LIMIT 10
    """.format(Ns.alivocab, Ns.linkset, Ns.lens, Ns.singletons, alignment)
    # print query

    # FIRE THE CONSTRUCT AGAINST THE TRIPLE STORE
    alignment_construct = Qry.endpointconstruct(query, clean=False)

    # REMOVE EMPTY LINES
    triples = flat["triples"]
    # triples = len(re.findall('ll:mySameAs', alignment_construct))
    alignment_construct = "\n".join([line for line in alignment_construct.splitlines() if line.strip()]) + "\n\n" + \
                          flat['result']

    result = "### GENERIC METADATA FOR \n### LINKSET: {}\n\n{}".format(
        alignment, alignment_construct)
    message = "You have just downloaded the graph [{}] which contains [{}] correspondences. ".format(
        row_alignment, triples)
    print result
    return {'result': result, 'message': message}
Esempio n. 7
0
def export_flat_alignment(alignment):

    print Ut.headings("EXPORTING THE ALIGNMENT WITH NO METADATA")

    print "Export for: {}".format(alignment)
    alignment = str(alignment).strip()
    row_alignment = alignment
    alignment = alignment if Ut.is_nt_format(
        alignment) is True else "<{}>".format(alignment)

    # CONSTRUCT QUERY
    query = """
    PREFIX ll: <{}>
    CONSTRUCT {{ ?x ll:mySameAs ?z }}
    WHERE
    {{
        GRAPH {}
        {{
            ?x ?y ?z
        }}
    }} order by ?x
    """.format(Ns.alivocab, alignment)
    # print query
    # FIRE THE CONSTRUCT AGAINST THE TRIPLE STORE
    alignment_construct = Qry.endpointconstruct(query)

    # REMOVE EMPTY LINES
    # COMMA IS COUNTED WHENEVER THERE ARE MORE OBJECTS FOR THE SUBJECT
    triples = len(regex.findall('ll:mySameAs', alignment_construct)) + len(
        regex.findall(',', alignment_construct))
    alignment_construct = "\n".join(
        [line for line in alignment_construct.splitlines() if line.strip()])
    alignment_construct = alignment_construct.replace(
        "{", "{}\n{{".format(alignment))

    # RESULTS
    result = "### TRIPLE COUNT: {0}\n### LINKSET: {1}\n".format(
        triples, alignment) + alignment_construct
    message = "You have just downloaded the graph [{}] which contains [{}] correspondences. ".format(
        row_alignment, triples)

    return {'result': result, 'message': message, "triples": triples}
Esempio n. 8
0
def get_table(dataset_specs, reducer=None):

    # ADD THE REDUCER IF SET. THE REDUCER OR (DATASET REDUCER) HELPS ELIMINATING
    # THE COMPUTATION OF SIMILARITY FOR INSTANCES THAT WHERE ALREADY MATCHED
    print "\nLOADING: {} {}".format(dataset_specs[St.graph],
                                    dataset_specs[St.entity_datatype])
    if reducer is None:
        reducer_comment = "#"
        reducer = ""
    else:
        reducer_comment = ""
        reducer = reducer
    aligns = dataset_specs[St.aligns] if Ut.is_nt_format(dataset_specs[St.aligns]) \
        else "<{}>".format(dataset_specs[St.aligns])
    query = """
    SELECT DISTINCT *
    {{
        GRAPH <{0}>
        {{
            ?subject
                a       <{1}> ;
                {2}    ?object .
        }}
        {4}FILTER NOT EXISTS
        {4}{{
        {4}    GRAPH <{3}>
        {4}    {{
        {4}        {{ ?subject   ?pred   ?obj . }}
        {4}        UNION
        {4}        {{ ?obj   ?pred   ?subject. }}
        {4}    }}
        {4}}}
    }} {5}
    """.format(dataset_specs[St.graph], dataset_specs[St.entity_datatype],
               aligns, reducer, reducer_comment, LIMIT)
    table_matrix = Qry.sparql_xml_to_matrix(query)
    # Qry.display_matrix(table_matrix, is_activated=True)
    # print table_matrix
    # print query
    if table_matrix[St.result]:
        print "\tINPUT SIZE: {}".format(str(len(table_matrix[St.result]) - 1))
    return table_matrix[St.result]
Esempio n. 9
0
def linkset_evolution(research_question_uri, refined_linkset_uri):

    # BUILD THE SPECIFICATION
    specs = {
        St.researchQ_URI: research_question_uri.strip(),
        St.linkset: refined_linkset_uri
    }
    # print specs

    # DOCUMENT THE ALIGNMENT
    document = ""
    metadata = linkset_evolution_composition(alignment_mapping=specs)
    # print "METADATA:", metadata

    if metadata:
        # 1: GETTING SUBJECT - OBJECT & MECHANISM-
        elements1 = re.findall('\t.*:aligns(.*) "<{0,1}', metadata)
        # elements2 = re.findall('(<.*?>)', metadata, re.S)
        elements2 = re.findall('"(<{0,1}.*?>{0,1})"', metadata, re.S)
        # print "1: ", elements1
        # print "2:", elements2
        # print ""
        if len(elements1) == len(elements2) == 3:
            for i in range(3):
                append = " | " if i < 2 else ""
                two = elements2[i] if Ut.is_nt_format(
                    elements2[i]) else "<{}>".format(elements2[i])
                document += "{}={}{}".format(elements1[i], two, append)
            document = "[{}]".format(document)

        # FOLLOW DOWN THE PATH
        new_link = linkset_wasderivedfrom(refined_linkset_uri)
        new_document = linkset_evolution(research_question_uri, new_link)
        # print "DONE!!!!"
        # RECURSIVE CALL
        return document + ";\n" + linkset_evolution(
            research_question_uri, new_link) if new_document else document

    # print "NO EVOLUTION RESULT"
    return document
Esempio n. 10
0
def get_table(dataset_specs, reducer=None):
    # ADD THE REDUCER IF SET
    if reducer is None:
        reducer_comment = "#"
        reducer = ""
    else:
        reducer_comment = ""
        reducer = reducer
    aligns = dataset_specs[St.aligns] if Ut.is_nt_format(dataset_specs[St.aligns]) \
        else "<{}>".format(dataset_specs[St.aligns])
    query = """
    SELECT DISTINCT *
    {{
        GRAPH <{0}>
        {{
            ?subject
                a       <{1}> ;
                {2}    ?object .
        }}
        {4}FILTER NOT EXISTS
        {4}{{
        {4}    GRAPH <{3}>
        {4}    {{
        {4}        {{ ?subject   ?pred   ?obj . }}
        {4}        UNION
        {4}        {{ ?obj   ?pred   ?subject. }}
        {4}    }}
        {4}}}
    }} {5}
    """.format(
        dataset_specs[St.graph], dataset_specs[St.entity_datatype], aligns,
        reducer, reducer_comment, LIMIT)
    table_matrix = Qry.sparql_xml_to_matrix(query)
    # Qry.display_matrix(table_matrix, is_activated=True)
    # print table_matrix
    # print query
    return table_matrix[St.result]
Esempio n. 11
0
def export_alignment(alignment, limit=5000):

    # COMMENT THE LINKSET IF IT IS EQUAL TO NONE

    # This function returns all the links + some metadata about the alignment.
    # METADATA: source dataset, target dataset and mechanism

    use = alignment
    alignment = str(alignment).strip()
    row_alignment = alignment
    alignment = alignment if Ut.is_nt_format(
        alignment) is True else "<{}>".format(alignment)
    src_dataset = None
    trg_dataset = None
    lens_targets = []
    mec_dataset = None
    rdf_type = None

    # GET THE METADATA OF THE ALIGNMENT: THE QUERY
    meta = """
    PREFIX ll: <{0}>
    CONSTRUCT {{ {1} ?y ?z. ?z ?p ?o . }}
    WHERE
    {{
        {1} ?y ?z .
        #?z ?p ?o .
    }} order by ?y
    """.format(Ns.alivocab, alignment)
    # print meta

    # GET THE METADATA OF THE ALIGNMENT: RUN THE QUERY
    meta_construct = Qry.endpointconstruct(meta, clean=False)
    meta_construct = meta_construct.replace("{", "").replace("}", "")
    # print meta_construct

    # LOAD THE METADATA USING RDFLIB
    sg = rdflib.Graph()
    sg.parse(data=meta_construct, format="turtle")

    # EXTRACT FROM THE RESPONSE: THE SOURCE AND TARGET DATASETS AND THE ALIGNMENT
    sbj = rdflib.URIRef(use)
    source = rdflib.URIRef("http://rdfs.org/ns/void#subjectsTarget")
    target = rdflib.URIRef("http://rdfs.org/ns/void#objectsTarget")
    lens_uri_targets = rdflib.URIRef("http://rdfs.org/ns/void#target")
    rdf_uri_type = rdflib.URIRef(
        "http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
    mechanism = rdflib.URIRef(
        "http://risis.eu/alignment/predicate/alignsMechanism")

    # EXTRACT THE ALIGNMENT TYPE
    for item in sg.objects(sbj, rdf_uri_type):
        rdf_type = item
        print "TYPE: ", rdf_type

    if str(rdf_type) == Ns.lens_type:

        # EXTRACT THE SOURCE DATASET
        for item in sg.objects(sbj, lens_uri_targets):
            lens_targets += [str(item)]
        print "{} TARGETS in {}".format(len(lens_targets), alignment)
        for trg_item in lens_targets:
            print "\t- {}".format(trg_item)

    else:

        # EXTRACT THE SOURCE DATASET
        for item in sg.objects(sbj, source):
            src_dataset = item

        # EXTRACT THE TARGET DATASET
        for item in sg.objects(sbj, target):
            trg_dataset = item

        # EXTRACT THE MECHANISM USED FOR THIS ALIGNMENT
        for item in sg.objects(sbj, mechanism):
            mec_dataset = item

    # CONSTRUCT QUERY FOR EXTRACTING HE CORRESPONDENCES
    comment = "" if limit else "#"
    query = """
    PREFIX ll: <{}>
    CONSTRUCT {{ ?x ?y ?z }}
    WHERE
    {{
        GRAPH {}
        {{
            ?x ?y ?z
        }}
    }} order by ?x {}LIMIT {}
    """.format(Ns.alivocab, alignment, comment, limit)
    # print query

    # FIRE THE CONSTRUCT FOR CORRESPONDENCES AGAINST THE TRIPLE STORE
    alignment_construct = Qry.endpointconstruct(query, clean=False)

    triples = 0
    links = None
    # RESULTS

    if alignment_construct is not None:
        links = "### TRIPLE COUNT: {}\n### LINKSET: {}\n".format(
            triples, alignment) + alignment_construct
        links = links.replace("{", "").replace("}", "")
    message = "You have just downloaded the graph [{}] which contains [{}] correspondences. ".format(
        row_alignment, triples)

    # result = result
    # print result
    print "Done with graph: {}".format(alignment)
    return {
        "type": rdf_type,
        'result': links,
        'message': message,
        'source': src_dataset,
        "target": trg_dataset,
        "lens_targets": lens_targets,
        'mechanism': mec_dataset
    }
Esempio n. 12
0
def export_alignment_all(alignment, directory=None, limit=5000):

    directory = os.path.join(directory, "")
    print directory
    if os.path.isdir(os.path.dirname(directory)) is False or os.path.exists(
            directory) is False:
        print "CREATING THE DIRECTORY"
        os.mkdir(os.path.dirname(directory))

    # COMMENT THE LINKSET OIT IF IT IS EQUAL TO NONE

    # This function returns all the links + some metadata about the alignment.
    # METADATA: source dataset, target dataset and mechanism

    use = alignment
    alignment = str(alignment).strip()
    row_alignment = alignment
    alignment = alignment if Ut.is_nt_format(
        alignment) is True else "<{}>".format(alignment)

    # ****************************************************
    # 1. GET THE METADATA OF THE ALIGNMENT: THE QUERY
    # ****************************************************
    meta = """
    PREFIX ll: <{0}>
    CONSTRUCT {{ {1} ?y ?z. ?z ?p ?o . }}
    WHERE
    {{
        {1} ?y ?z .
        OPTIONAL{{ ?z ?p ?o . }}
        OPTIONAL{{ ?O ?Q ?R . }}
    }} order by ?y
    """.format(Ns.alivocab, alignment)
    # print meta

    # GET THE METADATA OF THE ALIGNMENT: RUN THE QUERY
    meta_construct = Qry.endpointconstruct(meta, clean=False)
    meta_construct = meta_construct.replace("{", "").replace("}", "")
    with open(os.path.join(directory, "metadata.ttl"), "wb") as metadata:
        metadata.write(meta_construct)
    # print meta_construct

    # ****************************************************
    # 2. GET THE CORRESPONDENCES OF THE LINKSET
    # ****************************************************
    # CONSTRUCT QUERY FOR EXTRACTING HE CORRESPONDENCES
    comment = "" if limit else "#"
    query = """
        PREFIX ll: <{}>
        CONSTRUCT {{ ?x ?y ?z }}
        WHERE
        {{
            GRAPH {}
            {{
                ?x ?y ?z
            }}
        }} order by ?x {}LIMIT {}
        """.format(Ns.alivocab, alignment, comment, limit)
    # print query

    # FIRE THE CONSTRUCT FOR CORRESPONDENCES AGAINST THE TRIPLE STORE
    alignment_construct = Qry.endpointconstruct(query, clean=False)
    if alignment_construct:
        alignment_construct = alignment_construct.replace(
            "{", "{}\n{{".format(alignment))
    # print alignment_construct
    with open(os.path.join(directory, "linkset.trig"), "wb") as links:
        links.write(alignment_construct)

    # ****************************************************
    # 3. GET THE METADATA CORRESPONDENCES' PREDICATES
    # ****************************************************
    singleton_graph_uri = Ut.from_alignment2singleton(alignment)
    singleton_query = """
    PREFIX ll: <{0}>
    PREFIX singletons: <{1}>
    CONSTRUCT {{ ?predicate ?x  ?y }}
    WHERE
    {{
        {{
            SELECT ?predicate
            {{
                GRAPH {2}
                {{
                    ?subject ?predicate ?object
                }}
            }} order by ?x {3}LIMIT {4}
        }}
        GRAPH {5}
        {{
            ?predicate ?x  ?y
        }}
    }}
    """.format(Ns.alivocab, Ns.singletons, alignment, comment, limit,
               singleton_graph_uri)
    # print singleton_query

    # FIRE THE CONSTRUCT FOR SINGLETON AGAINST THE TRIPLE STORE
    singleton_construct = Qry.endpointconstruct(singleton_query, clean=False)
    if singleton_construct:
        singleton_construct = singleton_construct.replace(
            "{", "{}\n{{".format(singleton_graph_uri))
    # print singleton_construct
    with open(os.path.join(directory, "singletons.trig"), "wb") as singletons:
        singletons.write(singleton_construct)

    # LOAD THE METADATA USING RDFLIB
    sg = rdflib.Graph()
    sg.parse(data=meta_construct, format="turtle")

    # EXTRACT FROM THE RESPONSE: THE SOURCE AND TARGET DATASETS AND THE ALIGNMENT
    sbj = rdflib.URIRef(use)
    triples_uri = rdflib.URIRef("http://rdfs.org/ns/void#triples")

    # EXTRACT THE ALIGNMENT TYPE
    triples = ""
    for item in sg.objects(sbj, triples_uri):
        triples = item
        print "TRIPLES: ", triples

    if alignment_construct is not None:
        links = "### TRIPLE COUNT: {}\n### LINKSET: {}\n".format(
            triples, alignment) + alignment_construct
        links = links.replace("{", "").replace("}", "")
    message = "You have just downloaded the graph [{}] which contains [{}] correspondences. ".format(
        row_alignment, triples)

    host = Svr.settings[St.stardog_host_name]
    endpoint = b"http://{}/annex/{}/sparql/query?".format(
        host, Svr.settings[St.database])

    local_name = Ut.get_uri_local_name_plus(alignment)
    file_at_parent_directory = os.path.join(
        os.path.abspath(os.path.join(directory, os.pardir)),
        "{}.zip".format(local_name))

    zipped_file = Ut.zip_folder(directory,
                                output_file_path=file_at_parent_directory)
    print "\t>>> THE ZIPPED FILE IS LOCATED AT:\n\t\t- {}".format(zipped_file)

    # result = result
    # print result
    print "Done with graph: {}".format(alignment)

    # return {'result': {
    #     "generic_metadata": meta_construct,
    #     'specific_metadata': singleton_construct,
    #     'data': alignment_construct}, 'message': message}

    return {'result': zipped_file, 'message': message}
Esempio n. 13
0
def cluster_d_test(linkset, network_size=3, network_size_max=3, targets=None, constraint_targets=None,
                   constraint_text="", directory=None, greater_equal=True, print_it=False, limit=None,
                   only_good=False, activated=False):

    # FOR CONSTRAINTS TO WORK, IT SHOULD NOT BE NONE

    network = []
    print "\nLINK NETWORK INVESTIGATION"
    if activated is False:
        print "\tTHE FUNCTION I NOT ACTIVATED"
        return ""

    elif network_size > network_size_max and greater_equal is False:
        print "\t[network_size] SHOULD BE SMALLER THAN [network_size_max]"
        return ""

    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
    linkset_name = Ut.get_uri_local_name(linkset)
    linkset = linkset.strip()

    if network_size_max - network_size == 0:
        greater_equal = False

    check = False

    # RUN THE CLUSTER
    clusters_0 = Cls.links_clustering(linkset, limit)

    if greater_equal is True:
        temp_size = 0
        for cluster, cluster_val in clusters_0.items():
            new_size = len(list(cluster_val["nodes"]))
            if new_size > temp_size:
                temp_size = new_size
        network_size_max = temp_size
        print "THE BIGGEST NETWORK'S: {}".format(network_size_max)

    def check_constraint():

        text = constraint_text.lower()
        text = text.split(",")

        # CONSTRAINT BUILDER
        c_builder = Buffer.StringIO()
        if constraint_targets is not None:
            for dictionary in constraint_targets:
                graph = dictionary[St.graph]
                data_list = dictionary[St.data]
                properties = data_list[0][St.properties]
                prop = properties[0] if Ut.is_nt_format(properties[0]) else "<{}>".format(properties[0])

                # WRITING THE CONSTRAINT ON THE GRAPH
                graph_q = """
       {{
           GRAPH <{0}>
           {{
               ?lookup {1} ?constraint .
           }}
       }}
       """.format(graph, prop)
                c_builder.write(graph_q) if len(c_builder.getvalue()) == 0  else \
                    c_builder.write("UNION {}".format(graph_q))

            # WRITING THE FILTER
            if len(c_builder.getvalue()) > 0:
                for i in range(0, len(text)):
                    if i == 0 :
                        c_builder.write("""
       FILTER (LCASE(STR(?constraint)) = "{}" """.format(text[i].strip()))
                    else:
                        c_builder.write("""
       || LCASE(STR(?constraint)) = "{}" """.format(text[i].strip()))
                c_builder.write(")")


        # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES
        query = Qry.cluster_rsc_strengths_query(resources, linkset)
        query = query.replace("# CONSTRAINTS IF ANY", c_builder.getvalue())
        # print query
        response = Qry.sparql_xml_to_matrix(query)
        if response[St.result] is None:
            return False
        return True

    for index in range(network_size, network_size_max + 1):

        count_1 = 0
        count_2 = 0
        curr_network_size = index
        print "\nCLUSTERS OF SIZE {}".format(index)
        sheet_builder = Buffer.StringIO()
        analysis_builder = Buffer.StringIO()
        sheet_builder.write("Count	ID					STRUCTURE	E-STRUCTURE-SIZE	A. NETWORK QUALITY"
                            "		M. NETWORK QUALITY		REFERENCE\n")

        for cluster, cluster_val in clusters_0.items():

            # network = []
            resources = ""
            uri_size = 0
            count_1 += 1
            children = list(cluster_val["nodes"])
            strengths = cluster_val["strengths"]
            cluster_size = len(children)
            # if "<http://www.grid.ac/institutes/grid.10493.3f>" not in children:
            #     continue

            check = cluster_size >= curr_network_size if greater_equal else cluster_size == curr_network_size

            # NETWORK OF A PARTICULAR SIZE
            if check:

                # file_name = i_cluster[0]

                # 2: FETCHING THE CORRESPONDENTS
                smallest_hash = float('inf')
                child_list = ""
                for child in children:

                    # CREATE THE HASHED ID AS THE CLUSTER NAME
                    hashed = hash(child)
                    if hashed <= smallest_hash:
                        smallest_hash = hashed

                    # GENERAL INFO 1: RESOURCES INVOLVED
                    child_list += "\t{}\n".format(child)

                    # LIST OF RESOURCES IN THE CLUTER
                    use = "<{}>".format(child) if Ut.is_nt_format(child) is not True else child
                    resources += "\n\t\t\t\t{}".format(use)
                    if len(child) > uri_size:
                        uri_size = len(child)

                # MAKE SURE THE FILE NAME OF THE CLUSTER IS ALWAYS THE SAME
                file_name = "{}".format(str(smallest_hash).replace("-", "N")) if str(
                    smallest_hash).startswith("-") \
                    else "P{}".format(smallest_hash)

                if constraint_targets is not None and check_constraint() is False:
                    continue

                count_2 += 1

                # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES
                query = Qry.cluster_rsc_strengths_query(resources, linkset)
                response = Qry.sparql_xml_to_matrix(query)

                # GENERAL INFO 2:
                info = "SIZE    {}   \nCLUSTER {} \nNAME    {}\n".format(cluster_size, count_1, file_name)
                info2 = "CLUSTER [{}] NAME [{}] SIZE [{}]".format(count_1, file_name, cluster_size)
                analysis_builder.write("{}\n".format(info))


                analysis_builder.write("RESOURCES INVOLVED\n")
                analysis_builder.write(child_list)
                analysis_builder.write("\nCORRESPONDENT FOUND ")
                analysis_builder.write(
                    Qry.display_matrix(response, spacing=uri_size, output=True, line_feed='.', is_activated=True))

                # INFO TYPE 3: PROPERTY-VALUES OF THE RESOURCES INVOLVED
                analysis_builder.write("\n\nDISAMBIGUATION HELPER ")
                if targets is None:
                    analysis_builder.write(Cls.disambiguate_network(linkset, children))
                else:
                    report = Cls.disambiguate_network_2(children, targets)
                    if report is not None:
                        analysis_builder.write(report)

                # GENERATING THE NETWORK AS A TUPLE WHERE A TUPLE REPRESENT TWO RESOURCES IN A RELATIONSHIP :-)
                network = []
                link_count = 0
                for link in cluster_val["links"]:
                    link_count += 1
                    name_1 = "{}-{}".format(Ut.hash_it(link[0]), Ut.get_uri_local_name(link[0]))
                    name_2 = "{}-{}".format(Ut.hash_it(link[1]), Ut.get_uri_local_name(link[1]))
                    network += [(name_1, name_2)]

                #  GET THE AUTOMATED FLAG


                if print_it:
                    print ""
                    print analysis_builder.getvalue()

                # SETTING THE DIRECTORY
                if directory:

                    if network:
                        automated_decision = metric(network)["AUTOMATED_DECISION"]
                        if only_good is True and automated_decision.startswith("GOOD") is not True:
                            count_2 -= 1
                            continue

                        print "{:>5} {}".format(count_2, info2)

                        eval_sheet(targets, count_2, "{}_{}".format(cluster_size, file_name),
                                   sheet_builder, linkset, children, automated_decision)
                    else:
                        print network


                    # linkset_name = Ut.get_uri_local_name(linkset)
                    # date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
                    temp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\{}_{}\\".format(
                        curr_network_size, date, linkset_name, cluster_size, file_name))
                    if not os.path.exists(temp_directory):
                        os.makedirs(temp_directory)

                    """""""""""""  PLOTTING """""""""""""
                    # FIRE THE DRAWING: Supported formats: eps, pdf, pgf, png, ps, raw, rgba, svg, svgz.
                    analysis_builder.write(
                        draw_graph(graph=network,
                                   file_path="{}{}.{}".format(temp_directory, "cluster_{}".format(file_name), "pdf"),
                                   show_image=False)
                    )

                    """""""""""""  WRITING TO DISC """""""""""""
                    # WRITE TO DISC
                    Ut.write_2_disc(file_directory=temp_directory, file_name="cluster_{}".format(file_name, ),
                                    data=analysis_builder.getvalue(), extension="txt")
                    analysis_builder = Buffer.StringIO()

            if directory:
                # if len(sheet_builder.getvalue()) > 150 and count_2 == 2:
                if len(sheet_builder.getvalue()) > 150 and len(clusters_0) == count_1:
                    tmp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\\".format(
                        curr_network_size, date, linkset_name))

                    """""""""""""  WRITING CLUSTER SHEET TO DISC """""""""""""
                    print "\n\tWRITING CLUSTER SHEET AT\n\t{}".format(tmp_directory)
                    Ut.write_2_disc(file_directory=tmp_directory, file_name="{}_ClusterSheet".format(cluster_size),
                                    data=sheet_builder.getvalue(), extension="txt")

            # if count_2 == 2:
            #     break

        if greater_equal is True:
            # no need to continue as we already did all network greater of equal to "network-size" input
            break

        print "\t>>> FOUND: {} CLUSTERS OF SIZE {}".format(count_2, curr_network_size)

        if directory is None:
            return "{}\t{}".format(curr_network_size, count_2)
Esempio n. 14
0
def cluster_d_test_stats(linkset, network_size=3, targets=None,
                   directory=None, greater_equal=True, print_it=False, limit=None, activated=False):
    network = []
    print "LINK NETWORK INVESTIGATION"
    if activated is False:
        print "\tTHE FUNCTION I NOT ACTIVATED"
        return ""
    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
    linkset_name = Ut.get_uri_local_name(linkset)
    count_1 = 0
    count_2 = 0
    sheet_builder = Buffer.StringIO()
    analysis_builder = Buffer.StringIO()
    sheet_builder.write("Count	ID					STRUCTURE	E-STRUCTURE-SIZE	A. NETWORK QUALITY"
                        "		M. NETWORK QUALITY		REFERENCE\n")
    linkset = linkset.strip()
    check = False

    # RUN THE CLUSTER
    clusters_0 = Cls.links_clustering(linkset, limit)

    for cluster, cluster_val in clusters_0.items():

        # network = []
        resources = ""
        uri_size = 0
        count_1 += 1
        children = list(cluster_val["nodes"])
        strengths = cluster_val["strengths"]
        cluster_size = len(children)
        # if "<http://www.grid.ac/institutes/grid.10493.3f>" not in children:
        #     continue

        check = cluster_size >= network_size if greater_equal else cluster_size == network_size

        # NETWORK OF A PARTICULAR SIZE
        if check:
            count_2 += 1
            # file_name = i_cluster[0]

            # 2: FETCHING THE CORRESPONDENTS
            smallest_hash = float('inf')
            child_list = ""
            for child in children:
                hashed = hash(child)
                if hashed <= smallest_hash:
                    smallest_hash = hashed

                # GENERAL INFO 1: RESOURCES INVOLVED
                child_list += "\t{}\n".format(child)

                use = "<{}>".format(child) if Ut.is_nt_format(child) is not True else child
                resources += "\n\t\t\t\t{}".format(use)
                if len(child) > uri_size:
                    uri_size = len(child)

            if directory:
                # MAKE SURE THE FILE NAME OF THE CLUSTER IS ALWAYS THE SAME
                file_name = "{}".format(str(smallest_hash).replace("-", "N")) if str(
                    smallest_hash).startswith("-") \
                    else "P{}".format(smallest_hash)


                # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES
                query = Qry.cluster_rsc_strengths_query(resources, linkset)
                response = Qry.sparql_xml_to_matrix(query)

                # GENERAL INFO 2:
                info = "SIZE    {}   \nCLUSTER {} \nNAME    {}\n".format(cluster_size, count_1, file_name)
                info2 = "CLUSTER [{}] NAME [{}] SIZE [{}]".format(count_1, file_name, cluster_size)
                analysis_builder.write("{}\n".format(info))
                print "{:>5} {}".format(count_2, info2)

                analysis_builder.write("RESOURCES INVOLVED\n")
                analysis_builder.write(child_list)
                analysis_builder.write("\nCORRESPONDENT FOUND ")
                analysis_builder.write(
                    Qry.display_matrix(response, spacing=uri_size, output=True, line_feed='.', is_activated=True))

                # INFO TYPE 3: PROPERTY-VALUES OF THE RESOURCES INVOLVED
                analysis_builder.write("\n\nDISAMBIGUATION HELPER ")
                if targets is None:
                    analysis_builder.write(Cls.disambiguate_network(linkset, children))
                else:
                    analysis_builder.write(Cls.disambiguate_network_2(children, targets))


                # GENERATING THE NETWORK AS A TUPLE WHERE A TUPLE REPRESENT TWO RESOURCES IN A RELATIONSHIP :-)
                network = []
                link_count = 0
                for link in cluster_val["links"]:
                    link_count += 1
                    name_1 = "{}".format(Ut.get_uri_local_name(link[0]))
                    name_2 = "{}".format(Ut.get_uri_local_name(link[1]))
                    network += [(name_1, name_2)]


            if print_it:
                print ""
                print analysis_builder.getvalue()

            # SETTING THE DIRECTORY
            if directory:
                # linkset_name = Ut.get_uri_local_name(linkset)
                # date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
                temp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\{}_{}\\".format(
                    network_size, date, linkset_name, cluster_size, file_name))
                if not os.path.exists(temp_directory):
                    os.makedirs(temp_directory)

                """""""""""""  PLOTTING """""""""""""
                # FIRE THE DRAWING: Supported formats: eps, pdf, pgf, png, ps, raw, rgba, svg, svgz.
                analysis_builder.write(
                    draw_graph(graph=network,
                               file_path="{}{}.{}".format(temp_directory, "cluster_{}".format(file_name), "pdf"),
                               show_image=False)
                )

                """""""""""""  WRITING TO DISC """""""""""""
                # WRITE TO DISC
                Ut.write_2_disc(file_directory=temp_directory, file_name="cluster_{}".format(file_name, ),
                                data=analysis_builder.getvalue(), extension="txt")
                analysis_builder = Buffer.StringIO()

                if network:
                    automated_decision = metric(network)["AUTOMATED_DECISION"]
                    eval_sheet(targets, count_2, "{}_{}".format(cluster_size, file_name),
                               sheet_builder, linkset, children, automated_decision)
                else:
                    print network

        if directory:
            # if len(sheet_builder.getvalue()) > 150 and count_2 == 2:
            if len(sheet_builder.getvalue()) > 150 and len(clusters_0) == count_1:
                tmp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\\".format(
                    network_size, date, linkset_name))

                """""""""""""  WRITING CLUSTER SHEET TO DISC """""""""""""
                print "\nWRITING CLUSTER SHEET AT\n\t{}".format(tmp_directory)
                Ut.write_2_disc(file_directory=tmp_directory, file_name="{}_ClusterSheet".format(cluster_size),
                                data=sheet_builder.getvalue(), extension="txt")

                # if count_2 == 2:
                #     break

    print ">>> FOUND: {}".format(count_2)

    if directory is None:
        return "{}\t{}".format(network_size, count_2)
Esempio n. 15
0
def cluster_d_test_statss(linkset, network_size=3, targets=None,
                   directory=None, greater_equal=True, print_it=False, limit=None, activated=False):
    network = []
    print "LINK NETWORK INVESTIGATION"
    if activated is False:
        print "\tTHE FUNCTION I NOT ACTIVATED"
        return ""
    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
    linkset_name = Ut.get_uri_local_name(linkset)
    count_1 = 0
    count_2 = 0
    sheet_builder = Buffer.StringIO()
    analysis_builder = Buffer.StringIO()
    sheet_builder.write("Count	ID					STRUCTURE	E-STRUCTURE-SIZE	A. NETWORK QUALITY"
                        "		M. NETWORK QUALITY		REFERENCE\n")
    linkset = linkset.strip()
    check = False

    # RUN THE CLUSTER
    clusters_0 = Cls.links_clustering(linkset, limit)

    for i_cluster in clusters_0.items():

        # network = []
        resources = ""
        uri_size = 0
        count_1 += 1
        children = i_cluster[1][St.children]
        cluster_size = len(children)
        # if "<http://www.grid.ac/institutes/grid.10493.3f>" not in children:
        #     continue

        check = cluster_size >= network_size if greater_equal else cluster_size == network_size

        # NETWORK OF A PARTICULAR SIZE
        if check:
            count_2 += 1
            # file_name = i_cluster[0]

            # 2: FETCHING THE CORRESPONDENTS
            smallest_hash = float('inf')
            child_list = ""
            for child in children:
                hashed = hash(child)
                if hashed <= smallest_hash:
                    smallest_hash = hashed

                # GENERAL INFO 1: RESOURCES INVOLVED
                child_list += "\t{}\n".format(child)

                use = "<{}>".format(child) if Ut.is_nt_format(child) is not True else child
                resources += "\n\t\t\t\t{}".format(use)
                if len(child) > uri_size:
                    uri_size = len(child)

            if directory:
                # MAKE SURE THE FILE NAME OF THE CLUSTER IS ALWAYS THE SAME
                file_name = "{}".format(str(smallest_hash).replace("-", "N")) if str(
                    smallest_hash).startswith("-") \
                    else "P{}".format(smallest_hash)

                # QUERY FOR FETCHING ALL LINKED RESOURCES FROM THE LINKSET
                query = """
                PREFIX prov: <{3}>
                PREFIX ll: <{4}>
                SELECT DISTINCT ?lookup ?object ?Strength ?Evidence
                {{
                    VALUES ?lookup{{ {0} }}

                    {{
                        GRAPH <{1}>
                        {{ ?lookup ?predicate ?object .}}
                    }} UNION
                    {{
                        GRAPH <{1}>
                        {{?object ?predicate ?lookup . }}
                    }}

                    GRAPH <{2}>
                    {{
                        ?predicate  prov:wasDerivedFrom  ?DerivedFrom  .
                        OPTIONAL {{ ?DerivedFrom  ll:hasStrength  ?Strength . }}
                        OPTIONAL {{ ?DerivedFrom  ll:hasEvidence  ?Evidence . }}
                    }}
                }}
                            """.format(resources, linkset, linkset.replace("lens", "singletons"),
                                       Ns.prov, Ns.alivocab)
                # print query

                # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES
                response = Qry.sparql_xml_to_matrix(query)

                # A DICTIONARY OF KEY: (SUBJECT-OBJECT) VALUE:STRENGTH
                response_dic = dict()
                result = response[St.result]
                if result:
                    for i in range(1, len(result)):
                        key = (result[i][0], result[i][1])
                        if key not in response_dic:
                            response_dic[key] = result[i][2]

                # print response_dic

                # GENERAL INFO 2:
                info = "SIZE    {}   \nCLUSTER {} \nNAME    {}\n".format(cluster_size, count_1, file_name)
                info2 = "CLUSTER [{}] NAME [{}] SIZE [{}]".format(count_1, file_name, cluster_size)
                analysis_builder.write("{}\n".format(info))
                print "{:>5} {}".format(count_2, info2)

                analysis_builder.write("RESOURCES INVOLVED\n")
                analysis_builder.write(child_list)
                analysis_builder.write("\nCORRESPONDENT FOUND ")
                analysis_builder.write(
                    Qry.display_matrix(response, spacing=uri_size, output=True, line_feed='.', is_activated=True))

                # INFO TYPE 3: PROPERTY-VALUES OF THE RESOURCES INVOLVED
                analysis_builder.write("\n\nDISAMBIGUATION HELPER ")
                if targets is None:
                    analysis_builder.write(Cls.disambiguate_network(linkset, children))
                else:
                    analysis_builder.write(Cls.disambiguate_network_2(children, targets))

                position = i_cluster[1][St.row]
                if St.annotate in i_cluster[1]:
                    analysis_builder.write("\n\nANNOTATED CLUSTER PROCESS")
                    analysis_builder.write(i_cluster[1][St.annotate])

                # THE CLUSTER
                # print "POSITION: {}".format(position)
                # print "\nMATRIX DISPLAY\n"
                # for i in range(0, position):
                #     resource = (i_cluster[1][St.matrix])[i]
                #     print "\t{}".format(resource[:position])
                    # print "\t{}".format(resource)

                # GENERATING THE NETWORK AS A TUPLE WHERE A TUPLE REPRESENT TWO RESOURCES IN A RELATIONSHIP :-)
                network = []
                for i in range(1, position):
                    for j in range(1, position):
                        if (i, j) in (i_cluster[1][St.matrix_d]) and (i_cluster[1][St.matrix_d])[(i, j)] != 0:
                            r = (i_cluster[1][St.matrix_d])[(i, 0)]
                            c = (i_cluster[1][St.matrix_d])[(0, j)]
                            r_name = "{}:{}".format(i, Ut.get_uri_local_name(r))
                            c_name = "{}:{}".format(j, Ut.get_uri_local_name(c))
                            network += [(r_name, c_name)]
                            # network += [(r_smart, c_smart)]
                # print "\tNETWORK", network

            if print_it:
                print ""
                print analysis_builder.getvalue()

            # SETTING THE DIRECTORY
            if directory:
                # linkset_name = Ut.get_uri_local_name(linkset)
                # date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
                temp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\{}_{}\\".format(
                    network_size, date, linkset_name, cluster_size, file_name))
                if not os.path.exists(temp_directory):
                    os.makedirs(temp_directory)

                """""""""""""  PLOTTING """""""""""""
                # FIRE THE DRAWING: Supported formats: eps, pdf, pgf, png, ps, raw, rgba, svg, svgz.
                analysis_builder.write(
                    draw_graph(graph=network,
                               file_path="{}{}.{}".format(temp_directory, "cluster_{}".format(file_name), "pdf"),
                               show_image=False)
                )

                """""""""""""  WRITING TO DISC """""""""""""
                # WRITE TO DISC
                Ut.write_2_disc(file_directory=temp_directory, file_name="cluster_{}".format(file_name, ),
                                data=analysis_builder.getvalue(), extension="txt")
                analysis_builder = Buffer.StringIO()

        if directory:

            if network:
                automated_decision = metric(network)["AUTOMATED_DECISION"]
                eval_sheet(targets, count_2, "{}_{}".format(cluster_size, file_name),
                           sheet_builder, linkset, children, automated_decision)
            else:
                print network

        if directory:
            # if len(sheet_builder.getvalue()) > 150 and count_2 == 2:
            if len(sheet_builder.getvalue()) > 150 and len(clusters_0) == count_1:
                tmp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\\".format(
                    network_size, date, linkset_name))

                """""""""""""  WRITING CLUSTER SHEET TO DISC """""""""""""
                print "\nWRITING CLUSTER SHEET AT\n\t{}".format(tmp_directory)
                Ut.write_2_disc(file_directory=tmp_directory, file_name="{}_ClusterSheet".format(cluster_size),
                                data=sheet_builder.getvalue(), extension="txt")

        # if count_2 == 2:
        #     break

    print ">>> FOUND: {}".format(count_2)

    if directory is None:
        return "{}\t{}".format(network_size, count_2)