Example #1
0
def eval_sheet(targets, count, smallest_hash, a_builder, alignment, children, automated_decision):
    first = False
    a_builder.write("\n{:<5}\t{:<20}{:12}{:20}{:23}{:23}".format(count, smallest_hash, "", "", automated_decision, ""))
    if targets is None:
        a_builder.write(Cls.disambiguate_network(alignment, children))
    else:
        response = Cls.disambiguate_network_2(children, targets, output=False)
        if response:
            temp = ""
            dataset = ""
            # for line in response:
            #     print line

            for i in range(1, len(response)):
                resource = Ut.get_uri_local_name(response[i][0])
                if i == 1:
                    temp = "{:25}: {}".format(resource, response[i][1])

                elif dataset == response[i][0]:
                    temp = "{:25} | {}".format(temp, response[i][1])

                else:
                    if first is False:
                        a_builder.write("  {}\n".format(temp))
                    else:
                        a_builder.write("{:108}{}\n".format("", temp))
                    first = True

                    temp = "{:25}: {}".format(resource, response[i][1])

                dataset = response[i][0]
            a_builder.write("{:108}{}\n".format("", temp))
Example #2
0
def generate_lens_name(datasets, operator="union"):

    datasets.sort()
    ds_concat = ""
    for dataset in datasets:
        ds_concat += dataset

    # RETURN THE LIST OF DATASET UNIQUE NAMES
    unique_list = list()

    # THE QUERY FOR CHECKING WHETHER THE LENS EXIST
    query = ask_union(datasets)

    for dataset in datasets:
        lens_targets_unique(unique_list, dataset)

    # print unique_list
    name = ""
    for i in range(0, len(unique_list)):
        local_name = Ut.get_uri_local_name(unique_list[i])
        link = "" if i == 0 else "_"
        # print (local_name[0]).upper()
        name += link + (local_name[0]).upper() + local_name[1:]

    hash_value = hash(name + ds_concat)

    hash_value = str(hash_value).replace(
        "-",
        "N") if str(hash_value).__contains__("-") else "P{}".format(hash_value)

    name = "{}_{}_{}".format(operator, name, hash_value)

    # print name
    # print query
    # print hash(name)
    return {"name": name, "query": query}
Example #3
0
def view_data(view_specs, view_filter, display=False):

    # GENERATING THE METADATA FOR REGISTERING A VIEW.
    #
    # THE VIEW IS COMPOSED OF
    #   - EXACTLY ONE LENS
    #   - ONE OR MORE FILTERS
    #
    # A FILTER IS COMPOSED OF
    #   - EXACTLY ONE DATASET
    #   - ONE OR MORE PROPERTIES

    # view_specs = {
    #     St.researchQ_URI: question_uri,
    #     St.datasets: view_lens,
    #     St.lens_operation: Ns.lensOpi
    # }
    # TEXT BUFFER
    string_buffer = StringIO()
    main_buffer = StringIO()
    dataset_opt = []  # LIST OF DATASET THAT HAVE ONLY OPTIONAL PROPERTIES

    # HOLDER VARIABLE (STRING) FOR THE RESEARCH QUESTION URI
    question_uri = str(view_specs[St.researchQ_URI]).strip()

    # HOLDER VARIABLE (LIST) FOR LINKSETS AND/OR LENSES THAT COMPOSE THE LENS
    view_lens = view_specs[St.datasets]

    # KEY FUNCTION FOR ACCESSING ELEMENT ON WHICH TO SORT ON
    def get_key(item):
        return item[St.graph]

    # SORT THE LIST BASED ON THE GRAPH NAME OF EACH DICTIONARY
    # SORTING THE LIST OF FILTERS BASED ON THE DATASET NAME
    sorted_datasets = sorted(view_filter, key=get_key)
    # print sorted_datasets

    # [DESCRIPTION] RESEARCH QUESTION X
    main_buffer.write("\t### THE VIEW\n".format(question_uri))
    main_buffer.write("\t\t\t<{}>\n".format(question_uri))

    # [DESCRIPTION] CREATED A VIEW
    main_buffer.write("\t\t\t\talivocab:created\t\t\t<@URI> .\n\n")

    # [DESCRIPTION] THE VIEW
    main_buffer.write(
        "\t\t\t### THE COMPONENT OF THE VIEW: THE TYPE, THE LENS AND THE FILTERS\n"
        .format(Ns.view))
    main_buffer.write("\t\t\t<@URI>\n".format(Ns.view))

    # [DESCRIPTION] IS A TYPE OF RISIS:VIEW
    main_buffer.write("\t\t\t\ta\t\t\t\t\t\t\t<{}View> ;\n".format(Ns.riclass))

    # [DESCRIPTION] THAT HAS A LENS
    main_buffer.write(
        "\t\t\t\talivocab:hasViewLens\t\t<{}view_lens_@> ;".format(Ns.view))

    # SORT THE PROPERTIES IN EACH DICTIONARY
    count_ds = 0

    for filter in sorted_datasets:
        count_ds += 1
        append_ds = ";" if count_ds < len(sorted_datasets) else ".\n"

        if St.graph in filter:

            # [DESCRIPTION] THAT HAS A NUMBER OF FILTERS
            dataset_name = Ut.get_uri_local_name(filter[St.graph])

            # DATA IS AN ARRAY OF DICTIONARIES WHERE, FOR EACH DATATYPE, WE HAVE A LIST OF PROPERTIES SELECTED
            data = filter["data"]
            count_sub_filter = 0
            for dictionary in data:
                count_sub_filter += 1
                ent_type = dictionary["entity_datatype"]
                pro_list = dictionary["properties"]

                # APPEND THE GRAPH
                string_buffer.write("\n\t\t\t### FILTER {}_{}".format(
                    count_ds, count_sub_filter))

                if len(data) > 1:
                    append_ds = ";" if count_sub_filter < len(data) else ".\n"

                if St.entity_datatype in filter:
                    entity_type_name = Ut.get_uri_local_name(
                        filter[St.entity_datatype])
                    filter_c = "<{}filter_{}_{}_{}_@>".format(
                        Ns.view, dataset_name, count_sub_filter,
                        entity_type_name)
                else:
                    filter_c = "<{}filter_{}_{}_@>".format(
                        Ns.view, dataset_name, count_sub_filter)
                string_buffer.write("\n\t\t\t{}".format(filter_c))

                # [DESCRIPTION] A FILTER HAS A DATASET
                string_buffer.write(
                    "\n\t\t\t\tvoid:target\t\t\t\t\t<{}> ;".format(
                        filter[St.graph]))
                has_filter = "\n\t\t\t\talivocab:hasFilter\t\t\t{} {}".format(
                    filter_c, append_ds)

                # [DESCRIPTION] ADDING THE FILTERS BELONGING TO THE VIEW
                main_buffer.write(has_filter)

                # ADDING THE DATATYPE IF ANY
                if St.entity_datatype in dictionary:
                    string_buffer.write(
                        "\n\t\t\t\tvoid:hasDatatype\t\t\t<{}> ;".format(
                            dictionary[St.entity_datatype]))

                # APPEND THE PROPERTIES
                # print "\n>>>>>>> FILTER:", filter
                if St.properties in dictionary:
                    dictionary[St.properties].sort()
                    count = 0
                    pro = None

                    # [DESCRIPTION] WHERE EACH FILTER IS COMPOSED OF A NUMBER OF PROPERTIES
                    check_optional = False
                    total_properties = len(dictionary[St.properties])

                    for ds_property in dictionary[St.properties]:

                        append = ";" if count < total_properties - 1 else ".\n"

                        if type(ds_property) is tuple and len(
                                ds_property) == 2:
                            cur_property = str(ds_property[0]).strip()

                            if len(cur_property
                                   ) > 0 and ds_property[1] is True:
                                pro = "\n\t\t\t\talivocab:selectedOptional\t<{}> {}".format(
                                    ds_property[0], append)
                            else:
                                check_optional = True
                                pro = "\n\t\t\t\talivocab:selected\t\t\t<{}> {}".format(
                                    cur_property, append)
                        else:
                            cur_property = str(ds_property).strip()
                            if len(cur_property) > 0:
                                check_optional = True
                                pro = "\n\t\t\t\talivocab:selected\t\t\t<{}> {}".format(
                                    cur_property, append)
                        if pro is not None:
                            string_buffer.write(pro)
                            count += 1

                    # THESE DATASETS ARE COMPOSED OF ONLY OPTIONAL PROPERTIES
                    if check_optional is False:
                        dataset_opt += [filter[St.graph]]

    # THE VIEW_LENS IS COMPOSED OF A NUMBER OF LENSES AND LINKSETS SELECTED
    main_buffer.write("\n\t\t\t### THE COMPONENT OF THE LENS".format(Ns.view))
    main_buffer.write("\n\t\t\t<{}view_lens_@>".format(Ns.view))
    count_ls = 0
    for linkset_lens in view_lens:
        append_ls = ";" if count_ls < len(view_lens) - 1 else ".\n"
        main_buffer.write("\n\t\t\t\talivocab:selected\t\t\t<{}> {}".format(
            linkset_lens, append_ls))
        count_ls += 1

    main_triples = main_buffer.getvalue()
    triples = string_buffer.getvalue()

    # HASH THE STRING
    hash_value = hash(main_triples + triples)

    # CHANGE THE "-" NEGATIVE VALUE TO "N" AND POSITIVE TO "P"
    hash_value = str(hash_value).replace(
        '-',
        "N") if str(hash_value).__contains__('-') else "P" + str(hash_value)

    # GENERATE THE URI FOR THE VIEW
    uri = "{}View_{}".format(Ns.view, hash_value)

    query = PREFIX + """
    INSERT DATA
    {{
        GRAPH <{}>
        {{
        {}{}\t\t}}\n\t}}
    """.format(question_uri, main_triples.replace("@URI", uri),
               triples).replace("@", hash_value)

    message = "\nThe metadata was generated as: {}".format(uri)
    print message
    print "\nVIEW INSERT QUERY:", query

    if display:
        print "\nVIEW INSERT QUERY:", query

    # return {St.message: message, St.insert_query: query, St.result: uri}
    return {
        St.message: message,
        St.insert_query: query,
        St.result: uri,
        "sparql_issue": dataset_opt
    }
Example #4
0
def activity_overview(question_uri, get_text=True):

    idea = ""
    ds_mapping = ""
    alignments_data = ""
    lenses = ""
    views_data = ""
    """
    1. RESEARCH QUESTION LABEL
    """
    idea_result = research_label(question_uri)
    idea += "\tQuestion URI: {}\n\tLabel: {}\n".format(question_uri,
                                                       idea_result)
    """
    2. RESEARCH QUESTION DATASETS
    """
    datasets = datasets_selected(question_uri)
    if datasets:
        for dataset in datasets:
            ds_mapping += "\t{} | {} | {} instances found\n".format(
                dataset[0], dataset[1], dataset[2])
    """
    3. RESEARCH QUESTION LINKSETS
    """
    alignments = alignments_mappings(question_uri)
    if alignments:
        for i in range(len(alignments)):
            # THE ALIGNMENT
            alignments_data += "\t{:2} - {}\n".format(i + 1, alignments[i])
            # HE DESCRIPTION OF THE ALIGNMENT
            ali_description = alignments_mappings_description(
                question_uri, alignments[i])
            for info in ali_description:
                pro = Ut.get_uri_local_name(info[0])
                ls = str(info[1]).replace("http://risis.eu/linkset/",
                                          "linkset:")

                # LINKSETS CREATED
                if pro == "created" or pro == "used":
                    size = get_namedgraph_size(info[1], isdistinct=False)
                    alignments_data += "\t\t>>> {:13}: \t{} | {} correspondences found\n".format(
                        pro, ls, size)

                # DESCRIPTION + EVOLUTION
                elif pro != "type":
                    # print info
                    alignments_data += "\t\t{:17}:\t{}\n".format(pro, ls)
            alignments_data += "\n"
    """
    4. RESEARCH QUESTION LENSES
    """
    used_lenses = created_used_lens(question_uri)
    if used_lenses:
        for lens in used_lenses:
            pro = Ut.get_uri_local_name(lens[0])
            les = str(lens[1]).replace("http://risis.eu/lens/", "lens:")
            lenses += "\t\t{:17}:\t{} | {} correspondences\n".format(
                pro, les, lens[2])
    """
    RESEARCH QUESTION VIEWS
    """
    views_uri = views(question_uri)
    views_requested = 0
    # EXTRACTING ALL THE VIEWS FOR THIS RESEARCH QUESTION
    if views_uri:
        views_requested = len(views_uri) - 1
        for i in range(1, len(views_uri)):
            view_uri = views_uri[i][0]
            views_data += "\n\tView_Lens {}: {}".format(i, view_uri)
            view_composition = linksets_and_lenses(question_uri, view_uri)
            view_filters = filters(question_uri, view_uri)

            # DESCRIBING THE COMPOSITION OF EACH VIEW LENSES
            for element in view_composition:
                views_data += "\n\t\tComposition: {}".format(element)
            views_data += "\n"

            # EXTRACTING THE FILTERS
            for n in range(1, len(view_filters)):
                filter_uri = view_filters[n][0]
                views_data += "\n\t\tFilter {}: {}".format(
                    n, view_filters[n][0])
                filter_dt = filter_data(question_uri, filter_uri)

                # FILTER'S DATASETS
                views_data += "\n\t\t\tDataset: {}".format(filter_dt[1][0])

                for m in range(1, len(filter_dt)):
                    views_data += "\n\t\t\tProperty: {}".format(
                        filter_dt[m][1])

                views_data += "\n"

    if get_text:
        activity_buffer = StringIO()
        activity_buffer.write("\n>>> IDEA\n{}".format(idea))
        activity_buffer.write("\n>>> DATASET MAPPINGS\n{}".format(ds_mapping))
        activity_buffer.write(
            "\n>>> ALIGNMENT & LINKSETS\n{}".format(alignments_data))
        activity_buffer.write("\n>>> LENSES\n{}".format(lenses))
        activity_buffer.write("\n>>> VIEW REQUESTED [{}].\n{}".
                              format(views_requested, views_data) if str(1) ==
                              1 else "\n>>> VIEWS REQUESTED [{}].\n{}".
                              format(views_requested, views_data))
        print activity_buffer.getvalue()
        return activity_buffer.getvalue()
    else:
        result = {
            "idea": idea,
            "dataset_mappings": ds_mapping,
            "alignment_mappings": alignments_data,
            "lenses": lenses,
            "view_dic": views_data
        }
        # print alignments_data
        return result
Example #5
0
def view(view_specs, view_filter, save=False, limit=10):
    """
    :param view_specs:
    :param view_filter:
    :param save:
    :param limit
    :param view_filter: AN ARRAY OF DICTIONARY. THE DICTIONARY
        CONTAINS GRAPH AND PROPERTIES KEYWORDS. THE VALUE OF THE
        PROPERTIES KEYWORDS IS AN ARRAY OF PROPERTIES AVAILABLE IN THE GRAPH
    :param limit: LIMIT FOR THE OUTPUT DISPLAY TABLE
    :return:
    """

    # LIMIT FOR THE VARIABLE IN THE SELECT
    str_limit = 70
    ns = dict()
    view_where = ""
    view_select = ""
    variables_list = dict()
    namespace = dict()
    namespace_str = ""
    count = 1
    is_problematic = False

    try:
        # 1. GENERATE THE INSERT METADATA
        # RETURNS MESSAGE, INSERT QUERY AND RESULT (THE VIEW URI)
        # RETURNS{St.message:message, St.insert_query: final, St.result: uri}
        view_metadata = view_data(view_specs, view_filter)
        # print view_metadata
        # print view_filter

        # 2. CHECK FOR POTENTIAL SPARQL TIMEOUT
        opt_list = view_metadata["sparql_issue"]
        if len(opt_list) != 0:
            is_problematic = True
            the_list = ""
            for ds in opt_list:
                the_list += "{} ".format(ds)
            message = "The insertion metadata was generated but not inserted. The properties listed in theses " \
                      " datasets [{}] are ALL OPTIONAL. The presence of at least one non OPTIONAL property is " \
                      "required.".format(
                the_list)
            view_metadata[St.message] = message
            print message

        # 3. REGISTER THE METADATA IF SAVE ID SET TO TRUE
        if save:
            if is_problematic is False:
                print "We are in save mode!"
                is_metadata_inserted = boolean_endpoint_response(
                    view_metadata[St.insert_query])
                print "IS THE METADATA INSERTED?: {}".format(
                    is_metadata_inserted)
                message = "The insertion metadata was successfully inserted as: {}".format(view_metadata[St.result]) \
                    if (is_metadata_inserted == "true" or is_metadata_inserted == Ec.ERROR_STARDOG_1) \
                    else "The metadata could not be inserted."
                print message
                view_metadata[St.message] = message
                # print view_metadata[St.insert_query]

        # GENERATE THE INTERSECTION
        # AND DISPLAY THE QUERIES NEEDED
        inter = intersection(view_specs, display=False)

        if inter is None:
            print "WE CANNOT PROCEED AS THERE IS A PROBLEM WITH THE PROVIDED DATASETS."

        # For each design view, we have the dataset of interest
        #  and the list of properties to display in a filter
        # THE FILTER IS A LIST OF GRAPH DICTIONARIES
        # [GRAPH1, GRAPH2, GRAPH3, ...]
        for graph in view_filter:

            optional = ""

            # THE GRAPH CONTAINS GRAPH AND DATA
            graph_uri = graph[St.graph]

            # About the dataset: [NAMESPACE, NAME]
            ds_ns_name = Ut.get_uri_ns_local_name(graph_uri)
            if ds_ns_name[1][0].isdigit():
                ds_ns_name[1] = "D{}".format(ds_ns_name[1])
            print ds_ns_name
            # shortening prefix length
            short_name = ds_ns_name[1]

            # HOLDING INFORMATION ABOUT THIS GRAPH (FOR EACH ENTITY DATATYPE, THE PROPERTIES SELECTED)
            graph_data = graph["data"]

            # Adding the dataset name to the namespace dictionary [local name: namespace]
            if ds_ns_name is not None:
                if ds_ns_name[1] not in ns:
                    ns[ds_ns_name[1]] = ds_ns_name[0]

            # Generate the dataset design view WHICH LOOKS LIKE
            # ### DATASET: grid
            # GRAPH <http://risis.eu/genderc/grid>
            # {
            view_where += "\n\t### DATASET: {}\n\tGRAPH <{}>\n\t{{".format(
                ds_ns_name[1], graph_uri)

            # graph_data IS A LIST OF DICTIONARIES FOR HOLDING THE TYPES AND THEIR LISTED PROPERTIES
            count_ns = 0
            for data_info in graph_data:

                e_type_uri = data_info[St.entity_datatype]
                type_triple = ""
                if e_type_uri == "no_type":
                    e_type = ""
                    # print "!!!!!!!!!!!!!!!!!!!!!!!!!! NO TYPE"
                else:
                    e_type = Ut.get_uri_local_name(e_type_uri)
                    # print "!!!!!!!!!!!!!!!!!!!!!!!!!!e_type", e_type
                    if e_type:
                        e_type = "_{}".format(e_type[short:])

                        type_triple = "\n\t\t\ta{:54} <{}> ;".format(
                            "", e_type_uri)

                #   ?GRID
                # TODO ADD THE TYPE TO THE ALIGNMENTS IN THE INTERSECT
                # SUBJECT: ADDING 1 AT THE END SO THAT SAME SOURCE AND TARGET ARE TAKEN CARE OFF
                view_where += "\n\t\t?{}{}_1{}".format(ds_ns_name[1], e_type,
                                                       type_triple)
                # view_where += "\n\t\t?{}{}{}".format(ds_ns_name[1], "", type_triple)

                # Adding the resource as a variable to the variable list
                view_select += " ?{}{}_1".format(
                    ds_ns_name[1],
                    e_type,
                )

                t_properties = data_info[St.properties]

                # FOR BACKWARD COMPATIBILITY, REMOVE "<" AND ">"
                for i in range(len(t_properties)):
                    # print "PROPERTY TUPLE:", t_properties[i]
                    if type(t_properties[i]) is tuple:
                        # print "PROPERTY:", t_properties[i][0]
                        t_properties[i] = (re.sub('[<>]', "",
                                                  t_properties[i][0]),
                                           t_properties[i][1])
                        # print "PROPERTY:", t_properties[i]
                    else:
                        t_properties[i] = re.sub('[<>]', "", t_properties[i])
                # 3 characters string to differentiate the properties of a dataset
                attache = ds_ns_name[1][short:]

                # VARIABLES
                if type(t_properties) is not list:
                    print "THIS <PROPERTIES> NEED TO BE of TYPE A LIST"
                    return None

                # Going though the properties of interest
                for i in range(len(t_properties)):

                    # >>> PROPERTY IS JUST A STRING
                    if type(t_properties[i]) is str:

                        # EXTRACTING THE NAMESPACE TO USE FOR THE PROPERTY
                        curr_ns = Ut.get_uri_ns_local_name(t_properties[i])

                        if type(curr_ns) is list:

                            # Setting up the prefix and predicate
                            predicate = "{}voc:{}".format(
                                short_name, curr_ns[1])
                            prefix = "{}voc".format(short_name)

                            # GENERATE THE LIST OF OPTIONAL PROPERTIES
                            # optional += "\n\t\tOPTIONAL{{ ?{}   {:55}   ?{}_{} .}}".format(
                            #     ds_ns_name[1], predicate, attache, curr_ns[1])

                            # ADDING NAMESPACE TO THE VIEW QUERY
                            if prefix not in namespace:
                                namespace[prefix] = curr_ns[0]
                                namespace_str += "\nPREFIX {}: <{}>".format(
                                    prefix, curr_ns[0])

                            # Adding predicates
                            if i == len(t_properties) - 1:

                                if namespace[prefix] != curr_ns[0]:
                                    view_where += "\n\t\t\t<{}> ?{}{}_{} .".format(
                                        t_properties[i], attache, e_type,
                                        curr_ns[1])
                                else:
                                    view_where += "\n\t\t\t{:55} ?{}{}_{} .".format(
                                        predicate, attache, e_type, curr_ns[1])
                            else:

                                if namespace[prefix] != curr_ns[0]:
                                    view_where += "\n\t\t\t<{}> ?{}{}_{} ;".format(
                                        t_properties[i], attache, e_type,
                                        curr_ns[1])
                                else:
                                    view_where += "\n\t\t\t{:55} ?{}{}_{} ;".format(
                                        predicate, attache, e_type, curr_ns[1])

                            # ADDING THE VARIABLE LIST and making it
                            # unique to a dataset with the variable attache
                            value = (" ?{}{}_{}".format(
                                attache, e_type, curr_ns[1]))
                            if len(view_select + value) > str_limit:
                                variables_list[count] = view_select
                                view_select = value
                                count += 1
                            else:
                                view_select += value

                        # IN THIS CASE, ONLY THE SUBJECT IS PROVIDED
                        else:
                            # TODO check this
                            # ""
                            view_where += ".\n\t\t?{}\n\t\t\t?p ?o .".format(
                                curr_ns)

                    # >>> HERE, WE ARE DEALING WITH A SUBJECT AND A PREDICATE
                    elif type(t_properties[i]) is list:

                        if len(t_properties[i]) == 2:
                            curr_ns = Ut.get_uri_ns_local_name(
                                t_properties[i][1])

                            if type(curr_ns) is list:
                                predicate = "{}voc:{}".format(
                                    short_name, curr_ns[1])
                                prefix = "{}voc".format(short_name)

                                # ADDING NAMESPACE
                                if prefix not in namespace:
                                    namespace[prefix] = curr_ns[0]
                                    namespace_str += "\nPREFIX {}: <{}>".format(
                                        prefix, curr_ns[0])

                                # REMOVE PREVIOUS PUNCTUATION
                                # print "REMOVING PREDICATE"
                                view_where = view_where[:len(view_where) - 2]
                                view_where += " .\n\t\t?{}\n\t\t\t{:55} ?{}{}_{} .".format(
                                    t_properties[i][0], predicate, attache,
                                    e_type, curr_ns[1])

                                # ADDING THE VARIABLE LIST
                                value = (" ?{}{}_{}".format(
                                    attache, e_type, curr_ns[1]))
                                if len(view_select + value) > str_limit:
                                    variables_list[count] = view_select
                                    view_select = value
                                    count += 1

                                else:
                                    view_select += value

                    # >>> PROPERTY IS A SET OF TUPLE WITH THE PROPERTY AND A BOOLEAN
                    # VALE INDICATING WHETHER OR NOT THE PROPERTY IS OPTIONAL
                    elif type(t_properties[i]) is tuple:

                        # Setting up the prefix and predicate
                        curr_ns = Ut.get_uri_ns_local_name(t_properties[i][0])
                        prefix = "{}voc_{}".format(short_name, str(count_ns))

                        # ADDING NAMESPACE
                        if curr_ns[0] not in namespace:
                            count_ns += 1
                            namespace[curr_ns[0]] = prefix
                            namespace_str += "\nPREFIX {}: <{}>".format(
                                prefix, curr_ns[0])

                        # ACCESSING THE RIGHT NAMESPACE
                        prefix = namespace[curr_ns[0]]

                        # SETTING THE PREDICATE WITH THE RIGHT NAMESPACE
                        predicate = "{}:{}".format(prefix, curr_ns[1])

                        # CHECKING IF TUPLE OF 2
                        if len(t_properties[i]) == 2:

                            # ADDING PREDICATE AND SPARQL PUNCTUATION
                            if i == len(t_properties) - 1:

                                if t_properties[i][1] is True:
                                    optional += "\n\t\tOPTIONAL{{ ?{}{:15} {:60} ?{}{}_{} . }}".format(
                                        ds_ns_name[1], "{}_1".format(e_type),
                                        predicate, attache, e_type, curr_ns[1])
                                else:
                                    view_where += "\n\t\t\t{:55} ?{}{}_{} .".format(
                                        predicate, attache, e_type, curr_ns[1])

                            else:
                                if t_properties[i][1] is True:
                                    optional += "\n\t\tOPTIONAL{{ ?{}{:15} {:60} ?{}{}_{} . }}".format(
                                        ds_ns_name[1], "{}_1".format(e_type),
                                        predicate, attache, e_type, curr_ns[1])
                                else:
                                    view_where += "\n\t\t\t{:55} ?{}{}_{} ;".format(
                                        predicate, attache, e_type, curr_ns[1])

                            # ADDING THE VARIABLE LIST
                            value = (" ?{}{}_{}".format(
                                attache, e_type, curr_ns[1]))
                            if len(view_select + value) > str_limit:
                                variables_list[count] = view_select
                                view_select = value
                                count += 1

                            else:
                                view_select += value

                # IN CASE THE SELECTED PROPERTIES ARE ALL OPTIONAL, REMOVE THE RESOURCE
                # print "########################WERE", view_where
                # view_where = view_where.replace("?{}".format(ds_ns_name[1]), "")

                if len(optional) > 0:

                    if view_where[len(view_where) - 1] == ".":
                        "DO NOTHING"

                    elif view_where[len(view_where) - 1] == ";":
                        view_where = "{}.".format(view_where[:len(view_where) -
                                                             1])

                    else:
                        # IN CASE THE SELECTED PROPERTIES ARE ALL OPTIONAL, REMOVE THE RESOURCE
                        # print "########################WERE", view_where
                        # print "############", view_where[len(view_where) - 1]
                        view_where = view_where.replace(
                            "?{}".format(ds_ns_name[1]), "")

                    view_where += "\n\t\t### OPTIONAL PROPERTIES{}\n\t".format(
                        optional)
                # refresh
                optional = ""

            # close
            view_where += "\n\t}\n"

        my_list = ""
        for key, variable in variables_list.items():
            my_list += "\n" + variable

        if limit == 0:
            lmt = ""
        else:
            lmt = "LIMIT {}".format(limit)

        query = "{}\n\nSELECT DISTINCT {}\n{{{}{}\n}} {}".format(
            namespace_str, my_list + view_select, inter, view_where, lmt)

        # print "\nVIEW QUERY FOR GENERATING TABLE:", query

        # table = sparql_xml_to_matrix(query)
        # display_matrix(table, spacing=80, limit=limit, is_activated=False)
        print "\nDONE GENERATING THE VIEW"
        # return {"metadata": view_metadata, "query": query, "table": table}
        return {
            "metadata": view_metadata,
            "query": query,
            "sparql_issue": is_problematic
        }

    except Exception as err:
        print ">>> ERROR:", err
        view_metadata = {St.message: "Fatal Error"}
        return {
            "metadata": view_metadata,
            "query": None,
            "sparql_issue": is_problematic
        }
Example #6
0
def enrich(specs, directory, endpoint):

    # TODO RUN IT IF THERE IS NOT GRAPH ENRICHED WITH THE SAME NAME

    # specs[St.graph] = "http://grid.ac/20170712"
    print "ENRICHING DATA/GRAPH FROM EXPORT-ALIGNMENT"
    print "GRAPH:", specs[St.graph]
    print "ENTITY TYPE:", specs[St.entity_datatype]
    print "LAT PREDICATE:", specs[St.long_predicate]
    print "LONG PREDICATE:", specs[St.lat_predicate]
    print "FILE DIRECTORY:", directory
    name = Ut.get_uri_local_name(specs[St.graph])

    print endpoint
    data_1 = Qry.virtuoso_request(
        "ask {{ GRAPH <{}> {{ ?x ?y ?z . }} }}".format(specs[St.graph]),
        endpoint)
    data_1 = regex.findall("rs:boolean[ ]*(.*)[ ]*\.", data_1["result"])
    if len(data_1) > 0:
        data_1 = data_1[0].strip() == "true"
        if data_1 is False:
            print "GRAPH: {} {}".format(
                specs[St.graph], "DOES NOT EXIST AT THE REMOTE VIRTUOSO SITE.")

    # CHECKING WHETHER BOTH DATASETS ARE AT THE VIRTUOSO TRIPLE STORE
    data_2 = Qry.virtuoso_request(
        "ask {GRAPH <http://geo.risis.eu/gadm>{ ?x ?y ?z . }}", endpoint)
    data_2 = regex.findall("rs:boolean[ ]*(.*)[ ]*\.", data_2["result"])
    if len(data_2) > 0:
        data_2 = data_2[0].strip() == "true"
        if data_2 is False:
            print "GRAPH: {} {}".format(
                specs[St.graph], "DOES NOT EXIST AT THE REMOTE VIRTUOSO SITE.")

    if data_1 is False or data_2 is False:
        message = "BECAUSE BOTH DATASETS NEED TO BE PRESENT AT OUR TRIPLES STORE, WE ARE UNABLE TO EXECUTE THE REQUEST."
        return {
            St.message:
            message,
            St.result:
            'The dataset {} '
            'cannot be enriched with GADM boundary  at the moment.'.format(
                specs[St.graph])
        }

    total = 0
    limit = 20000
    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
    f_path = "{0}{1}{1}{2}_enriched_{3}.ttl".format(directory, os.path.sep,
                                                    name, date)
    b_path = "{0}{1}{1}{2}_enriched_{3}{4}".format(directory, os.path.sep,
                                                   name, date,
                                                   Ut.batch_extension())

    # MAKE SURE THE FOLDER EXISTS
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError as err:
        print "\n\t[utility_LOAD_TRIPLE_STORE:]", err
        return

    print "\n1. GETTING THE TOTAL NUMBER OF TRIPLES."
    count_query = enrich_query(specs, limit=0, offset=0, is_count=True)
    print count_query
    count_res = Qry.virtuoso_request(count_query, endpoint)
    result = count_res['result']

    # GET THE TOTAL NUMBER OF TRIPLES
    if result is None:
        print "NO RESULT FOR THIS ENRICHMENT."
        return count_res

    g = rdflib.Graph()
    g.parse(data=result, format="turtle")
    attribute = rdflib.URIRef("http://www.w3.org/2005/sparql-results#value")
    for subject, predicate, obj in g.triples((None, attribute, None)):
        total = int(obj)

    # NUMBER OF REQUEST NEEDED
    iterations = total / limit if total % limit == 0 else total / limit + 1
    print "\n2. TOTAL TRIPLES TO RETREIVE  : {} \n\tTOTAL NUMBER OF ITERATIONS : {}\n".format(
        total, iterations)

    writer = codecs.open(f_path, "wb", "utf-8")
    batch_writer = codecs.open(b_path, "wb", "utf-8")

    print "3. GENERATING THE BATCH FILE TEXT"
    enriched_graph = "{}_enriched".format(specs[St.graph])
    stardog_path = '' if Ut.OPE_SYS == "windows" else Svr.settings[
        St.stardog_path]

    load_text = """echo "Loading data"
            {}stardog data add {} -g {} "{}"
            """.format(stardog_path, Svr.settings[St.database], enriched_graph,
                       f_path)

    batch_writer.write(to_unicode(load_text))
    batch_writer.close()

    # RUN THE ITERATIONS
    for i in range(0, iterations):

        offset = i * 20000 + 1
        print "\tROUND: {} OFFSET: {}".format(i + 1, offset)

        # print "\t\t1. GENERATING THE ENRICHMENT QUERY"
        virtuoso = enrich_query(specs,
                                limit=limit,
                                offset=offset,
                                is_count=False)
        # print virtuoso
        # exit(0)
        # print Qry.virtuoso(virtuoso)["result"]

        # print "\t\t2. RUNNING THE QUERY + WRITE THE RESULT TO FILE"
        writer.write(Qry.virtuoso_request(virtuoso, endpoint)["result"])

    writer.close()
    print "\n4. RUNNING THE BATCH FILE"
    print "\tTHE DATA IS BEING LOADED OVER HTTP POST." if Svr.settings[St.split_sys] is True \
        else "\tTHE DATA IS BEING LOADED AT THE STARDOG LOCAL HOST FROM BATCH."
    # os.system(b_path)

    # RUN THE BATCH FILE
    print "\tFILE: {}".format(f_path)
    print "\tBATCH: {}\n".format(b_path)
    os.chmod(b_path, 0o777)
    Ut.batch_load(b_path)
    if os.path.exists(b_path) is True:
        os.remove(b_path)

    # TODO 1. REGISTER THE DATASET TO BE ENRICHED IF NOT YET REGISTER
    # TODO 2. ADD THE ENRICHED DATASET TO THE RESEARCH QUESTION (REGISTER).
    # TODO 3. MAYBE, CREATE THE LINKSET BETWEEN THE SOURCE AND THE RESULTING

    size = Qry.get_namedgraph_size(enriched_graph)

    print "JOB DONE...!!!!!!"

    return {
        St.message:
        "The select dataset was enriched with the GADM boundary as {}. "
        "{} triples were created.".format(enriched_graph, size),
        St.result:
        enriched_graph
    }
Example #7
0
def visualise(graphs, directory, credential):

    # production_directory = "/scratch/risis/data/rdf-data/links"
    # directory = production_directory

    writer = Buffer.StringIO()
    g = rdflib.Graph()
    source = {}
    target = {}
    attribute = {}
    src_count = 0
    trg_count = 0
    prd_count = 0
    singletons = {}
    triples = 0
    datasets = [None, None]
    code = 0

    for graph in graphs:
        # print graph

        code += 1
        links = export_alignment(graph)

        # THE MECHANISM USED
        mechanism = links['mechanism']
        # print "mechanism", mechanism

        # THE SOURCE AND TARGET DATASETS
        if datasets == [None, None]:

            if str(links["type"]) == Ns.lens_type:
                datasets = links["lens_targets"]
            else:
                datasets = [links["source"], links['target']]

        # MAKE SURE THAT FOR ALL ALIGNMENT, THE SOURCE DATASET AND TARGET DATASETS ARE THE SAME
        elif datasets != [links["source"], links['target']]:
            print "No visualisation for different set of source-target"
            return None

        print "DATASETS: ", datasets

        # print links['result']
        if links['result'] is not None:

            # LOAD THE CORRESPONDENCES TO THE MAIN GRAPH
            g.parse(data=links['result'], format="turtle")

            # INDEX THE CORRESPONDENCES USING THE SINGLETON PROPERTY
            sg = rdflib.Graph()
            sg.parse(data=links['result'], format="turtle")
            triples += len(sg)
            for subject, predicate, obj in sg.triples((None, None, None)):
                mech = "{}_{}".format(mechanism, code)
                if predicate not in singletons:
                    singletons[predicate] = [mech]
                elif mech not in singletons[mech]:
                    singletons[mech] += [mech]

    # WRITING THE FILE
    count = 0
    writer.write("PREFIX ll: <{}>\n".format(Ns.alivocab))
    writer.write("PREFIX rdf: <{}>\n".format(Ns.rdf))
    writer.write("PREFIX link: <http://risis.eu/alignment/link/>\n")
    writer.write("PREFIX plot: <http://risis.eu/alignment/plot/>\n")
    writer.write("PREFIX mechanism: <{}>\n".format(Ns.mechanism))

    print "size: ", len(datasets)
    if len(datasets) > 2:
        name = hash("".join(datasets))
        name = "{}".format(str(name).replace(
            "-", "P")) if str(name).__contains__("-") else "P{}".format(name)
    else:
        name = "{}_{}".format(Ut.get_uri_local_name(datasets[0]),
                              Ut.get_uri_local_name(datasets[1]))
    print "NAME: ", name

    # DROPPING GRAPH IF IT ALREADY EXISTS
    writer.write("\n#DROP SILENT GRAPH plot:{} ;\n".format(name))

    # INSERT NEW DATA
    writer.write("#INSERT DATA\n#{")
    writer.write("\n\tplot:{}\n".format(name))
    writer.write("\t{")

    # GOING THROUGH ALL CORRESPONDENCES OF HE MAIN GRAPH (MERGED)
    for subject, predicate, obj in g.triples((None, None, None)):

        count += 1

        # INDEX THE SOURCE CORRESPONDENCE
        if subject not in source:
            src_count += 1
            source[subject] = src_count

        # INDEX THE TARGET CORRESPONDENCE
        if obj not in target:
            trg_count += 1
            target[obj] = trg_count

        # INDEX THE PAIR
        pre_code = "{}_{}".format(source[subject], target[obj])
        if pre_code not in attribute:
            prd_count += 1
            attribute[pre_code] = prd_count

        # WRITE THE PLOT COORDINATE AND ITS METADATA
        writer.write("\n\t\t### [ {} ]\n".format(count))
        writer.write("\t\t{}\n".format(predicate).replace(Ns.alivocab, "ll:"))
        writer.write("\t\t\tlink:source     {} ;\n".format(source[subject]))
        writer.write("\t\t\tlink:target     {} ;\n".format(target[obj]))
        writer.write("\t\t\tlink:source_uri <{}> ;\n".format(subject))
        writer.write("\t\t\tlink:target_uri <{}> ;\n".format(obj))

        for value in singletons[predicate]:
            if str(value) != "None_1":
                writer.write(
                    "\t\t\tlink:mechanism  {} ;\n".format(value).replace(
                        Ns.mechanism, "mechanism:"))
        writer.write("\t\t\trdf:type        link:Link .\n")
        writer.write("")
    writer.write("\t}\n#}")

    # THE PATH OF THE OUTPUT FILES

    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
    f_path = "{0}{1}{1}{2}_plots_{3}.trig".format(directory, os.path.sep, name,
                                                  date)
    b_path = "{0}{1}{1}{2}_plots_{3}{4}".format(directory, os.path.sep, name,
                                                date, Ut.batch_extension())
    print "DIRECTORY:", directory

    # MAKE SURE THE FOLDER EXISTS
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError as err:
        print "\n\t[utility_LOAD_TRIPLE_STORE:]", err
        return

    # CREATE THE FILES
    plot_writer = codecs.open(f_path, "wb", "utf-8")
    batch_writer = codecs.open(b_path, "wb", "utf-8")

    # print "3. GENERATING THE BATCH FILE TEXT"
    # enriched_graph = "{}{}_plots".format(Ns.plot, name)
    # stardog_path = '' if Ut.OPE_SYS == "windows" else Svr.settings[St.stardog_path]

    # load_text = """echo "Loading data"
    # {}stardog data add {} -g {} "{}"
    # """.format(stardog_path, Svr.DATABASE, enriched_graph, f_path)

    # GENERATE THE BATCH FILE FOR AUTOMATIC LOAD
    user = "******"
    password = "******"
    if credential is not None:
        if "user" in credential:
            user = credential["user"]
        if "password" in credential:
            password = credential["password"]

    load_text = "echo \"Loading data\"\n" \
                "/usr/local/virtuoso-opensource/bin/isql 1112 {} {} exec=\"DB.DBA.TTLP_MT (file_to_string_output" \
                "('/scratch/risis/data/rdf-data/links/Plots/{}_plots{}.trig'), '', 'http://risis.eu/converted', " \
                "256);\"".format(user, password, name, date)
    batch_writer.write(to_unicode(load_text))
    batch_writer.close()
    os.chmod(b_path, 0o777)

    # WRITE THE CORRESPONDENCES TO FILE
    plot_writer.write(writer.getvalue())
    plot_writer.close()

    print "PLOT: {}".format(f_path)
    print "BATCH: {}".format(b_path)
    print "Job Done!!!"
    # Qry.virtuoso_request(writer.getvalue())
    # print count, triples
    # file.close()

    return {'result': writer.getvalue(), 'message': "Constructed"}
Example #8
0
def cluster_d_test_statss(linkset, network_size=3, targets=None,
                   directory=None, greater_equal=True, print_it=False, limit=None, activated=False):
    network = []
    print "LINK NETWORK INVESTIGATION"
    if activated is False:
        print "\tTHE FUNCTION I NOT ACTIVATED"
        return ""
    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
    linkset_name = Ut.get_uri_local_name(linkset)
    count_1 = 0
    count_2 = 0
    sheet_builder = Buffer.StringIO()
    analysis_builder = Buffer.StringIO()
    sheet_builder.write("Count	ID					STRUCTURE	E-STRUCTURE-SIZE	A. NETWORK QUALITY"
                        "		M. NETWORK QUALITY		REFERENCE\n")
    linkset = linkset.strip()
    check = False

    # RUN THE CLUSTER
    clusters_0 = Cls.links_clustering(linkset, limit)

    for i_cluster in clusters_0.items():

        # network = []
        resources = ""
        uri_size = 0
        count_1 += 1
        children = i_cluster[1][St.children]
        cluster_size = len(children)
        # if "<http://www.grid.ac/institutes/grid.10493.3f>" not in children:
        #     continue

        check = cluster_size >= network_size if greater_equal else cluster_size == network_size

        # NETWORK OF A PARTICULAR SIZE
        if check:
            count_2 += 1
            # file_name = i_cluster[0]

            # 2: FETCHING THE CORRESPONDENTS
            smallest_hash = float('inf')
            child_list = ""
            for child in children:
                hashed = hash(child)
                if hashed <= smallest_hash:
                    smallest_hash = hashed

                # GENERAL INFO 1: RESOURCES INVOLVED
                child_list += "\t{}\n".format(child)

                use = "<{}>".format(child) if Ut.is_nt_format(child) is not True else child
                resources += "\n\t\t\t\t{}".format(use)
                if len(child) > uri_size:
                    uri_size = len(child)

            if directory:
                # MAKE SURE THE FILE NAME OF THE CLUSTER IS ALWAYS THE SAME
                file_name = "{}".format(str(smallest_hash).replace("-", "N")) if str(
                    smallest_hash).startswith("-") \
                    else "P{}".format(smallest_hash)

                # QUERY FOR FETCHING ALL LINKED RESOURCES FROM THE LINKSET
                query = """
                PREFIX prov: <{3}>
                PREFIX ll: <{4}>
                SELECT DISTINCT ?lookup ?object ?Strength ?Evidence
                {{
                    VALUES ?lookup{{ {0} }}

                    {{
                        GRAPH <{1}>
                        {{ ?lookup ?predicate ?object .}}
                    }} UNION
                    {{
                        GRAPH <{1}>
                        {{?object ?predicate ?lookup . }}
                    }}

                    GRAPH <{2}>
                    {{
                        ?predicate  prov:wasDerivedFrom  ?DerivedFrom  .
                        OPTIONAL {{ ?DerivedFrom  ll:hasStrength  ?Strength . }}
                        OPTIONAL {{ ?DerivedFrom  ll:hasEvidence  ?Evidence . }}
                    }}
                }}
                            """.format(resources, linkset, linkset.replace("lens", "singletons"),
                                       Ns.prov, Ns.alivocab)
                # print query

                # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES
                response = Qry.sparql_xml_to_matrix(query)

                # A DICTIONARY OF KEY: (SUBJECT-OBJECT) VALUE:STRENGTH
                response_dic = dict()
                result = response[St.result]
                if result:
                    for i in range(1, len(result)):
                        key = (result[i][0], result[i][1])
                        if key not in response_dic:
                            response_dic[key] = result[i][2]

                # print response_dic

                # GENERAL INFO 2:
                info = "SIZE    {}   \nCLUSTER {} \nNAME    {}\n".format(cluster_size, count_1, file_name)
                info2 = "CLUSTER [{}] NAME [{}] SIZE [{}]".format(count_1, file_name, cluster_size)
                analysis_builder.write("{}\n".format(info))
                print "{:>5} {}".format(count_2, info2)

                analysis_builder.write("RESOURCES INVOLVED\n")
                analysis_builder.write(child_list)
                analysis_builder.write("\nCORRESPONDENT FOUND ")
                analysis_builder.write(
                    Qry.display_matrix(response, spacing=uri_size, output=True, line_feed='.', is_activated=True))

                # INFO TYPE 3: PROPERTY-VALUES OF THE RESOURCES INVOLVED
                analysis_builder.write("\n\nDISAMBIGUATION HELPER ")
                if targets is None:
                    analysis_builder.write(Cls.disambiguate_network(linkset, children))
                else:
                    analysis_builder.write(Cls.disambiguate_network_2(children, targets))

                position = i_cluster[1][St.row]
                if St.annotate in i_cluster[1]:
                    analysis_builder.write("\n\nANNOTATED CLUSTER PROCESS")
                    analysis_builder.write(i_cluster[1][St.annotate])

                # THE CLUSTER
                # print "POSITION: {}".format(position)
                # print "\nMATRIX DISPLAY\n"
                # for i in range(0, position):
                #     resource = (i_cluster[1][St.matrix])[i]
                #     print "\t{}".format(resource[:position])
                    # print "\t{}".format(resource)

                # GENERATING THE NETWORK AS A TUPLE WHERE A TUPLE REPRESENT TWO RESOURCES IN A RELATIONSHIP :-)
                network = []
                for i in range(1, position):
                    for j in range(1, position):
                        if (i, j) in (i_cluster[1][St.matrix_d]) and (i_cluster[1][St.matrix_d])[(i, j)] != 0:
                            r = (i_cluster[1][St.matrix_d])[(i, 0)]
                            c = (i_cluster[1][St.matrix_d])[(0, j)]
                            r_name = "{}:{}".format(i, Ut.get_uri_local_name(r))
                            c_name = "{}:{}".format(j, Ut.get_uri_local_name(c))
                            network += [(r_name, c_name)]
                            # network += [(r_smart, c_smart)]
                # print "\tNETWORK", network

            if print_it:
                print ""
                print analysis_builder.getvalue()

            # SETTING THE DIRECTORY
            if directory:
                # linkset_name = Ut.get_uri_local_name(linkset)
                # date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
                temp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\{}_{}\\".format(
                    network_size, date, linkset_name, cluster_size, file_name))
                if not os.path.exists(temp_directory):
                    os.makedirs(temp_directory)

                """""""""""""  PLOTTING """""""""""""
                # FIRE THE DRAWING: Supported formats: eps, pdf, pgf, png, ps, raw, rgba, svg, svgz.
                analysis_builder.write(
                    draw_graph(graph=network,
                               file_path="{}{}.{}".format(temp_directory, "cluster_{}".format(file_name), "pdf"),
                               show_image=False)
                )

                """""""""""""  WRITING TO DISC """""""""""""
                # WRITE TO DISC
                Ut.write_2_disc(file_directory=temp_directory, file_name="cluster_{}".format(file_name, ),
                                data=analysis_builder.getvalue(), extension="txt")
                analysis_builder = Buffer.StringIO()

        if directory:

            if network:
                automated_decision = metric(network)["AUTOMATED_DECISION"]
                eval_sheet(targets, count_2, "{}_{}".format(cluster_size, file_name),
                           sheet_builder, linkset, children, automated_decision)
            else:
                print network

        if directory:
            # if len(sheet_builder.getvalue()) > 150 and count_2 == 2:
            if len(sheet_builder.getvalue()) > 150 and len(clusters_0) == count_1:
                tmp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\\".format(
                    network_size, date, linkset_name))

                """""""""""""  WRITING CLUSTER SHEET TO DISC """""""""""""
                print "\nWRITING CLUSTER SHEET AT\n\t{}".format(tmp_directory)
                Ut.write_2_disc(file_directory=tmp_directory, file_name="{}_ClusterSheet".format(cluster_size),
                                data=sheet_builder.getvalue(), extension="txt")

        # if count_2 == 2:
        #     break

    print ">>> FOUND: {}".format(count_2)

    if directory is None:
        return "{}\t{}".format(network_size, count_2)
Example #9
0
def set_linkset_expands_name(specs):

    unique = ""
    source = specs[St.source]
    target = specs[St.target]

    if St.reducer in source:
        unique += source[St.reducer]

    # GEO DATA
    # unit_value = ""

    if St.longitude in source:
        unique += source[St.longitude]

    if St.latitude in source:
        unique += source[St.latitude]

    if St.longitude in target:
        unique += target[St.longitude]

    if St.latitude in source:
        unique += target[St.latitude]

    if St.unit in specs:
        unique += str(specs[St.unit])
        unit = Ut.get_uri_local_name(str(specs[St.unit]))
        unique += unit

    if St.unit_value in specs:
        unique += str(specs[St.unit_value])
        unit_value = str(specs[St.unit_value])
        unique += unit_value

    if St.reducer in specs[St.target]:
        unique += target[St.reducer]

    if St.intermediate_graph in specs:
        unique = str(specs[St.intermediate_graph])

    if St.threshold in specs:
        unique += str(specs[St.threshold])

    if St.delta in specs:
        unique += str(specs[St.delta])

    if St.aligns_name in source:
        unique += source[St.aligns_name]

    elif St.latitude_name in source:
        # src_aligns += source[St.latitude_name]
        unique += "Latitude"
        if St.longitude_name in source:
            # src_aligns += source[St.longitude_name]
            unique += "Longitude"

    if St.aligns_name in target:
        unique += target[St.aligns_name]

    elif St.latitude_name in target:
        # trg_aligns += target[St.latitude_name]
        unique += "Latitude"
        if St.longitude_name in target:
            # trg_aligns += target[St.longitude_name]
            unique += "Longitude"

    dir_name = DIRECTORY
    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')

    unique = Ut.hash_it(specs[St.mechanism] + source[St.graph_name] +
                        target[St.graph_name] + source[St.entity_datatype] +
                        target[St.entity_datatype] + unique)

    if St.expands in specs:

        specs[St.linkset_name] = "expands_{}_{}".format(
            specs[St.expands_name], unique)
        specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name])

        singleton_metadata_file = "{}(SingletonMetadata)-{}.trig".format(
            specs[St.linkset_name], date)
        singleton_metadata_output = "{}/{}".format(dir_name,
                                                   singleton_metadata_file)
        future_path = os.path.join(DIRECTORY, singleton_metadata_output)
        future_path = future_path.replace("\\", "/").replace("//", "/")

        if len(future_path) > 255:
            full_hashed = Ut.hash_it(specs[St.linkset_name])
            specs[St.linkset_name] = "expands_{}_{}_{}".format(
                source[St.graph_name], specs[St.mechanism], full_hashed)

        print "\t- specs[St.linkset]", specs[St.linkset]
        return specs[St.linkset]

    else:
        return set_linkset_name(specs)
Example #10
0
def intersection_extended(specs, lens_name, display=False):

    # print Ut.headings("EXTENDED INTERSECTION")
    inter = ""
    insert = Buffer.StringIO()
    insert_sing = Buffer.StringIO()
    model_1 = """
    ### ABOUT {0}
    GRAPH <{0}>
    {{
        {1}
    }}
    """
    model_2 = """
    ### {2}. ABOUT {0}
    GRAPH <{0}>
    {{
        ?{1}    ?pred_{2}   ?{3} .
    }}
    ### SINGLETONS
    GRAPH <{4}>
    {{
        ?pred_{2}   ?x_{2}   ?y_{2} .
    }}"""

    count_graph = 1
    for graph in specs[St.datasets]:

        query = """
    PREFIX void: <{}>
    PREFIX bdb: <{}>
    SELECT distinct ?subTarget ?objTarget ?subjectEntityType ?objectEntityType
    {{
        <{}>
            #void:target*/(void:subjectsTarget|void:objectsTarget)* ?x ;
            void:target*/(void:subjectsTarget|void:objectsTarget)* ?x .

        ?x
            void:subjectsTarget     ?subTarget ;
            void:objectsTarget      ?objTarget ;
            bdb:subjectsDatatype    ?subjectEntityType ;
            bdb:objectsDatatype     ?objectEntityType .

        FILTER NOT EXISTS {{ ?subTarget a void:Linkset }}
        FILTER NOT EXISTS {{ ?objTarget a void:Linkset }}
    }}""".format(Ns.void, Ns.bdb, graph)
        # print "INTERSECTION QUERY:", query
        response = sparql_xml_to_matrix(query)

        if display:
            print "INTERSECTION QUERY:", query
        # print "\nGRAPH:", graph
        # print "RESPONSE:", response
        # exit(0)

        if response:

            targets = response[St.result]

            # IF THE RESULT HAS MORE THAN
            # print "LENGTH:", len(targets)
            if targets is not None and len(targets) > 2:
                union = ""

                for i in range(1, len(targets)):

                    append = "UNION" if i < len(targets) - 1 else ""
                    tab = "" if i == 1 else ""
                    src = Ut.get_uri_local_name(targets[i][0])
                    trg = Ut.get_uri_local_name(targets[i][1])

                    if src[0].isdigit():
                        src = "D{}".format(src)

                    if trg[0].isdigit():
                        trg = "D{}".format(trg)

                    src_TYPE = Ut.get_uri_local_name(targets[i][2])
                    trg_TYPE = Ut.get_uri_local_name(targets[i][3])

                    src_variable = "{}_{}_1".format(src, src_TYPE[short:])

                    if src == trg and src_TYPE == trg_TYPE:
                        trg_variable = "{}_{}_2".format(trg, trg_TYPE[short:])
                    else:
                        trg_variable = "{}_{}_1".format(trg, trg_TYPE[short:])

                    union += "\n\t\t{0}{{ ?{1}  ?predicate_{2}  ?{3} . }} {4}".format(
                        tab, src_variable, i, trg_variable, append)

                union = model_1.format(graph, union)
                # print "UNION:", union
                inter += union

            # ONLY TWO TARGETS
            elif targets and len(targets) == 2:

                src = Ut.get_uri_local_name(targets[1][0])
                trg = Ut.get_uri_local_name(targets[1][1])

                if src[0].isdigit():
                    src = "D{}".format(src)

                if trg[0].isdigit():
                    trg = "D{}".format(trg)

                src_TYPE = Ut.get_uri_local_name(targets[1][2])
                trg_TYPE = Ut.get_uri_local_name(targets[1][3])

                src_variable = "{}_{}_1".format(src, src_TYPE[short:])

                if src == trg and src_TYPE == trg_TYPE:
                    trg_variable = "{}_{}_2".format(trg, trg_TYPE[short:])
                else:
                    trg_variable = "{}_{}_1".format(trg, trg_TYPE[short:])

                inter += model_2.format(
                    graph, src_variable, count_graph, trg_variable,
                    "{}{}".format(Ns.singletons,
                                  Ut.get_uri_local_name_plus(graph)))

                insert.write("\t\t?{}    ?pred_{}   ?{} .\n".format(
                    src_variable, count_graph, trg_variable))
                insert_sing.write(
                    "\t\t?pred_{0}   ?x_{0}     ?y_{0}.\n".format(count_graph))

        count_graph += 1

    # print inter
    # exit(0)
    insert_query = """INSERT\n{{
    ### LINKS
    GRAPH <{5}{4}>
    {{\n{1}\t}}

    ### METADATA
    GRAPH <{6}{4}>
    {{\n{3}\t}}\n}}\nWHERE\n{{{2}\n}}
    """.format("", insert.getvalue(), inter, insert_sing.getvalue(), lens_name,
               Ns.lens, Ns.singletons)
    return insert_query
Example #11
0
def intersection2(specification):



    print "\nINTERSECTION TASK" \
          "\n======================================================" \
          "========================================================\n"
    query = ""
    p_count = 0
    up_count = 0
    check = dict()
    union_check = dict()
    # target_check = dict()
    view_lens = specification[St.datasets]
    # print datasets

    for graph in view_lens:

        if type(graph) is not str and type(graph) is not unicode:
            # print dataset
            print "THE DATASET MUST BE OF TYPE STRING. {} WAS GIVEN.".format(
                type(graph))
            return None

        # NAME OF THE GRAPH
        graph_name = Ut.get_uri_local_name(graph)
        # print "Dataset:", dataset

        # GET THE TYPE OF THE GRAPH
        graph_type = get_graph_type(graph)

        if graph_type[St.message] != "NO RESPONSE":

            if graph_type[St.result] is not None:
                # print "\tABLE TO RETRIEVE THE TYPE {}".format(graph_type)

                # EXPECTING ONE RESULT. BECAUSE THE MATRIX HAS A HEADER, LENS NEED TO BE 2
                if len(graph_type[St.result]) == 2:
                    # EXPECTING A LENS DATATYPE

                    if graph_type[St.result][1][0] == "{}Lens".format(Ns.bdb):
                        operator = get_lens_operator(graph)
                        # print "\tLENS GENERATED BY {}".format(operator)

                        if (operator is not None) and (operator == "{}".format(
                                Ns.lensOpu)):

                            if graph not in check:
                                check[graph] = 1
                                # print "\tGETTING TARGET GRAPHS"
                                targets = get_graph_targets(graph)

                                if targets[St.result] is not None:
                                    # print "\tABLE TO RETRIEVE TARGETS {}".format(targets)
                                    union_query = ""
                                    graphs = list()
                                    for i in range(1, len(targets[St.result])):
                                        target = targets[St.result][i][0]
                                        # print "target: ", target
                                        # GET SOURCE AND TARGET DATASETS
                                        src_trg = get_graph_source_target(
                                            target)

                                        if src_trg[St.result] is not None:
                                            src = Ut.get_uri_local_name(
                                                src_trg[St.result][1][0])
                                            trg = Ut.get_uri_local_name(
                                                src_trg[St.result][1][1])
                                            # print "\tSOURCE: {} TARGET:{}".format(src, trg)

                                            if "{}_{}".format(
                                                    src, trg) in union_check:
                                                up_count += 1
                                                "\t{}_{} already exist".format(
                                                    src, trg)
                                                # print "\t{}_{} already exist".format(src, trg)
                                            else:
                                                union_check["{}_{}".format(
                                                    src, trg)] = up_count
                                                temp = "\n\t\tGRAPH <{}> \n\t\t{{ " \
                                                       "\n\t\t\t?{} ?pred_{} ?{} . " \
                                                       "\n\t\t}}".format(graph, src, up_count, trg)
                                                # print "\tTHE RESULTING GRAPH {}".format(temp)
                                                graphs.append(temp)

                                        else:
                                            "No source and target datasets"

                                    # query += "\n\tGRAPH <{}> \n\t{{ {} \n\t}}".format(dataset, triples)

                                    if len(union_check) > 1:
                                        for i in range(len(graphs)):
                                            if i == 0:
                                                union_query += "\n\t### LENS BY UNION: {}\n\t{{{}\n\t}}".format(
                                                    graph_name, graphs[i])
                                            elif i > 0:
                                                union_query += "\n\tUNION\n\t{{{}\n\t}}".format(
                                                    graphs[i])
                                    else:
                                        union_query += "\n\t### LENS BY UNION: {}\n\t{{{}\n\t}}".format(
                                            graph_name, graphs[0])

                                    query += union_query

                                else:
                                    "No target for this grap"
                            else:
                                "\tTHE DATASET ALREADY EXISTS"
                                # print "\tTHE DATASET ALREADY EXISTS"
                        else:
                            "Not a union operator"

                    elif graph_type[St.result][1][0] == "{}Linkset".format(
                            Ns.void):
                        "It is a linkset"
                        # GET SOURCE AND TARGET DATASETS
                        src_trg = get_graph_source_target(graph)
                        if src_trg is not None:
                            src = Ut.get_uri_local_name(
                                src_trg[St.result][1][0])
                            trg = Ut.get_uri_local_name(
                                src_trg[St.result][1][1])
                            if graph in check:
                                print "already exist"
                            else:
                                p_count += 1
                                check[graph] = p_count
                                query += "\n\t### LINKSET: {}\n\tGRAPH <{}> \n\t{{\n\t\t?{} ?predicate_{} ?{} .\n\t}}".\
                                    format(graph_name, graph, src, p_count, trg)

            else:
                print "WE COULD NOT ACCESS THE TYPE OF THE GRAPH: <{}>.".format(
                    graph)

        else:
            print Ec.ERROR_CODE_1
            return None
    # print query
    return query
Example #12
0
def intersection(specs, display=False):

    inter = ""

    count_graph = 1
    for graph in specs[St.datasets]:

        query = """
    PREFIX void: <{}>
    PREFIX bdb: <{}>
    SELECT distinct ?subTarget ?objTarget ?subjectEntityType ?objectEntityType
    {{
        <{}>
            #void:target*/(void:subjectsTarget|void:objectsTarget)* ?x ;
            void:target*/(void:subjectsTarget|void:objectsTarget)* ?x .

        ?x
            void:subjectsTarget     ?subTarget ;
            void:objectsTarget      ?objTarget ;
            bdb:subjectsDatatype    ?subjectEntityType ;
            bdb:objectsDatatype     ?objectEntityType .

        FILTER NOT EXISTS {{ ?subTarget a void:Linkset }}
        FILTER NOT EXISTS {{ ?objTarget a void:Linkset }}
    }}""".format(Ns.void, Ns.bdb, graph)
        # print "INTERSECTION QUERY:", query
        response = sparql_xml_to_matrix(query)

        if display:
            print "INTERSECTION QUERY:", query
        # print "\nGRAPH:", graph
        # print "RESPONSE:", response
        # exit(0)

        if response:
            targets = response[St.result]

            # IF THE RESULT HAS MORE THAN
            # print "LENGTH:", len(targets)
            if targets is not None and len(targets) > 2:
                union = ""

                for i in range(1, len(targets)):

                    append = "UNION" if i < len(targets) - 1 else ""
                    tab = "" if i == 1 else ""
                    src = Ut.get_uri_local_name(targets[i][0])
                    trg = Ut.get_uri_local_name(targets[i][1])

                    if src[0].isdigit():
                        src = "D{}".format(src)

                    if trg[0].isdigit():
                        trg = "D{}".format(trg)

                    src_TYPE = Ut.get_uri_local_name(targets[i][2])
                    trg_TYPE = Ut.get_uri_local_name(targets[i][3])

                    src_variable = "{}_{}_1".format(src, src_TYPE[short:])

                    if src == trg and src_TYPE == trg_TYPE:
                        trg_variable = "{}_{}_2".format(trg, trg_TYPE[short:])
                    else:
                        trg_variable = "{}_{}_1".format(trg, trg_TYPE[short:])

                    union += "\n\t\t{0}{{ ?{1}  ?predicate_{2}  ?{3} . }} {4}".format(
                        tab, src_variable, i, trg_variable, append)

                union = """
    ### ABOUT {0}
    GRAPH <{0}>
    {{
        {1}
    }}
    """.format(graph, union)
                # print "UNION:", union
                inter += union

            elif targets and len(targets) == 2:
                src = Ut.get_uri_local_name(targets[1][0])
                trg = Ut.get_uri_local_name(targets[1][1])

                if src[0].isdigit():
                    src = "D{}".format(src)

                if trg[0].isdigit():
                    trg = "D{}".format(trg)

                src_TYPE = Ut.get_uri_local_name(targets[1][2])
                trg_TYPE = Ut.get_uri_local_name(targets[1][3])

                src_variable = "{}_{}_1".format(src, src_TYPE[short:])

                if src == trg and src_TYPE == trg_TYPE:
                    trg_variable = "{}_{}_2".format(trg, trg_TYPE[short:])
                else:
                    trg_variable = "{}_{}_1".format(trg, trg_TYPE[short:])

                inter += """
    ### ABOUT {0}
    GRAPH <{0}>
    {{
        ?{1}    ?pred_{2}   ?{3} .
    }}
    """.format(graph, src_variable, count_graph, trg_variable)

        count_graph += 1

    # print inter
    # exit(0)
    return inter
Example #13
0
def geo_match_query(specs):

    # Note that for WKT formatted points,
    # the location is <long, lat>. The location of the White House can also be encoded using the WGS 84

    # source = specs[St.source]
    # target = specs[St.target]
    # src_lat = source[St.latitude]
    # src_long = source[St.longitude]

    is_de_duplication = (specs[St.source][St.graph] == specs[St.target][St.graph]) and \
                        (specs[St.source][St.entity_datatype] == specs[St.target][St.entity_datatype])

    number_of_load = '{}_1'.format(specs[St.lens_name]) if is_de_duplication is True \
        else "{}_2".format(specs[St.lens_name])

    unit = "{}(s)".format(Ut.get_uri_local_name(specs[St.unit]).lower())

    match = """
    ######################################################################
    ### INSETTING MATCH FOUND IN A TEMPORARY GRAPH
    ######################################################################
    PREFIX ll:          <{0}>
    PREFIX tmpvocab:    <{0}>
    PREFIX tmpgraph:    <{1}>
    prefix lens:        <{5}>
    prefix singleton:   <{7}>
    prefix prov:        <{12}>
    PREFIX geof:        <http://www.opengis.net/def/function/geosparql/>
    PREFIX wgs:         <http://www.w3.org/2003/01/geo/wgs84_pos#>
    INSERT
    {{

        GRAPH lens:{6}
        {{
            ?src_resource  ?singPre  ?trg_resource .
        }}

        GRAPH singleton:{6}
        {{
            ?singPre rdf:singletonPropertyOf     ll:nearbyGeoSim{10} .
            ?singPre ll:hasEvidence             "Near each other by at most {3} {9}" .
            ?singPre ll:hasStrength             1 .
            ?singPre ?pre_derived               ?obj_derived .
            ?singPre ?der_pre                    ?der_obj .
        }}
    }}
    WHERE
    {{
        ### THE ALIGNMENT TO REFINE
        GRAPH lens:{11}
        {{
            ?src_resource ?singleton ?trg_resource .
        }}

        GRAPH singleton:{11}
        {{
            ?singleton ?pre_derived ?obj_derived  .
            OPTIONAL{{
                ?obj_derived prov:wasDerivedFrom* ?der_from .
                ?der_from ?der_pre ?der_obj .

            }}
        }}

        ### SOURCE DATASET WITH GEO-COORDINATES
        GRAPH tmpgraph:load_{6}_1
        {{
            ?src_resource  wgs:long  ?src_longitude .
            ?src_resource  wgs:lat   ?src_latitude .
            ### Create A SINGLETON URI
            BIND( replace("{0}{8}_#", "#", STRAFTER(str(UUID()),"uuid:")) as ?pre )
            BIND( iri(?pre) as ?singPre )
        }}

        ### TARGET DATASET WITH GEO-COORDINATES
        GRAPH tmpgraph:load_{2}
        {{
            ?trg_resource  wgs:long  ?trg_longitude .
            ?trg_resource  wgs:lat   ?trg_latitude .
        }}

        ### MATCHING TARGETS NEAR BY SOURCE
        ?src_resource  geof:nearby (?trg_resource {3} <{4}>).
    }}
    """.format(
        # 0          1            2               3                     4
        Ns.alivocab,
        Ns.tmpgraph,
        number_of_load,
        specs[St.unit_value],
        specs[St.unit],
        # 5         6                       7              8              9     10
        Ns.lens,
        specs[St.lens_name],
        Ns.singletons,
        specs[St.mechanism],
        unit,
        specs[St.sameAsCount],
        # 11                    12
        specs[St.refined_name],
        Ns.prov)

    return match
Example #14
0
def cluster_d_test(linkset, network_size=3, network_size_max=3, targets=None, constraint_targets=None,
                   constraint_text="", directory=None, greater_equal=True, print_it=False, limit=None,
                   only_good=False, activated=False):

    # FOR CONSTRAINTS TO WORK, IT SHOULD NOT BE NONE

    network = []
    print "\nLINK NETWORK INVESTIGATION"
    if activated is False:
        print "\tTHE FUNCTION I NOT ACTIVATED"
        return ""

    elif network_size > network_size_max and greater_equal is False:
        print "\t[network_size] SHOULD BE SMALLER THAN [network_size_max]"
        return ""

    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
    linkset_name = Ut.get_uri_local_name(linkset)
    linkset = linkset.strip()

    if network_size_max - network_size == 0:
        greater_equal = False

    check = False

    # RUN THE CLUSTER
    clusters_0 = Cls.links_clustering(linkset, limit)

    if greater_equal is True:
        temp_size = 0
        for cluster, cluster_val in clusters_0.items():
            new_size = len(list(cluster_val["nodes"]))
            if new_size > temp_size:
                temp_size = new_size
        network_size_max = temp_size
        print "THE BIGGEST NETWORK'S: {}".format(network_size_max)

    def check_constraint():

        text = constraint_text.lower()
        text = text.split(",")

        # CONSTRAINT BUILDER
        c_builder = Buffer.StringIO()
        if constraint_targets is not None:
            for dictionary in constraint_targets:
                graph = dictionary[St.graph]
                data_list = dictionary[St.data]
                properties = data_list[0][St.properties]
                prop = properties[0] if Ut.is_nt_format(properties[0]) else "<{}>".format(properties[0])

                # WRITING THE CONSTRAINT ON THE GRAPH
                graph_q = """
       {{
           GRAPH <{0}>
           {{
               ?lookup {1} ?constraint .
           }}
       }}
       """.format(graph, prop)
                c_builder.write(graph_q) if len(c_builder.getvalue()) == 0  else \
                    c_builder.write("UNION {}".format(graph_q))

            # WRITING THE FILTER
            if len(c_builder.getvalue()) > 0:
                for i in range(0, len(text)):
                    if i == 0 :
                        c_builder.write("""
       FILTER (LCASE(STR(?constraint)) = "{}" """.format(text[i].strip()))
                    else:
                        c_builder.write("""
       || LCASE(STR(?constraint)) = "{}" """.format(text[i].strip()))
                c_builder.write(")")


        # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES
        query = Qry.cluster_rsc_strengths_query(resources, linkset)
        query = query.replace("# CONSTRAINTS IF ANY", c_builder.getvalue())
        # print query
        response = Qry.sparql_xml_to_matrix(query)
        if response[St.result] is None:
            return False
        return True

    for index in range(network_size, network_size_max + 1):

        count_1 = 0
        count_2 = 0
        curr_network_size = index
        print "\nCLUSTERS OF SIZE {}".format(index)
        sheet_builder = Buffer.StringIO()
        analysis_builder = Buffer.StringIO()
        sheet_builder.write("Count	ID					STRUCTURE	E-STRUCTURE-SIZE	A. NETWORK QUALITY"
                            "		M. NETWORK QUALITY		REFERENCE\n")

        for cluster, cluster_val in clusters_0.items():

            # network = []
            resources = ""
            uri_size = 0
            count_1 += 1
            children = list(cluster_val["nodes"])
            strengths = cluster_val["strengths"]
            cluster_size = len(children)
            # if "<http://www.grid.ac/institutes/grid.10493.3f>" not in children:
            #     continue

            check = cluster_size >= curr_network_size if greater_equal else cluster_size == curr_network_size

            # NETWORK OF A PARTICULAR SIZE
            if check:

                # file_name = i_cluster[0]

                # 2: FETCHING THE CORRESPONDENTS
                smallest_hash = float('inf')
                child_list = ""
                for child in children:

                    # CREATE THE HASHED ID AS THE CLUSTER NAME
                    hashed = hash(child)
                    if hashed <= smallest_hash:
                        smallest_hash = hashed

                    # GENERAL INFO 1: RESOURCES INVOLVED
                    child_list += "\t{}\n".format(child)

                    # LIST OF RESOURCES IN THE CLUTER
                    use = "<{}>".format(child) if Ut.is_nt_format(child) is not True else child
                    resources += "\n\t\t\t\t{}".format(use)
                    if len(child) > uri_size:
                        uri_size = len(child)

                # MAKE SURE THE FILE NAME OF THE CLUSTER IS ALWAYS THE SAME
                file_name = "{}".format(str(smallest_hash).replace("-", "N")) if str(
                    smallest_hash).startswith("-") \
                    else "P{}".format(smallest_hash)

                if constraint_targets is not None and check_constraint() is False:
                    continue

                count_2 += 1

                # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES
                query = Qry.cluster_rsc_strengths_query(resources, linkset)
                response = Qry.sparql_xml_to_matrix(query)

                # GENERAL INFO 2:
                info = "SIZE    {}   \nCLUSTER {} \nNAME    {}\n".format(cluster_size, count_1, file_name)
                info2 = "CLUSTER [{}] NAME [{}] SIZE [{}]".format(count_1, file_name, cluster_size)
                analysis_builder.write("{}\n".format(info))


                analysis_builder.write("RESOURCES INVOLVED\n")
                analysis_builder.write(child_list)
                analysis_builder.write("\nCORRESPONDENT FOUND ")
                analysis_builder.write(
                    Qry.display_matrix(response, spacing=uri_size, output=True, line_feed='.', is_activated=True))

                # INFO TYPE 3: PROPERTY-VALUES OF THE RESOURCES INVOLVED
                analysis_builder.write("\n\nDISAMBIGUATION HELPER ")
                if targets is None:
                    analysis_builder.write(Cls.disambiguate_network(linkset, children))
                else:
                    report = Cls.disambiguate_network_2(children, targets)
                    if report is not None:
                        analysis_builder.write(report)

                # GENERATING THE NETWORK AS A TUPLE WHERE A TUPLE REPRESENT TWO RESOURCES IN A RELATIONSHIP :-)
                network = []
                link_count = 0
                for link in cluster_val["links"]:
                    link_count += 1
                    name_1 = "{}-{}".format(Ut.hash_it(link[0]), Ut.get_uri_local_name(link[0]))
                    name_2 = "{}-{}".format(Ut.hash_it(link[1]), Ut.get_uri_local_name(link[1]))
                    network += [(name_1, name_2)]

                #  GET THE AUTOMATED FLAG


                if print_it:
                    print ""
                    print analysis_builder.getvalue()

                # SETTING THE DIRECTORY
                if directory:

                    if network:
                        automated_decision = metric(network)["AUTOMATED_DECISION"]
                        if only_good is True and automated_decision.startswith("GOOD") is not True:
                            count_2 -= 1
                            continue

                        print "{:>5} {}".format(count_2, info2)

                        eval_sheet(targets, count_2, "{}_{}".format(cluster_size, file_name),
                                   sheet_builder, linkset, children, automated_decision)
                    else:
                        print network


                    # linkset_name = Ut.get_uri_local_name(linkset)
                    # date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
                    temp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\{}_{}\\".format(
                        curr_network_size, date, linkset_name, cluster_size, file_name))
                    if not os.path.exists(temp_directory):
                        os.makedirs(temp_directory)

                    """""""""""""  PLOTTING """""""""""""
                    # FIRE THE DRAWING: Supported formats: eps, pdf, pgf, png, ps, raw, rgba, svg, svgz.
                    analysis_builder.write(
                        draw_graph(graph=network,
                                   file_path="{}{}.{}".format(temp_directory, "cluster_{}".format(file_name), "pdf"),
                                   show_image=False)
                    )

                    """""""""""""  WRITING TO DISC """""""""""""
                    # WRITE TO DISC
                    Ut.write_2_disc(file_directory=temp_directory, file_name="cluster_{}".format(file_name, ),
                                    data=analysis_builder.getvalue(), extension="txt")
                    analysis_builder = Buffer.StringIO()

            if directory:
                # if len(sheet_builder.getvalue()) > 150 and count_2 == 2:
                if len(sheet_builder.getvalue()) > 150 and len(clusters_0) == count_1:
                    tmp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\\".format(
                        curr_network_size, date, linkset_name))

                    """""""""""""  WRITING CLUSTER SHEET TO DISC """""""""""""
                    print "\n\tWRITING CLUSTER SHEET AT\n\t{}".format(tmp_directory)
                    Ut.write_2_disc(file_directory=tmp_directory, file_name="{}_ClusterSheet".format(cluster_size),
                                    data=sheet_builder.getvalue(), extension="txt")

            # if count_2 == 2:
            #     break

        if greater_equal is True:
            # no need to continue as we already did all network greater of equal to "network-size" input
            break

        print "\t>>> FOUND: {} CLUSTERS OF SIZE {}".format(count_2, curr_network_size)

        if directory is None:
            return "{}\t{}".format(curr_network_size, count_2)
Example #15
0
def cluster_d_test_stats(linkset, network_size=3, targets=None,
                   directory=None, greater_equal=True, print_it=False, limit=None, activated=False):
    network = []
    print "LINK NETWORK INVESTIGATION"
    if activated is False:
        print "\tTHE FUNCTION I NOT ACTIVATED"
        return ""
    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
    linkset_name = Ut.get_uri_local_name(linkset)
    count_1 = 0
    count_2 = 0
    sheet_builder = Buffer.StringIO()
    analysis_builder = Buffer.StringIO()
    sheet_builder.write("Count	ID					STRUCTURE	E-STRUCTURE-SIZE	A. NETWORK QUALITY"
                        "		M. NETWORK QUALITY		REFERENCE\n")
    linkset = linkset.strip()
    check = False

    # RUN THE CLUSTER
    clusters_0 = Cls.links_clustering(linkset, limit)

    for cluster, cluster_val in clusters_0.items():

        # network = []
        resources = ""
        uri_size = 0
        count_1 += 1
        children = list(cluster_val["nodes"])
        strengths = cluster_val["strengths"]
        cluster_size = len(children)
        # if "<http://www.grid.ac/institutes/grid.10493.3f>" not in children:
        #     continue

        check = cluster_size >= network_size if greater_equal else cluster_size == network_size

        # NETWORK OF A PARTICULAR SIZE
        if check:
            count_2 += 1
            # file_name = i_cluster[0]

            # 2: FETCHING THE CORRESPONDENTS
            smallest_hash = float('inf')
            child_list = ""
            for child in children:
                hashed = hash(child)
                if hashed <= smallest_hash:
                    smallest_hash = hashed

                # GENERAL INFO 1: RESOURCES INVOLVED
                child_list += "\t{}\n".format(child)

                use = "<{}>".format(child) if Ut.is_nt_format(child) is not True else child
                resources += "\n\t\t\t\t{}".format(use)
                if len(child) > uri_size:
                    uri_size = len(child)

            if directory:
                # MAKE SURE THE FILE NAME OF THE CLUSTER IS ALWAYS THE SAME
                file_name = "{}".format(str(smallest_hash).replace("-", "N")) if str(
                    smallest_hash).startswith("-") \
                    else "P{}".format(smallest_hash)


                # # THE RESULT OF THE QUERY ABOUT THE LINKED RESOURCES
                query = Qry.cluster_rsc_strengths_query(resources, linkset)
                response = Qry.sparql_xml_to_matrix(query)

                # GENERAL INFO 2:
                info = "SIZE    {}   \nCLUSTER {} \nNAME    {}\n".format(cluster_size, count_1, file_name)
                info2 = "CLUSTER [{}] NAME [{}] SIZE [{}]".format(count_1, file_name, cluster_size)
                analysis_builder.write("{}\n".format(info))
                print "{:>5} {}".format(count_2, info2)

                analysis_builder.write("RESOURCES INVOLVED\n")
                analysis_builder.write(child_list)
                analysis_builder.write("\nCORRESPONDENT FOUND ")
                analysis_builder.write(
                    Qry.display_matrix(response, spacing=uri_size, output=True, line_feed='.', is_activated=True))

                # INFO TYPE 3: PROPERTY-VALUES OF THE RESOURCES INVOLVED
                analysis_builder.write("\n\nDISAMBIGUATION HELPER ")
                if targets is None:
                    analysis_builder.write(Cls.disambiguate_network(linkset, children))
                else:
                    analysis_builder.write(Cls.disambiguate_network_2(children, targets))


                # GENERATING THE NETWORK AS A TUPLE WHERE A TUPLE REPRESENT TWO RESOURCES IN A RELATIONSHIP :-)
                network = []
                link_count = 0
                for link in cluster_val["links"]:
                    link_count += 1
                    name_1 = "{}".format(Ut.get_uri_local_name(link[0]))
                    name_2 = "{}".format(Ut.get_uri_local_name(link[1]))
                    network += [(name_1, name_2)]


            if print_it:
                print ""
                print analysis_builder.getvalue()

            # SETTING THE DIRECTORY
            if directory:
                # linkset_name = Ut.get_uri_local_name(linkset)
                # date = datetime.date.isoformat(datetime.date.today()).replace('-', '')
                temp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\{}_{}\\".format(
                    network_size, date, linkset_name, cluster_size, file_name))
                if not os.path.exists(temp_directory):
                    os.makedirs(temp_directory)

                """""""""""""  PLOTTING """""""""""""
                # FIRE THE DRAWING: Supported formats: eps, pdf, pgf, png, ps, raw, rgba, svg, svgz.
                analysis_builder.write(
                    draw_graph(graph=network,
                               file_path="{}{}.{}".format(temp_directory, "cluster_{}".format(file_name), "pdf"),
                               show_image=False)
                )

                """""""""""""  WRITING TO DISC """""""""""""
                # WRITE TO DISC
                Ut.write_2_disc(file_directory=temp_directory, file_name="cluster_{}".format(file_name, ),
                                data=analysis_builder.getvalue(), extension="txt")
                analysis_builder = Buffer.StringIO()

                if network:
                    automated_decision = metric(network)["AUTOMATED_DECISION"]
                    eval_sheet(targets, count_2, "{}_{}".format(cluster_size, file_name),
                               sheet_builder, linkset, children, automated_decision)
                else:
                    print network

        if directory:
            # if len(sheet_builder.getvalue()) > 150 and count_2 == 2:
            if len(sheet_builder.getvalue()) > 150 and len(clusters_0) == count_1:
                tmp_directory = "{}{}".format(directory, "\{}_Analysis_{}\{}\\".format(
                    network_size, date, linkset_name))

                """""""""""""  WRITING CLUSTER SHEET TO DISC """""""""""""
                print "\nWRITING CLUSTER SHEET AT\n\t{}".format(tmp_directory)
                Ut.write_2_disc(file_directory=tmp_directory, file_name="{}_ClusterSheet".format(cluster_size),
                                data=sheet_builder.getvalue(), extension="txt")

                # if count_2 == 2:
                #     break

    print ">>> FOUND: {}".format(count_2)

    if directory is None:
        return "{}\t{}".format(network_size, count_2)
Example #16
0
def lens_refine_name(specs, lens_type):

    extra = ""

    source = specs[St.source]
    target = specs[St.target]

    if St.reducer in source:
        extra += source[St.reducer]

    # GEO DATA
    unit_value = ""

    if St.longitude in source:
        extra += source[St.longitude]

    if St.latitude in source:
        extra += source[St.latitude]

    if St.longitude in target:
        extra += target[St.longitude]

    if St.latitude in source:
        extra += target[St.latitude]

    if St.unit in specs:
        extra += str(specs[St.unit])
        unit = Ut.get_uri_local_name(str(specs[St.unit]))

    if St.unit_value in specs:
        extra += str(specs[St.unit_value])
        unit_value = str(specs[St.unit_value])

    if St.reducer in specs[St.target]:
        extra += target[St.reducer]

    if St.intermediate_graph in specs:
        intermediate = str(specs[St.intermediate_graph])

    if St.threshold in specs:
        extra += str(specs[St.threshold])

    if St.delta in specs:
        extra += str(specs[St.delta])

    if St.aligns_name in source:
        extra += source[St.aligns_name]
    elif St.latitude_name in source:
        # src_aligns += source[St.latitude_name]
        extra += "Latitude"
        if St.longitude_name in source:
            # src_aligns += source[St.longitude_name]
            extra += "Longitude"

    if St.aligns_name in target:
        extra += target[St.aligns_name]
    elif St.latitude_name in target:
        # trg_aligns += target[St.latitude_name]
        extra += "Latitude"
        if St.longitude_name in target:
            # trg_aligns += target[St.longitude_name]
            extra += "Longitude"

    unique = Ut.hash_it(extra)
    specs[St.lens] = u"{}refine_{}_{}".format(unique, Ns.lens,
                                              specs[St.refined_name])
    update_specification(specs)
Example #17
0
def stats_optimised(graph, display_table=False, display_text=False, boolean=True):

    optional = dict()
    stat = {}
    text = buffer()

    # 1. FIND ALL TYPES IN THE GRAPH
    qry_types = """
    ### RETRIEVE ALL TYPES FROM THE GRAPH
    SELECT DISTINCT ?Types (count(distinct ?resource) as ?EntityCount)
    {{
        GRAPH <{}>
        {{
            ?resource a ?Types .
        }}
    }} GROUP by ?Types ?EntityType ORDER BY ?Graph
    """.format(graph)
    # print qry_types
    types_matrix = sparql_xml_to_matrix(qry_types)
    # print types_matrix
    # if display_table:
    display_matrix(types_matrix, spacing=70, limit=100, is_activated=display_table)

    # 2. FOR EACH TYPES GET ALL PROPERTIES
    if types_matrix["result"] is not None:
        types = types_matrix["result"]
        for i in range(1, len(types)):
            curr_type = types[i][0]
            type_name = Ut.get_uri_local_name(curr_type)
            instances = int(types[i][1])
            optional[curr_type] = dict()
            qry_properties = """
            ### RETRIEVE ALL PROPERTIES FOR THE TYPE [{0}]
            SELECT DISTINCT ?Properties_for_{0}
            {{
                GRAPH <{1}>
                {{
                    ?resource   a                       <{2}> ;
                                ?Properties_for_{0}     ?object .
                }}
            }}
            """.format(type_name, graph, curr_type)
            properties_matrix = sparql_xml_to_matrix(qry_properties)
            # if display_table:
            # print "\nPROPERTY COUNT:", len(properties_matrix["result"]) - 1
            display_matrix(properties_matrix, spacing=70, limit=100, is_activated=display_table)

            # PROPERTY OCCURENCE COUNT
            pro_text = buffer()
            sel_text = buffer()
            grp_text = buffer()
            if properties_matrix["result"] is not None:

                pro_text.write("\nSELECT ?predicate (COUNT(distinct ?resource) as ?Occurrences)")
                pro_text.write("\n{{\n\tGRAPH <{}> ".format(graph))
                pro_text.write("\n\t{{\n\t\t?resource a             <{}> .".format(curr_type))
                pro_text.write("\n\t\t?resource ?predicate    ?object .")
                pro_text.write("\n\t}}\n}}\nGROUP BY ?predicate".format(grp_text.getvalue()))
                properties = properties_matrix["result"]
                cur_dic = optional[curr_type]
                count = 0
                append = ""

                # RUN THE QUERY FOR PROPERTIES OCCURRENCES
                qry_property_stats = pro_text.getvalue()
                # print qry_property_stats
                Occurrences_matrix = sparql_xml_to_matrix(qry_property_stats)
                # if display_table:
                display_matrix(Occurrences_matrix, spacing=70, limit=100, is_activated=display_table)
                if Occurrences_matrix["result"] != None:
                    Occurrences = Occurrences_matrix["result"]
                    for i in range(1, len(Occurrences)):

                        # THE PROPERTY IS THE KEY OF THE DICTIONARY
                        if boolean is True:
                            cur_dic[Occurrences[i][0]] = int(Occurrences[i][1]) % float(instances) != 0
                        else:
                            cur_dic[Occurrences[i][0]] = math.floor(100 * int(Occurrences[i][1]) / float(instances))

    text.write("\nGRAPH: {}".format(graph))
    for key, value in optional.items():
        line = "-------------------------------------------------------------------------------------------------"
        text.write("\n\n\tENTITY TYPE: {}".format(key))
        text.write("\n\t\t{:100}{}".format(line, "------------"))
        text.write("\n\t\t{:<5}{:97}{}".format(len(optional[key]), "Properties", "Optional"))
        text.write("\n\t\t{:100}{}".format(line, "------------"))

        for pro, opt in value.items():
            if opt:
                text.write("\n\t\t{:100}{}".format("{} ***".format(pro), opt))
            else:
                text.write("\n\t\t{:100}{}".format(pro, opt))

    if display_text:
        print text.getvalue()
    return optional
Example #18
0
def set_linkset_name(specs, inverse=False):

    src_aligns = ""
    trg_aligns = ""
    reducer = ""
    intermediate = ""
    threshold = ""
    delta = ""
    geo = ""
    unit = ""

    source = specs[St.source]
    target = specs[St.target]

    if St.reducer in source:
        reducer += source[St.reducer]

    # GEO DATA
    unit_value = ""

    if St.longitude in source:
        geo += source[St.longitude]

    if St.latitude in source:
        geo += source[St.latitude]

    if St.longitude in target:
        geo += target[St.longitude]

    if St.latitude in source:
        geo += target[St.latitude]

    if St.unit in specs:
        geo += str(specs[St.unit])
        unit = Ut.get_uri_local_name(str(specs[St.unit]))

    if St.unit_value in specs:
        geo += str(specs[St.unit_value])
        unit_value = str(specs[St.unit_value])

    if St.reducer in specs[St.target]:
        reducer += target[St.reducer]

    if St.intermediate_graph in specs:
        intermediate = str(specs[St.intermediate_graph])

    if St.threshold in specs:
        threshold += str(specs[St.threshold])

    if St.delta in specs:
        delta += str(specs[St.delta])

    if St.aligns_name in source:
        src_aligns += source[St.aligns_name]
    elif St.latitude_name in source:
        # src_aligns += source[St.latitude_name]
        src_aligns += "Latitude"
        if St.longitude_name in source:
            # src_aligns += source[St.longitude_name]
            src_aligns += "Longitude"

    if St.aligns_name in target:
        trg_aligns += target[St.aligns_name]
    elif St.latitude_name in target:
        # trg_aligns += target[St.latitude_name]
        trg_aligns += "Latitude"
        if St.longitude_name in target:
            # trg_aligns += target[St.longitude_name]
            trg_aligns += "Longitude"

    dir_name = DIRECTORY
    date = datetime.date.isoformat(datetime.date.today()).replace('-', '')

    if inverse is False:

        h_name = specs[St.mechanism] + \
                 source[St.graph_name] + src_aligns + \
                 target[St.graph_name] + trg_aligns + \
                 source[St.entity_datatype] + target[St.entity_datatype] + "-" +\
                 reducer + intermediate + threshold + delta + geo

        hashed = hash(h_name)

        append = str(hashed).replace(
            "-",
            "N") if str(hashed).__contains__("-") else "P{}".format(hashed)

        specs[St.linkset_name] = "{}_{}_{}{}{}_{}_{}_{}".format(
            source[St.graph_name], target[St.graph_name], specs[St.mechanism],
            unit_value, unit, source[St.entity_name], src_aligns, append)

        singleton_metadata_file = "{}(SingletonMetadata)-{}.trig".format(
            specs[St.linkset_name], date)
        singleton_metadata_output = "{}/{}".format(dir_name,
                                                   singleton_metadata_file)
        future_path = os.path.join(DIRECTORY, singleton_metadata_output)
        future_path = future_path.replace("\\", "/").replace("//", "/")

        if len(future_path) > 255:
            full_hashed = Ut.hash_it(specs[St.linkset_name])
            specs[St.linkset_name] = "{}_{}_{}".format(source[St.graph_name],
                                                       specs[St.mechanism],
                                                       full_hashed)

        # if len(specs[St.linkset_name]) > 255:
        #     specs[St.linkset_name] = Ut.hash_it(specs[St.linkset_name])

        specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name])

        return specs[St.linkset]

    else:

        h_name = specs[St.mechanism] + \
                 target[St.graph_name] + trg_aligns + \
                 source[St.graph_name] + src_aligns + \
                 target[St.entity_datatype] + source[St.entity_datatype] + "-" +\
                 reducer + intermediate + threshold + delta + geo

        hashed = hash(h_name)

        append = str(hashed).replace(
            "-",
            "N") if str(hashed).__contains__("-") else "P{}".format(hashed)

        specs[St.linkset_name] = "{}_{}_{}{}{}_{}_{}_{}".format(
            target[St.graph_name], source[St.graph_name], specs[St.mechanism],
            unit_value, unit, target[St.entity_name], trg_aligns, append)

        singleton_metadata_file = "{}(SingletonMetadata)-{}.trig".format(
            specs[St.linkset_name], date)
        singleton_metadata_output = "{}/{}".format(dir_name,
                                                   singleton_metadata_file)
        future_path = os.path.join(DIRECTORY, singleton_metadata_output)
        future_path = future_path.replace("\\", "/").replace("//", "/")

        if len(future_path) > 255:
            full_hashed = Ut.hash_it(specs[St.linkset_name])
            specs[St.linkset_name] = "{}_{}_{}".format(target[St.graph_name],
                                                       specs[St.mechanism],
                                                       full_hashed)

        # if len(specs[St.linkset_name]) > 255:
        #     specs[St.linkset_name] = Ut.hash_it(specs[St.linkset_name])

        specs[St.linkset] = "{}{}".format(Ns.linkset, specs[St.linkset_name])
        print "\t- specs[St.linkset]", specs[St.linkset]
        return specs[St.linkset]
Example #19
0
def stats(graph, display_table=False, display_text=False):
    optional = dict()
    stat = {}
    text = buffer()

    # 1. FIND ALL TYPES IN THE GRAPH
    qry_types = """
    ### RETRIEVE ALL TYPES FROM THE GRAPH
    SELECT DISTINCT ?Types (count(distinct ?resource) as ?EntityCount)
    {{
        GRAPH <{}>
        {{
            ?resource a ?Types .
        }}
    }} GROUP by ?Types ?EntityType ORDER BY ?Graph
    """.format(graph)
    types_matrix = sparql_xml_to_matrix(qry_types)
    # print types_matrix

    if display_table:
        display_matrix(types_matrix, spacing=70, limit=100, is_activated=True)

    # 2. OR EACH TYPES GET ALL PROPERTIES
    if types_matrix["result"] is not None:

        types = types_matrix["result"]

        for i in range(1, len(types)):
            curr_type = types[i][0]
            type_name = Ut.get_uri_local_name(curr_type )
            instances = int(types[i][1])
            optional[type_name] = dict()
            qry_properties = """
            ### RETRIEVE ALL PROPERTIES FOR THE TYPE [{0}]
            SELECT DISTINCT ?Properties_for_{0}
            {{
                GRAPH <{1}>
                {{
                    ?resource   a                       <{2}> ;
                                ?Properties_for_{0}     ?object .
                }}
            }}
            """.format(type_name, graph, curr_type)
            properties_matrix = sparql_xml_to_matrix(qry_properties)

            if properties_matrix["result"] is not None:

                columns = 4
                rows = len(properties_matrix["result"])

                if display_table:
                    print "\nPROPERTY COUNT:", len(properties_matrix["result"]) - 1
                    display_matrix(properties_matrix, spacing=70, limit=100, is_activated=False)

                # PROPERTY OCCURENCE COUNT
                matrix = [[str(x * y * 0).replace("0", "") for x in range(columns)] for y in range(rows)]


                properties = properties_matrix["result"]
                matrix[0][0] = properties[0][0]
                matrix[0][1] = "Optional"
                matrix[0][2] = "Instances"
                matrix[0][3] = "Percentage"
                # print type_name
                cur_dic = optional[type_name]
                for i in range(1, len(properties)):
                    qry_occurence = """
                    ### RETRIEVE THE NUMBER OF OCCURRENCES FOR THIS PROPERTY
                    ### TYPE        : {2}
                    ### PROPERTY    : {3}
                    ### GRAPH       : {1}
                    SELECT (count(?object) as ?Occurrences)
                    {{
                        GRAPH <{1}>
                        {{
                            ?resource   a   <{2}> ;
                                        <{3}>   ?object .
                        }}
                    }}
                    """.format(type_name, graph, curr_type, properties[i][0])
                    # print qry_occurence
                    Occurrences_matrix = sparql_xml_to_matrix(qry_occurence)
                    if Occurrences_matrix["result"] is not None:
                        # print Occurrences_matrix["result"][1][0]
                        # print i
                        matrix[i][0] = properties[i][0]
                        matrix[i][2] = Occurrences_matrix["result"][1][0]
                        matrix[i][3] = int(Occurrences_matrix["result"][1][0])/float(instances)
                        if int(Occurrences_matrix["result"][1][0])%float(instances) == 0:
                            matrix[i][1] = False
                            cur_dic[properties[i][0]] = False
                        else:
                            matrix[i][1] = True
                            cur_dic[properties[i][0]] = True

                        # matrix = properties_matrix["result"]  + matrix
                # print matrix
                to_display = {"message": "OK", "result": matrix}

                if display_table:
                    display_matrix(to_display, spacing=50, limit=100, is_activated=True)
                stat[type_name] = matrix

    text.write("\nGRAPH: {}".format(graph))
    for key, value in optional.items():
        line = "-------------------------------------------------------------------------------------------------"
        text.write("\n\n\tENTITY TYPE: {}".format(key))
        text.write("\n\t\t{:100}{}".format(line, "------------"))
        text.write("\n\t\t{:<3}{:97}{}".format(len(optional[key]), "Properties", "Optional"))
        text.write("\n\t\t{:100}{}".format(line, "------------"))

        for pro, opt in value.items():
            if opt:
                text.write("\n\t\t{:100}{}".format("{} ***".format(pro), opt))
            else:
                text.write("\n\t\t{:100}{}".format(pro, opt))

    if display_text:
        print text.getvalue()

    return optional