Esempio n. 1
0
def graph_join_tree(first_collection_constructor,
                    collection_constructor_morphism,
                    second_collection_constructor, left):
    name, first_collection, first_model, collection_relationship, model_relationship, second_collection, second_model, first_file_path, result_file_name = parse_info_for_join(
        first_collection_constructor, collection_constructor_morphism,
        second_collection_constructor)
    G = nx.DiGraph()
    objects = first_collection.get_iterable_collection_of_objects()
    print(objects)
    ## Now elem is either (vertex_id, dict) or (source_id, target_id, dict)
    ## The result is assumed to be a simple dictionary

    ## We do not necessarily include all the elements in the second collection or in the first collection. This is default.
    for elem in objects:
        ## The result set is a collection of subtrees
        result_list = collection_relationship.get_relationship(elem)
        #print(result_list)
        if len(result_list) > 0:
            if len(elem) == 2:
                merged_dict = dict()
                for elem2 in result_list:
                    print(elem, elem2)
                    print(type(elem2))
                    merged_dict = merge_two_dicts(
                        merged_dict, merge_two_dicts(elem[1], elem2))
                G.add_nodes_from([(elem[0], merged_dict)])
            elif len(elem) == 3:
                merged_dict = dict()
                for elem2 in result_list:
                    merged_dict = merge_two_dicts(
                        merged_dict, merge_two_dicts(elem[2], elem2))
                G.add_edges_from([(elem[0], elem[1], merged_dict)])
        ## We add all the elements from the first collection even if they are not in relation with any element
        ## in the second collection.
        elif len(result_list) == 0 and left == True:
            if len(elem) == 2:
                G.add_nodes_from([elem])
            elif len(elem) == 3:
                G.add_edges_from([elem])

    result_file_path = parse_file_path(first_file_path, result_file_name)
    nx.write_gpickle(G, result_file_path, protocol=pickle.HIGHEST_PROTOCOL)

    result_collection = GraphCollection(name)
    result_collection.set_target_file_path(result_file_path)

    result_model = ModelCategoryJoin(first_model, model_relationship,
                                     second_model, left)
    result = CollectionConstructor(name, result_model.get_result(),
                                   result_collection)
    return result, result_model
Esempio n. 2
0
def tree_join_table(first_collection_constructor,
                    collection_constructor_morphism,
                    second_collection_constructor, left, attributes):
    name, first_collection, first_model, collection_relationship, model_relationship, second_collection, second_model, first_file_path, result_file_name = parse_info_for_join(
        first_collection_constructor, collection_constructor_morphism,
        second_collection_constructor)
    result_path = parse_file_path(first_file_path, result_file_name)
    result = shelve.open(result_path)

    ## First we create a copy of the original tree:
    all_objects = first_collection.get_iterable_collection_of_objects()
    for key in all_objects.keys():
        result[key] = all_objects[key]

    ## Then we store the result
    result_collection = TreeCollection(result_file_name,
                                       target_file_path=result_path)
    ## After this we modify the copy so that the orginal data are not affected
    objects = []
    for attribute in attributes:
        ## Because it is unefficient to loop over all the nodes in the tree, the user must specify the attributes that
        ## we loop. Each attribute has a specified path related to them that allows us faster access to the object.
        result_objects = result_collection.find_elements_with_attribute_and_path(
            attribute, "")
        if len(result_objects) == 0:
            raise Exception("No nodes for the given attribute.", attributes)
        else:
            objects = objects + result_objects
    for pair in objects:
        ## Each element consists of the node that has the attribute that the user gave and also a path to that element in the tree.
        ## The path gives a unique and relatively fast way to access the element again and substitute the new value into the tree.
        ## Unlike graphs and tables, trees do not have unique id system in this demo.
        elem, path = pair[0], pair[1]
        result_list = collection_relationship.get_relationship(elem)
        if len(result_list) > 0:
            new_elem = dict()
            for elem2 in result_list:
                print(elem2)
                new_elem = merge_two_dicts(
                    new_elem, merge_two_dicts(elem, elem2[len(elem2) - 1]))
                print(path, new_elem)
            update(result, path, new_elem)
        elif len(result_list) == 0 and left == False:
            remove(path, result)

    result_model = ModelCategoryJoin(first_model, model_relationship,
                                     second_model, left)
    return CollectionConstructor(name, result_model.get_result(),
                                 result_collection), result_model
Esempio n. 3
0
def graph_join_graph(first_collection_constructor, collection_constructor_morphism, second_collection_constructor, left=False, right=False):
    name, first_collection, first_model, collection_relationship, model_relationship, second_collection, second_model, first_file_path, result_file_name = parse_info_for_join(
        first_collection_constructor, collection_constructor_morphism, second_collection_constructor)

    G = nx.DiGraph()
    objects = first_collection.get_iterable_collection_of_objects()
    ## Now elem is either (vertex_id, dict) or (source_id, target_id, dict)
    ## The result is assumed to be a simple dictionary

    ## We inlcude all the elements from both graphs. In this case the relation does not matter, naturally same elements are identified.
    if right == True and left == True:
        G = graph_union(first_collection.get_graph(),
                        second_collection.get_graph())
    ## We do not necessarily include all the elements in the second collection or in the first collection. This is default.
    else:
        for elem in objects:
            result_list = collection_relationship.get_relationship(elem)
            if len(result_list) > 0:
                if len(elem) == 2:
                    merged_dict = dict()
                    for elem2 in result_list:
                        print(elem, elem2)
                        if len(elem2) == 2:
                            merged_dict = merge_two_dicts(
                                merged_dict, merge_two_dicts(elem[1], elem2[1]))
                        elif len(elem2) == 3:
                            merged_dict = merge_two_dicts(
                                merged_dict, merge_two_dicts(elem[1], elem2[2]))
                    G.add_nodes_from([(elem[0], merged_dict)])
                elif len(elem) == 3:
                    merged_dict = dict()
                    for elem2 in result_list:
                        if len(elem2) == 2:
                            merged_dict = merge_two_dicts(
                                merged_dict, merge_two_dicts(elem[2], elem2[1]))
                        elif len(elem2) == 3:
                            merged_dict = merge_two_dicts(
                                merged_dict, merge_two_dicts(elem[2], elem2[2]))
                    G.add_edges_from([(elem[0], elem[1], merged_dict)])
            ## We add all the elements from the first collection even if they are not in relation with any element
            ## in the second collection.
            elif len(result_list) == 0 and left == True:
                if len(elem) == 2:
                    G.add_nodes_from([elem])
                elif len(elem) == 3:
                    G.add_edges_from([elem])
        ## We include those elements that are in the second collection but are not in the image of the relation
        if right == True and left == False:
            G = graph_union(G, second_collection.get_graph())

    result_file_path = parse_file_path(first_file_path, result_file_name)
    nx.write_gpickle(G, result_file_path, protocol=pickle.HIGHEST_PROTOCOL)
    result_collection = GraphCollection(name)
    result_collection.set_target_file_path(result_file_path)

    result_model = ModelCategoryJoin(first_model, model_relationship, second_model, left, right)
    result = CollectionConstructor(name, result_model.get_result(), result_collection)
    return result, result_model
Esempio n. 4
0
def graph_join_table(first_collection_constructor,
                     collection_constructor_morphism,
                     second_collection_constructor,
                     left=False):
    name, first_collection, first_model, collection_relationship, model_relationship, second_collection, second_model, first_file_path, result_file_name = parse_info_for_join(
        first_collection_constructor, collection_constructor_morphism,
        second_collection_constructor)
    G = nx.DiGraph()
    objects = first_collection.get_iterable_collection_of_objects()
    ## Now elem is either (vertex_id, dict) or (source_id, target_id, dict)
    ## The result is assumed to be a simple dictionary
    for elem in objects:
        result_list = collection_relationship.get_relationship(elem)
        if len(result_list) > 0:
            if len(elem) == 2:
                merged_dict = dict()
                for elem2 in result_list:
                    merged_dict = merge_two_dicts(
                        merged_dict, merge_two_dicts(elem[1], elem2))
                G.add_nodes_from([(elem[0], merged_dict)])
            elif len(elem) == 3:
                merged_dict = dict()
                for elem2 in result_list:
                    merged_dict = merge_two_dicts(
                        merged_dict, merge_two_dicts(elem[2], elem2))
                G.add_edges_from([(elem[0], elem[1], merged_dict)])
        elif len(result_list) == 0 and left == True:
            if len(elem) == 2:
                G.add_nodes_from([elem])
            elif len(elem) == 3:
                G.add_edges_from([elem])

    result_file_path = parse_file_path(first_file_path, result_file_name)
    nx.write_gpickle(G, result_file_path, protocol=pickle.HIGHEST_PROTOCOL)
    result_collection = GraphCollection(name)
    result_collection.set_target_file_path(result_file_path)

    result_model = ModelCategoryJoin(first_model, model_relationship,
                                     second_model, left)
    result = CollectionConstructor(name, result_model.get_result(),
                                   result_collection)
    return result, result_model
Esempio n. 5
0
def table_join_graph(first_collection_constructor,
                     collection_constructor_morphism,
                     second_collection_constructor,
                     second_description,
                     left=False):
    name, first_collection, first_model, collection_relationship, model_relationship, second_collection, second_model, first_file_path, result_file_name = parse_info_for_join(
        first_collection_constructor, collection_constructor_morphism,
        second_collection_constructor)

    first_collection_description = first_collection.get_attributes_datatypes_dict(
    )

    if len(
            set(first_collection_description.keys()).intersection(
                set(second_description.keys()))) > 0:
        print(
            "Warning: The descriptions are not disjoint. This might cause problems in the evaluation."
        )

    result_description = merge_two_dicts(first_collection_description,
                                         second_description)
    length_of_first_collection_description = len(first_collection_description)
    second_file_path = second_collection.get_target_file_path()
    result_collection, result_table_row, result_h5file = create_h5file(
        result_description, first_file_path, second_file_path,
        collection_constructor_morphism)

    objects = first_collection.get_iterable_collection_of_objects()
    for elem in objects:
        result_list = collection_relationship.get_relationship(elem)
        ## We implicitly assume that the elements in the result are in right format i.e. they follow the given second
        ## description in the parameters.
        if len(result_list) > 0:
            for elem2 in result_list:
                j = 0
                for key in result_description:
                    if j >= length_of_first_collection_description:
                        result_table_row[key] = elem2[len(elem2) - 1][key]
                    else:
                        result_table_row[key] = elem[key]
                    j += 1
                result_table_row.append()
        elif len(result_list) == 0 and left == True:
            for key in result_description:
                ## If we do not set values for all the columns in a row, the predefined default value is used which serves as NULL.
                if j < length_of_first_collection_description:
                    result_table_row[key] = elem[key]

    result_h5file.close()
    result_model = ModelCategoryJoin(first_model, model_relationship,
                                     second_model, left)
    result = CollectionConstructor(name, result_model.get_result(),
                                   result_collection)
    return result, result_model
Esempio n. 6
0
def table_join_table(first_collection_constructor,
                     collection_constructor_morphism,
                     second_collection_constructor,
                     left=False):
    name, first_collection, first_model, collection_relationship, model_relationship, second_collection, second_model, first_file_path, result_file_name = parse_info_for_join(
        first_collection_constructor, collection_constructor_morphism,
        second_collection_constructor)

    first_collection_description = first_collection.get_attributes_datatypes_dict(
    )
    second_collection_description = second_collection.get_attributes_datatypes_dict(
    )
    length_of_first_collection_description = len(first_collection_description)
    result_description = merge_two_dicts(first_collection_description,
                                         second_collection_description)
    second_file_path = second_collection.get_target_file_path()
    result_collection, result_table_row, result_h5file = create_h5file(
        result_description, first_file_path, second_file_path,
        collection_constructor_morphism)
    objects = first_collection.get_iterable_collection_of_objects()

    for elem in objects:
        result_list = collection_relationship.get_relationship(elem)
        if len(result_list) > 0:
            for elem2 in result_list:
                j = 0
                for key in result_description:
                    if j >= length_of_first_collection_description:
                        result_table_row[key] = elem2[key]
                    else:
                        result_table_row[key] = elem[key]
                    j += 1
                result_table_row.append()
        elif len(result_list) == 0 and left == True:
            for key in result_description:
                ## If we do not set values for all the columns in a row, the predefined default value is used which serves as NULL.
                if j < length_of_first_collection_description:
                    result_table_row[key] = elem[key]

    result_h5file.close()
    result_model = ModelCategoryJoin(first_model, model_relationship,
                                     second_model)
    result = CollectionConstructor(name, result_model.get_result(),
                                   result_collection)
    return result, result_model
Esempio n. 7
0
def table_join_tree(first_collection_constructor,
                    collection_constructor_morphism,
                    second_collection_constructor,
                    second_description,
                    left=False):
    name, first_collection, first_model, collection_relationship, model_relationship, second_collection, second_model, first_file_path, result_file_name = parse_info_for_join(
        first_collection_constructor, collection_constructor_morphism,
        second_collection_constructor)

    first_collection_description = first_collection.get_attributes_datatypes_dict(
    )

    if len(
            set(first_collection_description.keys()).intersection(
                set(second_description.keys()))) > 0:
        print(
            "Warning: The descriptions are not disjoint. This might cause problems in the evaluation."
        )

    result_description = merge_two_dicts(first_collection_description,
                                         second_description)
    length_of_first_collection_description = len(first_collection_description)
    second_file_path = second_collection.get_target_file_path()
    result_collection, result_table_row, result_h5file = create_h5file(
        result_description, first_file_path, second_file_path,
        collection_constructor_morphism)

    objects = first_collection.get_iterable_collection_of_objects()
    for elem in objects:
        result_list = collection_relationship.get_relationship(elem)
        if len(result_list) > 0:
            ## Here we assume that every element in the result has a tree structure
            ## The tree structure is flattened so that each path from the root to a leaf is made a row
            ## From the row we pick the wanted elements defined in the second description parameter
            for elem2 in result_list:
                j = 0
                for key in result_description:
                    if j >= length_of_first_collection_description:
                        picked_values_from_tree = find_values_from_tree(
                            elem2, key)
                        if len(picked_values_from_tree) == 0:
                            print("No value for " + str(key) +
                                  " in the subtree.")
                            print(
                                "The table will have the default value for " +
                                str(key) + " in this row.")
                        elif len(picked_values_from_tree) > 1:
                            print(
                                "Warning! With key " + str(key) +
                                " exist multiple values. The algorithm picks the first."
                            )
                            result_table_row[key] = picked_values_from_tree[0]
                        else:
                            result_table_row[key] = picked_values_from_tree[0]
                    else:
                        result_table_row[key] = elem[key]
                    j += 1
                result_table_row.append()
        elif len(result_list) == 0 and left == True:
            for key in result_description:
                ## If we do not set values for all the columns in a row, the predefined default value is used which serves as NULL.
                if j < length_of_first_collection_description:
                    result_table_row[key] = elem[key]

    result_h5file.close()
    result_model = ModelCategoryJoin(first_model, model_relationship,
                                     second_model)
    result = CollectionConstructor(name, result_model.get_result(),
                                   result_collection)
    return result, result_model