def check_edges(in_nodes, out_nodes, edge_type):
     for in_node in in_nodes:
         for out_node in out_nodes:
             edge = RelationGraphEdge(in_node, out_node, edge_type)
             self.assertTrue(
                 edge in rel_graph_edges,
                 "Could not find edge %s in set of edges:\n%s" %
                 (edge, rel_graph_edges))
 def test_max_series(self):
     input_df = pd.DataFrame([[1, 2], [2, 3], [2, 0]])
     input_00 = RelationGraphNode("I0", (0, 0),
                                  get_node_type(input_df.iat[0, 0]))
     input_01 = RelationGraphNode("I0", (0, 1),
                                  get_node_type(input_df.iat[0, 1]))
     input_10 = RelationGraphNode("I0", (1, 0),
                                  get_node_type(input_df.iat[1, 0]))
     input_11 = RelationGraphNode("I0", (1, 1),
                                  get_node_type(input_df.iat[1, 1]))
     input_20 = RelationGraphNode("I0", (2, 0),
                                  get_node_type(input_df.iat[2, 0]))
     input_21 = RelationGraphNode("I0", (2, 1),
                                  get_node_type(input_df.iat[2, 1]))
     output = pd.DataFrame.max(input_df)
     output_00 = RelationGraphNode("O0", (0, 0),
                                   get_node_type(output.iat[0]))
     output_10 = RelationGraphNode("O0", (1, 0),
                                   get_node_type(output.iat[1]))
     options = RelationGraphOptions()
     options.NODE_TYPES = True
     rel_graph: RelationGraph = RelationGraph.build_relation_graph(
         [input_df], output, options)
     rel_graph_edges = rel_graph.edges
     #  positional edges
     positional_edges = [
         RelationGraphEdge(input_00, input_01,
                           RelationGraphEdgeType.ADJACENCY),
         RelationGraphEdge(input_00, input_10,
                           RelationGraphEdgeType.ADJACENCY),
         RelationGraphEdge(input_10, input_11,
                           RelationGraphEdgeType.ADJACENCY),
         RelationGraphEdge(input_10, input_20,
                           RelationGraphEdgeType.ADJACENCY),
         RelationGraphEdge(input_20, input_21,
                           RelationGraphEdgeType.ADJACENCY),
         RelationGraphEdge(input_01, input_11,
                           RelationGraphEdgeType.ADJACENCY),
         RelationGraphEdge(input_11, input_21,
                           RelationGraphEdgeType.ADJACENCY),
         RelationGraphEdge(output_00, output_10,
                           RelationGraphEdgeType.ADJACENCY)
     ]
     for edge in positional_edges:
         self.assertTrue(
             edge in rel_graph_edges,
             "Could not find edge %s in set of edges:\n%s" %
             (edge, rel_graph_edges))
     #  equality edges
     equality_edges = [
         RelationGraphEdge(input_10, output_00,
                           RelationGraphEdgeType.EQUALITY),
         RelationGraphEdge(input_20, output_00,
                           RelationGraphEdgeType.EQUALITY),
         RelationGraphEdge(input_01, output_00,
                           RelationGraphEdgeType.EQUALITY),  # redundant
         RelationGraphEdge(input_11, output_10,
                           RelationGraphEdgeType.EQUALITY)
     ]
     for edge in equality_edges:
         self.assertTrue(
             edge in rel_graph_edges,
             "Could not find edge %s in set of edges:\n%s" %
             (edge, rel_graph_edges))
    def test_idx_multi(self):
        tuples = [("bar", "one"), ("bar", "two")]
        index = pd.MultiIndex.from_tuples(tuples)
        data = [[0], [1]]
        input_df = pd.DataFrame(data, index=index)
        #          0
        # bar one  0
        #     two  1
        output_df = input_df.unstack()
        #       0
        #     one two
        # bar   0   1
        options = RelationGraphOptions()
        options.COLUMN_NODES = True
        options.INDEX_NODES = True
        options.ADJACENCY_EDGES = True
        options.EQUALITY_EDGES = True
        options.NODE_TYPES = True
        options.INDEX_EDGES = True
        rel_graph: RelationGraph = RelationGraph.build_relation_graph(
            [input_df], output_df, options)
        rel_graph_edges = rel_graph.edges

        bar_in_0 = RelationGraphNode("I0", (0, -2),
                                     RelationGraphNodeType.INDEX)
        bar_in_1 = RelationGraphNode("I0", (1, -2),
                                     RelationGraphNodeType.INDEX)
        bar_out = RelationGraphNode("O0", (0, -1), RelationGraphNodeType.INDEX)

        one_in = RelationGraphNode("I0", (0, -1), RelationGraphNodeType.INDEX)
        two_in = RelationGraphNode("I0", (1, -1), RelationGraphNodeType.INDEX)

        one_out = RelationGraphNode("O0", (-1, 0),
                                    RelationGraphNodeType.COLUMN)
        two_out = RelationGraphNode("O0", (-1, 1),
                                    RelationGraphNodeType.COLUMN)

        in_0 = RelationGraphNode("I0", (0, 0), RelationGraphNodeType.INT)
        in_1 = RelationGraphNode("I0", (1, 0), RelationGraphNodeType.INT)

        out_0 = RelationGraphNode("O0", (0, 0), RelationGraphNodeType.INT)
        out_1 = RelationGraphNode("O0", (0, 1), RelationGraphNodeType.INT)

        adjacency_edges = [
            RelationGraphEdge(bar_in_0, bar_in_1,
                              RelationGraphEdgeType.ADJACENCY),
            RelationGraphEdge(bar_in_0, one_in,
                              RelationGraphEdgeType.ADJACENCY),
            RelationGraphEdge(bar_in_1, two_in,
                              RelationGraphEdgeType.ADJACENCY),
            RelationGraphEdge(one_in, two_in, RelationGraphEdgeType.ADJACENCY)
        ]

        for edge in adjacency_edges:
            self.assertTrue(
                edge in rel_graph_edges,
                "Could not find edge %s in set of edges:\n%s" %
                (edge, rel_graph_edges))
        indexing_edges = [
            RelationGraphEdge(bar_in_0, in_0, RelationGraphEdgeType.INDEX),
            RelationGraphEdge(one_in, in_0, RelationGraphEdgeType.INDEX),
            RelationGraphEdge(bar_in_1, in_1, RelationGraphEdgeType.INDEX),
            RelationGraphEdge(two_in, in_1, RelationGraphEdgeType.INDEX),
            RelationGraphEdge(bar_out, out_0, RelationGraphEdgeType.INDEX),
            RelationGraphEdge(bar_out, out_1, RelationGraphEdgeType.INDEX)
        ]

        for edge in indexing_edges:
            self.assertTrue(
                edge in rel_graph_edges,
                "Could not find edge %s in set of edges:\n%s" %
                (edge, rel_graph_edges))

        equality_edges = [
            RelationGraphEdge(bar_in_0, bar_out,
                              RelationGraphEdgeType.EQUALITY),
            RelationGraphEdge(bar_in_1, bar_out,
                              RelationGraphEdgeType.EQUALITY),
            RelationGraphEdge(one_in, one_out, RelationGraphEdgeType.EQUALITY),
            RelationGraphEdge(two_in, two_out, RelationGraphEdgeType.EQUALITY)
        ]

        for edge in equality_edges:
            self.assertTrue(
                edge in rel_graph_edges,
                "Could not find edge %s in set of edges:\n%s" %
                (edge, rel_graph_edges))
    def test_column_multi(self):
        column_labels = [['bar', 'bar', 'baz', 'baz'],
                         ['one', 'two', 'one', 'two']]
        tuples = list(zip(*column_labels))
        col_index = pd.MultiIndex.from_tuples(tuples)
        data = [[0, 1, 2, 3], [4, 5, 6, 7]]
        input_df = pd.DataFrame(data, columns=col_index)
        #   bar     baz
        #   one two one two
        # 0   0   1   2   3
        # 1   4   5   6   7
        output_df = input_df.stack().reset_index()
        #    level_0 level_1  bar  baz
        # 0        0     one    0    2
        # 1        0     two    1    3
        # 2        1     one    4    6
        # 3        1     two    5    7

        options = RelationGraphOptions()
        options.COLUMN_NODES = True
        options.ADJACENCY_EDGES = True
        options.EQUALITY_EDGES = True
        options.NODE_TYPES = True
        options.INDEX_EDGES = True
        rel_graph: RelationGraph = RelationGraph.build_relation_graph(
            [input_df], output_df, options)
        rel_graph_edges = rel_graph.edges

        col_nodes = [
            [
                RelationGraphNode("I0", (-2, 0), RelationGraphNodeType.COLUMN),
                RelationGraphNode("I0", (-2, 1), RelationGraphNodeType.COLUMN),
                RelationGraphNode("I0", (-2, 2), RelationGraphNodeType.COLUMN),
                RelationGraphNode("I0", (-2, 3), RelationGraphNodeType.COLUMN)
            ],
            [
                RelationGraphNode("I0", (-1, 0), RelationGraphNodeType.COLUMN),
                RelationGraphNode("I0", (-1, 1), RelationGraphNodeType.COLUMN),
                RelationGraphNode("I0", (-1, 2), RelationGraphNodeType.COLUMN),
                RelationGraphNode("I0", (-1, 3), RelationGraphNodeType.COLUMN)
            ],
        ]

        adjacency_edges = [
            RelationGraphEdge(col_nodes[0][0], col_nodes[1][0],
                              RelationGraphEdgeType.ADJACENCY),
            RelationGraphEdge(col_nodes[0][0], col_nodes[0][1],
                              RelationGraphEdgeType.ADJACENCY),
            RelationGraphEdge(col_nodes[1][0], col_nodes[1][1],
                              RelationGraphEdgeType.ADJACENCY),
            RelationGraphEdge(col_nodes[1][1], col_nodes[1][2],
                              RelationGraphEdgeType.ADJACENCY),
            RelationGraphEdge(col_nodes[0][1], col_nodes[1][1],
                              RelationGraphEdgeType.ADJACENCY),
            RelationGraphEdge(col_nodes[0][1], col_nodes[0][2],
                              RelationGraphEdgeType.ADJACENCY),
            RelationGraphEdge(col_nodes[0][2], col_nodes[1][2],
                              RelationGraphEdgeType.ADJACENCY),
            RelationGraphEdge(col_nodes[0][2], col_nodes[0][3],
                              RelationGraphEdgeType.ADJACENCY),
            RelationGraphEdge(col_nodes[1][2], col_nodes[1][3],
                              RelationGraphEdgeType.ADJACENCY),
            RelationGraphEdge(col_nodes[0][3], col_nodes[1][3],
                              RelationGraphEdgeType.ADJACENCY)
        ]

        for edge in adjacency_edges:
            self.assertTrue(
                edge in rel_graph_edges,
                "Could not find edge %s in set of edges:\n%s" %
                (edge, rel_graph_edges))

        # indexing edges
        input_coli_elems = [[
            RelationGraphNode("I0", (0, 0), RelationGraphNodeType.INT),
            RelationGraphNode("I0", (1, 0), RelationGraphNodeType.INT)
        ],
                            [
                                RelationGraphNode("I0", (0, 1),
                                                  RelationGraphNodeType.INT),
                                RelationGraphNode("I0", (1, 1),
                                                  RelationGraphNodeType.INT)
                            ],
                            [
                                RelationGraphNode("I0", (0, 2),
                                                  RelationGraphNodeType.INT),
                                RelationGraphNode("I0", (1, 2),
                                                  RelationGraphNodeType.INT)
                            ],
                            [
                                RelationGraphNode("I0", (0, 3),
                                                  RelationGraphNodeType.INT),
                                RelationGraphNode("I0", (1, 3),
                                                  RelationGraphNodeType.INT)
                            ]]

        def check_edges(in_nodes, out_nodes, edge_type):
            for in_node in in_nodes:
                for out_node in out_nodes:
                    edge = RelationGraphEdge(in_node, out_node, edge_type)
                    self.assertTrue(
                        edge in rel_graph_edges,
                        "Could not find edge %s in set of edges:\n%s" %
                        (edge, rel_graph_edges))

        for i in range(4):
            in_nodes = [col_nodes[0][i], col_nodes[1][i]]
            out_nodes = input_coli_elems[i]
            check_edges(in_nodes, out_nodes, RelationGraphEdgeType.INDEX)

        # equality_edges
        bars = [col_nodes[0][0], col_nodes[0][1]]
        bazs = [col_nodes[0][2], col_nodes[0][3]]
        ones = [col_nodes[1][0], col_nodes[1][2]]
        twos = [col_nodes[1][1], col_nodes[1][3]]

        out_01 = RelationGraphNode("O0", (0, 1), RelationGraphNodeType.STR)
        out_11 = RelationGraphNode("O0", (1, 1), RelationGraphNodeType.STR)
        out_21 = RelationGraphNode("O0", (2, 1), RelationGraphNodeType.STR)
        out_31 = RelationGraphNode("O0", (3, 1), RelationGraphNodeType.STR)

        out_col_2 = RelationGraphNode("O0", (-1, 2),
                                      RelationGraphNodeType.COLUMN)
        out_col_3 = RelationGraphNode("O0", (-1, 3),
                                      RelationGraphNodeType.COLUMN)

        check_edges(bars, [out_col_2], RelationGraphEdgeType.EQUALITY)
        check_edges(bazs, [out_col_3], RelationGraphEdgeType.EQUALITY)

        check_edges(ones, [out_01, out_21], RelationGraphEdgeType.EQUALITY)
        check_edges(twos, [out_11, out_31], RelationGraphEdgeType.EQUALITY)
 def test_dict(self):
     input_df = pd.DataFrame([[1, 2], [3, 4]])
     input_00 = RelationGraphNode("I0", (0, 0),
                                  get_node_type(input_df.iat[0, 0]))
     input_01 = RelationGraphNode("I0", (0, 1),
                                  get_node_type(input_df.iat[0, 1]))
     input_10 = RelationGraphNode("I0", (1, 0),
                                  get_node_type(input_df.iat[1, 0]))
     input_11 = RelationGraphNode("I0", (1, 1),
                                  get_node_type(input_df.iat[1, 1]))
     output = {"A": [1, 3], "B": [2, 4]}
     output_00 = RelationGraphNode("O0", (0, 0),
                                   get_node_type(output['A'][0]))
     output_01 = RelationGraphNode("O0", (0, 1),
                                   get_node_type(output['B'][0]))
     output_10 = RelationGraphNode("O0", (1, 0),
                                   get_node_type(output['A'][1]))
     output_11 = RelationGraphNode("O0", (1, 1),
                                   get_node_type(output['B'][1]))
     options = RelationGraphOptions()
     options.NODE_TYPES = True
     rel_graph: RelationGraph = RelationGraph.build_relation_graph(
         [input_df], output, options)
     rel_graph_edges = rel_graph.edges
     positional_edges = [
         RelationGraphEdge(input_00, input_01,
                           RelationGraphEdgeType.ADJACENCY),
         RelationGraphEdge(input_00, input_10,
                           RelationGraphEdgeType.ADJACENCY),
         RelationGraphEdge(input_10, input_11,
                           RelationGraphEdgeType.ADJACENCY),
         RelationGraphEdge(input_01, input_11,
                           RelationGraphEdgeType.ADJACENCY),
         RelationGraphEdge(output_00, output_01,
                           RelationGraphEdgeType.ADJACENCY),
         RelationGraphEdge(output_00, output_10,
                           RelationGraphEdgeType.ADJACENCY),
         RelationGraphEdge(output_10, output_11,
                           RelationGraphEdgeType.ADJACENCY),
         RelationGraphEdge(output_01, output_11,
                           RelationGraphEdgeType.ADJACENCY)
     ]
     for edge in positional_edges:
         self.assertTrue(
             edge in rel_graph_edges,
             "Could not find edge %s in set of edges:\n%s" %
             (edge, rel_graph_edges))
     equality_edges = [
         RelationGraphEdge(input_00, output_00,
                           RelationGraphEdgeType.EQUALITY),
         RelationGraphEdge(input_10, output_10,
                           RelationGraphEdgeType.EQUALITY),
         RelationGraphEdge(input_01, output_01,
                           RelationGraphEdgeType.EQUALITY),
         RelationGraphEdge(input_11, output_11,
                           RelationGraphEdgeType.EQUALITY)
     ]
     for edge in equality_edges:
         self.assertTrue(
             edge in rel_graph_edges,
             "Could not find edge %s in set of edges:\n%s" %
             (edge, rel_graph_edges))