Esempio n. 1
0
 def __init__(self, obj: pd.DataFrame, type_str: str, source: str,
              options: GraphOptions, **kwargs):
     options = copy.copy(options)
     options.INDEX_NODES = False
     options.COLUMN_NODES = False
     options.INDEX_NAME_NODES = False
     options.INDEX_EDGES = False
     options.INDEX_NAME_EDGES = False
     super().__init__(obj, type_str, source, options, **kwargs)
Esempio n. 2
0
    def test_index_name_nodes(self):
        df = pd.DataFrame({
            'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
            'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
            'baz': [1, 2, 3, 4, 5, 6]
        })
        output = df.pivot(index='foo', columns='bar', values='baz')

        options = GraphOptions()
        options.COLUMN_NODES = True
        options.INDEX_NODES = True
        options.INDEX_NAME_NODES = True
        options.ADJACENCY_EDGES = True
        options.EQUALITY_EDGES = True
        options.NODE_TYPES = True
        options.INDEX_EDGES = False

        rel_graph: RelationGraph = RelationGraph(options)
        rel_graph.from_input_output([df], output)
        index_name_nodes = [
            node for node in rel_graph.nodes
            if node.ntype == GraphNodeType.INDEX_NAME
        ]
        column_name_nodes = [
            node for node in rel_graph.nodes
            if node.ntype == GraphNodeType.COL_INDEX_NAME
        ]

        self.assertEqual(len(index_name_nodes), 1)
        self.assertEqual(len(column_name_nodes), 1)
Esempio n. 3
0
    def test_no_spurious_for_idx_arg(self):
        df = pd.DataFrame([[5, 2], [2, 3], [2, 0]], columns=["A", "B"])

        options = GraphOptions()
        options.COLUMN_NODES = True
        options.INDEX_NODES = True
        options.ADJACENCY_EDGES = True
        options.EQUALITY_EDGES = True
        options.NODE_TYPES = True
        options.INDEX_EDGES = True
        options.INFLUENCE_EDGES = False

        rel_graph: RelationGraph = RelationGraph(options)
        rel_graph.from_input_output([df, df.columns], df)

        index_type_nodes = [
            node for node in rel_graph.nodes
            if node.ntype == GraphNodeType.INDEX
        ]
        column_type_nodes = [
            node for node in rel_graph.nodes
            if node.ntype == GraphNodeType.COLUMN
        ]

        self.assertEqual(len(index_type_nodes), 6)
        self.assertEqual(len(column_type_nodes), 4)
Esempio n. 4
0
    def test_index_name_equality_edges(self):
        df = pd.DataFrame({
            'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
            'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
            'baz': [1, 2, 3, 4, 5, 6]
        })
        output = df.pivot(index='foo', columns='bar', values='baz')

        options = GraphOptions()
        options.COLUMN_NODES = True
        options.INDEX_NODES = True
        options.INDEX_NAME_NODES = True
        options.ADJACENCY_EDGES = False
        options.EQUALITY_EDGES = True
        options.NODE_TYPES = True
        options.INDEX_EDGES = False
        rel_graph: RelationGraph = RelationGraph(options)
        rel_graph.from_input_output([df], output)
        inp_col_nodes = [
            node for node in rel_graph.nodes if
            node.ntype == GraphNodeType.COLUMN and node.source.startswith("I")
        ]
        out_idx_name_nodes = [
            node for node in rel_graph.nodes
            if node.ntype == GraphNodeType.INDEX_NAME
            and node.source.startswith("O")
        ]
        out_col_idx_name_nodes = [
            node for node in rel_graph.nodes
            if node.ntype == GraphNodeType.COL_INDEX_NAME
            and node.source.startswith("O")
        ]

        def check_edge_exists(in_node: GraphNode, out_node: GraphNode,
                              graph: RelationGraph):
            for e in graph.edges:
                if (e.node1 == in_node
                        and e.node2 == out_node) or (e.node1 == out_node
                                                     and e.node2 == in_node):
                    return True

            return False

        inp_foo_node = [i for i in inp_col_nodes
                        if i.identifier == '[-1,0]'][0]
        inp_bar_node = [i for i in inp_col_nodes
                        if i.identifier == '[-1,1]'][0]
        out_foo_node = [
            i for i in out_idx_name_nodes if i.identifier == '[-1,-1]'
        ][0]
        out_bar_node = [
            i for i in out_col_idx_name_nodes if i.identifier == '[-1,-1]'
        ][0]

        self.assertTrue(
            check_edge_exists(inp_foo_node, out_foo_node, rel_graph))
        self.assertTrue(
            check_edge_exists(inp_bar_node, out_bar_node, rel_graph))
Esempio n. 5
0
    def test_groupby_has_artifacts(self):
        df = pd.DataFrame([[5, 2], [2, 3], [2, 0]], columns=["A", "B"])
        output = df.groupby(by="A")

        options = GraphOptions()
        options.COLUMN_NODES = True
        options.INDEX_NODES = True
        options.ADJACENCY_EDGES = True
        options.EQUALITY_EDGES = True
        options.NODE_TYPES = True
        options.INDEX_EDGES = True

        rel_graph: RelationGraph = RelationGraph(options)
        rel_graph.from_input_output([df], output)

        index_type_nodes = [
            node for node in rel_graph.nodes
            if node.ntype == GraphNodeType.INDEX
        ]
        column_type_nodes = [
            node for node in rel_graph.nodes
            if node.ntype == GraphNodeType.COLUMN
        ]

        self.assertEqual(len(index_type_nodes), 6)
        self.assertEqual(len(column_type_nodes), 6)
Esempio n. 6
0
        def process(cls, raw_data: Dict):
            if raw_data is None:
                return None

            try:
                graph = RelationGraph(GraphOptions())
                inputs = raw_data['inputs']
                output = raw_data['output']
                graph.from_input_output(inputs, output)

                encoding = graph.get_encoding()
                encoding['label'] = raw_data['function_sequence']
                return encoding

            except SilentException:
                return None

            except Exception as e:
                try:
                    logger.warn("Failed for {}".format(raw_data))
                    logging.exception(e)
                    return None

                except:
                    pass

                return None
Esempio n. 7
0
    def iter_func_seqs(self) -> Generator[List[BaseGenerator], None, None]:
        generators: Dict[str, BaseGenerator] = load_generators()
        if self.model_store is None or 'function-model' not in self.model_store:
            model = ModelStore({'function-model': self.model_path})
        else:
            model = self.model_store

        if self.use_old_featurization:
            from autopandas_v2.ml.featurization_old.featurizer import RelationGraph
            from autopandas_v2.ml.featurization_old.options import GraphOptions
        else:
            from autopandas_v2.ml.featurization.featurizer import RelationGraph
            from autopandas_v2.ml.featurization.options import GraphOptions

        options = GraphOptions()
        graph: RelationGraph = RelationGraph(options)
        graph.from_input_output(self.iospec.inputs, self.iospec.output)
        encoding = graph.get_encoding(get_mapping=False)

        str_seqs, probs = list(
            zip(*model.predict_graphs('function-model', [encoding],
                                      top_k=self.top_k)[0]))
        str_seqs = [i.split(':') for i in str_seqs]
        model.close()

        for str_seq in str_seqs:
            result = [generators[i] for i in str_seq]
            if self.typecheck(result, self.iospec.output):
                logger.info(str_seq)
                yield result
            else:
                logger.warn("Skipping", str_seq)
Esempio n. 8
0
 def test_basic_max(self):
     input_df = pd.DataFrame([[1, 2], [2, 3], [2, 0]])
     input_00 = GraphNode("I0", '[0,0]', get_node_type(input_df.iat[0, 0]))
     input_01 = GraphNode("I0", '[0,1]', get_node_type(input_df.iat[0, 1]))
     input_10 = GraphNode("I0", '[1,0]', get_node_type(input_df.iat[1, 0]))
     input_11 = GraphNode("I0", '[1,1]', get_node_type(input_df.iat[1, 1]))
     input_20 = GraphNode("I0", '[2,0]', get_node_type(input_df.iat[2, 0]))
     input_21 = GraphNode("I0", '[2,1]', get_node_type(input_df.iat[2, 1]))
     output_df = pd.DataFrame([[2, 3]])
     output_00 = GraphNode("O0", '[0,0]', get_node_type(output_df.iat[0,
                                                                      0]))
     output_01 = GraphNode("O0", '[0,1]', get_node_type(output_df.iat[0,
                                                                      1]))
     options = GraphOptions()
     options.NODE_TYPES = True
     rel_graph: RelationGraph = RelationGraph(options)
     rel_graph.from_input_output([input_df], output_df)
     rel_graph_edges = rel_graph.edges
     #  positional edges
     positional_edges = [
         GraphEdge(input_00, input_01, GraphEdgeType.ADJACENCY),
         GraphEdge(input_00, input_10, GraphEdgeType.ADJACENCY),
         GraphEdge(input_10, input_11, GraphEdgeType.ADJACENCY),
         GraphEdge(input_10, input_20, GraphEdgeType.ADJACENCY),
         GraphEdge(input_20, input_21, GraphEdgeType.ADJACENCY),
         GraphEdge(input_01, input_11, GraphEdgeType.ADJACENCY),
         GraphEdge(input_11, input_21, GraphEdgeType.ADJACENCY),
         GraphEdge(output_00, output_01, GraphEdgeType.ADJACENCY)
     ]
     for edge in positional_edges:
         self.assertTrue(
             edge in rel_graph_edges,
             "Could not find edge %s in set of edges:\n%s" %
             (edge, rel_graph_edges))
     #  equality edges
     equality_edges = [
         GraphEdge(input_10, output_00, GraphEdgeType.EQUALITY),
         GraphEdge(input_20, output_00, GraphEdgeType.EQUALITY),
         GraphEdge(input_01, output_00,
                   GraphEdgeType.EQUALITY),  # redundant
         GraphEdge(input_11, output_01, GraphEdgeType.EQUALITY)
     ]
     for edge in equality_edges:
         self.assertTrue(
             edge in rel_graph_edges,
             "Could not find edge %s in set of edges:\n%s" %
             (edge, rel_graph_edges))
Esempio n. 9
0
    def test_groupby_input(self):
        df = pd.DataFrame({
            "Name": ["Alice", "Bob", "Mallory", "Mallory", "Bob", "Mallory"],
            "City": [
                "Seattle", "Seattle", "Portland", "Seattle", "Seattle",
                "Portland"
            ]
        })
        input_ = df.groupby("Name")
        output = input_.count().reset_index()
        options = GraphOptions()
        options.NODE_TYPES = True
        options.ADJACENCY_EDGES = False
        rel_graph: RelationGraph = RelationGraph(options)
        rel_graph.from_input_output([input_], output)
        rel_graph_edges = rel_graph.edges
        alice_nodes_in = [GraphNode("I0_0", '[0,0]', GraphNodeType.STR)]

        alice_nodes_out = [GraphNode("O0", '[0,0]', GraphNodeType.STR)]

        bob_nodes_in = [
            GraphNode("I0_1", '[0,0]', GraphNodeType.STR),
            GraphNode("I0_1", '[1,0]', GraphNodeType.STR)
        ]
        bob_nodes_out = [GraphNode("O0", '[1,0]', GraphNodeType.STR)]

        mallory_nodes_in = [
            GraphNode("I0_2", '[0,0]', GraphNodeType.STR),
            GraphNode("I0_2", '[1,0]', GraphNodeType.STR),
            GraphNode("I0_2", '[2,0]', GraphNodeType.STR)
        ]
        mallory_nodes_out = [GraphNode("O0", '[2,0]', GraphNodeType.STR)]

        def check_edges(in_nodes, out_nodes):
            for in_node in in_nodes:
                for out_node in out_nodes:
                    edge = GraphEdge(in_node, out_node, GraphEdgeType.EQUALITY)
                    self.assertTrue(
                        edge in rel_graph_edges,
                        "Could not find edge %s in set of edges:\n%s" %
                        (edge, rel_graph_edges))

        check_edges(alice_nodes_in, alice_nodes_out)
        check_edges(bob_nodes_in, bob_nodes_out)
        check_edges(mallory_nodes_in, mallory_nodes_out)
Esempio n. 10
0
    def predict(self, dpoints: List[Tuple[List[Any], Any]], with_confidence=True) -> List[List[Tuple[str, float]]]:
        relgraphs = []
        for dpoint in dpoints:
            inputs, output = dpoint
            graph = RelationGraph(GraphOptions())
            graph.from_input_output(inputs, output)
            relgraphs.append(graph.get_encoding(get_mapping=False))

        return self.predict_graphs(relgraphs, with_confidence=with_confidence)
Esempio n. 11
0
 def test_dict(self):
     input_df = pd.DataFrame([[1, 2], [3, 4]])
     input_00 = GraphNode("I0", '[0,0]', get_node_type(input_df.iat[0, 0]))
     input_01 = GraphNode("I0", '[0,1]', get_node_type(input_df.iat[0, 1]))
     input_10 = GraphNode("I0", '[1,0]', get_node_type(input_df.iat[1, 0]))
     input_11 = GraphNode("I0", '[1,1]', get_node_type(input_df.iat[1, 1]))
     output = {"A": [1, 3], "B": [2, 4]}
     output_00 = GraphNode("O0", '[0,0]', get_node_type(output['A'][0]))
     output_01 = GraphNode("O0", '[0,1]', get_node_type(output['B'][0]))
     output_10 = GraphNode("O0", '[1,0]', get_node_type(output['A'][1]))
     output_11 = GraphNode("O0", '[1,1]', get_node_type(output['B'][1]))
     options = GraphOptions()
     options.NODE_TYPES = True
     rel_graph: RelationGraph = RelationGraph(options)
     rel_graph.from_input_output([input_df], output)
     rel_graph_edges = rel_graph.edges
     positional_edges = [
         GraphEdge(input_00, input_01, GraphEdgeType.ADJACENCY),
         GraphEdge(input_00, input_10, GraphEdgeType.ADJACENCY),
         GraphEdge(input_10, input_11, GraphEdgeType.ADJACENCY),
         GraphEdge(input_01, input_11, GraphEdgeType.ADJACENCY),
         GraphEdge(output_00, output_01, GraphEdgeType.ADJACENCY),
         GraphEdge(output_00, output_10, GraphEdgeType.ADJACENCY),
         GraphEdge(output_10, output_11, GraphEdgeType.ADJACENCY),
         GraphEdge(output_01, output_11, GraphEdgeType.ADJACENCY)
     ]
     for edge in positional_edges:
         self.assertTrue(
             edge in rel_graph_edges,
             "Could not find edge %s in set of edges:\n%s" %
             (edge, rel_graph_edges))
     equality_edges = [
         GraphEdge(input_00, output_00, GraphEdgeType.EQUALITY),
         GraphEdge(input_10, output_10, GraphEdgeType.EQUALITY),
         GraphEdge(input_01, output_01, GraphEdgeType.EQUALITY),
         GraphEdge(input_11, output_11, GraphEdgeType.EQUALITY)
     ]
     for edge in equality_edges:
         self.assertTrue(
             edge in rel_graph_edges,
             "Could not find edge %s in set of edges:\n%s" %
             (edge, rel_graph_edges))
        def process(cls, raw_data: Dict):
            if raw_data is None:
                return None

            try:
                inputs = raw_data['inputs']
                output = raw_data['output']
                intermediates = raw_data['intermediates']
                program: Program = raw_data['program']

                function_seq = raw_data['function_sequence']
                unused_inputs = set(range(len(inputs)))
                unused_intermediates = set()
                encodings = []
                for depth, func in enumerate(function_seq, 1):
                    graph = RelationGraph(GraphOptions())
                    depth_inputs = [inputs[i] for i in unused_inputs]
                    depth_intermediates = [
                        intermediates[i] for i in unused_intermediates
                    ]
                    graph_inputs = depth_inputs + depth_intermediates
                    graph.from_input_output(graph_inputs, output)

                    encoding = graph.get_encoding()
                    encoding['label'] = func
                    encodings.append(encoding)

                    unused_inputs -= program.call_seq[depth -
                                                      1].get_used_inputs()
                    unused_intermediates -= program.call_seq[
                        depth - 1].get_used_intermediates()
                    unused_intermediates.add(depth - 1)

                return encodings

            except SilentException:
                return None

            except Exception as e:
                try:
                    logger.warn("Failed for {}".format(raw_data))
                    logging.exception(e)
                    return None

                except:
                    pass

                return None
Esempio n. 13
0
    def test_index_name_nodes_multiindex(self):
        df = pd.DataFrame(
            [(389.0, 'fly'), (24.0, 'fly'), (80.5, 'run'), (np.nan, 'jump')],
            index=pd.MultiIndex.from_tuples([('bird', 'falcon'),
                                             ('bird', 'parrot'),
                                             ('mammal', 'lion'),
                                             ('mammal', 'monkey')],
                                            names=['class', 'name']),
            columns=pd.MultiIndex.from_tuples([('speed', 'max'),
                                               ('species', 'type')]))
        df.columns.names = ['name1', 'name2']

        options = GraphOptions()
        options.COLUMN_NODES = True
        options.INDEX_NODES = True
        options.INDEX_NAME_NODES = True
        options.ADJACENCY_EDGES = True
        options.EQUALITY_EDGES = True
        options.NODE_TYPES = True
        options.INDEX_EDGES = False

        rel_graph: RelationGraph = RelationGraph(options)
        rel_graph.from_input_output([df], df)
        index_name_nodes = [
            node for node in rel_graph.nodes
            if node.ntype == GraphNodeType.INDEX_NAME
        ]
        column_name_nodes = [
            node for node in rel_graph.nodes
            if node.ntype == GraphNodeType.COL_INDEX_NAME
        ]

        self.assertEqual(len(index_name_nodes),
                         4)  # Both in the input and output, so x2
        self.assertEqual(len(column_name_nodes),
                         4)  # Both in the input and output, so x2
Esempio n. 14
0
    def test_idx_multi(self):
        tuples = [("bar", "one"), ("bar", "two")]
        index = pd.MultiIndex.from_tuples(tuples)
        data = [[0], [1]]
        input_df = pd.DataFrame(data, index=index)
        #          0
        # bar one  0
        #     two  1
        output_df = input_df.unstack()
        #       0
        #     one two
        # bar   0   1
        options = GraphOptions()
        options.COLUMN_NODES = True
        options.INDEX_NODES = True
        options.ADJACENCY_EDGES = True
        options.EQUALITY_EDGES = True
        options.NODE_TYPES = True
        options.INDEX_EDGES = True
        rel_graph: RelationGraph = RelationGraph(options)
        rel_graph.from_input_output([input_df], output_df)
        rel_graph_edges = rel_graph.edges

        bar_in_0 = GraphNode("I0", '[0,-2]', GraphNodeType.INDEX)
        bar_in_1 = GraphNode("I0", '[1,-2]', GraphNodeType.INDEX)
        bar_out = GraphNode("O0", '[0,-1]', GraphNodeType.INDEX)

        one_in = GraphNode("I0", '[0,-1]', GraphNodeType.INDEX)
        two_in = GraphNode("I0", '[1,-1]', GraphNodeType.INDEX)

        one_out = GraphNode("O0", '[-1,0]', GraphNodeType.COLUMN)
        two_out = GraphNode("O0", '[-1,1]', GraphNodeType.COLUMN)

        in_0 = GraphNode("I0", '[0,0]', GraphNodeType.INT)
        in_1 = GraphNode("I0", '[1,0]', GraphNodeType.INT)

        out_0 = GraphNode("O0", '[0,0]', GraphNodeType.INT)
        out_1 = GraphNode("O0", '[0,1]', GraphNodeType.INT)

        adjacency_edges = [
            GraphEdge(bar_in_0, bar_in_1, GraphEdgeType.ADJACENCY),
            GraphEdge(bar_in_0, one_in, GraphEdgeType.ADJACENCY),
            GraphEdge(bar_in_1, two_in, GraphEdgeType.ADJACENCY),
            GraphEdge(one_in, two_in, GraphEdgeType.ADJACENCY)
        ]

        for edge in adjacency_edges:
            self.assertTrue(
                edge in rel_graph_edges,
                "Could not find edge %s in set of edges:\n%s" %
                (edge, rel_graph_edges))
        indexing_edges = [
            GraphEdge(bar_in_0, in_0, GraphEdgeType.INDEX),
            GraphEdge(one_in, in_0, GraphEdgeType.INDEX),
            GraphEdge(bar_in_1, in_1, GraphEdgeType.INDEX),
            GraphEdge(two_in, in_1, GraphEdgeType.INDEX),
            GraphEdge(bar_out, out_0, GraphEdgeType.INDEX),
            GraphEdge(bar_out, out_1, GraphEdgeType.INDEX)
        ]

        for edge in indexing_edges:
            self.assertTrue(
                edge in rel_graph_edges,
                "Could not find edge %s in set of edges:\n%s" %
                (edge, rel_graph_edges))

        equality_edges = [
            GraphEdge(bar_in_0, bar_out, GraphEdgeType.EQUALITY),
            GraphEdge(bar_in_1, bar_out, GraphEdgeType.EQUALITY),
            GraphEdge(one_in, one_out, GraphEdgeType.EQUALITY),
            GraphEdge(two_in, two_out, GraphEdgeType.EQUALITY)
        ]

        for edge in equality_edges:
            self.assertTrue(
                edge in rel_graph_edges,
                "Could not find edge %s in set of edges:\n%s" %
                (edge, rel_graph_edges))
Esempio n. 15
0
    def test_substr_edges(self):
        df = pd.DataFrame({
            'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
            'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
            'baz': [1, 2, 3, 4, 5, 6]
        })
        out = pd.DataFrame({
            "mrr": ["wo", "no"],
            'asdasd': ["A_1", "B_4"],
            'nostr': [33, 12]
        })

        options = GraphOptions()
        options.SUBSTR_EDGES = True

        rel_graph: RelationGraph = RelationGraph(options)
        rel_graph.from_input_output([df], out)

        def check_edges(in_nodes, out_nodes):
            for in_node in in_nodes:
                for out_node in out_nodes:
                    edge = GraphEdge(in_node, out_node, GraphEdgeType.SUBSTR)
                    self.assertTrue(
                        edge in rel_graph.edges,
                        "Could not find edge %s in set of edges:\n%s" %
                        (edge, rel_graph.edges))

        # test substrings from out to in
        two_nodes = [
            GraphNode("I0", '[3,0]', GraphNodeType.STR),
            GraphNode("I0", '[4,0]', GraphNodeType.STR),
            GraphNode("I0", '[5,0]', GraphNodeType.STR)
        ]
        wo_node = GraphNode("O0", '[0,0]', GraphNodeType.STR)

        check_edges(two_nodes, [wo_node])

        # test substrings from in to out
        A_in = [
            GraphNode("I0", '[0,1]', GraphNodeType.STR),
            GraphNode("I0", '[3,1]', GraphNodeType.STR)
        ]
        A_out = [GraphNode("O0", '[0,1]', GraphNodeType.STR)]
        B_in = [
            GraphNode("I0", '[1,1]', GraphNodeType.STR),
            GraphNode("I0", '[4,1]', GraphNodeType.STR)
        ]
        B_out = [GraphNode("O0", '[1,1]', GraphNodeType.STR)]
        check_edges(A_in, A_out)
        check_edges(B_in, B_out)

        # test substrings involving non-strings
        one_in = [
            GraphNode("I0", '[0,2]', GraphNodeType.INT),
            GraphNode("I0", '[1,-1]', GraphNodeType.INDEX)
        ]
        one_out = [GraphNode("O0", '[0,1]', GraphNodeType.STR)]
        four_in = [
            GraphNode("I0", '[3,2]', GraphNodeType.INT),
            GraphNode("I0", '[4,-1]', GraphNodeType.INDEX)
        ]
        four_out = [GraphNode("O0", '[1,1]', GraphNodeType.STR)]
        check_edges(one_in, one_out)
        check_edges(four_in, four_out)

        # test nothing else
        self.assertEqual(
            11,
            len([
                e for e in rel_graph.edges if e.etype == GraphEdgeType.SUBSTR
            ]))
Esempio n. 16
0
    def debug(self, dpoint: Tuple[List[Any], Any]):
        inputs, output = dpoint
        graph = RelationGraph(GraphOptions())
        graph.from_input_output(inputs, output)

        self.debug_graph(graph.get_encoding(get_mapping=False))
Esempio n. 17
0
    def test_column_multi(self):
        column_labels = [['bar', 'bar', 'baz', 'baz'],
                         ['one', 'two', 'one', 'two']]
        tuples = list(zip(*column_labels))
        col_index = pd.MultiIndex.from_tuples(tuples)
        data = [[0, 1, 2, 3], [4, 5, 6, 7]]
        input_df = pd.DataFrame(data, columns=col_index)
        #   bar     baz
        #   one two one two
        # 0   0   1   2   3
        # 1   4   5   6   7
        output_df = input_df.stack().reset_index()
        #    level_0 level_1  bar  baz
        # 0        0     one    0    2
        # 1        0     two    1    3
        # 2        1     one    4    6
        # 3        1     two    5    7

        options = GraphOptions()
        options.COLUMN_NODES = True
        options.ADJACENCY_EDGES = True
        options.EQUALITY_EDGES = True
        options.NODE_TYPES = True
        options.INDEX_EDGES = True
        rel_graph: RelationGraph = RelationGraph(options)
        rel_graph.from_input_output([input_df], output_df)
        rel_graph_edges = rel_graph.edges

        col_nodes = [
            [
                GraphNode("I0", '[-2,0]', GraphNodeType.COLUMN),
                GraphNode("I0", '[-2,1]', GraphNodeType.COLUMN),
                GraphNode("I0", '[-2,2]', GraphNodeType.COLUMN),
                GraphNode("I0", '[-2,3]', GraphNodeType.COLUMN)
            ],
            [
                GraphNode("I0", '[-1,0]', GraphNodeType.COLUMN),
                GraphNode("I0", '[-1,1]', GraphNodeType.COLUMN),
                GraphNode("I0", '[-1,2]', GraphNodeType.COLUMN),
                GraphNode("I0", '[-1,3]', GraphNodeType.COLUMN)
            ],
        ]

        adjacency_edges = [
            GraphEdge(col_nodes[0][0], col_nodes[1][0],
                      GraphEdgeType.ADJACENCY),
            GraphEdge(col_nodes[0][0], col_nodes[0][1],
                      GraphEdgeType.ADJACENCY),
            GraphEdge(col_nodes[1][0], col_nodes[1][1],
                      GraphEdgeType.ADJACENCY),
            GraphEdge(col_nodes[1][1], col_nodes[1][2],
                      GraphEdgeType.ADJACENCY),
            GraphEdge(col_nodes[0][1], col_nodes[1][1],
                      GraphEdgeType.ADJACENCY),
            GraphEdge(col_nodes[0][1], col_nodes[0][2],
                      GraphEdgeType.ADJACENCY),
            GraphEdge(col_nodes[0][2], col_nodes[1][2],
                      GraphEdgeType.ADJACENCY),
            GraphEdge(col_nodes[0][2], col_nodes[0][3],
                      GraphEdgeType.ADJACENCY),
            GraphEdge(col_nodes[1][2], col_nodes[1][3],
                      GraphEdgeType.ADJACENCY),
            GraphEdge(col_nodes[0][3], col_nodes[1][3],
                      GraphEdgeType.ADJACENCY)
        ]

        for edge in adjacency_edges:
            self.assertTrue(
                edge in rel_graph_edges,
                "Could not find edge %s in set of edges:\n%s" %
                (edge, rel_graph_edges))

        # indexing edges
        input_coli_elems = [[
            GraphNode("I0", '[0,0]', GraphNodeType.INT),
            GraphNode("I0", '[1,0]', GraphNodeType.INT)
        ],
                            [
                                GraphNode("I0", '[0,1]', GraphNodeType.INT),
                                GraphNode("I0", '[1,1]', GraphNodeType.INT)
                            ],
                            [
                                GraphNode("I0", '[0,2]', GraphNodeType.INT),
                                GraphNode("I0", '[1,2]', GraphNodeType.INT)
                            ],
                            [
                                GraphNode("I0", '[0,3]', GraphNodeType.INT),
                                GraphNode("I0", '[1,3]', GraphNodeType.INT)
                            ]]

        def check_edges(in_nodes, out_nodes, edge_type):
            for in_node in in_nodes:
                for out_node in out_nodes:
                    edge = GraphEdge(in_node, out_node, edge_type)
                    self.assertTrue(
                        edge in rel_graph_edges,
                        "Could not find edge %s in set of edges:\n%s" %
                        (edge, rel_graph_edges))

        for i in range(4):
            in_nodes = [col_nodes[0][i], col_nodes[1][i]]
            out_nodes = input_coli_elems[i]
            check_edges(in_nodes, out_nodes, GraphEdgeType.INDEX)

        # equality_edges
        bars = [col_nodes[0][0], col_nodes[0][1]]
        bazs = [col_nodes[0][2], col_nodes[0][3]]
        ones = [col_nodes[1][0], col_nodes[1][2]]
        twos = [col_nodes[1][1], col_nodes[1][3]]

        out_01 = GraphNode("O0", '[0,1]', GraphNodeType.STR)
        out_11 = GraphNode("O0", '[1,1]', GraphNodeType.STR)
        out_21 = GraphNode("O0", '[2,1]', GraphNodeType.STR)
        out_31 = GraphNode("O0", '[3,1]', GraphNodeType.STR)

        out_col_2 = GraphNode("O0", '[-1,2]', GraphNodeType.COLUMN)
        out_col_3 = GraphNode("O0", '[-1,3]', GraphNodeType.COLUMN)

        check_edges(bars, [out_col_2], GraphEdgeType.EQUALITY)
        check_edges(bazs, [out_col_3], GraphEdgeType.EQUALITY)

        check_edges(ones, [out_01, out_21], GraphEdgeType.EQUALITY)
        check_edges(twos, [out_11, out_31], GraphEdgeType.EQUALITY)