def test_index_name_nodes(self):
        df = pd.DataFrame({
            'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
            'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
            'baz': [1, 2, 3, 4, 5, 6]
        })
        output = df.pivot(index='foo', columns='bar', values='baz')

        options = RelationGraphOptions()
        options.COLUMN_NODES = True
        options.INDEX_NODES = True
        options.INDEX_NAME_NODES = True
        options.ADJACENCY_EDGES = True
        options.EQUALITY_EDGES = True
        options.NODE_TYPES = True
        options.INDEX_EDGES = False

        rel_graph: RelationGraph = RelationGraph.build_relation_graph([df],
                                                                      output,
                                                                      options)
        index_name_nodes = [
            node for node in rel_graph.nodes
            if node._type == RelationGraphNodeType.INDEX_NAME
        ]
        column_name_nodes = [
            node for node in rel_graph.nodes
            if node._type == RelationGraphNodeType.COL_INDEX_NAME
        ]

        self.assertEqual(len(index_name_nodes), 1)
        self.assertEqual(len(column_name_nodes), 1)
    def test_index_name_equality_edges(self):
        df = pd.DataFrame({
            'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
            'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
            'baz': [1, 2, 3, 4, 5, 6]
        })
        output = df.pivot(index='foo', columns='bar', values='baz')

        options = RelationGraphOptions()
        options.COLUMN_NODES = True
        options.INDEX_NODES = True
        options.INDEX_NAME_NODES = True
        options.ADJACENCY_EDGES = False
        options.EQUALITY_EDGES = True
        options.NODE_TYPES = True
        options.INDEX_EDGES = False
        rel_graph: RelationGraph = RelationGraph.build_relation_graph([df],
                                                                      output,
                                                                      options)
        inp_col_nodes = [
            node for node in rel_graph.nodes
            if node._type == RelationGraphNodeType.COLUMN
            and node.dfindex.startswith("I")
        ]
        out_idx_name_nodes = [
            node for node in rel_graph.nodes
            if node._type == RelationGraphNodeType.INDEX_NAME
            and node.dfindex.startswith("O")
        ]
        out_col_idx_name_nodes = [
            node for node in rel_graph.nodes
            if node._type == RelationGraphNodeType.COL_INDEX_NAME
            and node.dfindex.startswith("O")
        ]

        def check_edge_exists(in_node: RelationGraphNode,
                              out_node: RelationGraphNode,
                              graph: RelationGraph):
            for e in graph.edges:
                if (e.node1 == in_node
                        and e.node2 == out_node) or (e.node1 == out_node
                                                     and e.node2 == in_node):
                    return True

            return False

        inp_foo_node = [i for i in inp_col_nodes if i.pos == (-1, 0)][0]
        inp_bar_node = [i for i in inp_col_nodes if i.pos == (-1, 1)][0]
        out_foo_node = [i for i in out_idx_name_nodes if i.pos == (-1, -1)][0]
        out_bar_node = [
            i for i in out_col_idx_name_nodes if i.pos == (-1, -1)
        ][0]

        self.assertTrue(
            check_edge_exists(inp_foo_node, out_foo_node, rel_graph))
        self.assertTrue(
            check_edge_exists(inp_bar_node, out_bar_node, rel_graph))
    def test_index_name_nodes_multiindex(self):
        df = pd.DataFrame(
            [(389.0, 'fly'), (24.0, 'fly'), (80.5, 'run'), (np.nan, 'jump')],
            index=pd.MultiIndex.from_tuples([('bird', 'falcon'),
                                             ('bird', 'parrot'),
                                             ('mammal', 'lion'),
                                             ('mammal', 'monkey')],
                                            names=['class', 'name']),
            columns=pd.MultiIndex.from_tuples([('speed', 'max'),
                                               ('species', 'type')]))
        df.columns.names = ['name1', 'name2']

        options = RelationGraphOptions()
        options.COLUMN_NODES = True
        options.INDEX_NODES = True
        options.INDEX_NAME_NODES = True
        options.ADJACENCY_EDGES = True
        options.EQUALITY_EDGES = True
        options.NODE_TYPES = True
        options.INDEX_EDGES = False

        rel_graph: RelationGraph = RelationGraph.build_relation_graph([df], df,
                                                                      options)
        index_name_nodes = [
            node for node in rel_graph.nodes
            if node._type == RelationGraphNodeType.INDEX_NAME
        ]
        column_name_nodes = [
            node for node in rel_graph.nodes
            if node._type == RelationGraphNodeType.COL_INDEX_NAME
        ]

        self.assertEqual(len(index_name_nodes),
                         4)  # Both in the input and output, so x2
        self.assertEqual(len(column_name_nodes),
                         4)  # Both in the input and output, so x2