コード例 #1
0
    def test_index_name_nodes(self):
        df = pd.DataFrame({
            'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
            'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
            'baz': [1, 2, 3, 4, 5, 6]
        })
        output = df.pivot(index='foo', columns='bar', values='baz')

        options = GraphOptions()
        options.COLUMN_NODES = True
        options.INDEX_NODES = True
        options.INDEX_NAME_NODES = True
        options.ADJACENCY_EDGES = True
        options.EQUALITY_EDGES = True
        options.NODE_TYPES = True
        options.INDEX_EDGES = False

        rel_graph: RelationGraph = RelationGraph(options)
        rel_graph.from_input_output([df], output)
        index_name_nodes = [
            node for node in rel_graph.nodes
            if node.ntype == GraphNodeType.INDEX_NAME
        ]
        column_name_nodes = [
            node for node in rel_graph.nodes
            if node.ntype == GraphNodeType.COL_INDEX_NAME
        ]

        self.assertEqual(len(index_name_nodes), 1)
        self.assertEqual(len(column_name_nodes), 1)
コード例 #2
0
    def test_index_name_equality_edges(self):
        df = pd.DataFrame({
            'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
            'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
            'baz': [1, 2, 3, 4, 5, 6]
        })
        output = df.pivot(index='foo', columns='bar', values='baz')

        options = GraphOptions()
        options.COLUMN_NODES = True
        options.INDEX_NODES = True
        options.INDEX_NAME_NODES = True
        options.ADJACENCY_EDGES = False
        options.EQUALITY_EDGES = True
        options.NODE_TYPES = True
        options.INDEX_EDGES = False
        rel_graph: RelationGraph = RelationGraph(options)
        rel_graph.from_input_output([df], output)
        inp_col_nodes = [
            node for node in rel_graph.nodes if
            node.ntype == GraphNodeType.COLUMN and node.source.startswith("I")
        ]
        out_idx_name_nodes = [
            node for node in rel_graph.nodes
            if node.ntype == GraphNodeType.INDEX_NAME
            and node.source.startswith("O")
        ]
        out_col_idx_name_nodes = [
            node for node in rel_graph.nodes
            if node.ntype == GraphNodeType.COL_INDEX_NAME
            and node.source.startswith("O")
        ]

        def check_edge_exists(in_node: GraphNode, out_node: GraphNode,
                              graph: RelationGraph):
            for e in graph.edges:
                if (e.node1 == in_node
                        and e.node2 == out_node) or (e.node1 == out_node
                                                     and e.node2 == in_node):
                    return True

            return False

        inp_foo_node = [i for i in inp_col_nodes
                        if i.identifier == '[-1,0]'][0]
        inp_bar_node = [i for i in inp_col_nodes
                        if i.identifier == '[-1,1]'][0]
        out_foo_node = [
            i for i in out_idx_name_nodes if i.identifier == '[-1,-1]'
        ][0]
        out_bar_node = [
            i for i in out_col_idx_name_nodes if i.identifier == '[-1,-1]'
        ][0]

        self.assertTrue(
            check_edge_exists(inp_foo_node, out_foo_node, rel_graph))
        self.assertTrue(
            check_edge_exists(inp_bar_node, out_bar_node, rel_graph))
コード例 #3
0
 def __init__(self, obj: pd.DataFrame, type_str: str, source: str,
              options: GraphOptions, **kwargs):
     options = copy.copy(options)
     options.INDEX_NODES = False
     options.COLUMN_NODES = False
     options.INDEX_NAME_NODES = False
     options.INDEX_EDGES = False
     options.INDEX_NAME_EDGES = False
     super().__init__(obj, type_str, source, options, **kwargs)
コード例 #4
0
    def test_index_name_nodes_multiindex(self):
        df = pd.DataFrame(
            [(389.0, 'fly'), (24.0, 'fly'), (80.5, 'run'), (np.nan, 'jump')],
            index=pd.MultiIndex.from_tuples([('bird', 'falcon'),
                                             ('bird', 'parrot'),
                                             ('mammal', 'lion'),
                                             ('mammal', 'monkey')],
                                            names=['class', 'name']),
            columns=pd.MultiIndex.from_tuples([('speed', 'max'),
                                               ('species', 'type')]))
        df.columns.names = ['name1', 'name2']

        options = GraphOptions()
        options.COLUMN_NODES = True
        options.INDEX_NODES = True
        options.INDEX_NAME_NODES = True
        options.ADJACENCY_EDGES = True
        options.EQUALITY_EDGES = True
        options.NODE_TYPES = True
        options.INDEX_EDGES = False

        rel_graph: RelationGraph = RelationGraph(options)
        rel_graph.from_input_output([df], df)
        index_name_nodes = [
            node for node in rel_graph.nodes
            if node.ntype == GraphNodeType.INDEX_NAME
        ]
        column_name_nodes = [
            node for node in rel_graph.nodes
            if node.ntype == GraphNodeType.COL_INDEX_NAME
        ]

        self.assertEqual(len(index_name_nodes),
                         4)  # Both in the input and output, so x2
        self.assertEqual(len(column_name_nodes),
                         4)  # Both in the input and output, so x2