Exemple #1
0
    def testSerializeLocalTrain(self):
        sess = new_session()

        with LocalContext(sess._sess):
            dmatrix = ToDMatrix(data=self.X, label=self.y)()
            model = XGBTrain(dtrain=dmatrix)()

            graph = model.build_graph(tiled=True)
            DAG.from_json(graph.to_json())

            dmatrix = ToDMatrix(data=self.X_df,
                                label=self.y_series,
                                output_types=[OutputType.dataframe])()
            model = XGBTrain(dtrain=dmatrix)()

            graph = model.build_graph(tiled=True)
            DAG.from_json(graph.to_json())

            new_X = mt.random.rand(1000, 10, chunk_size=(1000, 5))
            new_X, new_y = ToDMatrix(data=new_X,
                                     label=self.y,
                                     multi_output=True)()
            dmatrix = ToDMatrix(data=new_X, label=new_y)()
            dmatrix = dmatrix.tiles()

            self.assertEqual(len(dmatrix.chunks), 1)
Exemple #2
0
    def testDataFrameGraphSerialize(self):
        df = from_pandas_df(pd.DataFrame(np.random.rand(10, 10),
                                         columns=pd.timedelta_range(start='1 day', periods=10),
                                         index=pd.date_range('2020-1-1', periods=10)))
        graph = df.build_graph(tiled=False)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        t = next(iter(graph))
        t2 = next(iter(graph2))
        self.assertTrue(t2.op.outputs[0], ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs))
        pd.testing.assert_index_equal(t2.index_value.to_pandas(), t.index_value.to_pandas())
        pd.testing.assert_index_equal(t2.columns_value.to_pandas(), t.columns_value.to_pandas())

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        t = next(iter(graph))
        t2 = next(iter(graph2))
        self.assertTrue(t2.op.outputs[0], ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs))
        pd.testing.assert_index_equal(t2.index_value.to_pandas(), t.index_value.to_pandas())
        pd.testing.assert_index_equal(t2.columns_value.to_pandas(), t.columns_value.to_pandas())

        # test graph with tiled DataFrame
        t2 = from_pandas_df(pd.DataFrame(np.random.rand(10, 10)), chunk_size=(5, 4)).tiles()
        graph = DAG()
        graph.add_node(t2)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        chunks = next(iter(graph2)).chunks
        self.assertEqual(len(chunks), 6)
        self.assertIsInstance(chunks[0], DataFrameChunk)
        self.assertEqual(chunks[0].index, t2.chunks[0].index)
        self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)
        pd.testing.assert_index_equal(chunks[0].index_value.to_pandas(), t2.chunks[0].index_value.to_pandas())
        pd.testing.assert_index_equal(chunks[0].columns_value.to_pandas(), t2.chunks[0].columns_value.to_pandas())

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        chunks = next(iter(graph2)).chunks
        self.assertEqual(len(chunks), 6)
        self.assertIsInstance(chunks[0], DataFrameChunk)
        self.assertEqual(chunks[0].index, t2.chunks[0].index)
        self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)
        pd.testing.assert_index_equal(chunks[0].index_value.to_pandas(), t2.chunks[0].index_value.to_pandas())
        pd.testing.assert_index_equal(chunks[0].columns_value.to_pandas(), t2.chunks[0].columns_value.to_pandas())
Exemple #3
0
    def testTensorGraphSerialize(self):
        t = ones((10, 3), chunk_size=(5, 2)) + tensor(np.random.random(
            (10, 3)),
                                                      chunk_size=(5, 2))
        graph = t.build_graph(tiled=False)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        t = next(c for c in graph if c.inputs)
        t2 = next(c for c in graph2 if c.key == t.key)
        self.assertTrue(
            t2.op.outputs[0],
            ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs),
                         sorted(i.key for i in t2.inputs))

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        t = next(c for c in graph if c.inputs)
        t2 = next(c for c in graph2 if c.key == t.key)
        self.assertTrue(
            t2.op.outputs[0],
            ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs),
                         sorted(i.key for i in t2.inputs))

        # test graph with tiled tensor
        t2 = ones((10, 10), chunk_size=(5, 4)).tiles()
        graph = DAG()
        graph.add_node(t2)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        chunks = next(iter(graph2)).chunks
        self.assertEqual(len(chunks), 6)
        self.assertIsInstance(chunks[0], TensorChunk)
        self.assertEqual(chunks[0].index, t2.chunks[0].index)
        self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        chunks = next(iter(graph2)).chunks
        self.assertEqual(len(chunks), 6)
        self.assertIsInstance(chunks[0], TensorChunk)
        self.assertEqual(chunks[0].index, t2.chunks[0].index)
        self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)
Exemple #4
0
    def testDataFrameGraphSerialize(self):
        df = from_pandas(pd.DataFrame(np.random.rand(10, 10)))
        graph = df.build_graph(tiled=False)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        t = next(iter(graph))
        t2 = next(iter(graph2))
        self.assertTrue(
            t2.op.outputs[0],
            ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs),
                         sorted(i.key for i in t2.inputs))

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        t = next(iter(graph))
        t2 = next(iter(graph2))
        self.assertTrue(
            t2.op.outputs[0],
            ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs),
                         sorted(i.key for i in t2.inputs))

        # test graph with tiled DataFrame
        t2 = from_pandas(pd.DataFrame(np.random.rand(10, 10)),
                         chunk_size=(5, 4)).tiles()
        graph = DAG()
        graph.add_node(t2)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        chunks = next(iter(graph2)).chunks
        self.assertEqual(len(chunks), 6)
        self.assertIsInstance(chunks[0], DataFrameChunk)
        self.assertEqual(chunks[0].index, t2.chunks[0].index)
        self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        chunks = next(iter(graph2)).chunks
        self.assertEqual(len(chunks), 6)
        self.assertIsInstance(chunks[0], DataFrameChunk)
        self.assertEqual(chunks[0].index, t2.chunks[0].index)
        self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)
Exemple #5
0
    def testTensorGraphSerialize(self):
        t = ones((10, 3), chunks=(5, 2)) + tensor(np.random.random((10, 3)),
                                                  chunks=(5, 2))
        graph = t.build_graph(tiled=False)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        t = next(c for c in graph if c.inputs)
        t2 = next(c for c in graph2 if c.key == t.key)
        self.assertTrue(
            t2.op.outputs[0],
            ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs),
                         sorted(i.key for i in t2.inputs))

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        t = next(c for c in graph if c.inputs)
        t2 = next(c for c in graph2 if c.key == t.key)
        self.assertTrue(
            t2.op.outputs[0],
            ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs),
                         sorted(i.key for i in t2.inputs))
Exemple #6
0
    def testTensorGraphTiledSerialize(self):
        t = ones((10, 3), chunk_size=(5, 2)) + tensor(np.random.random(
            (10, 3)),
                                                      chunk_size=(5, 2))
        graph = t.build_graph(tiled=True)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        chunk = next(c for c in graph if c.inputs)
        chunk2 = next(c for c in graph2 if c.key == chunk.key)
        self.assertBaseEqual(chunk.op, chunk2.op)
        self.assertEqual(chunk.index, chunk2.index)
        self.assertEqual(chunk.shape, chunk2.shape)
        self.assertEqual(sorted(i.key for i in chunk.inputs),
                         sorted(i.key for i in chunk2.inputs))

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        chunk = next(c for c in graph if c.inputs)
        chunk2 = next(c for c in graph2 if c.key == chunk.key)
        self.assertBaseEqual(chunk.op, chunk2.op)
        self.assertEqual(chunk.index, chunk2.index)
        self.assertEqual(chunk.shape, chunk2.shape)
        self.assertEqual(sorted(i.key for i in chunk.inputs),
                         sorted(i.key for i in chunk2.inputs))

        t = ones((10, 3), chunk_size=((3, 5, 2), 2)) + 2
        graph = t.build_graph(tiled=True)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        chunk = next(c for c in graph)
        chunk2 = next(c for c in graph2 if c.key == chunk.key)
        self.assertBaseEqual(chunk.op, chunk2.op)
        self.assertEqual(sorted(i.key for i in chunk.composed),
                         sorted(i.key for i in chunk2.composed))

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        chunk = next(c for c in graph)
        chunk2 = next(c for c in graph2 if c.key == chunk.key)
        self.assertBaseEqual(chunk.op, chunk2.op)
        self.assertEqual(sorted(i.key for i in chunk.composed),
                         sorted(i.key for i in chunk2.composed))
Exemple #7
0
    def testDataFrameGraphSerialize(self):
        df = from_pandas(pd.DataFrame(np.random.rand(10, 10)))
        graph = df.build_graph(tiled=False)

        pb = graph.to_pb()
        graph2 = DAG.from_pb(pb)
        self.assertEqual(len(graph), len(graph2))
        t = next(iter(graph))
        t2 = next(iter(graph2))
        self.assertTrue(t2.op.outputs[0], ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs))

        jsn = graph.to_json()
        graph2 = DAG.from_json(jsn)
        self.assertEqual(len(graph), len(graph2))
        t = next(iter(graph))
        t2 = next(iter(graph2))
        self.assertTrue(t2.op.outputs[0], ReferenceType)  # make sure outputs are all weak reference
        self.assertBaseEqual(t.op, t2.op)
        self.assertEqual(t.shape, t2.shape)
        self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs))