def testSerializeLocalTrain(self): sess = new_session() with LocalContext(sess._sess): dmatrix = ToDMatrix(data=self.X, label=self.y)() model = XGBTrain(dtrain=dmatrix)() graph = model.build_graph(tiled=True) DAG.from_json(graph.to_json()) dmatrix = ToDMatrix(data=self.X_df, label=self.y_series, output_types=[OutputType.dataframe])() model = XGBTrain(dtrain=dmatrix)() graph = model.build_graph(tiled=True) DAG.from_json(graph.to_json()) new_X = mt.random.rand(1000, 10, chunk_size=(1000, 5)) new_X, new_y = ToDMatrix(data=new_X, label=self.y, multi_output=True)() dmatrix = ToDMatrix(data=new_X, label=new_y)() dmatrix = dmatrix.tiles() self.assertEqual(len(dmatrix.chunks), 1)
def testDataFrameGraphSerialize(self): df = from_pandas_df(pd.DataFrame(np.random.rand(10, 10), columns=pd.timedelta_range(start='1 day', periods=10), index=pd.date_range('2020-1-1', periods=10))) graph = df.build_graph(tiled=False) pb = graph.to_pb() graph2 = DAG.from_pb(pb) self.assertEqual(len(graph), len(graph2)) t = next(iter(graph)) t2 = next(iter(graph2)) self.assertTrue(t2.op.outputs[0], ReferenceType) # make sure outputs are all weak reference self.assertBaseEqual(t.op, t2.op) self.assertEqual(t.shape, t2.shape) self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs)) pd.testing.assert_index_equal(t2.index_value.to_pandas(), t.index_value.to_pandas()) pd.testing.assert_index_equal(t2.columns_value.to_pandas(), t.columns_value.to_pandas()) jsn = graph.to_json() graph2 = DAG.from_json(jsn) self.assertEqual(len(graph), len(graph2)) t = next(iter(graph)) t2 = next(iter(graph2)) self.assertTrue(t2.op.outputs[0], ReferenceType) # make sure outputs are all weak reference self.assertBaseEqual(t.op, t2.op) self.assertEqual(t.shape, t2.shape) self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs)) pd.testing.assert_index_equal(t2.index_value.to_pandas(), t.index_value.to_pandas()) pd.testing.assert_index_equal(t2.columns_value.to_pandas(), t.columns_value.to_pandas()) # test graph with tiled DataFrame t2 = from_pandas_df(pd.DataFrame(np.random.rand(10, 10)), chunk_size=(5, 4)).tiles() graph = DAG() graph.add_node(t2) pb = graph.to_pb() graph2 = DAG.from_pb(pb) self.assertEqual(len(graph), len(graph2)) chunks = next(iter(graph2)).chunks self.assertEqual(len(chunks), 6) self.assertIsInstance(chunks[0], DataFrameChunk) self.assertEqual(chunks[0].index, t2.chunks[0].index) self.assertBaseEqual(chunks[0].op, t2.chunks[0].op) pd.testing.assert_index_equal(chunks[0].index_value.to_pandas(), t2.chunks[0].index_value.to_pandas()) pd.testing.assert_index_equal(chunks[0].columns_value.to_pandas(), t2.chunks[0].columns_value.to_pandas()) jsn = graph.to_json() graph2 = DAG.from_json(jsn) self.assertEqual(len(graph), len(graph2)) chunks = next(iter(graph2)).chunks self.assertEqual(len(chunks), 6) self.assertIsInstance(chunks[0], DataFrameChunk) self.assertEqual(chunks[0].index, t2.chunks[0].index) self.assertBaseEqual(chunks[0].op, t2.chunks[0].op) pd.testing.assert_index_equal(chunks[0].index_value.to_pandas(), t2.chunks[0].index_value.to_pandas()) pd.testing.assert_index_equal(chunks[0].columns_value.to_pandas(), t2.chunks[0].columns_value.to_pandas())
def testTensorGraphSerialize(self): t = ones((10, 3), chunk_size=(5, 2)) + tensor(np.random.random( (10, 3)), chunk_size=(5, 2)) graph = t.build_graph(tiled=False) pb = graph.to_pb() graph2 = DAG.from_pb(pb) self.assertEqual(len(graph), len(graph2)) t = next(c for c in graph if c.inputs) t2 = next(c for c in graph2 if c.key == t.key) self.assertTrue( t2.op.outputs[0], ReferenceType) # make sure outputs are all weak reference self.assertBaseEqual(t.op, t2.op) self.assertEqual(t.shape, t2.shape) self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs)) jsn = graph.to_json() graph2 = DAG.from_json(jsn) self.assertEqual(len(graph), len(graph2)) t = next(c for c in graph if c.inputs) t2 = next(c for c in graph2 if c.key == t.key) self.assertTrue( t2.op.outputs[0], ReferenceType) # make sure outputs are all weak reference self.assertBaseEqual(t.op, t2.op) self.assertEqual(t.shape, t2.shape) self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs)) # test graph with tiled tensor t2 = ones((10, 10), chunk_size=(5, 4)).tiles() graph = DAG() graph.add_node(t2) pb = graph.to_pb() graph2 = DAG.from_pb(pb) self.assertEqual(len(graph), len(graph2)) chunks = next(iter(graph2)).chunks self.assertEqual(len(chunks), 6) self.assertIsInstance(chunks[0], TensorChunk) self.assertEqual(chunks[0].index, t2.chunks[0].index) self.assertBaseEqual(chunks[0].op, t2.chunks[0].op) jsn = graph.to_json() graph2 = DAG.from_json(jsn) self.assertEqual(len(graph), len(graph2)) chunks = next(iter(graph2)).chunks self.assertEqual(len(chunks), 6) self.assertIsInstance(chunks[0], TensorChunk) self.assertEqual(chunks[0].index, t2.chunks[0].index) self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)
def testDataFrameGraphSerialize(self): df = from_pandas(pd.DataFrame(np.random.rand(10, 10))) graph = df.build_graph(tiled=False) pb = graph.to_pb() graph2 = DAG.from_pb(pb) self.assertEqual(len(graph), len(graph2)) t = next(iter(graph)) t2 = next(iter(graph2)) self.assertTrue( t2.op.outputs[0], ReferenceType) # make sure outputs are all weak reference self.assertBaseEqual(t.op, t2.op) self.assertEqual(t.shape, t2.shape) self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs)) jsn = graph.to_json() graph2 = DAG.from_json(jsn) self.assertEqual(len(graph), len(graph2)) t = next(iter(graph)) t2 = next(iter(graph2)) self.assertTrue( t2.op.outputs[0], ReferenceType) # make sure outputs are all weak reference self.assertBaseEqual(t.op, t2.op) self.assertEqual(t.shape, t2.shape) self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs)) # test graph with tiled DataFrame t2 = from_pandas(pd.DataFrame(np.random.rand(10, 10)), chunk_size=(5, 4)).tiles() graph = DAG() graph.add_node(t2) pb = graph.to_pb() graph2 = DAG.from_pb(pb) self.assertEqual(len(graph), len(graph2)) chunks = next(iter(graph2)).chunks self.assertEqual(len(chunks), 6) self.assertIsInstance(chunks[0], DataFrameChunk) self.assertEqual(chunks[0].index, t2.chunks[0].index) self.assertBaseEqual(chunks[0].op, t2.chunks[0].op) jsn = graph.to_json() graph2 = DAG.from_json(jsn) self.assertEqual(len(graph), len(graph2)) chunks = next(iter(graph2)).chunks self.assertEqual(len(chunks), 6) self.assertIsInstance(chunks[0], DataFrameChunk) self.assertEqual(chunks[0].index, t2.chunks[0].index) self.assertBaseEqual(chunks[0].op, t2.chunks[0].op)
def testTensorGraphSerialize(self): t = ones((10, 3), chunks=(5, 2)) + tensor(np.random.random((10, 3)), chunks=(5, 2)) graph = t.build_graph(tiled=False) pb = graph.to_pb() graph2 = DAG.from_pb(pb) self.assertEqual(len(graph), len(graph2)) t = next(c for c in graph if c.inputs) t2 = next(c for c in graph2 if c.key == t.key) self.assertTrue( t2.op.outputs[0], ReferenceType) # make sure outputs are all weak reference self.assertBaseEqual(t.op, t2.op) self.assertEqual(t.shape, t2.shape) self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs)) jsn = graph.to_json() graph2 = DAG.from_json(jsn) self.assertEqual(len(graph), len(graph2)) t = next(c for c in graph if c.inputs) t2 = next(c for c in graph2 if c.key == t.key) self.assertTrue( t2.op.outputs[0], ReferenceType) # make sure outputs are all weak reference self.assertBaseEqual(t.op, t2.op) self.assertEqual(t.shape, t2.shape) self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs))
def testTensorGraphTiledSerialize(self): t = ones((10, 3), chunk_size=(5, 2)) + tensor(np.random.random( (10, 3)), chunk_size=(5, 2)) graph = t.build_graph(tiled=True) pb = graph.to_pb() graph2 = DAG.from_pb(pb) self.assertEqual(len(graph), len(graph2)) chunk = next(c for c in graph if c.inputs) chunk2 = next(c for c in graph2 if c.key == chunk.key) self.assertBaseEqual(chunk.op, chunk2.op) self.assertEqual(chunk.index, chunk2.index) self.assertEqual(chunk.shape, chunk2.shape) self.assertEqual(sorted(i.key for i in chunk.inputs), sorted(i.key for i in chunk2.inputs)) jsn = graph.to_json() graph2 = DAG.from_json(jsn) self.assertEqual(len(graph), len(graph2)) chunk = next(c for c in graph if c.inputs) chunk2 = next(c for c in graph2 if c.key == chunk.key) self.assertBaseEqual(chunk.op, chunk2.op) self.assertEqual(chunk.index, chunk2.index) self.assertEqual(chunk.shape, chunk2.shape) self.assertEqual(sorted(i.key for i in chunk.inputs), sorted(i.key for i in chunk2.inputs)) t = ones((10, 3), chunk_size=((3, 5, 2), 2)) + 2 graph = t.build_graph(tiled=True) pb = graph.to_pb() graph2 = DAG.from_pb(pb) chunk = next(c for c in graph) chunk2 = next(c for c in graph2 if c.key == chunk.key) self.assertBaseEqual(chunk.op, chunk2.op) self.assertEqual(sorted(i.key for i in chunk.composed), sorted(i.key for i in chunk2.composed)) jsn = graph.to_json() graph2 = DAG.from_json(jsn) chunk = next(c for c in graph) chunk2 = next(c for c in graph2 if c.key == chunk.key) self.assertBaseEqual(chunk.op, chunk2.op) self.assertEqual(sorted(i.key for i in chunk.composed), sorted(i.key for i in chunk2.composed))
def testDataFrameGraphSerialize(self): df = from_pandas(pd.DataFrame(np.random.rand(10, 10))) graph = df.build_graph(tiled=False) pb = graph.to_pb() graph2 = DAG.from_pb(pb) self.assertEqual(len(graph), len(graph2)) t = next(iter(graph)) t2 = next(iter(graph2)) self.assertTrue(t2.op.outputs[0], ReferenceType) # make sure outputs are all weak reference self.assertBaseEqual(t.op, t2.op) self.assertEqual(t.shape, t2.shape) self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs)) jsn = graph.to_json() graph2 = DAG.from_json(jsn) self.assertEqual(len(graph), len(graph2)) t = next(iter(graph)) t2 = next(iter(graph2)) self.assertTrue(t2.op.outputs[0], ReferenceType) # make sure outputs are all weak reference self.assertBaseEqual(t.op, t2.op) self.assertEqual(t.shape, t2.shape) self.assertEqual(sorted(i.key for i in t.inputs), sorted(i.key for i in t2.inputs))