Ejemplo n.º 1
0
    def testDistributedTile(self):
        X, y, w = self.X, self.y, self.weight

        X = X.tiles()
        y = y.tiles()
        w = w.tiles()

        workers = ['addr1:1', 'addr2:1']
        chunk_to_workers = dict()
        X_chunk_to_workers = {
            c.key: workers[i % 2]
            for i, c in enumerate(X.chunks)
        }
        chunk_to_workers.update(X_chunk_to_workers)
        y_chunk_to_workers = {
            c.key: workers[i % 2]
            for i, c in enumerate(y.chunks)
        }
        chunk_to_workers.update(y_chunk_to_workers)
        w_chunk_to_workers = {
            c.key: workers[i % 2]
            for i, c in enumerate(w.chunks)
        }
        chunk_to_workers.update(w_chunk_to_workers)

        class MockDistributedDictContext(ContextBase):
            @property
            def running_mode(self):
                return RunningMode.distributed

            def get_chunk_metas(self, chunk_keys):
                metas = []
                for ck in chunk_keys:
                    if ck in chunk_to_workers:
                        metas.append(
                            ChunkMeta(chunk_size=None,
                                      chunk_shape=None,
                                      workers=[chunk_to_workers[ck]]))
                    else:
                        metas.append(
                            ChunkMeta(chunk_size=None,
                                      chunk_shape=None,
                                      workers=None))
                return metas

        dmatrix = ToDMatrix(data=X, label=y, weight=w)()
        model = XGBTrain(dtrain=dmatrix)()

        with MockDistributedDictContext():
            model = model.tiles()
            dmatrix = get_tiled(dmatrix)

            # 2 workers
            self.assertEqual(len(dmatrix.chunks), 2)
            self.assertEqual(len(model.chunks), 2)
Ejemplo n.º 2
0
    def testSerializeLocalTrain(self):
        sess = new_session()

        with LocalContext(sess._sess):
            dmatrix = ToDMatrix(data=self.X, label=self.y)()
            model = XGBTrain(dtrain=dmatrix)()

            graph = model.build_graph(tiled=True)
            DAG.from_json(graph.to_json())

            dmatrix = ToDMatrix(data=self.X_df,
                                label=self.y_series,
                                output_types=[OutputType.dataframe])()
            model = XGBTrain(dtrain=dmatrix)()

            graph = model.build_graph(tiled=True)
            DAG.from_json(graph.to_json())

            new_X = mt.random.rand(1000, 10, chunk_size=(1000, 5))
            new_X, new_y = ToDMatrix(data=new_X,
                                     label=self.y,
                                     multi_output=True)()
            dmatrix = ToDMatrix(data=new_X, label=new_y)()
            dmatrix = dmatrix.tiles()

            self.assertEqual(len(dmatrix.chunks), 1)