Python _dataframe_to_xshards Examples

Programming Language: Python

Namespace/Package Name: zoo.orca.learn.utils

Method/Function: _dataframe_to_xshards

Examples at hotexamples.com: 2

Python _dataframe_to_xshards - 2 examples found. These are the top rated real world Python examples of zoo.orca.learn.utils._dataframe_to_xshards extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def test_dataframe_to_xshards(self):
        rdd = self.sc.range(0, 100)
        df = rdd.map(lambda x: ([float(x)] * 50,
                                [int(np.random.randint(0, 2, size=()))])).toDF(
                                    ["feature", "label"])
        num_partitions = df.rdd.getNumPartitions()
        # test shard_size = None
        shards = _dataframe_to_xshards(df,
                                       feature_cols=["feature"],
                                       label_cols=["label"])
        num_shards = shards.rdd.count()
        assert num_shards == num_partitions

        from zoo.orca import OrcaContext
        OrcaContext._shard_size = 1
        shards = _dataframe_to_xshards(df,
                                       feature_cols=["feature"],
                                       label_cols=["label"])
        num_shards = shards.rdd.count()
        assert num_shards == df.rdd.count()

Example #2

Show file

 def test_convert_predict_xshards_to_dataframe_multi_output(self):
     rdd = self.sc.range(0, 100)
     df = rdd.map(lambda x: ([float(x)] * 50,
                             [int(np.random.randint(0, 2, size=()))])).toDF(
                                 ["feature", "label"])
     pred_shards = _dataframe_to_xshards(
         df, feature_cols=["feature"]).transform_shard(
             lambda x: {"prediction": [x["x"][:, :25], x["x"][:, 25:]]})
     result_df = convert_predict_xshards_to_dataframe(df, pred_shards)
     expr = "sum(cast(feature <> flatten(prediction) as int)) as error"
     assert result_df.selectExpr(expr).first()["error"] == 0