Python SparkXShards.transform_shardの例

プログラミング言語: Python

名前空間/パッケージ名: zoo.orca.data

クラス/型: SparkXShards

メソッド/関数: transform_shard

hotexamples.comのコード掲載数: 4

Python SparkXShards.transform_shard - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのzoo.orca.data.SparkXShards.transform_shardの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

SparkXShards(30)

transform_shard(4)

repartition(2)

load_pickle(1)

コード例 #1

ファイルを表示

    def test_openvino(self):
        with tempfile.TemporaryDirectory() as local_path:
            model_url = data_url + "/analytics-zoo-data/openvino2020_resnet50.tar"
            model_path = maybe_download("openvino2020_resnet50.tar",
                                        local_path, model_url)
            cmd = "tar -xvf " + model_path + " -C " + local_path
            subprocess.Popen(cmd.split())
            model_path = os.path.join(
                local_path, "openvino2020_resnet50/resnet_v1_50.xml")
            est = Estimator.from_openvino(model_path=model_path)

            # ndarray
            input_data = np.random.random([20, 4, 3, 224, 224])
            result = est.predict(input_data)
            print(result)

            # xshards
            input_data_list = [
                np.random.random([1, 4, 3, 224, 224]),
                np.random.random([2, 4, 3, 224, 224])
            ]
            sc = init_nncontext()
            rdd = sc.parallelize(input_data_list, numSlices=2)
            shards = SparkXShards(rdd)

            def pre_processing(images):
                return {"x": images}

            shards = shards.transform_shard(pre_processing)
            result = est.predict(shards)
            result_c = result.collect()
            print(result_c)

コード例 #2

ファイルを表示

ファイル: test_estimator_openvino.py プロジェクト: yangw1234/analytics-zoo

    def test_openvino_predict_xshards(self):
        input_data_list = [np.array([self.input] * 4), np.array([self.input] * 2)]
        sc = init_nncontext()
        rdd = sc.parallelize(input_data_list, numSlices=2)
        shards = SparkXShards(rdd)

        def pre_processing(images):
            return {"x": images}

        shards = shards.transform_shard(pre_processing)
        result = self.est.predict(shards)
        result_c = result.collect()
        assert isinstance(result, SparkXShards)
        assert result_c[0]["prediction"].shape == (4, 1000)
        assert result_c[1]["prediction"].shape == (2, 1000)
        assert self.check_result(result_c[0]["prediction"], 4)
        assert self.check_result(result_c[1]["prediction"], 2)

コード例 #3

ファイルを表示

    def test_openvino_predict_xshards(self):
        input_data_list = [
            np.random.random([1, 4, 3, 224, 224]),
            np.random.random([2, 4, 3, 224, 224])
        ]
        sc = init_nncontext()
        rdd = sc.parallelize(input_data_list, numSlices=2)
        shards = SparkXShards(rdd)

        def pre_processing(images):
            return {"x": images}

        shards = shards.transform_shard(pre_processing)
        result = self.est.predict(shards)
        result_c = result.collect()
        assert isinstance(result, SparkXShards)
        assert result_c[0]["prediction"].shape == (1, 4, 1000)
        assert result_c[1]["prediction"].shape == (2, 4, 1000)

コード例 #4

ファイルを表示

ファイル: test_estimator_for_bigdl.py プロジェクト: TechnologyInstitute/analytics-zoo

    def test_nnEstimator(self):
        from zoo.pipeline.nnframes import NNModel
        linear_model = Sequential().add(Linear(2, 2))
        mse_criterion = MSECriterion()
        df = self.get_estimator_df()
        est = Estimator.from_bigdl(model=linear_model,
                                   loss=mse_criterion,
                                   optimizer=Adam(),
                                   feature_preprocessing=SeqToTensor([2]),
                                   label_preprocessing=SeqToTensor([2]))
        res0 = est.predict(df)
        res0_c = res0.collect()
        est.fit(df, 1, batch_size=4)
        nn_model = NNModel(est.get_model(),
                           feature_preprocessing=SeqToTensor([2]))
        res1 = nn_model.transform(df)
        res2 = est.predict(df)
        res1_c = res1.collect()
        res2_c = res2.collect()
        assert type(res1).__name__ == 'DataFrame'
        assert type(res2).__name__ == 'DataFrame'
        assert len(res1_c) == len(res2_c)
        for idx in range(len(res1_c)):
            assert res1_c[idx]["prediction"] == res2_c[idx]["prediction"]
        with tempfile.TemporaryDirectory() as tempdirname:
            temp_path = os.path.join(tempdirname, "model")
            est.save(temp_path)
            est2 = Estimator.from_bigdl(model=linear_model, loss=mse_criterion)
            est2.load(temp_path,
                      optimizer=Adam(),
                      loss=mse_criterion,
                      feature_preprocessing=SeqToTensor([2]),
                      label_preprocessing=SeqToTensor([2]))
            est2.set_constant_gradient_clipping(0.1, 1.2)
            est2.clear_gradient_clipping()
            res3 = est2.predict(df)
            res3_c = res3.collect()
            assert type(res3).__name__ == 'DataFrame'
            assert len(res1_c) == len(res3_c)
            for idx in range(len(res1_c)):
                assert res1_c[idx]["prediction"] == res3_c[idx]["prediction"]
            est2.fit(df, 4, batch_size=4)

        data = self.sc.parallelize([((2.0, 1.0), (1.0, 2.0)),
                                    ((1.0, 2.0), (2.0, 1.0)),
                                    ((2.0, 1.0), (1.0, 2.0)),
                                    ((1.0, 2.0), (2.0, 1.0))])
        data_shard = SparkXShards(data)
        data_shard = data_shard.transform_shard(
            lambda feature_label_tuple: {
                "x": [
                    np.expand_dims(np.array(feature_label_tuple[0][0]), axis=0
                                   ),
                    np.expand_dims(np.array(feature_label_tuple[0][1]), axis=0)
                ],
                "y": [
                    np.expand_dims(np.array(feature_label_tuple[1][0]), axis=0
                                   ),
                    np.expand_dims(np.array(feature_label_tuple[1][1]), axis=0)
                ]
            })
        res4 = est.predict(data_shard)
        res4_c = res4.collect()
        assert type(res4).__name__ == 'SparkXShards'
        for idx in range(len(res4_c)):
            assert abs(res4_c[idx]["prediction"][0][0] -
                       res3_c[idx]["prediction"][0]) == 0
            assert abs(res4_c[idx]["prediction"][0][1] -
                       res3_c[idx]["prediction"][1]) == 0
        est.fit(data_shard, 1, batch_size=4)
        res5 = est.predict(data_shard)
        res5_c = res5.collect()
        res6 = est.predict(df)
        res6_c = res6.collect()
        for idx in range(len(res5_c)):
            assert abs(res5_c[idx]["prediction"][0][0] -
                       res6_c[idx]["prediction"][0]) == 0
            assert abs(res5_c[idx]["prediction"][0][1] -
                       res6_c[idx]["prediction"][1]) == 0