Example #1
0
    def test_NNEstimator_multi_input(self):
        zx1 = ZLayer.Input(shape=(1, ))
        zx2 = ZLayer.Input(shape=(1, ))
        zz = ZLayer.merge([zx1, zx2], mode="concat")
        zy = ZLayer.Dense(2)(zz)
        zmodel = ZModel([zx1, zx2], zy)

        criterion = MSECriterion()
        df = self.get_estimator_df()
        estimator = NNEstimator(zmodel, criterion, [[1], [1]]).setMaxEpoch(5) \
            .setBatchSize(4)
        nnmodel = estimator.fit(df)
        nnmodel.transform(df).collect()
Example #2
0
    def test_NNEstimator_works_with_VectorAssembler_multi_input(self):
        if self.sc.version.startswith("2"):
            from pyspark.ml.linalg import Vectors
            from pyspark.ml.feature import VectorAssembler
            from pyspark.sql import SparkSession

            spark = SparkSession \
                .builder \
                .getOrCreate()

            df = spark.createDataFrame(
                [(1, 35, 109.0, Vectors.dense([2.0, 5.0, 0.5, 0.5]), 1.0),
                 (2, 58, 2998.0, Vectors.dense([4.0, 10.0, 0.5, 0.5]), 2.0),
                 (3, 18, 123.0, Vectors.dense([3.0, 15.0, 0.5, 0.5]), 1.0)],
                ["user", "age", "income", "history", "label"])

            assembler = VectorAssembler(
                inputCols=["user", "age", "income", "history"],
                outputCol="features")

            df = assembler.transform(df)

            x1 = ZLayer.Input(shape=(1, ))
            x2 = ZLayer.Input(shape=(2, ))
            x3 = ZLayer.Input(shape=(
                2,
                2,
            ))

            user_embedding = ZLayer.Embedding(5, 10)(x1)
            flatten = ZLayer.Flatten()(user_embedding)
            dense1 = ZLayer.Dense(2)(x2)
            gru = ZLayer.LSTM(4, input_shape=(2, 2))(x3)

            merged = ZLayer.merge([flatten, dense1, gru], mode="concat")
            zy = ZLayer.Dense(2)(merged)

            zmodel = ZModel([x1, x2, x3], zy)
            criterion = ClassNLLCriterion()
            classifier = NNClassifier(zmodel, criterion, [[1], [2], [2, 2]]) \
                .setOptimMethod(Adam()) \
                .setLearningRate(0.1) \
                .setBatchSize(2) \
                .setMaxEpoch(10)

            nnClassifierModel = classifier.fit(df)
            print(nnClassifierModel.getBatchSize())
            res = nnClassifierModel.transform(df).collect()
Example #3
0
    def test_nnEstimator_multiInput(self):
        zx1 = ZLayer.Input(shape=(1, ))
        zx2 = ZLayer.Input(shape=(1, ))
        zz = ZLayer.merge([zx1, zx2], mode="concat")
        zy = ZLayer.Dense(2)(zz)
        zmodel = ZModel([zx1, zx2], zy)

        criterion = MSECriterion()
        df, _ = self.get_estimator_df()
        estimator = Estimator.from_bigdl(model=zmodel,
                                         loss=criterion,
                                         feature_preprocessing=[[1], [1]])
        estimator.fit(df, epochs=5, batch_size=4)
        pred = estimator.predict(df)
        pred_data = pred.collect()
        assert type(pred).__name__ == 'DataFrame'
Example #4
0
    def test_merge_method_sum(self):
        zx1 = ZLayer.Input(shape=(8, ))
        zx2 = ZLayer.Input(shape=(6, ))
        zy1 = ZLayer.Dense(10)(zx1)
        zy2 = ZLayer.Dense(10)(zx2)
        zz = ZLayer.merge([zy1, zy2], mode="sum")
        zmodel = ZModel([zx1, zx2], zz, name="graph1")

        kx1 = KLayer.Input(shape=(8, ))
        kx2 = KLayer.Input(shape=(6, ))
        ky1 = KLayer.Dense(10)(kx1)
        ky2 = KLayer.Dense(10)(kx2)
        kz = kmerge([ky1, ky2], mode="sum")
        kmodel = KModel([kx1, kx2], kz)

        input_data = [np.random.random([2, 8]), np.random.random([2, 6])]
        self.compare_layer(kmodel, zmodel, input_data, self.convert_two_dense)
Example #5
0
    def test_nnEstimator_multiInput_cols(self):
        from pyspark.ml.linalg import Vectors
        from pyspark.sql import SparkSession

        spark = SparkSession \
            .builder \
            .getOrCreate()

        df = spark.createDataFrame(
            [(1, 35, 109.0, Vectors.dense([2.0, 5.0, 0.5, 0.5]), 1.0),
             (2, 58, 2998.0, Vectors.dense([4.0, 10.0, 0.5, 0.5]), 2.0),
             (3, 18, 123.0, Vectors.dense([3.0, 15.0, 0.5, 0.5]), 1.0),
             (4, 18, 123.0, Vectors.dense([3.0, 15.0, 0.5, 0.5]), 1.0)],
            ["user", "age", "income", "history", "label"])

        x1 = ZLayer.Input(shape=(1, ))
        x2 = ZLayer.Input(shape=(2, ))
        x3 = ZLayer.Input(shape=(
            2,
            2,
        ))

        user_embedding = ZLayer.Embedding(5, 10)(x1)
        flatten = ZLayer.Flatten()(user_embedding)
        dense1 = ZLayer.Dense(2)(x2)
        gru = ZLayer.LSTM(4, input_shape=(2, 2))(x3)

        merged = ZLayer.merge([flatten, dense1, gru], mode="concat")
        zy = ZLayer.Dense(2)(merged)

        zmodel = ZModel([x1, x2, x3], zy)
        criterion = ClassNLLCriterion()
        est = Estimator.from_bigdl(model=zmodel,
                                   loss=criterion,
                                   optimizer=Adam(learningrate=0.1),
                                   feature_preprocessing=[[1], [2], [2, 2]])
        est.fit(df,
                epochs=1,
                batch_size=4,
                feature_cols=["user", "age", "income", "history"])

        res = est.predict(df,
                          feature_cols=["user", "age", "income", "history"])
        res_c = res.collect()
        assert type(res).__name__ == 'DataFrame'
Example #6
0
    def test_xshards_spark_estimator_multi_inputs(self):
        resource_path = os.path.join(
            os.path.split(__file__)[0], "../../resources")

        def transform(df):
            result = {
                "x": [
                    np.expand_dims(df['user'].to_numpy(), axis=1),
                    np.expand_dims(df['item'].to_numpy(), axis=1)
                ],
                "y":
                df['label'].to_numpy()
            }
            return result

        file_path = os.path.join(resource_path, "orca/learn/ncf2.csv")
        data_shard = read_csv(file_path)
        data_shard = data_shard.transform_shard(transform)
        zx1 = ZLayer.Input(shape=(1, ))
        zx2 = ZLayer.Input(shape=(1, ))
        zz = ZLayer.merge([zx1, zx2], mode="concat")
        zy = ZLayer.Dense(2)(zz)
        model = ZModel([zx1, zx2], zy)

        optim_method = SGD(learningrate=0.01)
        with tempfile.TemporaryDirectory() as temp_dir_name:
            estimator = Estimator.from_bigdl(model=model,
                                             optimizer=optim_method,
                                             loss=ClassNLLCriterion(),
                                             metrics=[Accuracy()],
                                             model_dir=temp_dir_name)
            estimator.set_constant_gradient_clipping(0.1, 1.2)
            r1 = estimator.predict(data=data_shard)
            r_c = r1.collect()
            estimator.set_tensorboard(log_dir=temp_dir_name, app_name="test")
            estimator.fit(data=data_shard,
                          epochs=5,
                          batch_size=8,
                          validation_data=data_shard,
                          checkpoint_trigger=EveryEpoch())
            summary = estimator.get_train_summary(tag="Loss")
            temp_path = os.path.join(temp_dir_name, "save_model")
            estimator.save(temp_path)
            eval_result = estimator.evaluate(data=data_shard, batch_size=8)
Example #7
0
    def test_merge_method_model_concat(self):
        zx1 = ZLayer.Input(shape=(4, ))
        zx2 = ZLayer.Input(shape=(5, ))
        zy1 = ZLayer.Dense(6, activation="sigmoid")(zx1)
        zbranch1 = ZModel(zx1, zy1)(zx1)
        zbranch2 = ZLayer.Dense(8)(zx2)
        zz = ZLayer.merge([zbranch1, zbranch2], mode="concat")
        zmodel = ZModel([zx1, zx2], zz)

        kx1 = KLayer.Input(shape=(4, ))
        kx2 = KLayer.Input(shape=(5, ))
        ky1 = KLayer.Dense(6, activation="sigmoid")(kx1)
        kbranch1 = KModel(kx1, ky1)(kx1)
        kbranch2 = KLayer.Dense(8)(kx2)
        kz = KLayer.merge([kbranch1, kbranch2], mode="concat")
        kmodel = KModel([kx1, kx2], kz)

        input_data = [np.random.random([2, 4]), np.random.random([2, 5])]
        self.compare_layer(kmodel, zmodel, input_data, self.convert_two_dense)
Example #8
0
    def test_merge_method_seq_concat(self):
        zx1 = ZLayer.Input(shape=(10, ))
        zx2 = ZLayer.Input(shape=(10, ))
        zy1 = ZLayer.Dense(12, activation="sigmoid")(zx1)
        zbranch1_node = ZModel(zx1, zy1)(zx1)
        zbranch2 = ZSequential()
        zbranch2.add(ZLayer.Dense(12, input_dim=10))
        zbranch2_node = zbranch2(zx2)
        zz = ZLayer.merge([zbranch1_node, zbranch2_node], mode="concat")
        zmodel = ZModel([zx1, zx2], zz)

        kx1 = KLayer.Input(shape=(10, ))
        kx2 = KLayer.Input(shape=(10, ))
        ky1 = KLayer.Dense(12, activation="sigmoid")(kx1)
        kbranch1_node = KModel(kx1, ky1)(kx1)
        kbranch2 = KSequential()
        kbranch2.add(KLayer.Dense(12, input_dim=10))
        kbranch2_node = kbranch2(kx2)
        kz = KLayer.merge([kbranch1_node, kbranch2_node], mode="concat")
        kmodel = KModel([kx1, kx2], kz)

        input_data = [np.random.random([2, 10]), np.random.random([2, 10])]
        self.compare_layer(kmodel, zmodel, input_data, self.convert_two_dense)