Ejemplo n.º 1
0
    def test_NNEstimator_works_with_VectorAssembler_multi_input(self):
        if self.sc.version.startswith("2"):
            from pyspark.ml.linalg import Vectors
            from pyspark.ml.feature import VectorAssembler
            from pyspark.sql import SparkSession

            spark = SparkSession \
                .builder \
                .getOrCreate()

            df = spark.createDataFrame(
                [(1, 35, 109.0, Vectors.dense([2.0, 5.0, 0.5, 0.5]), 1.0),
                 (2, 58, 2998.0, Vectors.dense([4.0, 10.0, 0.5, 0.5]), 2.0),
                 (3, 18, 123.0, Vectors.dense([3.0, 15.0, 0.5, 0.5]), 1.0)],
                ["user", "age", "income", "history", "label"])

            assembler = VectorAssembler(
                inputCols=["user", "age", "income", "history"],
                outputCol="features")

            df = assembler.transform(df)

            x1 = ZLayer.Input(shape=(1, ))
            x2 = ZLayer.Input(shape=(2, ))
            x3 = ZLayer.Input(shape=(
                2,
                2,
            ))

            user_embedding = ZLayer.Embedding(5, 10)(x1)
            flatten = ZLayer.Flatten()(user_embedding)
            dense1 = ZLayer.Dense(2)(x2)
            gru = ZLayer.LSTM(4, input_shape=(2, 2))(x3)

            merged = ZLayer.merge([flatten, dense1, gru], mode="concat")
            zy = ZLayer.Dense(2)(merged)

            zmodel = ZModel([x1, x2, x3], zy)
            criterion = ClassNLLCriterion()
            classifier = NNClassifier(zmodel, criterion, [[1], [2], [2, 2]]) \
                .setOptimMethod(Adam()) \
                .setLearningRate(0.1) \
                .setBatchSize(2) \
                .setMaxEpoch(10)

            nnClassifierModel = classifier.fit(df)
            print(nnClassifierModel.getBatchSize())
            res = nnClassifierModel.transform(df).collect()
    def test_nnEstimator_multiInput_cols(self):
        from pyspark.ml.linalg import Vectors
        from pyspark.sql import SparkSession

        spark = SparkSession \
            .builder \
            .getOrCreate()

        df = spark.createDataFrame(
            [(1, 35, 109.0, Vectors.dense([2.0, 5.0, 0.5, 0.5]), 1.0),
             (2, 58, 2998.0, Vectors.dense([4.0, 10.0, 0.5, 0.5]), 2.0),
             (3, 18, 123.0, Vectors.dense([3.0, 15.0, 0.5, 0.5]), 1.0),
             (4, 18, 123.0, Vectors.dense([3.0, 15.0, 0.5, 0.5]), 1.0)],
            ["user", "age", "income", "history", "label"])

        x1 = ZLayer.Input(shape=(1, ))
        x2 = ZLayer.Input(shape=(2, ))
        x3 = ZLayer.Input(shape=(
            2,
            2,
        ))

        user_embedding = ZLayer.Embedding(5, 10)(x1)
        flatten = ZLayer.Flatten()(user_embedding)
        dense1 = ZLayer.Dense(2)(x2)
        gru = ZLayer.LSTM(4, input_shape=(2, 2))(x3)

        merged = ZLayer.merge([flatten, dense1, gru], mode="concat")
        zy = ZLayer.Dense(2)(merged)

        zmodel = ZModel([x1, x2, x3], zy)
        criterion = ClassNLLCriterion()
        est = Estimator.from_bigdl(model=zmodel,
                                   loss=criterion,
                                   optimizer=Adam(learningrate=0.1),
                                   feature_preprocessing=[[1], [2], [2, 2]])
        est.fit(df,
                epochs=1,
                batch_size=4,
                feature_cols=["user", "age", "income", "history"])

        res = est.predict(df,
                          feature_cols=["user", "age", "income", "history"])
        res_c = res.collect()
        assert type(res).__name__ == 'DataFrame'
Ejemplo n.º 3
0
 def _to_tensor(self):
     flatten = zlayers.Flatten()
     return flatten(self.model_inputs[0].zvalue)