def test_NNEstimator_works_with_VectorAssembler_multi_input(self):
        if self.sc.version.startswith("2"):
            from pyspark.ml.linalg import Vectors
            from pyspark.ml.feature import VectorAssembler
            from pyspark.sql import SparkSession

            spark = SparkSession \
                .builder \
                .getOrCreate()

            df = spark.createDataFrame(
                [(1, 35, 109.0, Vectors.dense([2.0, 5.0, 0.5, 0.5]), 1.0),
                 (2, 58, 2998.0, Vectors.dense([4.0, 10.0, 0.5, 0.5]), 2.0),
                 (3, 18, 123.0, Vectors.dense([3.0, 15.0, 0.5, 0.5]), 1.0)],
                ["user", "age", "income", "history", "label"])

            assembler = VectorAssembler(
                inputCols=["user", "age", "income", "history"],
                outputCol="features")

            df = assembler.transform(df)

            x1 = ZLayer.Input(shape=(1, ))
            x2 = ZLayer.Input(shape=(2, ))
            x3 = ZLayer.Input(shape=(
                2,
                2,
            ))

            user_embedding = ZLayer.Embedding(5, 10)(x1)
            flatten = ZLayer.Flatten()(user_embedding)
            dense1 = ZLayer.Dense(2)(x2)
            gru = ZLayer.LSTM(4, input_shape=(2, 2))(x3)

            merged = ZLayer.merge([flatten, dense1, gru], mode="concat")
            zy = ZLayer.Dense(2)(merged)

            zmodel = ZModel([x1, x2, x3], zy)
            criterion = ClassNLLCriterion()
            classifier = NNClassifier(zmodel, criterion, [[1], [2], [2, 2]]) \
                .setOptimMethod(Adam()) \
                .setLearningRate(0.1) \
                .setBatchSize(2) \
                .setMaxEpoch(10)

            nnClassifierModel = classifier.fit(df)
            print(nnClassifierModel.getBatchSize())
            res = nnClassifierModel.transform(df).collect()
    def test_nnEstimator_multiInput_cols(self):
        from pyspark.ml.linalg import Vectors
        from pyspark.sql import SparkSession

        spark = SparkSession \
            .builder \
            .getOrCreate()

        df = spark.createDataFrame(
            [(1, 35, 109.0, Vectors.dense([2.0, 5.0, 0.5, 0.5]), 1.0),
             (2, 58, 2998.0, Vectors.dense([4.0, 10.0, 0.5, 0.5]), 2.0),
             (3, 18, 123.0, Vectors.dense([3.0, 15.0, 0.5, 0.5]), 1.0),
             (4, 18, 123.0, Vectors.dense([3.0, 15.0, 0.5, 0.5]), 1.0)],
            ["user", "age", "income", "history", "label"])

        x1 = ZLayer.Input(shape=(1, ))
        x2 = ZLayer.Input(shape=(2, ))
        x3 = ZLayer.Input(shape=(
            2,
            2,
        ))

        user_embedding = ZLayer.Embedding(5, 10)(x1)
        flatten = ZLayer.Flatten()(user_embedding)
        dense1 = ZLayer.Dense(2)(x2)
        gru = ZLayer.LSTM(4, input_shape=(2, 2))(x3)

        merged = ZLayer.merge([flatten, dense1, gru], mode="concat")
        zy = ZLayer.Dense(2)(merged)

        zmodel = ZModel([x1, x2, x3], zy)
        criterion = ClassNLLCriterion()
        est = Estimator.from_bigdl(model=zmodel,
                                   loss=criterion,
                                   optimizer=Adam(learningrate=0.1),
                                   feature_preprocessing=[[1], [2], [2, 2]])
        est.fit(df,
                epochs=1,
                batch_size=4,
                feature_cols=["user", "age", "income", "history"])

        res = est.predict(df,
                          feature_cols=["user", "age", "income", "history"])
        res_c = res.collect()
        assert type(res).__name__ == 'DataFrame'
Example #3
0
    def _to_tensor(self):
        data = self.model_inputs[0].zvalue
        indices = self.model_inputs[1].zvalue

        if self._initializer and isinstance(data, zautograd.Parameter):
            embedding = zlayers.Embedding(input_dim=data.shape[0],
                                          output_dim=data.shape[1],
                                          weights=data.get_weight(),
                                          input_length=indices.shape[1])
            return embedding(indices)
        else:
            dim = int(self.onnx_attr['axis'])
            assert dim >= 1, "Currently only dim>=1 is supported."
            assert indices.shape == (
                1, ), "Currently only one index is supported."
            index = int(indices.get_weight().max())
            return zautograd.expand_dims(data.index_select(dim=dim,
                                                           index=index),
                                         axis=dim)
Example #4
0
 def test_embedding(self):
     input_data = np.random.randint(1000, size=(32, 10))
     zlayer = ZLayer.Embedding(1000, 64, input_shape=(10, ))
     klayer = KLayer.Embedding(1000, 64, input_length=10)
     self.compare_layer(klayer, zlayer, input_data,
                        WeightsConverter.convert_embedding)