Ejemplo n.º 1
0
    def predict(self,
                data,
                batch_size=4,
                feature_cols=None,
                hard_code_batch_size=False):

        assert self.outputs is not None, \
            "output is None, it should not be None in prediction"
        if isinstance(data, DataFrame):
            assert feature_cols is not None, \
                "feature columns is None; it should not be None in prediction"

        dataset = to_dataset(data,
                             batch_size=-1,
                             batch_per_thread=batch_size,
                             validation_data=None,
                             feature_cols=feature_cols,
                             labels_cols=None,
                             hard_code_batch_size=hard_code_batch_size,
                             sequential_order=True,
                             shuffle=False)

        flat_inputs = nest.flatten(self.inputs)
        flat_outputs = nest.flatten(self.outputs)
        tfnet = TFNet.from_session(sess=self.sess,
                                   inputs=flat_inputs,
                                   outputs=flat_outputs)
        predicted_rdd = tfnet.predict(dataset)
        if isinstance(data, DataFrame):
            return convert_predict_to_dataframe(data, predicted_rdd)
        else:
            return predicted_rdd
Ejemplo n.º 2
0
    def test_init_tfnet_from_session(self):
        import tensorflow as tf
        with tf.Graph().as_default():
            input1 = tf.placeholder(dtype=tf.float32, shape=(None, 2))
            label1 = tf.placeholder(dtype=tf.float32, shape=(None, 1))
            hidden = tf.layers.dense(input1, 4)
            output = tf.layers.dense(hidden, 1)
            loss = tf.reduce_mean(tf.square(output - label1))
            grad_inputs = tf.gradients(loss, input1)
            with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())
                data = np.random.rand(2, 2)
                output_value_ref = sess.run(output, feed_dict={input1: data})
                label_value = output_value_ref - 1.0
                grad_input_value_ref = sess.run(grad_inputs[0],
                                                feed_dict={
                                                    input1: data,
                                                    label1: label_value
                                                })
                net = TFNet.from_session(sess, [input1], [output],
                                         generate_backward=True)

        output_value = net.forward(data)

        grad_input_value = net.backward(data, np.ones(shape=(2, 1)))

        self.assert_allclose(output_value, output_value_ref)
        self.assert_allclose(grad_input_value, grad_input_value_ref)
Ejemplo n.º 3
0
 def test_tf_net_predict_dataset(self):
     tfnet_path = os.path.join(TestTF.resource_path, "tfnet")
     net = TFNet.from_export_folder(tfnet_path)
     dataset = TFDataset.from_ndarrays((np.random.rand(16, 4), ))
     output = net.predict(dataset)
     output = np.stack(output.collect())
     assert output.shape == (16, 2)
Ejemplo n.º 4
0
    def predict(
        self,
        data,
        batch_size=4,
        feature_cols=None,
        hard_code_batch_size=False,
        auto_shard_files=False,
    ):
        """
        Predict input data
        :param data: data to be predicted. It can be XShards, Spark DataFrame.
        If data is XShards, each element needs to be {'x': a feature numpy array
         or a tuple of feature numpy arrays}.
        :param batch_size: batch size per thread
        :param feature_cols: list of feature column names if input data is Spark DataFrame.
        :param hard_code_batch_size: whether to hard code batch size for prediction.
         The default value is False.
        :return: predicted result.
         If input data is XShards or tf.data.Dataset, the predict result is a XShards,
         and the schema for each result is: {'prediction': predicted numpy array or
          list of predicted numpy arrays}.
         If input data is Spark DataFrame, the predict result is a DataFrame which includes original
         columns plus 'prediction' column. The 'prediction' column can be FloatType, VectorUDT
         or Array of VectorUDT depending on model outputs shape.
        """

        assert self.outputs is not None, \
            "output is None, it should not be None in prediction"
        if isinstance(data, DataFrame):
            assert feature_cols is not None, \
                "feature columns is None; it should not be None in prediction"

        assert not is_tf_data_dataset(data), "tf.data.Dataset currently cannot be used for" \
                                             "estimator prediction"

        dataset = to_dataset(
            data,
            batch_size=-1,
            batch_per_thread=batch_size,
            validation_data=None,
            feature_cols=feature_cols,
            label_cols=None,
            hard_code_batch_size=hard_code_batch_size,
            sequential_order=True,
            shuffle=False,
            auto_shard_files=auto_shard_files,
        )

        flat_inputs = nest.flatten(self.inputs)
        flat_outputs = nest.flatten(self.outputs)
        tfnet = TFNet.from_session(sess=self.sess,
                                   inputs=flat_inputs,
                                   outputs=flat_outputs)
        predicted_rdd = tfnet.predict(dataset)
        if isinstance(data, DataFrame):
            return convert_predict_rdd_to_dataframe(data, predicted_rdd)
        elif isinstance(data, SparkXShards):
            return convert_predict_rdd_to_xshard(data, predicted_rdd)
        else:
            return predicted_rdd
Ejemplo n.º 5
0
 def test_init_tfnet_from_saved_model(self):
     model_path = os.path.join(TestTF.resource_path, "saved-model-resource")
     tfnet = TFNet.from_saved_model(model_path,
                                    inputs=["flatten_input:0"],
                                    outputs=["dense_2/Softmax:0"])
     result = tfnet.predict(np.ones(dtype=np.float32,
                                    shape=(20, 28, 28, 1)))
     result.collect()
Ejemplo n.º 6
0
 def test_for_scalar(self):
     import tensorflow as tf
     with tf.Graph().as_default():
         input1 = tf.placeholder(dtype=tf.float32, shape=())
         output = input1 + 1
         sess = tf.Session()
         net = TFNet.from_session(sess, [input1], [output])
         sess.close()
     out_value = net.forward(np.array(1.0))
     assert len(out_value.shape) == 0
Ejemplo n.º 7
0
def predict(model_path, img_path, partition_num=4):
    inputs = "image_tensor:0"
    outputs = [
        "num_detections:0", "detection_boxes:0", "detection_scores:0",
        "detection_classes:0"
    ]

    model = TFNet(model_path, inputs, outputs)
    image_set = ImageSet.read(img_path, sc, partition_num)
    transformer = ChainedPreprocessing([
        ImageResize(256, 256),
        ImageMatToTensor(format="NHWC"),
        ImageSetToSample()
    ])
    transformed_image_set = image_set.transform(transformer)
    output = model.predict_image(transformed_image_set.to_image_frame(),
                                 batch_per_partition=1)
    # Print the detection result of the first image.
    result = ImageSet.from_image_frame(output).get_predict().first()
    print(result)
Ejemplo n.º 8
0
 def test_tf_net_predict(self):
     tfnet_path = os.path.join(TestTF.resource_path, "tfnet")
     import tensorflow as tf
     tf_session_config = tf.ConfigProto(inter_op_parallelism_threads=1,
                                        intra_op_parallelism_threads=1)
     net = TFNet.from_export_folder(tfnet_path,
                                    tf_session_config=tf_session_config)
     output = net.predict(np.random.rand(16, 4),
                          batch_per_thread=5,
                          distributed=False)
     assert output.shape == (16, 2)
Ejemplo n.º 9
0
    def predict(self, data, batch_size=32):
        assert self.outputs is not None, \
            "output is None, it should not be None in prediction"

        dataset = _to_dataset(data, batch_size=-1, batch_per_thread=batch_size)

        flat_inputs = nest.flatten(self.inputs)
        flat_outputs = nest.flatten(self.outputs)
        tfnet = TFNet.from_session(sess=self.sess,
                                   inputs=flat_inputs,
                                   outputs=flat_outputs)
        return tfnet.predict(dataset)
Ejemplo n.º 10
0
 def test_tfdataset_with_string_rdd(self):
     string_rdd = self.sc.parallelize(["123", "456"], 1)
     ds = TFDataset.from_string_rdd(string_rdd, batch_per_thread=1)
     input_tensor = tf.placeholder(dtype=tf.string, shape=(None, ))
     output_tensor = tf.string_to_number(input_tensor)
     with tf.Session() as sess:
         tfnet = TFNet.from_session(sess,
                                    inputs=[input_tensor],
                                    outputs=[output_tensor])
     result = tfnet.predict(ds).collect()
     assert result[0] == 123
     assert result[1] == 456
Ejemplo n.º 11
0
    def predict(self, data, batch_size=32):
        assert self.outputs is not None, \
            "output is None, it should not be None in prediction"

        if isinstance(data, SparkXShards):
            dataset = _xshards_to_tf_dataset(data,
                                             batch_per_thread=batch_size)
        elif isinstance(data, Dataset):
            dataset = TFDataDataset2(data, batch_size=-1,
                                     batch_per_thread=batch_size)
        else:
            raise ValueError("data must be a SparkXShards or an orca.data.tf.Dataset")

        flat_inputs = nest.flatten(self.inputs)
        flat_outputs = nest.flatten(self.outputs)
        tfnet = TFNet.from_session(sess=self.sess, inputs=flat_inputs, outputs=flat_outputs)
        return tfnet.predict(dataset)
Ejemplo n.º 12
0
    def predict(
        self,
        data,
        batch_size=4,
        feature_cols=None,
        auto_shard_files=False,
    ):
        """
        Predict input data

        :param data: data to be predicted. It can be XShards, Spark DataFrame.
               If data is XShards, each partition can be Pandas Dataframe or a dictionary of
               {'x': feature}, where feature is a numpy array or a tuple of numpy arrays.
        :param batch_size: batch size per thread
        :param feature_cols: list of feature column names if input data is Spark DataFrame
        or XShards of Pandas DataFrame.
        :param auto_shard_files: whether to automatically detect if the dataset is file-based and
               and apply sharding on files, otherwise sharding on records. Default is False.
        :return: predicted result.
                 If input data is XShards or tf.data.Dataset, the predict result is a XShards, each
                 partition of the XShards is a dictionary of {'prediction': result}, where the
                 result is a numpy array or a list of numpy arrays.
                 If input data is Spark DataFrame, the predict result is a DataFrame which includes
                 original columns plus 'prediction' column. The 'prediction' column can be
                 FloatType, VectorUDT or Array of VectorUDT depending on model outputs shape.
        """

        assert self.outputs is not None, \
            "output is None, it should not be None in prediction"
        if isinstance(data, DataFrame):
            assert feature_cols is not None, \
                "feature columns is None; it should not be None in prediction"
        if isinstance(data, SparkXShards):
            if data._get_class_name() == 'pandas.core.frame.DataFrame':
                assert feature_cols is not None, \
                    "feature columns is None; it should not be None in prediction"
                data = process_xshards_of_pandas_dataframe(data, feature_cols)

        assert not is_tf_data_dataset(data), "tf.data.Dataset currently cannot be used for" \
                                             "estimator prediction"

        dataset = to_dataset(
            data,
            batch_size=-1,
            batch_per_thread=batch_size,
            validation_data=None,
            feature_cols=feature_cols,
            label_cols=None,
            hard_code_batch_size=False,
            sequential_order=True,
            shuffle=False,
            auto_shard_files=auto_shard_files,
        )

        flat_inputs = nest.flatten(self.inputs)
        flat_outputs = nest.flatten(self.outputs)
        tfnet = TFNet.from_session(sess=self.sess,
                                   inputs=flat_inputs,
                                   outputs=flat_outputs)
        predicted_rdd = tfnet.predict(dataset)
        if isinstance(data, DataFrame):
            return convert_predict_rdd_to_dataframe(data, predicted_rdd)
        elif isinstance(data, SparkXShards):
            return convert_predict_rdd_to_xshard(data, predicted_rdd)
        else:
            return predicted_rdd
Ejemplo n.º 13
0
if __name__ == '__main__':

    sparkConf = init_spark_conf().setAppName("testNNClassifer").setMaster('local[1]')
    sc = init_nncontext(sparkConf)
    spark = SparkSession \
        .builder \
        .getOrCreate()

    with tf.Graph().as_default():
        input1 = tf.placeholder(dtype=tf.float32, shape=(None, 2))
        hidden = tf.layers.dense(input1, 4)
        output = tf.sigmoid(tf.layers.dense(hidden, 1))
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            net = TFNet.from_session(sess, [input1], [output], generate_backward=True)

    df = spark.createDataFrame(
        [(Vectors.dense([2.0, 1.0]), 1.0),
         (Vectors.dense([1.0, 2.0]), 0.0),
         (Vectors.dense([2.0, 1.0]), 1.0),
         (Vectors.dense([1.0, 2.0]), 0.0)],
        ["features", "label"])

    print("before training:")
    NNModel(net).transform(df).show()

    classifier = NNClassifier(net, MSECriterion()) \
        .setBatchSize(4) \
        .setOptimMethod(Adam()) \
        .setLearningRate(0.1) \
Ejemplo n.º 14
0
 def test_init_tf_net(self):
     tfnet_path = os.path.join(TestTF.resource_path, "tfnet")
     net = TFNet.from_export_folder(tfnet_path)
     output = net.forward(np.random.rand(2, 4))
     assert output.shape == (2, 2)