def predict(self, data, batch_size=4, feature_cols=None, hard_code_batch_size=False): assert self.outputs is not None, \ "output is None, it should not be None in prediction" if isinstance(data, DataFrame): assert feature_cols is not None, \ "feature columns is None; it should not be None in prediction" dataset = to_dataset(data, batch_size=-1, batch_per_thread=batch_size, validation_data=None, feature_cols=feature_cols, labels_cols=None, hard_code_batch_size=hard_code_batch_size, sequential_order=True, shuffle=False) flat_inputs = nest.flatten(self.inputs) flat_outputs = nest.flatten(self.outputs) tfnet = TFNet.from_session(sess=self.sess, inputs=flat_inputs, outputs=flat_outputs) predicted_rdd = tfnet.predict(dataset) if isinstance(data, DataFrame): return convert_predict_to_dataframe(data, predicted_rdd) else: return predicted_rdd
def test_init_tfnet_from_session(self): import tensorflow as tf with tf.Graph().as_default(): input1 = tf.placeholder(dtype=tf.float32, shape=(None, 2)) label1 = tf.placeholder(dtype=tf.float32, shape=(None, 1)) hidden = tf.layers.dense(input1, 4) output = tf.layers.dense(hidden, 1) loss = tf.reduce_mean(tf.square(output - label1)) grad_inputs = tf.gradients(loss, input1) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) data = np.random.rand(2, 2) output_value_ref = sess.run(output, feed_dict={input1: data}) label_value = output_value_ref - 1.0 grad_input_value_ref = sess.run(grad_inputs[0], feed_dict={ input1: data, label1: label_value }) net = TFNet.from_session(sess, [input1], [output], generate_backward=True) output_value = net.forward(data) grad_input_value = net.backward(data, np.ones(shape=(2, 1))) self.assert_allclose(output_value, output_value_ref) self.assert_allclose(grad_input_value, grad_input_value_ref)
def test_tf_net_predict_dataset(self): tfnet_path = os.path.join(TestTF.resource_path, "tfnet") net = TFNet.from_export_folder(tfnet_path) dataset = TFDataset.from_ndarrays((np.random.rand(16, 4), )) output = net.predict(dataset) output = np.stack(output.collect()) assert output.shape == (16, 2)
def predict( self, data, batch_size=4, feature_cols=None, hard_code_batch_size=False, auto_shard_files=False, ): """ Predict input data :param data: data to be predicted. It can be XShards, Spark DataFrame. If data is XShards, each element needs to be {'x': a feature numpy array or a tuple of feature numpy arrays}. :param batch_size: batch size per thread :param feature_cols: list of feature column names if input data is Spark DataFrame. :param hard_code_batch_size: whether to hard code batch size for prediction. The default value is False. :return: predicted result. If input data is XShards or tf.data.Dataset, the predict result is a XShards, and the schema for each result is: {'prediction': predicted numpy array or list of predicted numpy arrays}. If input data is Spark DataFrame, the predict result is a DataFrame which includes original columns plus 'prediction' column. The 'prediction' column can be FloatType, VectorUDT or Array of VectorUDT depending on model outputs shape. """ assert self.outputs is not None, \ "output is None, it should not be None in prediction" if isinstance(data, DataFrame): assert feature_cols is not None, \ "feature columns is None; it should not be None in prediction" assert not is_tf_data_dataset(data), "tf.data.Dataset currently cannot be used for" \ "estimator prediction" dataset = to_dataset( data, batch_size=-1, batch_per_thread=batch_size, validation_data=None, feature_cols=feature_cols, label_cols=None, hard_code_batch_size=hard_code_batch_size, sequential_order=True, shuffle=False, auto_shard_files=auto_shard_files, ) flat_inputs = nest.flatten(self.inputs) flat_outputs = nest.flatten(self.outputs) tfnet = TFNet.from_session(sess=self.sess, inputs=flat_inputs, outputs=flat_outputs) predicted_rdd = tfnet.predict(dataset) if isinstance(data, DataFrame): return convert_predict_rdd_to_dataframe(data, predicted_rdd) elif isinstance(data, SparkXShards): return convert_predict_rdd_to_xshard(data, predicted_rdd) else: return predicted_rdd
def test_init_tfnet_from_saved_model(self): model_path = os.path.join(TestTF.resource_path, "saved-model-resource") tfnet = TFNet.from_saved_model(model_path, inputs=["flatten_input:0"], outputs=["dense_2/Softmax:0"]) result = tfnet.predict(np.ones(dtype=np.float32, shape=(20, 28, 28, 1))) result.collect()
def test_for_scalar(self): import tensorflow as tf with tf.Graph().as_default(): input1 = tf.placeholder(dtype=tf.float32, shape=()) output = input1 + 1 sess = tf.Session() net = TFNet.from_session(sess, [input1], [output]) sess.close() out_value = net.forward(np.array(1.0)) assert len(out_value.shape) == 0
def predict(model_path, img_path, partition_num=4): inputs = "image_tensor:0" outputs = [ "num_detections:0", "detection_boxes:0", "detection_scores:0", "detection_classes:0" ] model = TFNet(model_path, inputs, outputs) image_set = ImageSet.read(img_path, sc, partition_num) transformer = ChainedPreprocessing([ ImageResize(256, 256), ImageMatToTensor(format="NHWC"), ImageSetToSample() ]) transformed_image_set = image_set.transform(transformer) output = model.predict_image(transformed_image_set.to_image_frame(), batch_per_partition=1) # Print the detection result of the first image. result = ImageSet.from_image_frame(output).get_predict().first() print(result)
def test_tf_net_predict(self): tfnet_path = os.path.join(TestTF.resource_path, "tfnet") import tensorflow as tf tf_session_config = tf.ConfigProto(inter_op_parallelism_threads=1, intra_op_parallelism_threads=1) net = TFNet.from_export_folder(tfnet_path, tf_session_config=tf_session_config) output = net.predict(np.random.rand(16, 4), batch_per_thread=5, distributed=False) assert output.shape == (16, 2)
def predict(self, data, batch_size=32): assert self.outputs is not None, \ "output is None, it should not be None in prediction" dataset = _to_dataset(data, batch_size=-1, batch_per_thread=batch_size) flat_inputs = nest.flatten(self.inputs) flat_outputs = nest.flatten(self.outputs) tfnet = TFNet.from_session(sess=self.sess, inputs=flat_inputs, outputs=flat_outputs) return tfnet.predict(dataset)
def test_tfdataset_with_string_rdd(self): string_rdd = self.sc.parallelize(["123", "456"], 1) ds = TFDataset.from_string_rdd(string_rdd, batch_per_thread=1) input_tensor = tf.placeholder(dtype=tf.string, shape=(None, )) output_tensor = tf.string_to_number(input_tensor) with tf.Session() as sess: tfnet = TFNet.from_session(sess, inputs=[input_tensor], outputs=[output_tensor]) result = tfnet.predict(ds).collect() assert result[0] == 123 assert result[1] == 456
def predict(self, data, batch_size=32): assert self.outputs is not None, \ "output is None, it should not be None in prediction" if isinstance(data, SparkXShards): dataset = _xshards_to_tf_dataset(data, batch_per_thread=batch_size) elif isinstance(data, Dataset): dataset = TFDataDataset2(data, batch_size=-1, batch_per_thread=batch_size) else: raise ValueError("data must be a SparkXShards or an orca.data.tf.Dataset") flat_inputs = nest.flatten(self.inputs) flat_outputs = nest.flatten(self.outputs) tfnet = TFNet.from_session(sess=self.sess, inputs=flat_inputs, outputs=flat_outputs) return tfnet.predict(dataset)
def predict( self, data, batch_size=4, feature_cols=None, auto_shard_files=False, ): """ Predict input data :param data: data to be predicted. It can be XShards, Spark DataFrame. If data is XShards, each partition can be Pandas Dataframe or a dictionary of {'x': feature}, where feature is a numpy array or a tuple of numpy arrays. :param batch_size: batch size per thread :param feature_cols: list of feature column names if input data is Spark DataFrame or XShards of Pandas DataFrame. :param auto_shard_files: whether to automatically detect if the dataset is file-based and and apply sharding on files, otherwise sharding on records. Default is False. :return: predicted result. If input data is XShards or tf.data.Dataset, the predict result is a XShards, each partition of the XShards is a dictionary of {'prediction': result}, where the result is a numpy array or a list of numpy arrays. If input data is Spark DataFrame, the predict result is a DataFrame which includes original columns plus 'prediction' column. The 'prediction' column can be FloatType, VectorUDT or Array of VectorUDT depending on model outputs shape. """ assert self.outputs is not None, \ "output is None, it should not be None in prediction" if isinstance(data, DataFrame): assert feature_cols is not None, \ "feature columns is None; it should not be None in prediction" if isinstance(data, SparkXShards): if data._get_class_name() == 'pandas.core.frame.DataFrame': assert feature_cols is not None, \ "feature columns is None; it should not be None in prediction" data = process_xshards_of_pandas_dataframe(data, feature_cols) assert not is_tf_data_dataset(data), "tf.data.Dataset currently cannot be used for" \ "estimator prediction" dataset = to_dataset( data, batch_size=-1, batch_per_thread=batch_size, validation_data=None, feature_cols=feature_cols, label_cols=None, hard_code_batch_size=False, sequential_order=True, shuffle=False, auto_shard_files=auto_shard_files, ) flat_inputs = nest.flatten(self.inputs) flat_outputs = nest.flatten(self.outputs) tfnet = TFNet.from_session(sess=self.sess, inputs=flat_inputs, outputs=flat_outputs) predicted_rdd = tfnet.predict(dataset) if isinstance(data, DataFrame): return convert_predict_rdd_to_dataframe(data, predicted_rdd) elif isinstance(data, SparkXShards): return convert_predict_rdd_to_xshard(data, predicted_rdd) else: return predicted_rdd
if __name__ == '__main__': sparkConf = init_spark_conf().setAppName("testNNClassifer").setMaster('local[1]') sc = init_nncontext(sparkConf) spark = SparkSession \ .builder \ .getOrCreate() with tf.Graph().as_default(): input1 = tf.placeholder(dtype=tf.float32, shape=(None, 2)) hidden = tf.layers.dense(input1, 4) output = tf.sigmoid(tf.layers.dense(hidden, 1)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) net = TFNet.from_session(sess, [input1], [output], generate_backward=True) df = spark.createDataFrame( [(Vectors.dense([2.0, 1.0]), 1.0), (Vectors.dense([1.0, 2.0]), 0.0), (Vectors.dense([2.0, 1.0]), 1.0), (Vectors.dense([1.0, 2.0]), 0.0)], ["features", "label"]) print("before training:") NNModel(net).transform(df).show() classifier = NNClassifier(net, MSECriterion()) \ .setBatchSize(4) \ .setOptimMethod(Adam()) \ .setLearningRate(0.1) \
def test_init_tf_net(self): tfnet_path = os.path.join(TestTF.resource_path, "tfnet") net = TFNet.from_export_folder(tfnet_path) output = net.forward(np.random.rand(2, 4)) assert output.shape == (2, 2)