def input_fn(mode): x = np.random.rand(20, 10) y = np.random.randint(0, 10, (20, )) if mode == tf.estimator.ModeKeys.TRAIN: return TFDataset.from_ndarrays((x, y), batch_size=8) elif mode == tf.estimator.ModeKeys.EVAL: return TFDataset.from_ndarrays((x, y), batch_per_thread=1) else: return TFDataset.from_ndarrays(x, batch_per_thread=1)
def input_fn(mode): if mode == tf.estimator.ModeKeys.TRAIN: training_data = get_data("train") dataset = TFDataset.from_ndarrays(training_data, batch_size=320) elif mode == tf.estimator.ModeKeys.EVAL: testing_data = get_data("test") dataset = TFDataset.from_ndarrays(testing_data, batch_per_thread=80) else: images, _ = get_data("test") dataset = TFDataset.from_ndarrays(images, batch_per_thread=80) return dataset
def input_function(): ds = tf.data.Dataset.from_tensor_slices((dict(data_df), )) ds = TFDataset.from_tf_data_dataset( dataset=ds, batch_size=batch_size, batch_per_thread=batch_per_thread) return ds
def input_fn(mode, params): if mode == tf.estimator.ModeKeys.TRAIN: image_set = ImageSet.read(params["image_path"], sc=sc, with_label=True, one_based_label=False) train_transformer = ChainedPreprocessing([ ImageBytesToMat(), ImageResize(256, 256), ImageRandomCrop(224, 224), ImageRandomPreprocessing(ImageHFlip(), 0.5), ImageChannelNormalize(0.485, 0.456, 0.406, 0.229, 0.224, 0.225), ImageMatToTensor(to_RGB=True, format="NHWC"), ImageSetToSample(input_keys=["imageTensor"], target_keys=["label"]) ]) feature_set = FeatureSet.image_frame(image_set.to_image_frame()) feature_set = feature_set.transform(train_transformer) feature_set = feature_set.transform(ImageFeatureToSample()) dataset = TFDataset.from_feature_set(feature_set, features=(tf.float32, [224, 224, 3]), labels=(tf.int32, [1]), batch_size=batch_size) else: raise NotImplementedError return dataset
def test_tf_net_predict_dataset(self): tfnet_path = os.path.join(TestTF.resource_path, "tfnet") net = TFNet.from_export_folder(tfnet_path) dataset = TFDataset.from_ndarrays((np.random.rand(16, 4),)) output = net.predict(dataset) output = np.stack(output.collect()) assert output.shape == (16, 2)
def test_tfdataset_with_tf_data_dataset_which_requires_table(self): keys = [1, 0, -1] dataset = tf.data.Dataset.from_tensor_slices([1, 2, -1, 5] * 40) table = tf.contrib.lookup.HashTable( initializer=tf.contrib.lookup.KeyValueTensorInitializer( keys=keys, values=list(reversed(keys))), default_value=100) dataset = dataset.map(table.lookup) def transform(x): float_x = tf.to_float(x) return float_x, 1 dataset = dataset.map(transform) dataset = TFDataset.from_tf_data_dataset(dataset, batch_size=16) seq = tf.keras.Sequential([ tf.keras.layers.Flatten(input_shape=()), tf.keras.layers.Dense(10, activation="softmax") ]) seq.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model = KerasModel(seq) model.fit(dataset)
def test_control_inputs(self): features = np.random.randn(20, 10) labels = np.random.randint(0, 10, size=[20]) with tf.Graph().as_default(): dataset = TFDataset.from_ndarrays((features, labels), batch_size=4, val_tensors=(features, labels)) is_training = tf.placeholder(dtype=tf.bool, shape=()) feature_tensor, label_tensor = dataset.tensors features = tf.layers.dense(feature_tensor, 8) features = tf.layers.dropout(features, training=is_training) output = tf.layers.dense(features, 10) loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=output, labels=label_tensor)) optimizer = TFOptimizer.from_loss( loss, Adam(), val_outputs=[output], val_labels=[label_tensor], val_method=Accuracy(), tensor_with_value={is_training: (True, False)}, metrics={"loss": loss}) optimizer.optimize(end_trigger=MaxEpoch(1)) optimizer.sess.close()
def input_fn(mode): np.random.seed(20) x = np.random.rand(20, 10) y = np.random.randint(0, 10, (20)) rdd_x = self.sc.parallelize(x) rdd_y = self.sc.parallelize(y) rdd = rdd_x.zip(rdd_y) if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL: dataset = TFDataset.from_rdd(rdd, features=(tf.float32, [10]), labels=(tf.int32, [])) else: dataset = TFDataset.from_rdd(rdd_x, features=(tf.float32, [10])) return dataset
def main(data_num): data_path = '/tmp/mnist' if not args.data_path else args.data_path cluster_mode = args.cluster_mode if cluster_mode.startswith("yarn"): hadoop_conf = os.environ.get("HADOOP_CONF_DIR") assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \ "set the environment variable HADOOP_CONF_DIR" spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \ .set("spark.executor.cores", 2) \ .set("spark.executor.instances", 2) \ .set("spark.executorEnv.HTTP_PROXY", "http://child-prc.intel.com:913") \ .set("spark.executorEnv.HTTPS_PROXY", "http://child-prc.intel.com:913") \ .set("spark.driver.memory", "2g") if cluster_mode == "yarn-client": sc = init_nncontext(spark_conf, cluster_mode="yarn-client", hadoop_conf=hadoop_conf) else: sc = init_nncontext(spark_conf, cluster_mode="yarn-cluster", hadoop_conf=hadoop_conf) else: sc = init_nncontext() # get data, pre-process and create TFDataset (images_data, labels_data) = mnist.read_data_sets(data_path, "test") images_data = (images_data[:data_num] - mnist.TRAIN_MEAN) / mnist.TRAIN_STD labels_data = labels_data[:data_num].astype(np.int32) dataset = TFDataset.from_ndarrays((images_data, labels_data), batch_per_thread=20) # construct the model from TFDataset images, labels = dataset.tensors labels = tf.squeeze(labels) with slim.arg_scope(lenet.lenet_arg_scope()): logits, end_points = lenet.lenet(images, num_classes=10, is_training=False) predictions = tf.to_int32(tf.argmax(logits, axis=1)) correct = tf.expand_dims(tf.to_int32(tf.equal(predictions, labels)), axis=1) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, "/tmp/lenet/model") predictor = TFPredictor(sess, [correct]) accuracy = predictor.predict().mean() print("predict accuracy is %s" % accuracy)
def test_training_for_feature_set(self): model = self.create_image_model() feature_set = self.create_train_features_Set() training_dataset = TFDataset.from_feature_set(feature_set, features=(tf.float32, [224, 224, 3]), labels=(tf.int32, [1]), batch_size=8) model.fit(training_dataset)
def test_training_for_imageset(self): model = self.create_image_model() image_set = self.create_image_set(with_label=True) training_dataset = TFDataset.from_image_set(image_set, image=(tf.float32, [224, 224, 3]), label=(tf.int32, [1]), batch_size=4) model.fit(training_dataset)
def test_predict_for_imageset(self): model = self.create_image_model() image_set = self.create_image_set(with_label=False) predict_dataset = TFDataset.from_image_set(image_set, image=(tf.float32, [224, 224, 3]), batch_per_thread=1) results = model.predict(predict_dataset).get_predict().collect() assert all(r[1] is not None for r in results)
def test_evaluation_for_imageset(self): model = self.create_image_model() image_set = self.create_image_set(with_label=True) eval_dataset = TFDataset.from_image_set(image_set, image=(tf.float32, [224, 224, 3]), label=(tf.int32, [1]), batch_per_thread=1) model.evaluate(eval_dataset)
def create_ds(mode): if mode == "train": dataset = TFDataset.from_dataframe(train_df, feature_cols=["feature"], labels_cols=["label"], batch_size=32, validation_df=val_df) elif mode == "predict": dataset = TFDataset.from_dataframe(val_df, feature_cols=["feature"], batch_per_thread=32) elif mode == "evaluate": dataset = TFDataset.from_dataframe(val_df, feature_cols=["feature"], labels_cols=["label"], batch_per_thread=32) else: raise ValueError("unrecognized mode: {}".format(mode)) return dataset
def test_dataset_without_batch(self): x = np.random.rand(20, 10) y = np.random.randint(0, 2, (20)) rdd_x = self.sc.parallelize(x) rdd_y = self.sc.parallelize(y) rdd = rdd_x.zip(rdd_y) dataset = TFDataset.from_rdd(rdd, features=(tf.float32, [10]), labels=(tf.int32, []), names=["features", "labels"], val_rdd=rdd) keras_model = self.create_model() model = KerasModel(keras_model) self.intercept( lambda: model.fit(dataset), "The batch_size of TFDataset must be" + " specified when used in KerasModel fit.") dataset = TFDataset.from_rdd( rdd, features=(tf.float32, [10]), labels=(tf.int32, []), names=["features", "labels"], ) self.intercept( lambda: model.evaluate(dataset), "The batch_per_thread of TFDataset must be " + "specified when used in KerasModel evaluate.") dataset = TFDataset.from_rdd( rdd_x, features=(tf.float32, [10]), names=["features", "labels"], ) self.intercept( lambda: model.predict(dataset), "The batch_per_thread of TFDataset must be" + " specified when used in KerasModel predict.")
def test_tfdataset_with_string_rdd(self): string_rdd = self.sc.parallelize(["123", "456"], 1) ds = TFDataset.from_string_rdd(string_rdd, batch_per_thread=1) input_tensor = tf.placeholder(dtype=tf.string, shape=(None, )) output_tensor = tf.string_to_number(input_tensor) with tf.Session() as sess: tfnet = TFNet.from_session(sess, inputs=[input_tensor], outputs=[output_tensor]) result = tfnet.predict(ds).collect() assert result[0] == 123 assert result[1] == 456
def create_predict_dataset(self): np.random.seed(20) x = np.random.rand(20, 10) rdd = self.sc.parallelize(x) rdd = rdd.map(lambda x: [x]) dataset = TFDataset.from_rdd(rdd, features=(tf.float32, [10]), batch_per_thread=1 ) return dataset
def input_fn(): def map_func(data): image = data['image'] label = data['label'] one_hot_label = tf.one_hot(label, depth=10) noise = tf.random.normal(mean=0.0, stddev=1.0, shape=(NOISE_DIM, )) generator_inputs = (noise, one_hot_label) discriminator_inputs = ((tf.to_float(image) / 255.0) - 0.5) * 2 return (generator_inputs, discriminator_inputs) ds = tfds.load("mnist", split="train") ds = ds.map(map_func) dataset = TFDataset.from_tf_data_dataset(ds, batch_size=56) return dataset
def test_tfdataset_with_tf_data_dataset(self): dataset = tf.data.Dataset.from_tensor_slices( (np.random.randn(102, 28, 28, 1), np.random.randint(0, 10, size=(102, )))) dataset = dataset.map(lambda feature, label: (tf.to_float(feature), label)) dataset = TFDataset.from_tf_data_dataset(dataset, batch_size=16) seq = tf.keras.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28, 1)), tf.keras.layers.Dense(10, activation="softmax") ]) seq.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model = KerasModel(seq) model.fit(dataset) dataset = tf.data.Dataset.from_tensor_slices( (np.random.randn(102, 28, 28, 1), np.random.randint(0, 10, size=(102, )))) dataset = dataset.map(lambda feature, label: (tf.to_float(feature), label)) dataset = TFDataset.from_tf_data_dataset(dataset, batch_per_thread=16) model.evaluate(dataset)
def create_training_dataset(self): np.random.seed(20) x = np.random.rand(20, 10) y = np.random.randint(0, 2, (20)) rdd_x = self.sc.parallelize(x) rdd_y = self.sc.parallelize(y) rdd = rdd_x.zip(rdd_y) dataset = TFDataset.from_rdd(rdd, features=(tf.float32, [10]), labels=(tf.int32, []), batch_size=4, val_rdd=rdd) return dataset
def create_evaluation_dataset(self): np.random.seed(20) x = np.random.rand(20, 10) y = np.random.randint(0, 2, (20)) rdd_x = self.sc.parallelize(x) rdd_y = self.sc.parallelize(y) rdd = rdd_x.zip(rdd_y) dataset = TFDataset.from_rdd(rdd, features=(tf.float32, [10]), labels=(tf.int32, []), batch_per_thread=1 ) return dataset
def test_tfdataset_with_tfrecord(self): train_path = os.path.join(resource_path, "tfrecord/mnist_train.tfrecord") test_path = os.path.join(resource_path, "tfrecord/mnist_test.tfrecord") dataset = TFDataset.from_tfrecord_file(self.sc, train_path, batch_size=16, validation_file_path=test_path) raw_bytes = dataset.tensors[0] images, labels = parse_fn(raw_bytes) flat = tf.layers.flatten(images) logits = tf.layers.dense(flat, 10) loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)) opt = TFOptimizer.from_loss(loss, Adam()) opt.optimize()
def test_tfdataset_with_tf_data_dataset_which_contains_bool(self): dataset = tf.data.Dataset.from_tensor_slices( (np.random.randn(102, 28, 28, 1), np.random.randint(0, 10, size=(102, )), np.ones(shape=(102, 28, 28, 1), dtype=np.bool))) dataset = TFDataset.from_tf_data_dataset(dataset, batch_size=16) feature, labels, mask = dataset.tensors float_mask = tf.to_float(mask) masked_feature = tf.to_float(feature) * float_mask flatten = tf.layers.flatten(masked_feature) logits = tf.layers.dense(flatten, 10) loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)) opt = TFOptimizer.from_loss(loss, Adam()) opt.optimize()
def test_tf_dataset_with_list_feature(self): np.random.seed(20) x = np.random.rand(20, 10) y = np.random.randint(0, 2, (20)) rdd_x = self.sc.parallelize(x) rdd_y = self.sc.parallelize(y) rdd = rdd_x.zip(rdd_y) dataset = TFDataset.from_rdd(rdd, features=[(tf.float32, [10]), (tf.float32, [10])], labels=(tf.int32, []), batch_size=4, val_rdd=rdd) for idx, tensor in enumerate(dataset.feature_tensors): assert tensor.name == "list_input_" + str(idx) + ":0"
def test_tf_optimizer_with_sparse_gradient(self): ids = np.random.randint(0, 10, size=[40]) labels = np.random.randint(0, 5, size=[40]) id_rdd = self.sc.parallelize(ids) label_rdd = self.sc.parallelize(labels) training_rdd = id_rdd.zip(label_rdd).map(lambda x: [x[0], x[1]]) with tf.Graph().as_default(): dataset = TFDataset.from_rdd(training_rdd, names=["ids", "labels"], shapes=[[], []], types=[tf.int32, tf.int32], batch_size=8) id_tensor, label_tensor = dataset.tensors embedding_table = tf.get_variable(name="word_embedding", shape=[10, 5]) embedding = tf.nn.embedding_lookup(embedding_table, id_tensor) loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=embedding, labels=label_tensor)) optimizer = TFOptimizer.from_loss(loss, Adam(1e-3)) optimizer.optimize(end_trigger=MaxEpoch(1)) optimizer.sess.close()
def test_tf_optimizer_metrics(self): features = np.random.randn(20, 10) labels = np.random.randint(0, 10, size=[20]) with tf.Graph().as_default(): dataset = TFDataset.from_ndarrays((features, labels), batch_size=4, val_tensors=(features, labels)) feature_tensor, label_tensor = dataset.tensors features = tf.layers.dense(feature_tensor, 8) output = tf.layers.dense(features, 10) loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=output, labels=label_tensor)) optimizer = TFOptimizer.from_loss(loss, { "dense/": Adam(1e-3), "dense_1/": SGD(0.0) }, val_outputs=[output], val_labels=[label_tensor], val_method=Accuracy(), metrics={"loss": loss}) initial_weights = optimizer.tf_model.training_helper_layer.get_weights( ) optimizer.optimize(end_trigger=MaxEpoch(1)) updated_weights = optimizer.tf_model.training_helper_layer.get_weights( ) for i in [ 0, 1 ]: # weights and bias combined with "dense/" should be updated assert not np.allclose(initial_weights[i], updated_weights[i]) for i in [ 2, 3 ]: # weights and bias combined with "dense_1" should be unchanged assert np.allclose(initial_weights[i], updated_weights[i]) optimizer.sess.close()
def test_tf_optimizer_with_sparse_gradient_using_keras(self): import tensorflow as tf ids = np.random.randint(0, 10, size=[40]) labels = np.random.randint(0, 5, size=[40]) id_rdd = self.sc.parallelize(ids) label_rdd = self.sc.parallelize(labels) training_rdd = id_rdd.zip(label_rdd).map(lambda x: [x[0], x[1]]) dataset = TFDataset.from_rdd(training_rdd, features=(tf.int32, []), labels=(tf.int32, []), batch_size=8) words_input = tf.keras.layers.Input(shape=(), name='words_input') embedding_layer = tf.keras.layers.Embedding(input_dim=10, output_dim=5, name='word_embedding') word_embeddings = embedding_layer(words_input) embedding = tf.keras.layers.Flatten()(word_embeddings) output = tf.keras.layers.Dense(5, activation="softmax")(embedding) model = tf.keras.models.Model(inputs=[words_input], outputs=[output]) model.compile(optimizer="sgd", loss="sparse_categorical_crossentropy") optimizer = TFOptimizer.from_keras(model, dataset) optimizer.optimize()
def main(max_epoch): args = parser.parse_args() cluster_mode = args.cluster_mode if cluster_mode.startswith("yarn"): hadoop_conf = os.environ.get("HADOOP_CONF_DIR") assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \ "set the environment variable HADOOP_CONF_DIR" spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \ .set("spark.executor.cores", 2) \ .set("spark.executor.instances", 2) \ .set("spark.executorEnv.HTTP_PROXY", "http://child-prc.intel.com:913") \ .set("spark.executorEnv.HTTPS_PROXY", "http://child-prc.intel.com:913") \ .set("spark.driver.memory", "2g") if cluster_mode == "yarn-client": sc = init_nncontext(spark_conf, cluster_mode="yarn-client", hadoop_conf=hadoop_conf) else: sc = init_nncontext(spark_conf, cluster_mode="yarn-cluster", hadoop_conf=hadoop_conf) else: sc = init_nncontext() training_rdd = get_data_rdd("train", sc) testing_rdd = get_data_rdd("test", sc) dataset = TFDataset.from_rdd(training_rdd, features=(tf.float32, [28, 28, 1]), labels=(tf.int32, []), batch_size=320, val_rdd=testing_rdd) model = tf.keras.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28, 1)), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(10, activation='softmax'), ]) model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='sparse_categorical_crossentropy', metrics=['accuracy']) keras_model = KerasModel(model) keras_model.fit(dataset, epochs=max_epoch, distributed=True) eval_dataset = TFDataset.from_rdd(testing_rdd, features=(tf.float32, [28, 28, 1]), labels=(tf.int32, []), batch_per_thread=80) result = keras_model.evaluate(eval_dataset) print(result) # >> [0.08865142822265625, 0.9722] # the following assert is used for internal testing assert result['acc Top1Accuracy'] > 0.95 model.save_weights("/tmp/mnist_keras.h5")
def test_checkpoint(self): features = np.random.randn(20, 10) labels = np.random.randint(0, 10, size=[20]) with tf.Graph().as_default(): dataset = TFDataset.from_ndarrays((features, labels), batch_size=4, val_tensors=(features, labels)) feature_tensor, label_tensor = dataset.tensors features = tf.layers.dense(feature_tensor, 8) output = tf.layers.dense(features, 10) loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=output, labels=label_tensor)) model_dir = tempfile.mkdtemp() try: optimizer = TFOptimizer.from_loss(loss, Adam(), val_outputs=[output], val_labels=[label_tensor], val_method=Accuracy(), metrics={"loss": loss}, model_dir=model_dir) optimizer.optimize(end_trigger=MaxEpoch(1)) first_weights = optimizer.sess.run(tf.trainable_variables()[0]) import re ckpt_path = None versions = [] for (root, dirs, files) in os.walk(model_dir, topdown=True): temp_versions = [] for file_name in files: if re.match("^optimMethod-TFParkTraining\.[0-9]+$", file_name) is not None: version = int(file_name.split(".")[1]) temp_versions.append(version) if temp_versions: ckpt_path = root versions = temp_versions break assert ckpt_path is not None, "Cannot fine checkpoint file" optimizer.sess.run( tf.global_variables_initializer()) # reset variable optimizer_load = TFOptimizer.from_loss( loss, Adam(), session=optimizer.sess, val_outputs=[output], val_labels=[label_tensor], val_method=Accuracy(), metrics={"loss": loss}, model_dir=model_dir) optimizer_load.load_checkpoint(ckpt_path, max(versions)) loaded_first_weights_before_train = optimizer.sess.run( tf.trainable_variables()[0]) assert np.allclose(first_weights, loaded_first_weights_before_train) # max epoch still 1, should not train optimizer_load.optimize(end_trigger=MaxEpoch(1)) loaded_first_weights = optimizer.sess.run( tf.trainable_variables()[0]) assert np.allclose(first_weights, loaded_first_weights) # max epoch increase 1, should train 1 epoch optimizer_load.optimize(end_trigger=MaxEpoch(2)) loaded_first_weights_2 = optimizer.sess.run( tf.trainable_variables()[0]) assert not np.allclose(first_weights, loaded_first_weights_2) optimizer_load.sess.close() finally: import shutil shutil.rmtree(model_dir)
def input_fn(mode): import os resource_path = os.path.join( os.path.split(__file__)[0], "../resources") if mode == tf.estimator.ModeKeys.TRAIN: image_folder = os.path.join(resource_path, "cat_dog") image_set = ImageSet.read(image_folder, with_label=True, sc=self.sc, one_based_label=False) transformer = ChainedPreprocessing([ ImageResize(256, 256), ImageRandomCrop(224, 224, True), ImageMatToTensor(format="NHWC"), ImageSetToSample(input_keys=["imageTensor"], target_keys=["label"]) ]) image_set = image_set.transform(transformer) dataset = TFDataset.from_image_set(image_set, image=(tf.float32, [224, 224, 3]), label=(tf.int32, [1]), batch_size=8) elif mode == tf.estimator.ModeKeys.EVAL: image_folder = os.path.join(resource_path, "cat_dog") image_set = ImageSet.read(image_folder, with_label=True, sc=self.sc, one_based_label=False) transformer = ChainedPreprocessing([ ImageResize(256, 256), ImageRandomCrop(224, 224, True), ImageMatToTensor(format="NHWC"), ImageSetToSample(input_keys=["imageTensor"], target_keys=["label"]) ]) image_set = image_set.transform(transformer) dataset = TFDataset.from_image_set(image_set, image=(tf.float32, [224, 224, 3]), label=(tf.int32, [1]), batch_per_thread=8) else: image_folder = os.path.join(resource_path, "cat_dog/*/*") image_set = ImageSet.read(image_folder, with_label=False, sc=self.sc, one_based_label=False) transformer = ChainedPreprocessing([ ImageResize(256, 256), ImageRandomCrop(224, 224, True), ImageMatToTensor(format="NHWC"), ImageSetToSample(input_keys=["imageTensor"]) ]) image_set = image_set.transform(transformer) dataset = TFDataset.from_image_set(image_set, image=(tf.float32, [224, 224, 3]), batch_per_thread=8) return dataset