def test_tfdataset_with_tf_data_dataset_which_requires_table(self): keys = [1, 0, -1] dataset = tf.data.Dataset.from_tensor_slices([1, 2, -1, 5] * 40) table = tf.contrib.lookup.HashTable( initializer=tf.contrib.lookup.KeyValueTensorInitializer( keys=keys, values=list(reversed(keys))), default_value=100) dataset = dataset.map(table.lookup) def transform(x): float_x = tf.to_float(x) return float_x, 1 dataset = dataset.map(transform) dataset = TFDataset.from_tf_data_dataset(dataset, batch_size=16) seq = tf.keras.Sequential([ tf.keras.layers.Flatten(input_shape=()), tf.keras.layers.Dense(10, activation="softmax") ]) seq.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model = KerasModel(seq) model.fit(dataset)
def test_training_with_ndarry_distributed(self): keras_model = self.create_model() model = KerasModel(keras_model) x, y = self.create_training_data() model.fit(x, y, batch_size=4, distributed=True)
def test_training_and_validation_with_dataset(self): keras_model = self.create_model() model = KerasModel(keras_model) dataset = self.create_training_dataset() model.fit(dataset)
def test_training_with_ndarray(self): keras_model = self.create_model() model = KerasModel(keras_model) x, y = self.create_training_data() model.fit(x, y, batch_size=2)
def test_training_with_validation_data_distributed(self): keras_model = self.create_model() model = KerasModel(keras_model) x, y = self.create_training_data() val_x, val_y = self.create_training_data() model.fit(x, y, validation_data=(val_x, val_y), batch_size=4, distributed=True)
def test_training_with_validation_data_distributed_multi_heads(self): keras_model = self.create_multi_input_output_model() model = KerasModel(keras_model) x, y = self.create_training_data() val_x, val_y = self.create_training_data() model.fit([x, x], [y, y], validation_data=([val_x, val_x], [val_y, val_y]), batch_size=4, distributed=True)
def test_evaluate_with_ndarray_distributed(self): keras_model = self.create_model() model = KerasModel(keras_model) x, y = self.create_training_data() results_pre = model.evaluate(x, y, batch_per_thread=1) model.fit(x, y, batch_size=4, epochs=10) results_after = model.evaluate(x, y, distributed=True, batch_per_thread=1) assert results_pre["loss"] > results_after["loss"]
def test_evaluate_with_ndarray(self): keras_model = self.create_model() model = KerasModel(keras_model) x, y = self.create_training_data() results_pre = model.evaluate(x, y) model.fit(x, y, batch_size=4, epochs=10) results_after = model.evaluate(x, y) assert results_pre["loss"] > results_after["loss"]
def check_dataset(self, create_ds): seq = tf.keras.Sequential([ tf.keras.layers.Flatten(input_shape=(20, )), tf.keras.layers.Dense(10, activation="softmax") ]) seq.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model = KerasModel(seq) model.fit(create_ds("train")) model.predict(create_ds("predict")).collect() model.evaluate(create_ds("evaluate"))
def test_tensorflow_optimizer(self): data = tf.keras.layers.Input(shape=[10]) x = tf.keras.layers.Flatten()(data) x = tf.keras.layers.Dense(10, activation='relu')(x) predictions = tf.keras.layers.Dense(2, activation='softmax')(x) model = tf.keras.models.Model(inputs=data, outputs=predictions) model.compile(optimizer=tf.train.AdamOptimizer(), loss='sparse_categorical_crossentropy', metrics=['accuracy']) keras_model = KerasModel(model) x, y = self.create_training_data() keras_model.fit(x, y, batch_size=4, distributed=True)
def test_invalid_data_handling(self): keras_model = self.create_multi_input_output_model() model = KerasModel(keras_model) x, y = self.create_training_data() val_x, val_y = self.create_training_data() # Number doesn't match with pytest.raises(AssertionError) as excinfo: model.fit([x, x], [y, y, y], batch_size=4, distributed=True) assert "model_target number does not match data number" in str( excinfo.value) # Dict as input with pytest.raises(AssertionError) as excinfo: model.fit({"input_1": x}, [y, y], batch_size=4, distributed=True) assert "all model_input names should exist in data" in str( excinfo.value)
def test_tfdataset_with_tf_data_dataset(self): dataset = tf.data.Dataset.from_tensor_slices( (np.random.randn(102, 28, 28, 1), np.random.randint(0, 10, size=(102, )))) dataset = dataset.map(lambda feature, label: (tf.to_float(feature), label)) dataset = TFDataset.from_tf_data_dataset(dataset, batch_size=16) seq = tf.keras.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28, 1)), tf.keras.layers.Dense(10, activation="softmax") ]) seq.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model = KerasModel(seq) model.fit(dataset) dataset = tf.data.Dataset.from_tensor_slices( (np.random.randn(102, 28, 28, 1), np.random.randint(0, 10, size=(102, )))) dataset = dataset.map(lambda feature, label: (tf.to_float(feature), label)) dataset = TFDataset.from_tf_data_dataset(dataset, batch_per_thread=16) model.evaluate(dataset)
def test_gradient_clipping(self): data = tf.keras.layers.Input(shape=[10]) x = tf.keras.layers.Flatten()(data) x = tf.keras.layers.Dense(10, activation='relu')(x) predictions = tf.keras.layers.Dense(2, activation='softmax')(x) model = tf.keras.models.Model(inputs=data, outputs=predictions) model.compile(optimizer=tf.keras.optimizers.SGD(lr=1, clipvalue=1e-8), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model = KerasModel(model) pre_weights = model.get_weights() dataset = self.create_training_dataset() # 5 iterations model.fit(dataset) current_weight = model.get_weights() np.all(np.abs((current_weight[0] - pre_weights[0])) < 1e-7)
def test_dataset_without_batch(self): x = np.random.rand(20, 10) y = np.random.randint(0, 2, (20)) rdd_x = self.sc.parallelize(x) rdd_y = self.sc.parallelize(y) rdd = rdd_x.zip(rdd_y) dataset = TFDataset.from_rdd(rdd, features=(tf.float32, [10]), labels=(tf.int32, []), names=["features", "labels"], val_rdd=rdd) keras_model = self.create_model() model = KerasModel(keras_model) self.intercept( lambda: model.fit(dataset), "The batch_size of TFDataset must be" + " specified when used in KerasModel fit.") dataset = TFDataset.from_rdd( rdd, features=(tf.float32, [10]), labels=(tf.int32, []), names=["features", "labels"], ) self.intercept( lambda: model.evaluate(dataset), "The batch_per_thread of TFDataset must be " + "specified when used in KerasModel evaluate.") dataset = TFDataset.from_rdd( rdd_x, features=(tf.float32, [10]), names=["features", "labels"], ) self.intercept( lambda: model.predict(dataset), "The batch_per_thread of TFDataset must be" + " specified when used in KerasModel predict.")
def main(max_epoch): args = parser.parse_args() cluster_mode = args.cluster_mode if cluster_mode.startswith("yarn"): hadoop_conf = os.environ.get("HADOOP_CONF_DIR") assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \ "set the environment variable HADOOP_CONF_DIR" spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \ .set("spark.executor.cores", 2) \ .set("spark.executor.instances", 2) \ .set("spark.driver.memory", "2g") if cluster_mode == "yarn-client": _ = init_nncontext(spark_conf, cluster_mode="yarn-client", hadoop_conf=hadoop_conf) else: _ = init_nncontext(spark_conf, cluster_mode="yarn-cluster", hadoop_conf=hadoop_conf) else: _ = init_nncontext() (training_images_data, training_labels_data) = mnist.read_data_sets("/tmp/mnist", "train") (testing_images_data, testing_labels_data) = mnist.read_data_sets("/tmp/mnist", "test") training_images_data = (training_images_data - mnist.TRAIN_MEAN) / mnist.TRAIN_STD testing_images_data = (testing_images_data - mnist.TRAIN_MEAN) / mnist.TRAIN_STD model = tf.keras.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28, 1)), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(10, activation='softmax'), ]) model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy']) keras_model = KerasModel(model) keras_model.fit(training_images_data, training_labels_data, validation_data=(testing_images_data, testing_labels_data), epochs=max_epoch, batch_size=320, distributed=True) result = keras_model.evaluate(testing_images_data, testing_labels_data, distributed=True, batch_per_thread=80) print(result) # >> [0.08865142822265625, 0.9722] # the following assert is used for internal testing assert result['acc Top1Accuracy'] > 0.95 keras_model.save_weights("/tmp/mnist_keras.h5")
def main(max_epoch): args = parser.parse_args() cluster_mode = args.cluster_mode if cluster_mode.startswith("yarn"): hadoop_conf = os.environ.get("HADOOP_CONF_DIR") assert hadoop_conf, "Directory path to hadoop conf not found for yarn-client mode. Please " \ "set the environment variable HADOOP_CONF_DIR" spark_conf = create_spark_conf().set("spark.executor.memory", "5g") \ .set("spark.executor.cores", 2) \ .set("spark.executor.instances", 2) \ .set("spark.executorEnv.HTTP_PROXY", "http://child-prc.intel.com:913") \ .set("spark.executorEnv.HTTPS_PROXY", "http://child-prc.intel.com:913") \ .set("spark.driver.memory", "2g") if cluster_mode == "yarn-client": sc = init_nncontext(spark_conf, cluster_mode="yarn-client", hadoop_conf=hadoop_conf) else: sc = init_nncontext(spark_conf, cluster_mode="yarn-cluster", hadoop_conf=hadoop_conf) else: sc = init_nncontext() training_rdd = get_data_rdd("train", sc) testing_rdd = get_data_rdd("test", sc) dataset = TFDataset.from_rdd(training_rdd, features=(tf.float32, [28, 28, 1]), labels=(tf.int32, []), batch_size=320, val_rdd=testing_rdd) model = tf.keras.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28, 1)), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(10, activation='softmax'), ]) model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='sparse_categorical_crossentropy', metrics=['accuracy']) keras_model = KerasModel(model) keras_model.fit(dataset, epochs=max_epoch, distributed=True) eval_dataset = TFDataset.from_rdd(testing_rdd, features=(tf.float32, [28, 28, 1]), labels=(tf.int32, []), batch_per_thread=80) result = keras_model.evaluate(eval_dataset) print(result) # >> [0.08865142822265625, 0.9722] # the following assert is used for internal testing assert result['acc Top1Accuracy'] > 0.95 model.save_weights("/tmp/mnist_keras.h5")