def test_tf_optimizer_with_sparse_gradient_using_keras(self): import tensorflow as tf ids = np.random.randint(0, 10, size=[40]) labels = np.random.randint(0, 5, size=[40]) id_rdd = self.sc.parallelize(ids) label_rdd = self.sc.parallelize(labels) training_rdd = id_rdd.zip(label_rdd).map(lambda x: [x[0], x[1]]) dataset = TFDataset.from_rdd(training_rdd, names=["ids", "labels"], shapes=[[], []], types=[tf.int32, tf.int32], batch_size=8) words_input = tf.keras.layers.Input(shape=(), name='words_input') embedding_layer = tf.keras.layers.Embedding(input_dim=10, output_dim=5, name='word_embedding') word_embeddings = embedding_layer(words_input) embedding = tf.keras.layers.Flatten()(word_embeddings) output = tf.keras.layers.Dense(5, activation="softmax")(embedding) model = tf.keras.models.Model(inputs=[words_input], outputs=[output]) model.compile(optimizer="sgd", loss="sparse_categorical_crossentropy") optimizer = TFOptimizer.from_keras(model, dataset) optimizer.optimize()
def test_tf_optimizer_with_sparse_gradient_using_keras(self): import tensorflow as tf ids = np.random.randint(0, 10, size=[40]) labels = np.random.randint(0, 5, size=[40]) id_rdd = self.sc.parallelize(ids) label_rdd = self.sc.parallelize(labels) training_rdd = id_rdd.zip(label_rdd).map(lambda x: [x[0], x[1]]) dataset = TFDataset.from_rdd(training_rdd, names=["ids", "labels"], shapes=[[], []], types=[tf.int32, tf.int32], batch_size=8) from tensorflow.python.ops import variable_scope def variable_creator(**kwargs): kwargs["use_resource"] = False return variable_scope.default_variable_creator(None, **kwargs) getter = lambda next_creator, **kwargs: variable_creator(**kwargs) with variable_scope.variable_creator_scope(getter): words_input = tf.keras.layers.Input(shape=(), name='words_input') embedding_layer = tf.keras.layers.Embedding(input_dim=10, output_dim=5, name='word_embedding') word_embeddings = embedding_layer(words_input) embedding = tf.keras.layers.Flatten()(word_embeddings) output = tf.keras.layers.Dense(5, activation="softmax")(embedding) model = tf.keras.models.Model(inputs=[words_input], outputs=[output]) model.compile(optimizer="sgd", loss="sparse_categorical_crossentropy")\ optimizer = TFOptimizer.from_keras(model, dataset) optimizer.optimize()
def input_fn(mode): np.random.seed(20) x = np.random.rand(20, 10) y = np.random.randint(0, 10, (20)) rdd_x = self.sc.parallelize(x) rdd_y = self.sc.parallelize(y) rdd = rdd_x.zip(rdd_y) if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL: dataset = TFDataset.from_rdd(rdd, features=(tf.float32, [10]), labels=(tf.int32, [])) else: dataset = TFDataset.from_rdd(rdd_x, features=(tf.float32, [10])) return dataset
def main(max_epoch): sc = init_nncontext() training_rdd = get_data_rdd("train", sc) testing_rdd = get_data_rdd("test", sc) dataset = TFDataset.from_rdd(training_rdd, features=(tf.float32, [28, 28, 1]), labels=(tf.int32, []), batch_size=320, val_rdd=testing_rdd) model = tf.keras.Sequential( [tf.keras.layers.Flatten(input_shape=(28, 28, 1)), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(10, activation='softmax'), ] ) model.compile(optimizer=tf.keras.optimizers.RMSprop(), loss='sparse_categorical_crossentropy', metrics=['accuracy']) keras_model = KerasModel(model) keras_model.fit(dataset, epochs=max_epoch, distributed=True) eval_dataset = TFDataset.from_rdd( testing_rdd, features=(tf.float32, [28, 28, 1]), labels=(tf.int32, []), batch_per_thread=80) result = keras_model.evaluate(eval_dataset) print(result) # >> [0.08865142822265625, 0.9722] # the following assert is used for internal testing assert result['acc Top1Accuracy'] > 0.95 model.save_weights("/tmp/mnist_keras.h5")
def input_fn(mode): if mode == tf.estimator.ModeKeys.TRAIN: training_rdd = get_data_rdd("train", sc) dataset = TFDataset.from_rdd(training_rdd, features=(tf.float32, [28, 28, 1]), labels=(tf.int32, []), batch_size=320) elif mode == tf.estimator.ModeKeys.EVAL: testing_rdd = get_data_rdd("test", sc) dataset = TFDataset.from_rdd(testing_rdd, features=(tf.float32, [28, 28, 1]), labels=(tf.int32, []), batch_size=320) else: testing_rdd = get_data_rdd("test", sc).map(lambda x: x[0]) dataset = TFDataset.from_rdd(testing_rdd, features=(tf.float32, [28, 28, 1]), batch_per_thread=80) return dataset
def test_dataset_without_batch(self): x = np.random.rand(20, 10) y = np.random.randint(0, 2, (20)) rdd_x = self.sc.parallelize(x) rdd_y = self.sc.parallelize(y) rdd = rdd_x.zip(rdd_y) dataset = TFDataset.from_rdd(rdd, features=(tf.float32, [10]), labels=(tf.int32, []), names=["features", "labels"], val_rdd=rdd) keras_model = self.create_model() model = KerasModel(keras_model) self.intercept( lambda: model.fit(dataset), "The batch_size of TFDataset must be" + " specified when used in KerasModel fit.") dataset = TFDataset.from_rdd( rdd, features=(tf.float32, [10]), labels=(tf.int32, []), names=["features", "labels"], ) self.intercept( lambda: model.evaluate(dataset), "The batch_per_thread of TFDataset must be " + "specified when used in KerasModel evaluate.") dataset = TFDataset.from_rdd( rdd_x, features=(tf.float32, [10]), names=["features", "labels"], ) self.intercept( lambda: model.predict(dataset), "The batch_per_thread of TFDataset must be" + " specified when used in KerasModel predict.")
def create_predict_dataset(self): np.random.seed(20) x = np.random.rand(20, 10) rdd = self.sc.parallelize(x) rdd = rdd.map(lambda x: [x]) dataset = TFDataset.from_rdd(rdd, features=(tf.float32, [10]), batch_per_thread=1) return dataset
def create_predict_dataset(self): np.random.seed(20) x = np.random.rand(20, 10) rdd = self.sc.parallelize(x) rdd = rdd.map(lambda x: [x]) dataset = TFDataset.from_rdd(rdd, names=["features"], shapes=[[10]], types=[tf.float32], batch_per_thread=1) return dataset
def create_evaluation_dataset(self): np.random.seed(20) x = np.random.rand(20, 10) y = np.random.randint(0, 2, (20)) rdd_x = self.sc.parallelize(x) rdd_y = self.sc.parallelize(y) rdd = rdd_x.zip(rdd_y) dataset = TFDataset.from_rdd(rdd, features=(tf.float32, [10]), labels=(tf.int32, []), batch_per_thread=1) return dataset
def create_training_dataset(self): np.random.seed(20) x = np.random.rand(20, 10) y = np.random.randint(0, 2, (20)) rdd_x = self.sc.parallelize(x) rdd_y = self.sc.parallelize(y) rdd = rdd_x.zip(rdd_y) dataset = TFDataset.from_rdd(rdd, features=(tf.float32, [10]), labels=(tf.int32, []), batch_size=4, val_rdd=rdd) return dataset
def main(max_epoch, data_num): sc = init_nncontext() # get data, pre-process and create TFDataset def get_data_rdd(dataset): (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", dataset) image_rdd = sc.parallelize(images_data[:data_num]) labels_rdd = sc.parallelize(labels_data[:data_num]) rdd = image_rdd.zip(labels_rdd) \ .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD), np.array(rec_tuple[1])]) return rdd training_rdd = get_data_rdd("train") testing_rdd = get_data_rdd("test") dataset = TFDataset.from_rdd(training_rdd, names=["features", "labels"], shapes=[[28, 28, 1], []], types=[tf.float32, tf.int32], batch_size=280, val_rdd=testing_rdd) # construct the model from TFDataset images, labels = dataset.tensors with slim.arg_scope(lenet.lenet_arg_scope()): logits, end_points = lenet.lenet(images, num_classes=10, is_training=True) loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)) # create a optimizer optimizer = TFOptimizer(loss, Adam(1e-3), val_outputs=[logits], val_labels=[labels], val_method=Top1Accuracy()) optimizer.set_train_summary(TrainSummary("/tmp/az_lenet", "lenet")) optimizer.set_val_summary(ValidationSummary("/tmp/az_lenet", "lenet")) # kick off training optimizer.optimize(end_trigger=MaxEpoch(max_epoch)) saver = tf.train.Saver() saver.save(optimizer.sess, "/tmp/lenet/model")
def main(data_num): data = Input(shape=[28, 28, 1]) x = Flatten()(data) x = Dense(64, activation='relu')(x) x = Dense(64, activation='relu')(x) predictions = Dense(10, activation='softmax')(x) model = Model(inputs=data, outputs=predictions) model.load_weights("/tmp/mnist_keras.h5") if DISTRIBUTED: # using RDD api to do distributed evaluation sc = init_nncontext() # get data, pre-process and create TFDataset (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", "test") image_rdd = sc.parallelize(images_data[:data_num]) labels_rdd = sc.parallelize(labels_data[:data_num]) rdd = image_rdd.zip(labels_rdd) \ .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD)]) dataset = TFDataset.from_rdd(rdd, names=["features"], shapes=[[28, 28, 1]], types=[tf.float32], batch_per_thread=20) predictor = TFPredictor.from_keras(model, dataset) accuracy = predictor.predict().zip(labels_rdd).map( lambda x: np.argmax(x[0]) == x[1]).mean() print("predict accuracy is %s" % accuracy) else: # using keras api for local evaluation model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy']) (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", "test") images_data = normalizer(images_data, mnist.TRAIN_MEAN, mnist.TRAIN_STD) result = model.evaluate(images_data, labels_data) print(model.metrics_names) print(result)
def main(max_epoch, data_num): sc = init_nncontext() # get data, pre-process and create TFDataset def get_data_rdd(dataset): (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", dataset) image_rdd = sc.parallelize(images_data[:data_num]) labels_rdd = sc.parallelize(labels_data[:data_num]) rdd = image_rdd.zip(labels_rdd) \ .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD), np.array(rec_tuple[1])]) return rdd training_rdd = get_data_rdd("train") testing_rdd = get_data_rdd("test") dataset = TFDataset.from_rdd(training_rdd, names=["features", "labels"], shapes=[[28, 28, 1], []], types=[tf.float32, tf.int32], batch_size=280, val_rdd=testing_rdd) data = Input(shape=[28, 28, 1]) x = Flatten()(data) x = Dense(64, activation='relu')(x) x = Dense(64, activation='relu')(x) predictions = Dense(10, activation='softmax')(x) model = Model(inputs=data, outputs=predictions) model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy']) optimizer = TFOptimizer.from_keras(model, dataset, model_dir="/tmp/mnist_keras") # kick off training optimizer.optimize(end_trigger=MaxEpoch(max_epoch)) model.save_weights("/tmp/mnist_keras/mnist_keras.h5")
def test_tf_dataset_with_list_feature(self): np.random.seed(20) x = np.random.rand(20, 10) y = np.random.randint(0, 2, (20)) rdd_x = self.sc.parallelize(x) rdd_y = self.sc.parallelize(y) rdd = rdd_x.zip(rdd_y) dataset = TFDataset.from_rdd(rdd, features=[(tf.float32, [10]), (tf.float32, [10])], labels=(tf.int32, []), batch_size=4, val_rdd=rdd ) for idx, tensor in enumerate(dataset.feature_tensors): assert tensor.name == "list_input_" + str(idx) + ":0"
def main(data_num): sc = init_nncontext() # get data, pre-process and create TFDataset (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", "test") image_rdd = sc.parallelize(images_data[:data_num]) labels_rdd = sc.parallelize(labels_data[:data_num]) rdd = image_rdd.zip(labels_rdd) \ .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD), np.array(rec_tuple[1])]) dataset = TFDataset.from_rdd(rdd, names=["features", "labels"], shapes=[[28, 28, 1], [1]], types=[tf.float32, tf.int32], batch_per_thread=20) # construct the model from TFDataset images, labels = dataset.tensors labels = tf.squeeze(labels) with slim.arg_scope(lenet.lenet_arg_scope()): logits, end_points = lenet.lenet(images, num_classes=10, is_training=False) predictions = tf.to_int32(tf.argmax(logits, axis=1)) correct = tf.expand_dims(tf.to_int32(tf.equal(predictions, labels)), axis=1) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, "/tmp/lenet/model") predictor = TFPredictor(sess, [correct]) accuracy = predictor.predict().mean() print("predict accuracy is %s" % accuracy)
def main(max_epoch, data_num): sc = init_nncontext() # get data, pre-process and create TFDataset def get_data_rdd(dataset): (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", dataset) image_rdd = sc.parallelize(images_data[:data_num]) labels_rdd = sc.parallelize(labels_data[:data_num]) rdd = image_rdd.zip(labels_rdd) \ .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD), np.array(rec_tuple[1])]) return rdd training_rdd = get_data_rdd("train") testing_rdd = get_data_rdd("test") dataset = TFDataset.from_rdd(training_rdd, features=(tf.float32, [28, 28, 1]), labels=(tf.int32, []), batch_size=280, val_rdd=testing_rdd) # construct the model from TFDataset images, labels = dataset.tensors with slim.arg_scope(lenet.lenet_arg_scope()): logits, end_points = lenet.lenet(images, num_classes=10, is_training=True) loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)) acc = accuracy(logits, labels) # create a optimizer optimizer = TFOptimizer.from_loss(loss, Adam(1e-3), metrics={"acc": acc}, model_dir="/tmp/lenet/") # kick off training optimizer.optimize(end_trigger=MaxEpoch(max_epoch)) saver = tf.train.Saver() saver.save(optimizer.sess, "/tmp/lenet/model")
def test_tf_optimizer_with_sparse_gradient(self): ids = np.random.randint(0, 10, size=[40]) labels = np.random.randint(0, 5, size=[40]) id_rdd = self.sc.parallelize(ids) label_rdd = self.sc.parallelize(labels) training_rdd = id_rdd.zip(label_rdd).map(lambda x: [x[0], x[1]]) with tf.Graph().as_default(): dataset = TFDataset.from_rdd(training_rdd, names=["ids", "labels"], shapes=[[], []], types=[tf.int32, tf.int32], batch_size=8) id_tensor, label_tensor = dataset.tensors embedding_table = tf.get_variable(name="word_embedding", shape=[10, 5]) embedding = tf.nn.embedding_lookup(embedding_table, id_tensor) loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=embedding, labels=label_tensor)) optimizer = TFOptimizer(loss, Adam(1e-3)) optimizer.optimize(end_trigger=MaxEpoch(1)) optimizer.sess.close()
from bigdl.dataset import mnist from tensorflow_gan.examples.mnist.networks import * from tensorflow_gan.python.losses.losses_impl import * def get_data_rdd(dataset): (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", dataset) image_rdd = sc.parallelize(images_data).map(lambda img: [((img / 255) - 0.5) * 2]) return image_rdd if __name__ == "__main__": sc = init_nncontext() training_rdd = get_data_rdd("train") dataset = TFDataset.from_rdd(training_rdd, features=(tf.float32, (28, 28, 1)), batch_size=32) def noise_fn(batch_size): return tf.random.normal(mean=0.0, stddev=1.0, shape=(batch_size, 10)) dataset = dataset.map(lambda tensors: (noise_fn(tf.shape(tensors[0])[0]), tensors[0])) opt = GANEstimator( generator_fn=unconditional_generator, discriminator_fn=unconditional_discriminator, generator_loss_fn=wasserstein_generator_loss, discriminator_loss_fn=wasserstein_discriminator_loss, generator_optimizer=Adam(1e-3, beta1=0.5), discriminator_optimizer=Adam(1e-4, beta1=0.5), model_dir="/tmp/gan_model/model"