def train(self, input_fn, steps=None): """Trains a model given training data `input_fn`. :param input_fn: A function that constructs the input data for evaluation. The function should construct and return one of the following: * A `TFDataset` object, each elements of which is a tuple `(features, labels)`. * A `tf.data.Dataset` object: Outputs of `Dataset` object must be a tuple `(features, labels)` with same constraints as below. * A tuple `(features, labels)`: Where `features` is a `tf.Tensor` or a dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both `features` and `labels` are consumed by `model_fn`. They should satisfy the expectation of `model_fn` from inputs. :param steps: Number of steps for which to train the model. Returns: `self`, for chaining. """ with tf.Graph().as_default() as g: global_step_tensor = self.estimator._create_and_assert_global_step( g) add_step_input = tf.placeholder(dtype=tf.int64, shape=()) assign_step = tf.assign_add(global_step_tensor, add_step_input) result = self.estimator._call_input_fn(input_fn, tf.estimator.ModeKeys.TRAIN) if isinstance(result, TFDataset): if not result.has_batch: raise ValueError("The batch_size of TFDataset must be " + "specified when used for training.") spec = self._call_model_fn(result.feature_tensors, result.label_tensors, tf.estimator.ModeKeys.TRAIN, self.config) optim_method = TFOptimizer.to_bigdl_optim_method( koptim_method=self.optimizer) latest_checkpoint = self.estimator.latest_checkpoint() with tf.Session() as sess: saver = tf.train.Saver() if latest_checkpoint: saver.restore(sess, latest_checkpoint) else: sess.run(tf.global_variables_initializer()) opt = TFOptimizer.from_loss( spec.loss, optim_method, session=sess, clip_norm=self.gradient_clipping_norm, clip_value=self.gradient_clipping_constant) opt.optimize(MaxIteration(steps)) sess.run(assign_step, feed_dict={add_step_input: steps}) final_step = sess.run(global_step_tensor) saver.save(sess, self.estimator.model_dir + "/model", global_step=final_step) return self return self.estimator.train(input_fn, steps=steps)
def fit(self, data, steps, batch_size=32, validation_data=None, feed_dict=None, session_config=None): assert self.labels is not None, \ "labels is None; it should not be None in training" assert self.loss is not None, \ "loss is None; it should not be None in training" assert self.optimizer is not None, \ "optimizer is None; it not None in training" if isinstance(data, SparkXShards): dataset = _xshards_to_tf_dataset( data, batch_size=batch_size, validation_data_shard=validation_data) elif isinstance(data, Dataset): dataset = TFDataDataset2(data, batch_size=batch_size, batch_per_thread=-1, validation_dataset=validation_data) else: raise ValueError("data type {} is not supported; " "it must be created by zoo.orca.data.package") if feed_dict is not None: tensor_with_value = { key: (value, value) for key, value in feed_dict.items() } else: tensor_with_value = None optimizer = TFOptimizer.from_train_op( train_op=self.train_op, loss=self.loss, inputs=self.inputs, labels=self.labels, dataset=dataset, metrics=self.metrics, updates=self.updates, sess=self.sess, tensor_with_value=tensor_with_value, session_config=session_config, model_dir=self.model_dir) optimizer.optimize(end_trigger=MaxIteration(steps)) return self
def main(): sc = init_nncontext() # get data, pre-process and create TFDataset (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", "train") image_rdd = sc.parallelize(images_data) labels_rdd = sc.parallelize(labels_data) rdd = image_rdd.zip(labels_rdd) \ .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD), np.array(rec_tuple[1])]) dataset = TFDataset.from_rdd(rdd, names=["features", "labels"], shapes=[(None, 28, 28, 1), (None, 1)], types=[tf.float32, tf.int32] ) # construct the model from TFDataset images, labels = dataset.inputs labels = tf.squeeze(labels) with slim.arg_scope(lenet.lenet_arg_scope()): logits, end_points = lenet.lenet(images, num_classes=10, is_training=True) loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)) # create a optimizer optimizer = TFOptimizer(loss, Adam(1e-3)) # kick off training # you may change the MaxIteration to MaxEpoch(5) to make it converge optimizer.optimize(end_trigger=MaxIteration(20), batch_size=280) # evaluate (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", "test") images_data = normalizer(images_data, mnist.TRAIN_MEAN, mnist.TRAIN_STD) predictions = tf.argmax(logits, axis=1) predictions_data, loss_value = optimizer.sess.run([predictions, loss], feed_dict={images: images_data, labels: labels_data}) print(np.mean(np.equal(predictions_data, labels_data))) print(loss_value)
def fit(self, data, steps, batch_size=32, validation_data=None, feed_dict=None, session_config=None): assert self.labels is not None, \ "labels is None; it should not be None in training" assert self.loss is not None, \ "loss is None; it should not be None in training" assert self.optimizer is not None, \ "optimizer is None; it not None in training" dataset = _to_dataset(data, batch_size=batch_size, batch_per_thread=-1) if feed_dict is not None: tensor_with_value = { key: (value, value) for key, value in feed_dict.items() } else: tensor_with_value = None optimizer = TFOptimizer.from_train_op( train_op=self.train_op, loss=self.loss, inputs=self.inputs, labels=self.labels, dataset=dataset, metrics=self.metrics, updates=self.updates, sess=self.sess, tensor_with_value=tensor_with_value, session_config=session_config, model_dir=self.model_dir) optimizer.optimize(end_trigger=MaxIteration(steps)) return self
def train(self, input_fn, steps=None): with tf.Graph().as_default() as g: global_step_tensor = self.estimator._create_and_assert_global_step( g) add_step_input = tf.placeholder(dtype=tf.int64, shape=()) assign_step = tf.assign_add(global_step_tensor, add_step_input) result = self.estimator._call_input_fn(input_fn, tf.estimator.ModeKeys.TRAIN) if isinstance(result, TFDataset): if not result.has_batch: raise ValueError("The batch_size of TFDataset must be " + "specified when used for training.") spec = self._call_model_fn(result.feature_tensors, result.label_tensors, tf.estimator.ModeKeys.TRAIN, self.config) optim_method = TFOptimizer.to_bigdl_optim_method( koptim_method=self.optimizer) latest_checkpoint = self.estimator.latest_checkpoint() with tf.Session() as sess: saver = tf.train.Saver() if latest_checkpoint: saver.restore(sess, latest_checkpoint) else: sess.run(tf.global_variables_initializer()) opt = TFOptimizer.from_loss(spec.loss, optim_method, session=sess) opt.optimize(MaxIteration(steps)) sess.run(assign_step, feed_dict={add_step_input: steps}) final_step = sess.run(global_step_tensor) saver.save(sess, self.estimator.model_dir + "/model", global_step=final_step) return self return self.estimator.train(input_fn, steps=steps)
def input_fn(): def map_func(data): image = data['image'] label = data['label'] one_hot_label = tf.one_hot(label, depth=10) noise = tf.random.normal(mean=0.0, stddev=1.0, shape=(NOISE_DIM,)) generator_inputs = (noise, one_hot_label) discriminator_inputs = ((tf.to_float(image) / 255.0) - 0.5) * 2 return (generator_inputs, discriminator_inputs) ds = tfds.load("mnist", split="train") ds = ds.map(map_func) dataset = TFDataset.from_tf_data_dataset(ds, batch_size=36) return dataset opt = GANEstimator( generator_fn=conditional_generator, discriminator_fn=conditional_discriminator, generator_loss_fn=wasserstein_generator_loss, discriminator_loss_fn=wasserstein_discriminator_loss, generator_optimizer=ZooOptimizer(tf.train.AdamOptimizer(1e-5, 0.5)), discriminator_optimizer=ZooOptimizer(tf.train.AdamOptimizer(1e-4, 0.5)), model_dir=MODEL_DIR, session_config=tf.ConfigProto() ) for i in range(20): opt.train(input_fn, MaxIteration(1000)) eval()