def fit(self, x=None, y=None, batch_size=None, epochs=1, validation_split=0., validation_data=None, distributed=False, **kwargs): if isinstance(x, TFDataset): # todo check arguments if not x.has_batch: raise ValueError("The batch_size of TFDataset must be " + "specified when used in KerasModel fit.") if isinstance(x, TFNdarrayDataset): x = _standarize_feature_label_dataset(x, self.model) self._fit_distributed(x, validation_split, epochs, **kwargs) elif distributed: dataset = TFDataset.from_ndarrays((x, y), val_tensors=validation_data) self._fit_distributed(dataset, validation_split, epochs, **kwargs) else: self.model.fit(x=x, y=y, batch_size=batch_size, epochs=epochs, validation_split=validation_split, validation_data=validation_data, **kwargs)
def evaluate(self, x=None, y=None, batch_per_thread=None, distributed=False): if isinstance(x, TFDataset): x = _standarize_feature_label_dataset(x, self.model) # todo check arguments return self._evaluate_distributed(x) else: if distributed: sc = getOrCreateSparkContext() rdd, types, shapes = _create_rdd_x_y( x, y, self.model._feed_input_names, self.model._feed_output_names, sc) names = self.model._feed_input_names + self.model._feed_output_names dataset = TFDataset.from_rdd( rdd, names=names, types=types, shapes=shapes, batch_per_thread=-1 if batch_per_thread is None else batch_per_thread) return self._evaluate_distributed(dataset) else: return self.model.evaluate(x=x, y=y, batch_size=batch_per_thread)
def test_tf_optimizer_with_sparse_gradient(self): import tensorflow as tf ids = np.random.randint(0, 10, size=[40]) labels = np.random.randint(0, 5, size=[40]) id_rdd = self.sc.parallelize(ids) label_rdd = self.sc.parallelize(labels) training_rdd = id_rdd.zip(label_rdd).map(lambda x: [x[0], x[1]]) with tf.Graph().as_default(): dataset = TFDataset.from_rdd(training_rdd, names=["ids", "labels"], shapes=[[], []], types=[tf.int32, tf.int32], batch_size=8) id_tensor, label_tensor = dataset.tensors embedding_table = tf.get_variable(name="word_embedding", shape=[10, 5]) embedding = tf.nn.embedding_lookup(embedding_table, id_tensor) loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=embedding, labels=label_tensor)) optimizer = TFOptimizer(loss, Adam(1e-3)) optimizer.optimize(end_trigger=MaxEpoch(1)) optimizer.sess.close()
def evaluate(self, x=None, y=None, batch_per_thread=None, distributed=False): if isinstance(x, TFDataset): if not x.has_batch: raise ValueError("The batch_per_thread of TFDataset must be " + "specified when used in KerasModel evaluate.") x = _standarize_feature_label_dataset(x, self.model) # todo check arguments return self._evaluate_distributed(x) else: if distributed: sc = getOrCreateSparkContext() rdd, types, shapes = _create_rdd_x_y( x, y, self.model._feed_input_names, self.model._feed_output_names, sc) names = self.model._feed_input_names + self.model._feed_output_names dataset = TFDataset.from_rdd( rdd, names=names, types=types, shapes=shapes, batch_per_thread=-1 if batch_per_thread is None else batch_per_thread) return self._evaluate_distributed(dataset) else: return self.model.evaluate(x=x, y=y, batch_size=batch_per_thread)
def predict(self, x, batch_per_thread=None, distributed=False): if isinstance(x, TFDataset): # todo check arguments x = _standarize_feature_dataset(x, self.model) return self._predict_distributed(x) else: if distributed: sc = getOrCreateSparkContext() rdd, types, shapes = _create_rdd_x( x, self.model._feed_input_names, sc) dataset = TFDataset.from_rdd( rdd, names=self.model._feed_input_names, types=types, shapes=shapes, batch_per_thread=-1 if batch_per_thread is None else batch_per_thread) results = self._predict_distributed(dataset).collect() output_num = len(self.model.outputs) if output_num == 1: return np.stack(results) else: predictions = [] for i in range(0, output_num): predictions.append( np.stack([res[i] for res in results])) return predictions else: return self.model.predict(x=x, batch_size=batch_per_thread)
def _standarize_feature_label_dataset(dataset, model): input_names = model.input_names output_names = model.output_names rdd = dataset.rdd.map(lambda x: (x[0], _process_labels(x[1])))\ .map(lambda sample: _training_reorder(sample, input_names, output_names)) if dataset.val_rdd is not None: val_rdd = dataset.val_rdd.map(lambda x: (x[0], _process_labels(x[1])))\ .map(lambda sample: _training_reorder(sample, input_names, output_names)) else: val_rdd = None tensor_structure = _training_reorder(dataset.tensor_structure, input_names, output_names) new_dataset = TFDataset(rdd, tensor_structure, dataset.batch_size, -1, dataset.hard_code_batch_size, val_rdd) new_dataset.batch_per_thread = dataset.batch_per_thread return new_dataset
def _standarize_feature_dataset(dataset, model): input_names = model.input_names rdd = dataset.rdd.map(lambda sample: _reorder(sample, input_names)) feature_schema = _reorder(dataset.tensor_structure[0], input_names) dataset = TFDataset(rdd, feature_schema, dataset.batch_size, -1, dataset.hard_code_batch_size) return dataset
def test_tf_net_predict_dataset(self): resource_path = os.path.join(os.path.split(__file__)[0], "../../resources") tfnet_path = os.path.join(resource_path, "tfnet") net = TFNet.from_export_folder(tfnet_path) dataset = TFDataset.from_ndarrays((np.random.rand(16, 4),)) output = net.predict(dataset) output = np.stack(output.collect()) assert output.shape == (16, 2)
def _standarize_feature_label_dataset(dataset, model): input_names = model.input_names output_names = model.output_names def _process_labels(ys): if isinstance(ys, dict): return { k: np.expand_dims(y, axis=1) if y.ndim == 0 else y for k, y in ys.items() } elif isinstance(ys, list): return [ np.expand_dims(y, axis=1) if y.ndim == 0 else y for y in ys ] else: return np.expand_dims(ys, axis=1) if ys.ndim == 0 else ys def _training_reorder(x, input_names, output_names): assert isinstance(x, tuple) return _reorder(x[0], input_names) + _reorder(x[1], output_names) def _reorder(x, names): if isinstance(x, dict): return [x[name] for name in names] elif isinstance(x, list): return x else: return [x] rdd = dataset.rdd.map(lambda x: (x[0], _process_labels(x[1])))\ .map(lambda sample: _training_reorder(sample, input_names, output_names)) if dataset.val_rdd is not None: val_rdd = dataset.val_rdd.map(lambda x: (x[0], _process_labels(x[1])))\ .map(lambda sample: _training_reorder(sample, input_names, output_names)) else: val_rdd = None tensor_structure = _training_reorder(dataset.tensor_structure, input_names, output_names) new_dataset = TFDataset(rdd, tensor_structure, dataset.batch_size, -1, dataset.hard_code_batch_size, val_rdd) new_dataset.batch_per_thread = dataset.batch_per_thread return new_dataset
def predict(self, x, batch_per_thread=None, distributed=False): """ Use a model to do prediction. :param x: Input data. It could be: - a TFDataset object - A Numpy array (or array-like), or a list of arrays (in case the model has multiple inputs). - A dict mapping input names to the corresponding array/tensors, if the model has named inputs. :param batch_per_thread: The default value is 1. When distributed is True,the total batch size is batch_per_thread * rdd.getNumPartitions. When distributed is False the total batch size is batch_per_thread * numOfCores. :param distributed: Boolean. Whether to do prediction in distributed mode or local mode. Default is True. In local mode, x must be a Numpy array. """ if isinstance(x, TFDataset): # todo check arguments if not x.has_batch: raise ValueError( "The batch_per_thread of TFDataset" + " must be specified when used in KerasModel predict.") if isinstance(x, TFNdarrayDataset): x = _standarize_feature_dataset(x, self.model) return self._predict_distributed(x) else: if distributed: sc = getOrCreateSparkContext() rdd, types, shapes = _create_rdd_x( x, self.model._feed_input_names, sc) dataset = TFDataset.from_rdd( rdd, names=self.model._feed_input_names, types=types, shapes=shapes, batch_per_thread=-1 if batch_per_thread is None else batch_per_thread) results = self._predict_distributed(dataset).collect() output_num = len(self.model.outputs) if output_num == 1: return np.stack(results) else: predictions = [] for i in range(0, output_num): predictions.append( np.stack([res[i] for res in results])) return predictions else: return self.model.predict(x=x, batch_size=batch_per_thread)
def fit(self, x=None, y=None, batch_size=None, epochs=1, validation_split=0., validation_data=None, distributed=False, **kwargs): if isinstance(x, TFDataset): # todo check arguments if not x.has_batch: raise ValueError("The batch_size of TFDataset must be " + "specified when used in KerasModel fit.") x = _standarize_feature_label_dataset(x, self.model) self._fit_distributed(x, validation_split, epochs, **kwargs) elif distributed: sc = getOrCreateSparkContext() train_rdd, types, shapes = _create_rdd_x_y( x, y, self.model._feed_input_names, self.model._feed_output_names, sc) val_rdd = None if validation_data is not None: val_rdd, _, _ = _create_rdd_x_y(validation_data[0], validation_data[1], self.model._feed_input_names, self.model._feed_output_names, sc) names = self.model._feed_input_names + self.model._feed_output_names dataset = TFDataset.from_rdd( train_rdd, names=names, shapes=shapes, types=types, batch_size=batch_size if batch_size is not None else 32, val_rdd=val_rdd) self._fit_distributed(dataset, validation_split, epochs, **kwargs) else: self.model.fit(x=x, y=y, batch_size=batch_size, epochs=epochs, validation_split=validation_split, validation_data=validation_data, **kwargs)
def main(max_epoch, data_num): sc = init_nncontext() # get data, pre-process and create TFDataset def get_data_rdd(dataset): (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", dataset) image_rdd = sc.parallelize(images_data[:data_num]) labels_rdd = sc.parallelize(labels_data[:data_num]) rdd = image_rdd.zip(labels_rdd) \ .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD), np.array(rec_tuple[1])]) return rdd training_rdd = get_data_rdd("train") testing_rdd = get_data_rdd("test") dataset = TFDataset.from_rdd(training_rdd, names=["features", "labels"], shapes=[[28, 28, 1], []], types=[tf.float32, tf.int32], batch_size=280, val_rdd=testing_rdd) # construct the model from TFDataset images, labels = dataset.tensors with slim.arg_scope(lenet.lenet_arg_scope()): logits, end_points = lenet.lenet(images, num_classes=10, is_training=True) loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)) # create a optimizer optimizer = TFOptimizer(loss, Adam(1e-3), val_outputs=[logits], val_labels=[labels], val_method=Top1Accuracy()) optimizer.set_train_summary(TrainSummary("/tmp/az_lenet", "lenet")) optimizer.set_val_summary(ValidationSummary("/tmp/az_lenet", "lenet")) # kick off training optimizer.optimize(end_trigger=MaxEpoch(max_epoch)) saver = tf.train.Saver() saver.save(optimizer.sess, "/tmp/lenet/")
def main(data_num): data = Input(shape=[28, 28, 1]) x = Flatten()(data) x = Dense(64, activation='relu')(x) x = Dense(64, activation='relu')(x) predictions = Dense(10, activation='softmax')(x) model = Model(inputs=data, outputs=predictions) model.load_weights("/tmp/mnist_keras.h5") if DISTRIBUTED: # using RDD api to do distributed evaluation sc = init_nncontext() # get data, pre-process and create TFDataset (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", "test") image_rdd = sc.parallelize(images_data[:data_num]) labels_rdd = sc.parallelize(labels_data[:data_num]) rdd = image_rdd.zip(labels_rdd) \ .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD)]) dataset = TFDataset.from_rdd(rdd, names=["features"], shapes=[[28, 28, 1]], types=[tf.float32], batch_per_thread=20) predictor = TFPredictor.from_keras(model, dataset) accuracy = predictor.predict().zip(labels_rdd).map( lambda x: np.argmax(x[0]) == x[1]).mean() print("predict accuracy is %s" % accuracy) else: # using keras api for local evaluation model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy']) (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", "test") images_data = normalizer(images_data, mnist.TRAIN_MEAN, mnist.TRAIN_STD) result = model.evaluate(images_data, labels_data) print(model.metrics_names) print(result)
def _standarize_feature_dataset(dataset, model): input_names = model.input_names def _reorder(x, names): if isinstance(x, dict): return [x[name] for name in names] elif isinstance(x, list): return x else: return [x] rdd = dataset.rdd.map(lambda sample: _reorder(sample, input_names)) feature_schema = _reorder(dataset.tensor_structure[0], input_names) dataset = TFDataset(rdd, feature_schema, dataset.batch_size, -1, dataset.hard_code_batch_size) return dataset
def main(max_epoch, data_num): sc = init_nncontext() # get data, pre-process and create TFDataset def get_data_rdd(dataset): (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", dataset) image_rdd = sc.parallelize(images_data[:data_num]) labels_rdd = sc.parallelize(labels_data[:data_num]) rdd = image_rdd.zip(labels_rdd) \ .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD), np.array(rec_tuple[1])]) return rdd training_rdd = get_data_rdd("train") testing_rdd = get_data_rdd("test") dataset = TFDataset.from_rdd(training_rdd, names=["features", "labels"], shapes=[[28, 28, 1], []], types=[tf.float32, tf.int32], batch_size=280, val_rdd=testing_rdd ) data = Input(shape=[28, 28, 1]) x = Flatten()(data) x = Dense(64, activation='relu')(x) x = Dense(64, activation='relu')(x) predictions = Dense(10, activation='softmax')(x) model = Model(input=data, output=predictions) model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy']) optimizer = TFOptimizer.from_keras(model, dataset) optimizer.set_train_summary(TrainSummary("/tmp/az_lenet", "lenet")) optimizer.set_val_summary(ValidationSummary("/tmp/az_lenet", "lenet")) # kick off training optimizer.optimize(end_trigger=MaxEpoch(max_epoch)) saver = tf.train.Saver() saver.save(optimizer.sess, "/tmp/lenet/")
def evaluate(self, x=None, y=None, batch_per_thread=None, distributed=False): """ Evaluate a model on a given dataset :param x: Input data. It could be: - a TFDataset object - A Numpy array (or array-like), or a list of arrays (in case the model has multiple inputs). - A dict mapping input names to the corresponding array/tensors, if the model has named inputs. :param y: Target data. Like the input data `x`, It should be consistent with `x` (you cannot have Numpy inputs and tensor targets, or inversely). If `x` is a TFDataset, `y` should not be specified (since targets will be obtained from `x`). :param batch_per_thread: The default value is 1. When distributed is True,the total batch size is batch_per_thread * rdd.getNumPartitions. When distributed is False the total batch size is batch_per_thread * numOfCores. :param distributed: Boolean. Whether to do prediction in distributed mode or local mode. Default is True. In local mode, x must be a Numpy array. """ if isinstance(x, TFDataset): if not x.has_batch: raise ValueError("The batch_per_thread of TFDataset must be " + "specified when used in KerasModel evaluate.") if isinstance(x, TFNdarrayDataset): x = _standarize_feature_label_dataset(x, self.model) # todo check arguments return self._evaluate_distributed(x) else: if distributed: dataset = TFDataset.from_ndarrays( (x, y), batch_per_thread=-1 if batch_per_thread is None else batch_per_thread) return self._evaluate_distributed(dataset) else: return self.model.evaluate(x=x, y=y, batch_size=batch_per_thread)
def main(): sc = init_nncontext() # get data, pre-process and create TFDataset (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", "test") image_rdd = sc.parallelize(images_data) labels_rdd = sc.parallelize(labels_data) rdd = image_rdd.zip(labels_rdd) \ .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD), np.array(rec_tuple[1])]) dataset = TFDataset.from_rdd(rdd, names=["features", "labels"], shapes=[[28, 28, 1], [1]], types=[tf.float32, tf.int32], batch_per_thread=20) # construct the model from TFDataset images, labels = dataset.tensors labels = tf.squeeze(labels) with slim.arg_scope(lenet.lenet_arg_scope()): logits, end_points = lenet.lenet(images, num_classes=10, is_training=False) predictions = tf.to_int32(tf.argmax(logits, axis=1)) correct = tf.expand_dims(tf.to_int32(tf.equal(predictions, labels)), axis=1) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, "/tmp/lenet/") predictor = TFPredictor(sess, [correct]) accuracy = predictor.predict().mean() print("predict accuracy is %s" % accuracy)
def main(): sc = init_nncontext() # get data, pre-process and create TFDataset (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", "train") image_rdd = sc.parallelize(images_data) labels_rdd = sc.parallelize(labels_data) rdd = image_rdd.zip(labels_rdd) \ .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD), np.array(rec_tuple[1])]) dataset = TFDataset.from_rdd(rdd, names=["features", "labels"], shapes=[(None, 28, 28, 1), (None, 1)], types=[tf.float32, tf.int32] ) # construct the model from TFDataset images, labels = dataset.inputs labels = tf.squeeze(labels) with slim.arg_scope(lenet.lenet_arg_scope()): logits, end_points = lenet.lenet(images, num_classes=10, is_training=True) loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)) # create a optimizer optimizer = TFOptimizer(loss, Adam(1e-3)) # kick off training # you may change the MaxIteration to MaxEpoch(5) to make it converge optimizer.optimize(end_trigger=MaxIteration(20), batch_size=280) # evaluate (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", "test") images_data = normalizer(images_data, mnist.TRAIN_MEAN, mnist.TRAIN_STD) predictions = tf.argmax(logits, axis=1) predictions_data, loss_value = optimizer.sess.run([predictions, loss], feed_dict={images: images_data, labels: labels_data}) print(np.mean(np.equal(predictions_data, labels_data))) print(loss_value)
def main(): sc = init_nncontext() # get data, pre-process and create TFDataset (images_data, labels_data) = mnist.read_data_sets("/tmp/mnist", "train") image_rdd = sc.parallelize(images_data) labels_rdd = sc.parallelize(labels_data) rdd = image_rdd.zip(labels_rdd) \ .map(lambda rec_tuple: [normalizer(rec_tuple[0], mnist.TRAIN_MEAN, mnist.TRAIN_STD), np.array(rec_tuple[1])]) dataset = TFDataset.from_rdd(rdd, names=["features", "labels"], shapes=[[28, 28, 1], [1]], types=[tf.float32, tf.int32], batch_size=280) # construct the model from TFDataset images, labels = dataset.tensors labels = tf.squeeze(labels) with slim.arg_scope(lenet.lenet_arg_scope()): logits, end_points = lenet.lenet(images, num_classes=10, is_training=True) loss = tf.reduce_mean( tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)) # create a optimizer optimizer = TFOptimizer(loss, Adam(1e-3)) optimizer.set_train_summary(TrainSummary("/tmp/az_lenet", "lenet")) # kick off training for i in range(5): optimizer.optimize(end_trigger=MaxEpoch(i + 1)) saver = tf.train.Saver() saver.save(optimizer.sess, "/tmp/lenet/")
def test_tf_optimizer_with_sparse_gradient_using_keras(self): import tensorflow as tf ids = np.random.randint(0, 10, size=[40]) labels = np.random.randint(0, 5, size=[40]) id_rdd = self.sc.parallelize(ids) label_rdd = self.sc.parallelize(labels) training_rdd = id_rdd.zip(label_rdd).map(lambda x: [x[0], x[1]]) with tf.Graph().as_default(): dataset = TFDataset.from_rdd(training_rdd, names=["ids", "labels"], shapes=[[], []], types=[tf.int32, tf.int32], batch_size=8) from tensorflow.python.ops import variable_scope def variable_creator(**kwargs): kwargs["use_resource"] = False return variable_scope.default_variable_creator(None, **kwargs) getter = lambda next_creator, **kwargs: variable_creator(**kwargs) with variable_scope.variable_creator_scope(getter): words_input = tf.keras.layers.Input(shape=(), name='words_input') embedding_layer = tf.keras.layers.Embedding( input_dim=10, output_dim=5, name='word_embedding') word_embeddings = embedding_layer(words_input) embedding = tf.keras.layers.Flatten()(word_embeddings) output = tf.keras.layers.Dense(5, activation="softmax")(embedding) model = tf.keras.models.Model(inputs=[words_input], outputs=[output]) model.compile(optimizer="sgd", loss="sparse_categorical_crossentropy") optimizer = TFOptimizer.from_keras(model, dataset) optimizer.optimize(end_trigger=MaxEpoch(1)) optimizer.sess.close()
def evaluate(self, x=None, y=None, batch_per_thread=None, distributed=False): if isinstance(x, TFDataset): if not x.has_batch: raise ValueError("The batch_per_thread of TFDataset must be " + "specified when used in KerasModel evaluate.") if isinstance(x, TFNdarrayDataset): x = _standarize_feature_label_dataset(x, self.model) # todo check arguments return self._evaluate_distributed(x) else: if distributed: dataset = TFDataset.from_ndarrays( (x, y), batch_per_thread=-1 if batch_per_thread is None else batch_per_thread) return self._evaluate_distributed(dataset) else: return self.model.evaluate(x=x, y=y, batch_size=batch_per_thread)
# In[10]: # Let's have a look at one element of trainRDD trainRDD.take(1) # We can see that `features` is now composed by the list of 801 particles with 19 features each (`shape=[801 19]`) plus the HLF (`shape=[14]`) and the encoded label (`shape=[3]`). # In[11]: from zoo.pipeline.api.net import TFDataset from zoo.tfpark.model import KerasModel # create TFDataset for TF training dataset = TFDataset.from_rdd(trainRDD, features=[(tf.float32, [801, 19]), (tf.float32, [14])], labels=(tf.float32, [3]), batch_size=256, val_rdd=testRDD) # ## Optimizer setup and training # In[12]: # Set of hyperparameters numEpochs = 8 # The batch used by BDL must be a multiple of numExecutors * executorCores # Because data will be equally distibuted inside each executor workerBatch = 64 # numExecutors = int(sc._conf.get('spark.executor.instances'))
output_dim=FC_LINEAR_DIMENSION, # 尺寸: 32 -> 64. activation="sigmoid")) # BigDL 不支持 parameter sharing, 不得已而为之. both_feature = TimeDistributed(layer=convolve_net, input_shape=input_shape)(both_input) encode_left = both_feature.index_select(1, 0) encode_right = both_feature.index_select(1, 1) distance = autograd.abs(encode_left - encode_right) predict = Dense(output_dim=NUM_CLASS_LABEL, activation="sigmoid")(distance) siamese_net = Model(input=both_input, output=predict) siamese_net.compile(optimizer="adam", loss='sparse_categorical_crossentropy', metrics=["accuracy"]) # 构造分布式的数据集对象. data_set = TFDataset.from_rdd(train_rdd, shapes=[input_shape, [1]], batch_size=args.batch_size, val_rdd=test_rdd) optimizer = TFOptimizer.from_keras(siamese_net, data_set) app_name = "Siamese Network" optimizer.set_train_summary(TrainSummary("tmp", app_name)) optimizer.set_val_summary(ValidationSummary("tmp", app_name)) optimizer.optimize(end_trigger=MaxEpoch(args.num_epoch))
def fit(self, x=None, y=None, batch_size=None, epochs=1, validation_split=0., validation_data=None, distributed=False, **kwargs): """ Train the model for a fixed num of epochs Arguments: :param x: Input data. It could be: - a TFDataset object - A Numpy array (or array-like), or a list of arrays (in case the model has multiple inputs). - A dict mapping input names to the corresponding array/tensors, if the model has named inputs. :param y: Target data. Like the input data `x`, It should be consistent with `x` (you cannot have Numpy inputs and tensor targets, or inversely). If `x` is a TFDataset, `y` should not be specified (since targets will be obtained from `x`). :param batch_size: Integer or `None`. Number of samples per gradient update. If `x` is a TFDataset, you do not need to specify batch_size. :param epochs: Integer. Number of epochs to train the model. An epoch is an iteration over the entire `x` and `y` data provided. :param validation_split: Float between 0 and 1. Fraction of the training data to be used as validation data. The model will set apart this fraction of the training data, will not train on it, and will evaluate the loss and any model metrics on this data at the end of each epoch. :param validation_data: Data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. `validation_data` will override `validation_split`. `validation_data` could be: - tuple `(x_val, y_val)` of Numpy arrays or tensors - `TFDataset` :param distributed: Boolean. Whether to do prediction in distributed mode or local mode. Default is True. In local mode, x must be a Numpy array. """ if isinstance(x, TFDataset): # todo check arguments if not x.has_batch: raise ValueError("The batch_size of TFDataset must be " + "specified when used in KerasModel fit.") if isinstance(x, TFNdarrayDataset): x = _standarize_feature_label_dataset(x, self.model) self._fit_distributed(x, validation_split, epochs, **kwargs) elif distributed: dataset = TFDataset.from_ndarrays((x, y), val_tensors=validation_data) self._fit_distributed(dataset, validation_split, epochs, **kwargs) else: self.model.fit(x=x, y=y, batch_size=batch_size, epochs=epochs, validation_split=validation_split, validation_data=validation_data, **kwargs)