def train(epochs: int, data_handler: DataHandler, model: tf.keras.Model, models_dir: str, model_name: str = "unnamed", batch_size: int = 64, loss_weights: dict = None): for epoch in range(epochs): print(f"Starting Epoch {epoch}") train_data = data_handler.train_iterator(batch_size=batch_size) model.fit(train_data, class_weight=loss_weights) print(f"Finished training on Epoch {epoch}") test_data = data_handler.test_iterator(batch_size=batch_size) model.evaluate(test_data) print(f"Finished evaluation on Epoch {epoch}") model.save(os.path.join(models_dir, model_name, "checkpoints", str(epoch))) model.save(os.path.join(model_name, model_name, "final_model"))
def train_and_evaluate_model(model: tf.keras.Model) -> None: """Train and test the transfer learning model Parameters ---------- model : tf.keras.Model The transfer learning model """ optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5) loss = tf.keras.losses.BinaryCrossentropy(from_logits=True) metric = tf.keras.metrics.BinaryAccuracy() model.compile(loss=loss, optimizer=optimizer, metrics=[metric]) train_dataset, validation_dataset, test_dataset = get_dataset() model.fit(train_dataset, epochs=20, validation_data=validation_dataset) model.evaluate(x=test_dataset)
def plot_worst( self, model: tf.keras.Model, cadastre_indeces: Collection[int] = range(0, 10), metric: Tuple[Mapping, str] = (min, "iou"), number: int = 5, ): """Plot the worst predictions according to a given metric.""" cadastre_metrics = {} optimizer, metric = metric for cadastre_index in track(cadastre_indeces): try: x, y = self[cadastre_index:cadastre_index + 1] except ValueError: continue if x is None or x.shape[0] > 1: continue evaluation = model.evaluate(x=x, y=y, verbose=0) metrics = { name: value for name, value in zip(model.metrics_names, evaluation) } cadastre_metrics[cadastre_index] = metrics number = min(number, len(cadastre_metrics)) for _ in range(number): worst_cadastre = optimizer( cadastre_metrics.keys(), key=lambda key: cadastre_metrics[key][metric], ) self.plot_prediction(model=model, cadastre_index=worst_cadastre) del cadastre_metrics[worst_cadastre]
def compare_loss(model: tf.keras.Model, model_no_quant_tflite: bytes, model_quant_tflite: bytes, ds: Dataset, config: ConfigData) -> None: """ **2. Loss (MSE/Mean Squared Error)** Throws: AssertionError """ # Calculate loss loss_tf, _ = model.evaluate(ds.x_test, ds.y_test, verbose=0) loss_no_quant_tflite = evaluate_tflite(model, tflite_model=model_no_quant_tflite, x_test=ds.x_test, y_true=ds.y_test) loss_quant_tflite = evaluate_tflite(model, tflite_model=model_quant_tflite, x_test=ds.x_test, y_true=ds.y_test) # Compare loss df = pd.DataFrame.from_records( [['TensorFlow', loss_tf], ['TensorFlow Lite', loss_no_quant_tflite], ['TensorFlow Lite Quantized', loss_quant_tflite]], columns=['Model', 'Loss/MSE'], index='Model').round(4) print(df) assert loss_quant_tflite <= config.TEST_QUANT_LOSS, \ 'Test loss (quantized) too large'
def test_model(model: tf.keras.Model, checkpoint_dir: str, eval_dataset: tf.data.Dataset) -> Tuple[float, float]: model.load_weights(tf.train.latest_checkpoint(checkpoint_dir)) eval_loss, eval_acc = model.evaluate(eval_dataset) return eval_loss, eval_acc
def evaluator( X_test: np.ndarray, y_test: np.ndarray, model: tf.keras.Model, ) -> float: """Calculate the accuracy on the test set""" test_acc = model.evaluate(X_test, y_test, verbose=2) return test_acc
def tf_evaluator( X_test: np.ndarray, y_test: np.ndarray, model: tf.keras.Model, ) -> float: """Calculate the loss for the model for each epoch in a graph""" _, test_acc = model.evaluate(X_test, y_test, verbose=2) return test_acc
def evaluator( X_test: np.ndarray, y_test: np.ndarray, model: tf.keras.Model, ) -> np.ndarray: """Calculate the loss for the model for each epoch in a graph""" test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2) return np.array([test_loss, test_acc])
def evaluator( test_df: pd.DataFrame, model: tf.keras.Model, ) -> float: """Calculate the accuracy on the test set""" test_acc = model.evaluate(test_df[FEATURE_COLS].values, test_df[TARGET_COL_NAME].values, verbose=2) return test_acc
def estimate_accuracy(model: tf.keras.Model, dataset, iteration: int, should_encode: (int, bool)): acc = 0.0 for _ in range(iteration): x, y = dataset.next() if should_encode: y = tf.one_hot(y, should_encode) ev = model.evaluate(x, y, verbose=0) acc += ev[1] return acc / iteration
def train_model(epochs: int, data_handler: DataHandler, model: tf.keras.Model, save_dir: str, batch_size: int = 64, loss_weights: dict = None, dry_days=True, wet_days=False) -> tf.keras.Model: """" Trains a model, and saves it to cwd/models_dir/model_name/model_type/trained_model To be loaded by """ for epoch in range(epochs): print(f"Starting Epoch {epoch}") train_data = data_handler.train_iterator(batch_size=batch_size, dry_days=dry_days, wet_days=wet_days) model.fit(train_data, class_weight=loss_weights) print(f"Finished training on Epoch {epoch}") test_data = data_handler.test_iterator(batch_size=batch_size, dry_days=dry_days, wet_days=wet_days) model.evaluate(test_data) # x_data, y_true = data_handler[500] # y_pred = model.predict(tf.expand_dims(x_data, axis=0))[0] # print(f"Model: \n" # f"Differences: {y_true - y_pred}\n" # f"Predictions: {y_pred} \n" # f"True labels: {y_true}") print(f"Finished evaluation on Epoch {epoch}") model.save(os.path.join(save_dir, "checkpoints", str(epoch))) model.save(os.path.join(save_dir, "trained_model")) return model
def fit( hp: kt.HyperParameters, model: tf.keras.Model, data_fn: tp.Callable[[kt.HyperParameters], core.DataSplit], log_dir: tp.Optional[str] = None, **kwargs, ): split = data_fn(hp) callbacks = list(kwargs.pop("callbacks", [])) if log_dir is not None: run = 0 values = dict(hp.values) lap_str = "lap" if values.pop("use_laplacian") else "adj" subdir = ("{lap_str}{spectral_size}-{hidden_layers}x{hidden_units}-" "{embedding_size}_d{dropout_rate:.1f}").format( lap_str=lap_str, **values) log_dir = os.path.join(log_dir, subdir) def full_log_dir(log_dir, run): return os.path.join(log_dir, f"run-{run:03d}") while os.path.exists(full_log_dir(log_dir, run)): run += 1 log_dir = full_log_dir(log_dir, run) hparams = hp_to_hparams(hp) callbacks.extend([ tf.keras.callbacks.TensorBoard(log_dir), hp_lib.KerasCallback(log_dir, hparams), ]) history = train_lib.fit(model, split.train_data, split.validation_data, callbacks=callbacks, **kwargs) # don't trust history - might have `EarlyStopping` with `restore_best_weights` del history metrics = {} for prefix, d in (("val", split.validation_data), ("test", split.test_data)): if d is not None: if not isinstance(d, tf.data.Dataset) and len(d) == 1: d = tf.data.Dataset.from_tensors(d[0]) m = model.evaluate(d, return_dict=True, verbose=False) metrics.update({f"{prefix}_{k}": v for k, v in m.items()}) return metrics
def evaluate( self, model: tf.keras.Model, evaluation_set: Union[tf.data.Dataset, ClassificationDataset], evaluation_steps: Union[int, None] = None, batch_size: Union[int, None] = None, augmentation: bool = False) -> Union[float, List[float], None]: """ Evaluate the model on provided set. :return: the loss value if model has no other metrics, otw returns array with loss and metrics values. """ self.__logs['training'].info('Evaluating the model...') if augmentation: x_eval, y_eval = evaluation_set.get_xy_evaluation() data_generator = ImageDataGenerator() evaluation_set = data_generator.flow_from_dataframe( dataframe=pd.DataFrame({ 'image': x_eval, 'class': y_eval }), directory='', x_col='image', y_col='class', class_mode='other', target_size=(self.__input_width, self.__input_height), batch_size=batch_size) else: if evaluation_steps is not None and evaluation_steps == 0: self.__logs['training'].warn( 'Skipping evaluation since provided set is empty') return None return model.evaluate(evaluation_set, verbose=1, steps=evaluation_steps)
def train_model(model: tf.keras.Model, train_ds: tf.data.Dataset, val_ds: tf.data.Dataset) -> tf.keras.Model: """Function trains the model and evaluates it's performance, displays training metrics. :param model: Untrained model :param train_ds: Training Dataset containing input data and labels :param val_ds: Validation Dataset containing input data and labels :return: Trained model """ early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) history = model.fit(train_ds, validation_data=val_ds, epochs=100, verbose=2, callbacks=[early_stop], use_multiprocessing=True, workers=-1) loss, acc = model.evaluate(valid_ds) print(f'Validation loss: {loss}\nValidation accuracy: {acc}') plot_history(history) return model
def evaluate_model(model: tf.keras.Model, dataset, **kwargs): return model.evaluate(dataset['data'], dataset['labels'], **kwargs)
def train_and_eval(self, model: tf.keras.Model, epochs: Optional[int] = None, sparsity: Optional[float] = None): """ Trains a Keras model and returns its validation set error (1.0 - accuracy). :param model: A Keras model. :param epochs: Overrides the duration of training. :param sparsity: Desired sparsity level (for unstructured sparsity) :returns Smallest error on validation set seen during training, the error on the test set, pruned weights (if pruning was used) """ dataset = self.config.dataset batch_size = self.config.batch_size sparsity = sparsity or 0.0 train = dataset.train_dataset() \ .shuffle(batch_size * 8) \ .batch(batch_size) \ .prefetch(tf.data.experimental.AUTOTUNE) val = dataset.validation_dataset() \ .batch(batch_size) \ .prefetch(tf.data.experimental.AUTOTUNE) # TODO: check if this works, make sure we're excluding the last layer from the student if self.pruning and self.distillation: raise NotImplementedError() if self.distillation: teacher = tf.keras.models.load_model( self.distillation.distill_from) teacher._name = "teacher_" teacher.trainable = False t, a = self.distillation.temperature, self.distillation.alpha # Assemble a parallel model with the teacher and student i = tf.keras.Input(shape=dataset.input_shape) cxent = tf.keras.losses.CategoricalCrossentropy() stud_logits = model(i) tchr_logits = teacher(i) o_stud = tf.keras.layers.Softmax()(stud_logits / t) o_tchr = tf.keras.layers.Softmax()(tchr_logits / t) teaching_loss = (a * t * t) * cxent(o_tchr, o_stud) model = tf.keras.Model(inputs=i, outputs=stud_logits) model.add_loss(teaching_loss, inputs=True) if self.dataset.num_classes == 2: loss = tf.keras.losses.BinaryCrossentropy(from_logits=True) accuracy = tf.keras.metrics.BinaryAccuracy(name="accuracy") else: loss = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True) accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name="accuracy") model.compile(optimizer=self.config.optimizer(), loss=loss, metrics=[accuracy]) # TODO: adjust metrics by class weight? class_weight = {k: v for k, v in enumerate(self.dataset.class_weight())} \ if self.config.use_class_weight else None epochs = epochs or self.config.epochs callbacks = self.config.callbacks() check_logs_from_epoch = 0 pruning_cb = None if self.pruning and sparsity > 0.0: assert 0.0 < sparsity <= 1.0 self.log.info(f"Target sparsity: {sparsity:.4f}") pruning_cb = DPFPruning( target_sparsity=sparsity, structured=self.pruning.structured, start_pruning_at_epoch=self.pruning.start_pruning_at_epoch, finish_pruning_by_epoch=self.pruning.finish_pruning_by_epoch) check_logs_from_epoch = self.pruning.finish_pruning_by_epoch callbacks.append(pruning_cb) log = model.fit(train, epochs=epochs, validation_data=val, verbose=1 if debug_mode() else 2, callbacks=callbacks, class_weight=class_weight) test = dataset.test_dataset() \ .batch(batch_size) \ .prefetch(tf.data.experimental.AUTOTUNE) _, test_acc = model.evaluate(test, verbose=0) return { "val_error": 1.0 - max(log.history["val_accuracy"][check_logs_from_epoch:]), "test_error": 1.0 - test_acc, "pruned_weights": pruning_cb.weights if pruning_cb else None }
def keras_evaluate(model: tf.keras.Model, dataset: tf.data.Dataset, batch_size: int) -> Tuple[float, float]: """Evaluate the model using model.evaluate(...).""" ds_test = dataset.batch(batch_size=batch_size, drop_remainder=False) test_loss, acc = model.evaluate(x=ds_test) return float(test_loss), float(acc)
def run( keras_model: tf.keras.Model, train_dataset: tf.data.Dataset, experiment_name: str, root_output_dir: str, num_epochs: int, hparams_dict: Optional[Dict[str, Any]] = None, decay_epochs: Optional[int] = None, lr_decay: Optional[float] = None, validation_dataset: Optional[tf.data.Dataset] = None, test_dataset: Optional[tf.data.Dataset] = None ) -> tf.keras.callbacks.History: """Run centralized training for a given compiled `tf.keras.Model`. Args: keras_model: A compiled `tf.keras.Model`. train_dataset: The `tf.data.Dataset` to be used for training. experiment_name: Name of the experiment, used as part of the name of the output directory. root_output_dir: The top-level output directory. The directory `root_output_dir/experiment_name` will contain TensorBoard logs, metrics CSVs and other outputs. num_epochs: How many training epochs to perform. hparams_dict: An optional dict specifying hyperparameters. If provided, the hyperparameters will be written to CSV. decay_epochs: Number of training epochs before decaying the learning rate. lr_decay: How much to decay the learning rate by every `decay_epochs`. validation_dataset: An optional `tf.data.Dataset` used for validation during training. test_dataset: An optional `tf.data.Dataset` used for testing after all training has completed. Returns: A `tf.keras.callbacks.History` object. """ tensorboard_dir = os.path.join(root_output_dir, 'logdir', experiment_name) results_dir = os.path.join(root_output_dir, 'results', experiment_name) for path in [root_output_dir, tensorboard_dir, results_dir]: tf.io.gfile.makedirs(path) if hparams_dict: hparams_file = os.path.join(results_dir, 'hparams.csv') logging.info('Saving hyper parameters to: [%s]', hparams_file) hparams_df = pd.DataFrame(hparams_dict, index=[0]) utils_impl.atomic_write_to_csv(hparams_df, hparams_file) csv_logger_callback = keras_callbacks.AtomicCSVLogger(results_dir) tensorboard_callback = tf.keras.callbacks.TensorBoard( log_dir=tensorboard_dir) training_callbacks = [tensorboard_callback, csv_logger_callback] if decay_epochs is not None and decay_epochs > 0: # Reduce the learning rate after a fixed number of epochs. def decay_lr(epoch, learning_rate): if (epoch + 1) % decay_epochs == 0: return learning_rate * lr_decay else: return learning_rate lr_callback = tf.keras.callbacks.LearningRateScheduler(decay_lr, verbose=1) training_callbacks.append(lr_callback) logging.info('Training model:') logging.info(keras_model.summary()) history = keras_model.fit(train_dataset, validation_data=validation_dataset, epochs=num_epochs, callbacks=training_callbacks) logging.info('Final training metrics:') for metric in keras_model.metrics: name = metric.name metric = history.history[name][-1] logging.info('\t%s: %.4f', name, metric) if validation_dataset: logging.info('Final validation metrics:') for metric in keras_model.metrics: name = metric.name metric = history.history['val_{}'.format(name)][-1] logging.info('\t%s: %.4f', name, metric) if test_dataset: test_metrics = keras_model.evaluate(test_dataset, return_dict=True) logging.info('Test metrics:') for metric in keras_model.metrics: name = metric.name metric = test_metrics[name] logging.info('\t%s: %.4f', name, metric) return history
def meta_adversarial_training(model: tf.keras.Model, config: PatchTrainingConfig): """Trains a model meta-adversarial training. Args: model: A model to be trained. config: A training configuration """ patch_lr = config.patch_lr initial_lr = config.initial_lr n_epochs = config.n_epochs batch_size = config.batch_size label_smoothing = config.label_smoothing n_patches = config.n_patches n_patch_trials = config.n_patch_trials patch_shape = config.patch_shape # Data dataset_train, dataset_valid = load_tiny_imagenet_dataset( config.data_dir, batch_size, image_size=64, add_train_augmentation=True, one_hot=True, label_smooth=label_smoothing, ) # Log-file csv_file = open(config.result_dir + os.sep + "log.csv", "w") csv_writer = csv.writer(csv_file, delimiter=";") csv_writer.writerow( ["Epoch", "Train Loss", "Train Accuracy", "Valid Accuracy"]) i_fgsm = IFGSM( model, patch_application, model.loss_functions[0], config.optimizer.n_iterations, maximize_loss=not config.optimizer.targeted_attack, ) # Objects for sampling new patches and selecting patches to be applied to a batch patch_sampler = PatchSampler( dataset_train, patch_shape, patch_initialization=config.patch_initialization, targeted=config.optimizer.targeted_attack, ) patch_selector = PatchSelector(model, patch_application, model.loss_functions[0]) # Generate initial meta patches meta_patches, patch_targets = patch_sampler(n_patches) # Sample patch-specific step sizes from log-uniform distribution step_sizes = tf.convert_to_tensor(10**np.random.uniform( np.log10(config.optimizer.min_step_size), np.log10(config.optimizer.max_step_size), (n_patches, ), ).astype("float32"))[:, None, None, None] # Adversarial training for epoch in range(n_epochs): # Set up progress bar and adapt learning rate progbar = tf.keras.utils.Progbar(100000 // batch_size) lr = cosine_anneal_schedule(epoch, n_epochs, initial_lr) tf.keras.backend.set_value(model.optimizer.lr, lr) # Train for one epoch (outer minimization) for (images, labels) in dataset_train.shuffle(100): # Select patches and randomness in adversarial fashion # SELECT in Algorithm 1 patch_ind, randomness = patch_selector(images, labels, meta_patches, batch_size, n_patch_trials) patches = tf.gather(meta_patches, patch_ind) # Set patch-specific step sizes step_size = tf.gather(step_sizes, patch_ind) # Adapt patch to current batch target = (tf.gather(patch_targets, patch_ind) if config.optimizer.targeted_attack else labels) # Run inner maximization patches = i_fgsm(patches, images, target, step_size, randomness) # Update meta patch using REPTILE meta_patches = update_meta_patches(patches, patch_ind, meta_patches, patch_lr) # Add batch-adapted patch to images images_with_patch = patch_application(images, patches, randomness) # Train model on inputs with patches metric_values = model.train_on_batch(images_with_patch, labels) progbar.add(1, zip(model.metrics_names, metric_values)) # Evaluate clean performance of model acc_valid = model.evaluate(dataset_valid)[1] # Store weights and update log file model.save_weights( os.path.join(config.result_dir, f"tiny_imagenet_weights_{epoch:03d}.h5")) csv_writer.writerow( map( lambda x: "%.3f" % x, [ epoch, progbar._values["loss"][0] / progbar._values["loss"][1], progbar._values["acc"][0] / progbar._values["acc"][1], acc_valid, ], )) csv_file.flush()
def Train(model: tf.keras.Model, dataset, method: str, T: int): print("starting training with {} algorithm on {} iterations ...".format( method, T)) if method in ["Bayes_UCB", "TS_Beta"]: W = model.layers[-1].get_weights() success = np.zeros((64, 5)) fail = np.zeros_like(success) n = np.zeros_like(success) #number of visits threshold = 0.005 norm_const = 0.03 for t in range(1, T): # select random train data for comparison random_data = dn.Select_Random_Data(dataset) # selecting index exploration/exploitation if np.where(n == 0)[0].size == 0 and np.where(n == 0)[1].size == 0: if method == "Bayes_UCB": index = Bayes_UCB(t, success, fail) elif method == "TS_Beta": index = TS_Beta(success, fail) ind_max = np.array(np.unravel_index(index, success.shape)) row = ind_max[0] col = ind_max[1] else: #every weight should be visited once to get initial distribution row = np.where(n == 0)[0][0] col = np.where(n == 0)[1][0] print("iteration:", t, " index:", row, col) # evaluating main model loss_base = model.evaluate(random_data, verbose=0)[0] # setting selected node to zero and evaluating again W_ = np.copy(W) W_[0][row, col] = 0 model.layers[-1].set_weights(W_) loss = model.evaluate(random_data, verbose=0)[0] # calculating delta and reward delta = loss_base - loss reward = max(0, threshold + delta) / norm_const # updating number of successes and fails # the threshold for quantization is set to 0.5 if reward >= 0.5: success[row, col] += 1 print("successful") else: fail[row, col] += 1 print("failed") n[row, col] += 1 # initializing the layer to the original trained weights for next round model.layers[-1].set_weights(W) # saving the results file_name = "result_weights_" + method + ".npy" results = {"n": n, "s": success, "f": fail} np.save(file_name, results) return results elif method in ["UCB1", "KL_UCB", "TS_Normal"]: W = model.layers[-1].get_weights() n = np.zeros((64, 5)) # number of visits mu = np.zeros_like(n) # average of reward threshold = 0.005 norm_const = 0.03 for t in range(1, T): # select random train data for comparison random_data = dn.Select_Random_Data(dataset) # selecting index exploration/exploitation if np.where(n == 0)[0].size == 0 and np.where(n == 0)[1].size == 0: if method == "UCB1": index = UCB1(mu, n, t) elif method == "KL_UCB": index = KL_UCB(mu, n, t) else: index = TS_Normal(mu, n) ind_max = np.array(np.unravel_index(index, n.shape)) row = ind_max[0] col = ind_max[1] else: #every weight should be visited once to get initial distribution row = np.where(n == 0)[0][0] col = np.where(n == 0)[1][0] print("iteration:", t, " index:", row, col) # evaluating main model loss_base = model.evaluate(random_data, verbose=0)[0] # setting selected node to zero and evaluating again W_ = np.copy(W) W_[0][row, col] = 0 model.layers[-1].set_weights(W_) loss = model.evaluate(random_data, verbose=0)[0] # calculating delta and reward delta = loss_base - loss reward = max(0, threshold + delta) / norm_const # clipping reward if reward >= 1: reward = 0.99 print("reward:", reward) # updating number of visiting the node and the average reward n[row, col] = n[row, col] + 1 mu[row, col] = ((n[row, col] - 1) / n[row, col]) * mu[row, col] + (1 / n[row, col]) * reward # initializing the layer to the original trained weights for next round model.layers[-1].set_weights(W) #saving the results file_name = "result_weights_" + method + ".npy" results = {"n": n, "mu": mu} np.save(file_name, results) return results
def fit( model: tf.keras.Model, train_data: tf.data.Dataset, epochs: int = 1, steps_per_epoch: Optional[int] = None, validation_data: tf.data.Dataset = None, validation_steps: Optional[int] = None, callbacks: Tuple[tf.keras.callbacks.Callback, ...] = (), initial_epoch: int = 0, validation_freq: int = 1, track_iterator: bool = False, verbose: bool = True, ) -> tf.keras.callbacks.History: """ Custom fit implementation. Interface is intended to mimic best-practice usage of `tf.keras.Model.fit`. Unlike `tf.keras.Model.fit` `_train_iter` is added as an attribute to model. If using `tf.train.Checkpoint`s to manage training state, this may result in larger files on disk. Args: model: keras model to train. train_data: dataset with (inputs, labels) or (inputs, labels, sample_weights) epochs: total number of epochs to train until. steps_per_epoch: number of steps per epoch. Must be provided if train_data has infinite cardinality. validation_data: optional dataset to perform validation on. validation_steps: number of steps of validation to perform per epoch. callbacks: `tf.keras.callbacks.Callback` instances. initial_epoch: starting epoch. validation_freq: number of epochs between validation. track_iterator: if True, `train_data`'s iterator is added as an attribute to `model`, meaning it will be saved in checkpoint's saving `model`. verbose: controls verbosity of printed output. Returns: `tf.keras.callbacks.History` object. Raises: `AttributeError` if `model` has an existing `_train_iter` attribute and `track_iterator` is True. """ train_func = model.make_train_function() train_iter, steps_per_epoch = as_infinite_iterator(train_data, steps_per_epoch) if hasattr(model, "_train_iter"): raise AttributeError( "Cannot fit model with existing `_train_iter` attribute.") if track_iterator: model._train_iter = train_iter # pylint: disable=protected-access cb = tf.keras.callbacks.CallbackList(callbacks=callbacks, add_history=True, add_progbar=verbose, model=model) cb.set_params( dict(epochs=epochs, verbose=int(verbose), steps=steps_per_epoch)) cb.on_train_begin() initial_epoch = ( model._maybe_load_initial_epoch_from_ckpt( # pylint: disable=protected-access initial_epoch)) training_logs = None model.stop_training = False for epoch in range(initial_epoch, epochs): model.reset_metrics() cb.on_epoch_begin(epoch) logs = None for step in range(steps_per_epoch): cb.on_train_batch_begin(step) logs = train_func(train_iter) cb.on_train_batch_end(step, logs) if model.stop_training: break assert logs is not None epoch_logs = logs if (validation_data is not None and model._should_eval( # pylint: disable=protected-access epoch, validation_freq)): logs = model.evaluate( validation_data, steps=validation_steps, callbacks=cb, return_dict=True, ) epoch_logs.update( {"val_" + name: val for name, val in logs.items()}) cb.on_epoch_end(epoch, epoch_logs) training_logs = epoch_logs if model.stop_training: break cb.on_train_end(logs=training_logs) if track_iterator: del model._train_iter return model.history
def train_model(config: ConfigData, model: tf.keras.Model, ds: Dataset) -> None: """ ### 2. Train the Model ### We'll now train and save the new model. Throws: ImportError, AssertionError """ # Train the model history = model.fit(ds.x_train, ds.y_train, epochs=500, batch_size=64, validation_data=(ds.x_validate, ds.y_validate), verbose=2) # Save the model to disk model.save(config.MODEL_TF) # ### 3. Plot Metrics # Each training epoch, the model prints out its loss and mean absolute error # for training and validation. You can read this in the output above # (note that your exact numbers may differ): # Epoch 500/500 # 10/10 - 0s - loss: 0.0121 - mae: 0.0884 - # val_loss: 0.0111 - val_mae: 0.0856 - 21ms/epoch - 2ms/step # You can see that we've already got a huge improvement - validation loss # has dropped from 0.15 to 0.01, and validation MAE has dropped # from 0.33 to 0.08. # The following cell will print the same graphs we used to evaluate our # original model, but showing our new training history: # Draw a graph of the loss, which is the distance between # the predicted and actual values during training and validation. train_loss = history.history['loss'] val_loss = history.history['val_loss'] epochs = range(1, len(train_loss) + 1) # Exclude the first few epochs so the graph is easier to read skip = 100 plt.figure(figsize=(10, 4)) plt.subplot(1, 2, 1) plt.plot(epochs[skip:], train_loss[skip:], 'g.', label='Training loss') plt.plot(epochs[skip:], val_loss[skip:], 'b.', label='Validation loss') plt.title('Training and validation loss') plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.subplot(1, 2, 2) # Draw a graph of mean absolute error, which is another way of # measuring the amount of error in the prediction. train_mae = history.history['mae'] val_mae = history.history['val_mae'] plt.plot(epochs[skip:], train_mae[skip:], 'g.', label='Training MAE') plt.plot(epochs[skip:], val_mae[skip:], 'b.', label='Validation MAE') plt.title('Training and validation mean absolute error') plt.xlabel('Epochs') plt.ylabel('MAE') plt.legend() save_plot(config, title=None, filename=config.PLOT_TRAIN_VALIDATION) assert train_loss[-1] <= config.TRAIN_LOSS, 'Training loss too large' assert val_loss[-1] <= config.VAL_LOSS, 'Validation loss too large' assert train_mae[-1] <= config.TRAIN_MAE, 'Training MAE too large' assert val_mae[-1] <= config.VAL_MAE, 'Validation MAE too large' # Great results! From these graphs, we can see several exciting things: # * The overall loss and MAE are much better than our previous network # * Metrics are better for validation than training, which means the # network is not overfitting # The reason the metrics for validation are better than those for training # is that validation metrics are calculated at the end of each epoch, # while training metrics are calculated throughout the epoch, so validation # happens on a model that has been trained slightly longer. # This all means our network seems to be performing well! To confirm, let's # check its predictions against the test dataset we set aside earlier: # Calculate and print the loss on our test dataset print('Evaluation loss:') test_loss, test_mae = model.evaluate(ds.x_test, ds.y_test) # Make predictions based on our test dataset y_test_pred = model.predict(ds.x_test) # Graph the predictions against the actual values plt.clf() plt.title('Comparison of predictions and actual values') plt.plot(ds.x_test, ds.y_test, 'b.', label='Actual values') plt.plot(ds.x_test, y_test_pred, 'r.', label='TF predicted') plt.legend() save_plot(config, title=None, filename=config.PLOT_TRAIN_PREDICTION) assert test_loss <= config.TEST_LOSS, 'Test loss too large' assert test_mae <= config.TEST_MAE, 'Test MAE too large'