def save_model(model, filepath, overwrite=True, include_optimizer=True, save_format=None): """Saves a model as a TensorFlow SavedModel or HDF5 file. The saved model contains: - the model's configuration (topology) - the model's weights - the model's optimizer's state (if any) Thus the saved model can be reinstantiated in the exact same state, without any of the code used for model definition or training. Arguments: model: Keras model instance to be saved. filepath: One of the following: - String, path where to save the model - `h5py.File` object where to save the model overwrite: Whether we should overwrite any existing model at the target location, or instead ask the user with a manual prompt. include_optimizer: If True, save optimizer's state together. save_format: Either 'tf' or 'h5', indicating whether to save the model to Tensorflow SavedModel or HDF5. The 'tf' option is currently disabled, and will be enabled when Keras SavedModel export is no longer experimental. (The experimental function is tf.keras.experimental.export_saved_model). Raises: ImportError: If save format is hdf5, and h5py is not available. """ from tensorflow.python.keras.engine import sequential # pylint: disable=g-import-not-at-top if (not tf2.enabled() and not ops.executing_eagerly_outside_functions() and save_format == 'tf'): raise NotImplementedError( 'Saving the model as SavedModel is not supported in TensorFlow 1.X' 'graph mode. Please enable eager execution or use the "h5" save format.' ) if _KERAS_SAVED_MODEL_STILL_EXPERIMENTAL and save_format == 'tf': raise NotImplementedError( 'Saving the model as SavedModel is still in experimental stages. ' 'Please use tf.keras.experimental.export_saved_model, or use ' 'save_format="h5" to save to HDF5.') # TODO(kathywu): Remove this when Keras SavedModel is not experimental. save_format = 'h5' if (save_format == 'h5' or (h5py is not None and isinstance(filepath, h5py.File)) or os.path.splitext(filepath)[1] in _HDF5_EXTENSIONS): # TODO(b/130258301): add utility method for detecting model type. if (not model._is_graph_network and # pylint:disable=protected-access not isinstance(model, sequential.Sequential)): raise NotImplementedError( 'Saving the model to HDF5 format requires the model to be a ' 'Functional model or a Sequential model. It does not work for ' 'subclassed models, because such models are defined via the body of ' 'a Python method, which isn\'t safely serializable. Consider saving ' 'to the Tensorflow SavedModel format (by setting save_format="tf") ' 'or using `save_weights`.') hdf5_format.save_model_to_hdf5( model, filepath, overwrite, include_optimizer) return
def launch(self) -> int: """Execute the :class:`AutoencoderNeuralNetwork <neural_networks.autoencoder_neural_network.AutoencoderNeuralNetwork>` neural_networks.autoencoder_neural_network.AutoencoderNeuralNetwork object.""" # check input/output paths and parameters self.check_data_params(self.out_log, self.err_log) # Setup Biobb if self.check_restart(): return 0 self.stage_files() # load decode dataset fu.log( 'Getting decode dataset from %s' % self.io_dict["in"]["input_decode_path"], self.out_log, self.global_log) data_dec = pd.read_csv(self.io_dict["in"]["input_decode_path"]) seq_in = np.array(data_dec) # reshape input into [samples, timesteps, features] n_in = len(seq_in) seq_in = seq_in.reshape((1, n_in, 1)) # load predict dataset n_out = None if (self.io_dict["in"]["input_predict_path"]): fu.log( 'Getting predict dataset from %s' % self.io_dict["in"]["input_predict_path"], self.out_log, self.global_log) data_pred = pd.read_csv(self.io_dict["in"]["input_predict_path"]) seq_out = np.array(data_pred) # reshape output into [samples, timesteps, features] n_out = len(seq_out) seq_out = seq_out.reshape((1, n_out, 1)) # build model fu.log('Building model', self.out_log, self.global_log) model = self.build_model(n_in, n_out) # model summary stringlist = [] model.summary(print_fn=lambda x: stringlist.append(x)) model_summary = "\n".join(stringlist) fu.log('Model summary:\n\n%s\n' % model_summary, self.out_log, self.global_log) # get optimizer mod = __import__('tensorflow.keras.optimizers', fromlist=[self.optimizer]) opt_class = getattr(mod, self.optimizer) opt = opt_class(lr=self.learning_rate) # compile model model.compile(optimizer=opt, loss='mse', metrics=['mse', 'mae']) # fitting fu.log('Training model', self.out_log, self.global_log) y_list = [seq_in] if n_out: y_list.append(seq_out) # fit the model mf = model.fit(seq_in, y_list, batch_size=self.batch_size, epochs=self.max_epochs, verbose=1) train_metrics = pd.DataFrame() metric = [] coefficient = [] for key, lst in mf.history.items(): metric.append(' '.join(x.capitalize() or '_' for x in key.split('_'))) coefficient.append(lst[-1]) train_metrics['metric'] = metric train_metrics['coefficient'] = coefficient fu.log('Calculating metrics\n\nMETRICS TABLE\n\n%s\n' % train_metrics, self.out_log, self.global_log) # predicting fu.log('Predicting model', self.out_log, self.global_log) yhat = model.predict(seq_in, verbose=1) decoding_table = pd.DataFrame() if (self.io_dict["in"]["input_predict_path"]): decoding_table['reconstructed'] = np.squeeze(np.asarray( yhat[0][0])) decoding_table['original'] = data_dec else: decoding_table['reconstructed'] = np.squeeze(np.asarray(yhat[0])) decoding_table['original'] = np.squeeze(np.asarray(data_dec)) decoding_table['residual'] = decoding_table[ 'original'] - decoding_table['reconstructed'] decoding_table['difference %'] = np.absolute( decoding_table['residual'] / decoding_table['original'] * 100) pd.set_option('display.float_format', lambda x: '%.5f' % x) # sort by difference in % decoding_table = decoding_table.sort_values(by=['difference %']) decoding_table = decoding_table.reset_index(drop=True) fu.log('RECONSTRUCTION TABLE\n\n%s\n' % decoding_table, self.out_log, self.global_log) # save reconstruction data if (self.io_dict["out"]["output_test_decode_path"]): fu.log( 'Saving reconstruction data to %s' % self.io_dict["out"]["output_test_decode_path"], self.out_log, self.global_log) decoding_table.to_csv( self.io_dict["out"]["output_test_decode_path"], index=False, header=True) if (self.io_dict["in"]["input_predict_path"]): prediction_table = pd.DataFrame() prediction_table['predicted'] = np.squeeze(np.asarray(yhat[1][0])) prediction_table['original'] = data_pred prediction_table['residual'] = prediction_table[ 'original'] - prediction_table['predicted'] prediction_table['difference %'] = np.absolute( prediction_table['residual'] / prediction_table['original'] * 100) pd.set_option('display.float_format', lambda x: '%.5f' % x) # sort by difference in % prediction_table = prediction_table.sort_values( by=['difference %']) prediction_table = prediction_table.reset_index(drop=True) fu.log('PREDICTION TABLE\n\n%s\n' % prediction_table, self.out_log, self.global_log) # save decoding data if (self.io_dict["out"]["output_test_predict_path"]): fu.log( 'Saving prediction data to %s' % self.io_dict["out"]["output_test_predict_path"], self.out_log, self.global_log) prediction_table.to_csv( self.io_dict["out"]["output_test_predict_path"], index=False, header=True) # save model and parameters vars_obj = {'type': 'autoencoder'} variables = json.dumps(vars_obj) fu.log('Saving model to %s' % self.io_dict["out"]["output_model_path"], self.out_log, self.global_log) with h5py.File(self.io_dict["out"]["output_model_path"], mode='w') as f: hdf5_format.save_model_to_hdf5(model, f) f.attrs['variables'] = variables return 0
def save_model(model, filepath, overwrite=True, include_optimizer=True, save_format=None, signatures=None, options=None): """Saves a model as a TensorFlow SavedModel or HDF5 file. The saved model contains: - the model's configuration (topology) - the model's weights - the model's optimizer's state (if any) Thus the saved model can be reinstantiated in the exact same state, without any of the code used for model definition or training. Note that the model weights may have different scoped names after being loaded. Scoped names include the model/layer names, such as "dense_1/kernel:0"`. It is recommended that you use the layer properties to access specific variables, e.g. `model.get_layer("dense_1").kernel`. _SavedModel serialization_ The SavedModel serialization path uses `tf.saved_model.save` to save the model and all trackable objects attached to the model (e.g. layers and variables). `@tf.function`-decorated methods are also saved. Additional trackable objects and functions are added to the SavedModel to allow the model to be loaded back as a Keras Model object. Arguments: model: Keras model instance to be saved. filepath: One of the following: - String or `pathlib.Path` object, path where to save the model - `h5py.File` object where to save the model overwrite: Whether we should overwrite any existing model at the target location, or instead ask the user with a manual prompt. include_optimizer: If True, save optimizer's state together. save_format: Either 'tf' or 'h5', indicating whether to save the model to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X, and 'h5' in TF 1.X. signatures: Signatures to save with the SavedModel. Applicable to the 'tf' format only. Please see the `signatures` argument in `tf.saved_model.save` for details. options: Optional `tf.saved_model.SaveOptions` object that specifies options for saving to SavedModel. Raises: ImportError: If save format is hdf5, and h5py is not available. """ from tensorflow.python.keras.engine import sequential # pylint: disable=g-import-not-at-top default_format = 'tf' if tf2.enabled() else 'h5' save_format = save_format or default_format if sys.version_info >= (3, 4) and isinstance(filepath, pathlib.Path): filepath = str(filepath) if (save_format == 'h5' or (h5py is not None and isinstance(filepath, h5py.File)) or os.path.splitext(filepath)[1] in _HDF5_EXTENSIONS): # TODO(b/130258301): add utility method for detecting model type. if (not model._is_graph_network and # pylint:disable=protected-access not isinstance(model, sequential.Sequential)): raise NotImplementedError( 'Saving the model to HDF5 format requires the model to be a ' 'Functional model or a Sequential model. It does not work for ' 'subclassed models, because such models are defined via the body of ' 'a Python method, which isn\'t safely serializable. Consider saving ' 'to the Tensorflow SavedModel format (by setting save_format="tf") ' 'or using `save_weights`.') hdf5_format.save_model_to_hdf5(model, filepath, overwrite, include_optimizer) else: saved_model_save.save(model, filepath, overwrite, include_optimizer, signatures, options)
def save_model(model, filepath, overwrite=True, include_optimizer=True, save_format=None, signatures=None, options=None, save_traces=True): # pylint: disable=line-too-long """Saves a model as a TensorFlow SavedModel or HDF5 file. See the [Serialization and Saving guide](https://keras.io/guides/serialization_and_saving/) for details. Usage: >>> model = tf.keras.Sequential([ ... tf.keras.layers.Dense(5, input_shape=(3,)), ... tf.keras.layers.Softmax()]) >>> model.save('/tmp/model') >>> loaded_model = tf.keras.models.load_model('/tmp/model') >>> x = tf.random.uniform((10, 3)) >>> assert np.allclose(model.predict(x), loaded_model.predict(x)) The SavedModel and HDF5 file contains: - the model's configuration (topology) - the model's weights - the model's optimizer's state (if any) Thus models can be reinstantiated in the exact same state, without any of the code used for model definition or training. Note that the model weights may have different scoped names after being loaded. Scoped names include the model/layer names, such as `"dense_1/kernel:0"`. It is recommended that you use the layer properties to access specific variables, e.g. `model.get_layer("dense_1").kernel`. __SavedModel serialization format__ Keras SavedModel uses `tf.saved_model.save` to save the model and all trackable objects attached to the model (e.g. layers and variables). The model config, weights, and optimizer are saved in the SavedModel. Additionally, for every Keras layer attached to the model, the SavedModel stores: * the config and metadata -- e.g. name, dtype, trainable status * traced call and loss functions, which are stored as TensorFlow subgraphs. The traced functions allow the SavedModel format to save and load custom layers without the original class definition. You can choose to not save the traced functions by disabling the `save_traces` option. This will decrease the time it takes to save the model and the amount of disk space occupied by the output SavedModel. If you enable this option, then you _must_ provide all custom class definitions when loading the model. See the `custom_objects` argument in `tf.keras.models.load_model`. Args: model: Keras model instance to be saved. filepath: One of the following: - String or `pathlib.Path` object, path where to save the model - `h5py.File` object where to save the model overwrite: Whether we should overwrite any existing model at the target location, or instead ask the user with a manual prompt. include_optimizer: If True, save optimizer's state together. save_format: Either 'tf' or 'h5', indicating whether to save the model to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X, and 'h5' in TF 1.X. signatures: Signatures to save with the SavedModel. Applicable to the 'tf' format only. Please see the `signatures` argument in `tf.saved_model.save` for details. options: (only applies to SavedModel format) `tf.saved_model.SaveOptions` object that specifies options for saving to SavedModel. save_traces: (only applies to SavedModel format) When enabled, the SavedModel will store the function traces for each layer. This can be disabled, so that only the configs of each layer are stored. Defaults to `True`. Disabling this will decrease serialization time and reduce file size, but it requires that all custom layers/models implement a `get_config()` method. Raises: ImportError: If save format is hdf5, and h5py is not available. """ # pylint: enable=line-too-long from tensorflow.python.keras.engine import sequential # pylint: disable=g-import-not-at-top default_format = 'tf' if tf2.enabled() else 'h5' save_format = save_format or default_format filepath = path_to_string(filepath) if (save_format == 'h5' or (h5py is not None and isinstance(filepath, h5py.File)) or saving_utils.is_hdf5_filepath(filepath)): # TODO(b/130258301): add utility method for detecting model type. if (not model._is_graph_network and # pylint:disable=protected-access not isinstance(model, sequential.Sequential)): raise NotImplementedError( 'Saving the model to HDF5 format requires the model to be a ' 'Functional model or a Sequential model. It does not work for ' 'subclassed models, because such models are defined via the body of ' 'a Python method, which isn\'t safely serializable. Consider saving ' 'to the Tensorflow SavedModel format (by setting save_format="tf") ' 'or using `save_weights`.') hdf5_format.save_model_to_hdf5(model, filepath, overwrite, include_optimizer) else: saved_model_save.save(model, filepath, overwrite, include_optimizer, signatures, options, save_traces)
def launch(self) -> int: """Execute the :class:`ClassificationNeuralNetwork <neural_networks.classification_neural_network.ClassificationNeuralNetwork>` neural_networks.classification_neural_network.ClassificationNeuralNetwork object.""" # check input/output paths and parameters self.check_data_params(self.out_log, self.err_log) # Setup Biobb if self.check_restart(): return 0 self.stage_files() # load dataset fu.log( 'Getting dataset from %s' % self.io_dict["in"]["input_dataset_path"], self.out_log, self.global_log) if 'columns' in self.features: labels = getHeader(self.io_dict["in"]["input_dataset_path"]) skiprows = 1 else: labels = None skiprows = None data = pd.read_csv(self.io_dict["in"]["input_dataset_path"], header=None, sep="\s+|;|:|,|\t", engine="python", skiprows=skiprows, names=labels) targets_list = data[getTargetValue(self.target)].to_numpy() X = getFeatures(self.features, data, self.out_log, self.__class__.__name__) fu.log('Features: [%s]' % (getIndependentVarsList(self.features)), self.out_log, self.global_log) # target y = getTarget(self.target, data, self.out_log, self.__class__.__name__) fu.log('Target: %s' % (str(getTargetValue(self.target))), self.out_log, self.global_log) # weights if self.weight: w = getWeight(self.weight, data, self.out_log, self.__class__.__name__) # shuffle dataset fu.log('Shuffling dataset', self.out_log, self.global_log) shuffled_indices = np.arange(X.shape[0]) np.random.shuffle(shuffled_indices) np_X = X.to_numpy() shuffled_X = np_X[shuffled_indices] shuffled_y = targets_list[shuffled_indices] if self.weight: shuffled_w = w[shuffled_indices] # train / test split fu.log('Creating train and test sets', self.out_log, self.global_log) arrays_sets = (shuffled_X, shuffled_y) # if user provide weights if self.weight: arrays_sets = arrays_sets + (shuffled_w, ) X_train, X_test, y_train, y_test, w_train, w_test = train_test_split( *arrays_sets, test_size=self.test_size, random_state=self.random_state) else: X_train, X_test, y_train, y_test = train_test_split( *arrays_sets, test_size=self.test_size, random_state=self.random_state) # scale dataset if self.scale: fu.log('Scaling dataset', self.out_log, self.global_log) X_train = scale(X_train) # build model fu.log('Building model', self.out_log, self.global_log) model = self.build_model((X_train.shape[1], ), np.unique(y_train).size) # model summary stringlist = [] model.summary(print_fn=lambda x: stringlist.append(x)) model_summary = "\n".join(stringlist) fu.log('Model summary:\n\n%s\n' % model_summary, self.out_log, self.global_log) # get optimizer mod = __import__('tensorflow.keras.optimizers', fromlist=[self.optimizer]) opt_class = getattr(mod, self.optimizer) opt = opt_class(lr=self.learning_rate) # compile model model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy', 'mse']) # fitting fu.log('Training model', self.out_log, self.global_log) # set an early stopping mechanism # set patience=2, to be a bit tolerant against random validation loss increases early_stopping = EarlyStopping(patience=2) if self.weight: sample_weight = w_train class_weight = [] else: # TODO: class_weight not working since TF 2.4.1 update #fu.log('No weight provided, class_weight will be estimated from the target data', self.out_log, self.global_log) fu.log('No weight provided', self.out_log, self.global_log) sample_weight = None class_weight = [ ] #compute_class_weight('balanced', np.unique(y_train), y_train) print(class_weight) # fit the model mf = model.fit(X_train, y_train, class_weight=class_weight, sample_weight=sample_weight, batch_size=self.batch_size, epochs=self.max_epochs, callbacks=[early_stopping], validation_split=self.validation_size, verbose=1) fu.log('Total epochs performed: %s' % len(mf.history['loss']), self.out_log, self.global_log) train_metrics = pd.DataFrame() train_metrics['metric'] = [ 'Train loss', ' Train accuracy', 'Train MSE', 'Validation loss', 'Validation accuracy', 'Validation MSE' ] train_metrics['coefficient'] = [ mf.history['loss'][-1], mf.history['accuracy'][-1], mf.history['mse'][-1], mf.history['val_loss'][-1], mf.history['val_accuracy'][-1], mf.history['val_mse'][-1] ] fu.log( 'Training metrics\n\nTRAINING METRICS TABLE\n\n%s\n' % train_metrics, self.out_log, self.global_log) # confusion matrix train_predictions = model.predict(X_train) train_predictions = np.around(train_predictions, decimals=2) norm_pred = [] [ norm_pred.append(np.argmax(pred, axis=0)) for pred in train_predictions ] cnf_matrix_train = math.confusion_matrix(y_train, norm_pred).numpy() np.set_printoptions(precision=2) if self.normalize_cm: cnf_matrix_train = cnf_matrix_train.astype( 'float') / cnf_matrix_train.sum(axis=1)[:, np.newaxis] cm_type = 'NORMALIZED CONFUSION MATRIX' else: cm_type = 'CONFUSION MATRIX, WITHOUT NORMALIZATION' fu.log( 'Calculating confusion matrix for training dataset\n\n%s\n\n%s\n' % (cm_type, cnf_matrix_train), self.out_log, self.global_log) # testing if self.scale: X_test = scale(X_test) fu.log('Testing model', self.out_log, self.global_log) test_loss, test_accuracy, test_mse = model.evaluate(X_test, y_test) test_metrics = pd.DataFrame() test_metrics['metric'] = ['Test loss', ' Test accuracy', 'Test MSE'] test_metrics['coefficient'] = [test_loss, test_accuracy, test_mse] fu.log( 'Testing metrics\n\nTESTING METRICS TABLE\n\n%s\n' % test_metrics, self.out_log, self.global_log) # predict data from X_test test_predictions = model.predict(X_test) test_predictions = np.around(test_predictions, decimals=2) tpr = tuple(map(tuple, test_predictions)) test_table = pd.DataFrame() test_table['P' + np.array2string(np.unique(y_train))] = tpr test_table['target'] = y_test fu.log('TEST DATA\n\n%s\n' % test_table, self.out_log, self.global_log) # confusion matrix norm_pred = [] [ norm_pred.append(np.argmax(pred, axis=0)) for pred in test_predictions ] cnf_matrix_test = math.confusion_matrix(y_test, norm_pred).numpy() np.set_printoptions(precision=2) if self.normalize_cm: cnf_matrix_test = cnf_matrix_test.astype( 'float') / cnf_matrix_test.sum(axis=1)[:, np.newaxis] cm_type = 'NORMALIZED CONFUSION MATRIX' else: cm_type = 'CONFUSION MATRIX, WITHOUT NORMALIZATION' fu.log( 'Calculating confusion matrix for testing dataset\n\n%s\n\n%s\n' % (cm_type, cnf_matrix_test), self.out_log, self.global_log) # save test data if (self.io_dict["out"]["output_test_table_path"]): fu.log( 'Saving testing data to %s' % self.io_dict["out"]["output_test_table_path"], self.out_log, self.global_log) test_table.to_csv(self.io_dict["out"]["output_test_table_path"], index=False, header=True) # create test plot if (self.io_dict["out"]["output_plot_path"]): vs = np.unique(targets_list) vs.sort() if len(vs) > 2: plot = plotResultsClassMultCM(mf.history, cnf_matrix_train, cnf_matrix_test, self.normalize_cm, vs) fu.log( 'Saving confusion matrix plot to %s' % self.io_dict["out"]["output_plot_path"], self.out_log, self.global_log) else: plot = plotResultsClassBinCM(mf.history, train_predictions, test_predictions, y_train, y_test, cnf_matrix_train, cnf_matrix_test, self.normalize_cm, vs) fu.log( 'Saving binary classifier evaluator plot to %s' % self.io_dict["out"]["output_plot_path"], self.out_log, self.global_log) plot.savefig(self.io_dict["out"]["output_plot_path"], dpi=150) # save model and parameters vs = np.unique(targets_list) vs.sort() vars_obj = { 'features': self.features, 'target': self.target, 'scale': self.scale, 'vs': vs.tolist(), 'type': 'classification' } variables = json.dumps(vars_obj) fu.log('Saving model to %s' % self.io_dict["out"]["output_model_path"], self.out_log, self.global_log) with h5py.File(self.io_dict["out"]["output_model_path"], mode='w') as f: hdf5_format.save_model_to_hdf5(model, f) f.attrs['variables'] = variables return 0
def launch(self) -> int: """Execute the :class:`RecurrentNeuralNetwork <neural_networks.recurrent_neural_network.RecurrentNeuralNetwork>` neural_networks.recurrent_neural_network.RecurrentNeuralNetwork object.""" # check input/output paths and parameters self.check_data_params(self.out_log, self.err_log) # Setup Biobb if self.check_restart(): return 0 self.stage_files() # load dataset fu.log('Getting dataset from %s' % self.io_dict["in"]["input_dataset_path"], self.out_log, self.global_log) if 'column' in self.target: labels = getHeader(self.io_dict["in"]["input_dataset_path"]) skiprows = 1 else: labels = None skiprows = None data = pd.read_csv(self.io_dict["in"]["input_dataset_path"], header = None, sep="\s+|;|:|,|\t", engine="python", skiprows=skiprows, names=labels) # get target column target = data[getTargetValue(self.target)].to_numpy() # split into samples X, y = split_sequence(target, self.window_size) # reshape into [samples, timesteps, features] X = X.reshape((X.shape[0], X.shape[1], 1)) # train / test split fu.log('Creating train and test sets', self.out_log, self.global_log) X_train, X_test, y_train, y_test = X[:-self.test_size], X[-self.test_size:], y[:-self.test_size], y[-self.test_size:] # build model fu.log('Building model', self.out_log, self.global_log) model = self.build_model((X_train.shape[1],1)) # model summary stringlist = [] model.summary(print_fn=lambda x: stringlist.append(x)) model_summary = "\n".join(stringlist) fu.log('Model summary:\n\n%s\n' % model_summary, self.out_log, self.global_log) # get optimizer mod = __import__('tensorflow.keras.optimizers', fromlist = [self.optimizer]) opt_class = getattr(mod, self.optimizer) opt = opt_class(lr = self.learning_rate) # compile model model.compile(optimizer = opt, loss = 'mse', metrics = ['mse', 'mae']) # fitting fu.log('Training model', self.out_log, self.global_log) # set an early stopping mechanism # set patience=2, to be a bit tolerant against random validation loss increases early_stopping = EarlyStopping(patience=2) # fit the model mf = model.fit(X_train, y_train, batch_size=self.batch_size, epochs=self.max_epochs, callbacks=[early_stopping], validation_split=self.validation_size, verbose = 1) train_metrics = pd.DataFrame() train_metrics['metric'] = ['Train loss',' Train MAE', 'Train MSE', 'Validation loss', 'Validation MAE', 'Validation MSE'] train_metrics['coefficient'] = [mf.history['loss'][-1], mf.history['mae'][-1], mf.history['mse'][-1], mf.history['val_loss'][-1], mf.history['val_mae'][-1], mf.history['val_mse'][-1]] fu.log('Training metrics\n\nTRAINING METRICS TABLE\n\n%s\n' % train_metrics, self.out_log, self.global_log) # testing fu.log('Testing model', self.out_log, self.global_log) test_loss, test_mse, test_mae = model.evaluate(X_test, y_test) # predict data from X_test test_predictions = model.predict(X_test) test_predictions = np.around(test_predictions, decimals=2) tpr = np.squeeze(np.asarray(test_predictions)) test_metrics = pd.DataFrame() test_metrics['metric'] = ['Test loss', 'Test MAE', 'Test MSE'] test_metrics['coefficient'] = [test_loss, test_mae, test_mse] fu.log('Testing metrics\n\nTESTING METRICS TABLE\n\n%s\n' % test_metrics, self.out_log, self.global_log) test_table = pd.DataFrame() test_table['prediction'] = tpr test_table['target'] = y_test test_table['residual'] = test_table['target'] - test_table['prediction'] test_table['difference %'] = np.absolute(test_table['residual']/test_table['target']*100) pd.set_option('display.float_format', lambda x: '%.2f' % x) # sort by difference in % test_table = test_table.sort_values(by=['difference %']) test_table = test_table.reset_index(drop=True) fu.log('TEST DATA\n\n%s\n' % test_table, self.out_log, self.global_log) # save test data if(self.io_dict["out"]["output_test_table_path"]): fu.log('Saving testing data to %s' % self.io_dict["out"]["output_test_table_path"], self.out_log, self.global_log) test_table.to_csv(self.io_dict["out"]["output_test_table_path"], index = False, header=True) # create test plot if(self.io_dict["out"]["output_plot_path"]): fu.log('Saving plot to %s' % self.io_dict["out"]["output_plot_path"], self.out_log, self.global_log) test_predictions = test_predictions.flatten() train_predictions = model.predict(X_train).flatten() plot = plotResultsReg(mf.history, y_test, test_predictions, y_train, train_predictions) plot.savefig(self.io_dict["out"]["output_plot_path"], dpi=150) # save model and parameters vars_obj = { 'target': self.target, 'window_size': self.window_size, 'type': 'recurrent' } variables = json.dumps(vars_obj) fu.log('Saving model to %s' % self.io_dict["out"]["output_model_path"], self.out_log, self.global_log) with h5py.File(self.io_dict["out"]["output_model_path"], mode='w') as f: hdf5_format.save_model_to_hdf5(model, f) f.attrs['variables'] = variables return 0
def save_model(model, filepath, overwrite=True, include_optimizer=True, save_format=None): """Saves a model as a TensorFlow SavedModel or HDF5 file. The saved model contains: - the model's configuration (topology) - the model's weights - the model's optimizer's state (if any) Thus the saved model can be reinstantiated in the exact same state, without any of the code used for model definition or training. _SavedModel serialization_ (not yet added) The SavedModel serialization path uses `tf.saved_model.save` to save the model and all trackable objects attached to the model (e.g. layers and variables). `@tf.function`-decorated methods are also saved. Additional trackable objects and functions are added to the SavedModel to allow the model to be loaded back as a Keras Model object. Arguments: model: Keras model instance to be saved. filepath: One of the following: - String, path where to save the model - `h5py.File` object where to save the model overwrite: Whether we should overwrite any existing model at the target location, or instead ask the user with a manual prompt. include_optimizer: If True, save optimizer's state together. save_format: Either 'tf' or 'h5', indicating whether to save the model to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X, and 'h5' in TF 1.X. Raises: ImportError: If save format is hdf5, and h5py is not available. """ from tensorflow.python.keras.engine import sequential # pylint: disable=g-import-not-at-top default_format = 'tf' if tf2.enabled() else 'h5' save_format = save_format or default_format if (save_format == 'h5' or (h5py is not None and isinstance(filepath, h5py.File)) or os.path.splitext(filepath)[1] in _HDF5_EXTENSIONS): # TODO(b/130258301): add utility method for detecting model type. if (not model._is_graph_network and # pylint:disable=protected-access not isinstance(model, sequential.Sequential)): raise NotImplementedError( 'Saving the model to HDF5 format requires the model to be a ' 'Functional model or a Sequential model. It does not work for ' 'subclassed models, because such models are defined via the body of ' 'a Python method, which isn\'t safely serializable. Consider saving ' 'to the Tensorflow SavedModel format (by setting save_format="tf") ' 'or using `save_weights`.') hdf5_format.save_model_to_hdf5(model, filepath, overwrite, include_optimizer) else: saved_model.save(model, filepath, overwrite, include_optimizer)
def launch(self) -> int: """Execute the :class:`RegressionNeuralNetwork <neural_networks.regression_neural_network.RegressionNeuralNetwork>` neural_networks.regression_neural_network.RegressionNeuralNetwork object.""" # check input/output paths and parameters self.check_data_params(self.out_log, self.err_log) # Setup Biobb if self.check_restart(): return 0 self.stage_files() # load dataset fu.log( 'Getting dataset from %s' % self.io_dict["in"]["input_dataset_path"], self.out_log, self.global_log) if 'columns' in self.features: labels = getHeader(self.io_dict["in"]["input_dataset_path"]) skiprows = 1 else: labels = None skiprows = None data = pd.read_csv(self.io_dict["in"]["input_dataset_path"], header=None, sep="\s+|;|:|,|\t", engine="python", skiprows=skiprows, names=labels) X = getFeatures(self.features, data, self.out_log, self.__class__.__name__) fu.log('Features: [%s]' % (getIndependentVarsList(self.features)), self.out_log, self.global_log) # target y = getTarget(self.target, data, self.out_log, self.__class__.__name__) fu.log('Target: %s' % (str(getTargetValue(self.target))), self.out_log, self.global_log) # weights if self.weight: w = getWeight(self.weight, data, self.out_log, self.__class__.__name__) # shuffle dataset fu.log('Shuffling dataset', self.out_log, self.global_log) shuffled_indices = np.arange(X.shape[0]) np.random.shuffle(shuffled_indices) np_X = X.to_numpy() shuffled_X = np_X[shuffled_indices] shuffled_y = y[shuffled_indices] if self.weight: shuffled_w = w[shuffled_indices] # train / test split fu.log('Creating train and test sets', self.out_log, self.global_log) arrays_sets = (shuffled_X, shuffled_y) # if user provide weights if self.weight: arrays_sets = arrays_sets + (shuffled_w, ) X_train, X_test, y_train, y_test, w_train, w_test = train_test_split( *arrays_sets, test_size=self.test_size, random_state=self.random_state) else: X_train, X_test, y_train, y_test = train_test_split( *arrays_sets, test_size=self.test_size, random_state=self.random_state) # scale dataset if self.scale: fu.log('Scaling dataset', self.out_log, self.global_log) X_train = scale(X_train) # build model fu.log('Building model', self.out_log, self.global_log) model = self.build_model((X_train.shape[1], )) # model summary stringlist = [] model.summary(print_fn=lambda x: stringlist.append(x)) model_summary = "\n".join(stringlist) fu.log('Model summary:\n\n%s\n' % model_summary, self.out_log, self.global_log) # get optimizer mod = __import__('tensorflow.keras.optimizers', fromlist=[self.optimizer]) opt_class = getattr(mod, self.optimizer) opt = opt_class(lr=self.learning_rate) # compile model model.compile(optimizer=opt, loss='mse', metrics=['mae', 'mse'], sample_weight_mode='samplewise') # fitting fu.log('Training model', self.out_log, self.global_log) # set an early stopping mechanism # set patience=2, to be a bit tolerant against random validation loss increases early_stopping = EarlyStopping(patience=2) if self.weight: sample_weight = w_train class_weight = [] else: # TODO: class_weight not working since TF 2.4.1 update #fu.log('No weight provided, class_weight will be estimated from the target data', self.out_log, self.global_log) sample_weight = None class_weight = [ ] #compute_class_weight('balanced', np.unique(y_train), y_train) # fit the model mf = model.fit(X_train, y_train, class_weight=class_weight, sample_weight=sample_weight, batch_size=self.batch_size, epochs=self.max_epochs, callbacks=[early_stopping], validation_split=self.validation_size, verbose=1) fu.log('Total epochs performed: %s' % len(mf.history['loss']), self.out_log, self.global_log) # predict data from X_train train_predictions = model.predict(X_train) train_predictions = np.around(train_predictions, decimals=2) score_train_inputs = (y_train, train_predictions) if self.weight: score_train_inputs = score_train_inputs + (w_train, ) train_score = r2_score(*score_train_inputs) train_metrics = pd.DataFrame() train_metrics['metric'] = [ 'Train loss', 'Train MAE', 'Train MSE', 'Train R2', 'Validation loss', 'Validation MAE', 'Validation MSE' ] train_metrics['coefficient'] = [ mf.history['loss'][-1], mf.history['mae'][-1], mf.history['mse'][-1], train_score, mf.history['val_loss'][-1], mf.history['val_mae'][-1], mf.history['val_mse'][-1] ] fu.log( 'Training metrics\n\nTRAINING METRICS TABLE\n\n%s\n' % train_metrics, self.out_log, self.global_log) # testing if self.scale: X_test = scale(X_test) fu.log('Testing model', self.out_log, self.global_log) test_loss, test_mae, test_mse = model.evaluate(X_test, y_test) # predict data from X_test test_predictions = model.predict(X_test) test_predictions = np.around(test_predictions, decimals=2) tpr = np.squeeze(np.asarray(test_predictions)) score_test_inputs = (y_test, test_predictions) if self.weight: score_test_inputs = score_test_inputs + (w_test, ) score = r2_score(*score_test_inputs) test_metrics = pd.DataFrame() test_metrics['metric'] = [ 'Test loss', 'Test MAE', 'Test MSE', 'Test R2' ] test_metrics['coefficient'] = [test_loss, test_mae, test_mse, score] fu.log( 'Testing metrics\n\nTESTING METRICS TABLE\n\n%s\n' % test_metrics, self.out_log, self.global_log) test_table = pd.DataFrame() test_table['prediction'] = tpr test_table['target'] = y_test test_table[ 'residual'] = test_table['target'] - test_table['prediction'] test_table['difference %'] = np.absolute(test_table['residual'] / test_table['target'] * 100) pd.set_option('display.float_format', lambda x: '%.2f' % x) # sort by difference in % test_table = test_table.sort_values(by=['difference %']) test_table = test_table.reset_index(drop=True) fu.log('TEST DATA\n\n%s\n' % test_table, self.out_log, self.global_log) # save test data if (self.io_dict["out"]["output_test_table_path"]): fu.log( 'Saving testing data to %s' % self.io_dict["out"]["output_test_table_path"], self.out_log, self.global_log) test_table.to_csv(self.io_dict["out"]["output_test_table_path"], index=False, header=True) # create test plot if (self.io_dict["out"]["output_plot_path"]): fu.log( 'Saving plot to %s' % self.io_dict["out"]["output_plot_path"], self.out_log, self.global_log) test_predictions = test_predictions.flatten() train_predictions = model.predict(X_train).flatten() plot = plotResultsReg(mf.history, y_test, test_predictions, y_train, train_predictions) plot.savefig(self.io_dict["out"]["output_plot_path"], dpi=150) # save model and parameters vars_obj = { 'features': self.features, 'target': self.target, 'scale': self.scale, 'type': 'regression' } variables = json.dumps(vars_obj) fu.log('Saving model to %s' % self.io_dict["out"]["output_model_path"], self.out_log, self.global_log) with h5py.File(self.io_dict["out"]["output_model_path"], mode='w') as f: hdf5_format.save_model_to_hdf5(model, f) f.attrs['variables'] = variables return 0