Esempio n. 1
0
def save_model(model,
               filepath,
               overwrite=True,
               include_optimizer=True,
               save_format=None):
  """Saves a model as a TensorFlow SavedModel or HDF5 file.

  The saved model contains:
      - the model's configuration (topology)
      - the model's weights
      - the model's optimizer's state (if any)

  Thus the saved model can be reinstantiated in
  the exact same state, without any of the code
  used for model definition or training.

  Arguments:
      model: Keras model instance to be saved.
      filepath: One of the following:
        - String, path where to save the model
        - `h5py.File` object where to save the model
      overwrite: Whether we should overwrite any existing model at the target
        location, or instead ask the user with a manual prompt.
      include_optimizer: If True, save optimizer's state together.
      save_format: Either 'tf' or 'h5', indicating whether to save the model
        to Tensorflow SavedModel or HDF5. The 'tf' option is currently disabled,
        and will be enabled when Keras SavedModel export is no longer
        experimental. (The experimental function is
        tf.keras.experimental.export_saved_model).

  Raises:
      ImportError: If save format is hdf5, and h5py is not available.
  """
  from tensorflow.python.keras.engine import sequential  # pylint: disable=g-import-not-at-top

  if (not tf2.enabled() and
      not ops.executing_eagerly_outside_functions()
      and save_format == 'tf'):
    raise NotImplementedError(
        'Saving the model as SavedModel is not supported in TensorFlow 1.X'
        'graph mode. Please enable eager execution or use the "h5" save format.'
        )

  if _KERAS_SAVED_MODEL_STILL_EXPERIMENTAL and save_format == 'tf':
    raise NotImplementedError(
        'Saving the model as SavedModel is still in experimental stages. '
        'Please use tf.keras.experimental.export_saved_model, or use '
        'save_format="h5" to save to HDF5.')

  # TODO(kathywu): Remove this when Keras SavedModel is not experimental.
  save_format = 'h5'

  if (save_format == 'h5' or
      (h5py is not None and isinstance(filepath, h5py.File)) or
      os.path.splitext(filepath)[1] in _HDF5_EXTENSIONS):
    # TODO(b/130258301): add utility method for detecting model type.
    if (not model._is_graph_network and  # pylint:disable=protected-access
        not isinstance(model, sequential.Sequential)):
      raise NotImplementedError(
          'Saving the model to HDF5 format requires the model to be a '
          'Functional model or a Sequential model. It does not work for '
          'subclassed models, because such models are defined via the body of '
          'a Python method, which isn\'t safely serializable. Consider saving '
          'to the Tensorflow SavedModel format (by setting save_format="tf") '
          'or using `save_weights`.')
    hdf5_format.save_model_to_hdf5(
        model, filepath, overwrite, include_optimizer)
    return
    def launch(self) -> int:
        """Execute the :class:`AutoencoderNeuralNetwork <neural_networks.autoencoder_neural_network.AutoencoderNeuralNetwork>` neural_networks.autoencoder_neural_network.AutoencoderNeuralNetwork object."""

        # check input/output paths and parameters
        self.check_data_params(self.out_log, self.err_log)

        # Setup Biobb
        if self.check_restart(): return 0
        self.stage_files()

        # load decode dataset
        fu.log(
            'Getting decode dataset from %s' %
            self.io_dict["in"]["input_decode_path"], self.out_log,
            self.global_log)
        data_dec = pd.read_csv(self.io_dict["in"]["input_decode_path"])
        seq_in = np.array(data_dec)

        # reshape input into [samples, timesteps, features]
        n_in = len(seq_in)
        seq_in = seq_in.reshape((1, n_in, 1))

        # load predict dataset
        n_out = None
        if (self.io_dict["in"]["input_predict_path"]):
            fu.log(
                'Getting predict dataset from %s' %
                self.io_dict["in"]["input_predict_path"], self.out_log,
                self.global_log)
            data_pred = pd.read_csv(self.io_dict["in"]["input_predict_path"])
            seq_out = np.array(data_pred)

            # reshape output into [samples, timesteps, features]
            n_out = len(seq_out)
            seq_out = seq_out.reshape((1, n_out, 1))

        # build model
        fu.log('Building model', self.out_log, self.global_log)
        model = self.build_model(n_in, n_out)

        # model summary
        stringlist = []
        model.summary(print_fn=lambda x: stringlist.append(x))
        model_summary = "\n".join(stringlist)
        fu.log('Model summary:\n\n%s\n' % model_summary, self.out_log,
               self.global_log)

        # get optimizer
        mod = __import__('tensorflow.keras.optimizers',
                         fromlist=[self.optimizer])
        opt_class = getattr(mod, self.optimizer)
        opt = opt_class(lr=self.learning_rate)
        # compile model
        model.compile(optimizer=opt, loss='mse', metrics=['mse', 'mae'])

        # fitting
        fu.log('Training model', self.out_log, self.global_log)
        y_list = [seq_in]
        if n_out:
            y_list.append(seq_out)
        # fit the model
        mf = model.fit(seq_in,
                       y_list,
                       batch_size=self.batch_size,
                       epochs=self.max_epochs,
                       verbose=1)

        train_metrics = pd.DataFrame()
        metric = []
        coefficient = []
        for key, lst in mf.history.items():
            metric.append(' '.join(x.capitalize() or '_'
                                   for x in key.split('_')))
            coefficient.append(lst[-1])

        train_metrics['metric'] = metric
        train_metrics['coefficient'] = coefficient

        fu.log('Calculating metrics\n\nMETRICS TABLE\n\n%s\n' % train_metrics,
               self.out_log, self.global_log)

        # predicting
        fu.log('Predicting model', self.out_log, self.global_log)
        yhat = model.predict(seq_in, verbose=1)

        decoding_table = pd.DataFrame()
        if (self.io_dict["in"]["input_predict_path"]):
            decoding_table['reconstructed'] = np.squeeze(np.asarray(
                yhat[0][0]))
            decoding_table['original'] = data_dec
        else:
            decoding_table['reconstructed'] = np.squeeze(np.asarray(yhat[0]))
            decoding_table['original'] = np.squeeze(np.asarray(data_dec))
        decoding_table['residual'] = decoding_table[
            'original'] - decoding_table['reconstructed']
        decoding_table['difference %'] = np.absolute(
            decoding_table['residual'] / decoding_table['original'] * 100)
        pd.set_option('display.float_format', lambda x: '%.5f' % x)
        # sort by difference in %
        decoding_table = decoding_table.sort_values(by=['difference %'])
        decoding_table = decoding_table.reset_index(drop=True)
        fu.log('RECONSTRUCTION TABLE\n\n%s\n' % decoding_table, self.out_log,
               self.global_log)

        # save reconstruction data
        if (self.io_dict["out"]["output_test_decode_path"]):
            fu.log(
                'Saving reconstruction data to %s' %
                self.io_dict["out"]["output_test_decode_path"], self.out_log,
                self.global_log)
            decoding_table.to_csv(
                self.io_dict["out"]["output_test_decode_path"],
                index=False,
                header=True)

        if (self.io_dict["in"]["input_predict_path"]):
            prediction_table = pd.DataFrame()
            prediction_table['predicted'] = np.squeeze(np.asarray(yhat[1][0]))
            prediction_table['original'] = data_pred
            prediction_table['residual'] = prediction_table[
                'original'] - prediction_table['predicted']
            prediction_table['difference %'] = np.absolute(
                prediction_table['residual'] / prediction_table['original'] *
                100)
            pd.set_option('display.float_format', lambda x: '%.5f' % x)
            # sort by difference in %
            prediction_table = prediction_table.sort_values(
                by=['difference %'])
            prediction_table = prediction_table.reset_index(drop=True)
            fu.log('PREDICTION TABLE\n\n%s\n' % prediction_table, self.out_log,
                   self.global_log)

            # save decoding data
            if (self.io_dict["out"]["output_test_predict_path"]):
                fu.log(
                    'Saving prediction data to %s' %
                    self.io_dict["out"]["output_test_predict_path"],
                    self.out_log, self.global_log)
                prediction_table.to_csv(
                    self.io_dict["out"]["output_test_predict_path"],
                    index=False,
                    header=True)

        # save model and parameters
        vars_obj = {'type': 'autoencoder'}
        variables = json.dumps(vars_obj)
        fu.log('Saving model to %s' % self.io_dict["out"]["output_model_path"],
               self.out_log, self.global_log)
        with h5py.File(self.io_dict["out"]["output_model_path"],
                       mode='w') as f:
            hdf5_format.save_model_to_hdf5(model, f)
            f.attrs['variables'] = variables

        return 0
Esempio n. 3
0
def save_model(model,
               filepath,
               overwrite=True,
               include_optimizer=True,
               save_format=None,
               signatures=None,
               options=None):
    """Saves a model as a TensorFlow SavedModel or HDF5 file.

  The saved model contains:

      - the model's configuration (topology)
      - the model's weights
      - the model's optimizer's state (if any)

  Thus the saved model can be reinstantiated in
  the exact same state, without any of the code
  used for model definition or training.

  Note that the model weights may have different scoped names after being
  loaded. Scoped names include the model/layer names, such as
  "dense_1/kernel:0"`. It is recommended that you use the layer properties to
  access specific variables, e.g. `model.get_layer("dense_1").kernel`.

  _SavedModel serialization_

  The SavedModel serialization path uses `tf.saved_model.save` to save the model
  and all trackable objects attached to the model (e.g. layers and variables).
  `@tf.function`-decorated methods are also saved. Additional trackable objects
  and functions are added to the SavedModel to allow the model to be
  loaded back as a Keras Model object.

  Arguments:
      model: Keras model instance to be saved.
      filepath: One of the following:
        - String or `pathlib.Path` object, path where to save the model
        - `h5py.File` object where to save the model
      overwrite: Whether we should overwrite any existing model at the target
        location, or instead ask the user with a manual prompt.
      include_optimizer: If True, save optimizer's state together.
      save_format: Either 'tf' or 'h5', indicating whether to save the model
        to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X, and 'h5'
        in TF 1.X.
      signatures: Signatures to save with the SavedModel. Applicable to the 'tf'
        format only. Please see the `signatures` argument in
        `tf.saved_model.save` for details.
      options: Optional `tf.saved_model.SaveOptions` object that specifies
        options for saving to SavedModel.

  Raises:
      ImportError: If save format is hdf5, and h5py is not available.
  """
    from tensorflow.python.keras.engine import sequential  # pylint: disable=g-import-not-at-top

    default_format = 'tf' if tf2.enabled() else 'h5'
    save_format = save_format or default_format

    if sys.version_info >= (3, 4) and isinstance(filepath, pathlib.Path):
        filepath = str(filepath)

    if (save_format == 'h5'
            or (h5py is not None and isinstance(filepath, h5py.File))
            or os.path.splitext(filepath)[1] in _HDF5_EXTENSIONS):
        # TODO(b/130258301): add utility method for detecting model type.
        if (not model._is_graph_network and  # pylint:disable=protected-access
                not isinstance(model, sequential.Sequential)):
            raise NotImplementedError(
                'Saving the model to HDF5 format requires the model to be a '
                'Functional model or a Sequential model. It does not work for '
                'subclassed models, because such models are defined via the body of '
                'a Python method, which isn\'t safely serializable. Consider saving '
                'to the Tensorflow SavedModel format (by setting save_format="tf") '
                'or using `save_weights`.')
        hdf5_format.save_model_to_hdf5(model, filepath, overwrite,
                                       include_optimizer)
    else:
        saved_model_save.save(model, filepath, overwrite, include_optimizer,
                              signatures, options)
Esempio n. 4
0
def save_model(model,
               filepath,
               overwrite=True,
               include_optimizer=True,
               save_format=None,
               signatures=None,
               options=None,
               save_traces=True):
    # pylint: disable=line-too-long
    """Saves a model as a TensorFlow SavedModel or HDF5 file.

  See the [Serialization and Saving guide](https://keras.io/guides/serialization_and_saving/)
  for details.

  Usage:

  >>> model = tf.keras.Sequential([
  ...     tf.keras.layers.Dense(5, input_shape=(3,)),
  ...     tf.keras.layers.Softmax()])
  >>> model.save('/tmp/model')
  >>> loaded_model = tf.keras.models.load_model('/tmp/model')
  >>> x = tf.random.uniform((10, 3))
  >>> assert np.allclose(model.predict(x), loaded_model.predict(x))

  The SavedModel and HDF5 file contains:

  - the model's configuration (topology)
  - the model's weights
  - the model's optimizer's state (if any)

  Thus models can be reinstantiated in the exact same state, without any of the
  code used for model definition or training.

  Note that the model weights may have different scoped names after being
  loaded. Scoped names include the model/layer names, such as
  `"dense_1/kernel:0"`. It is recommended that you use the layer properties to
  access specific variables, e.g. `model.get_layer("dense_1").kernel`.

  __SavedModel serialization format__

  Keras SavedModel uses `tf.saved_model.save` to save the model and all
  trackable objects attached to the model (e.g. layers and variables). The model
  config, weights, and optimizer are saved in the SavedModel. Additionally, for
  every Keras layer attached to the model, the SavedModel stores:

    * the config and metadata -- e.g. name, dtype, trainable status
    * traced call and loss functions, which are stored as TensorFlow subgraphs.

  The traced functions allow the SavedModel format to save and load custom
  layers without the original class definition.

  You can choose to not save the traced functions by disabling the `save_traces`
  option. This will decrease the time it takes to save the model and the
  amount of disk space occupied by the output SavedModel. If you enable this
  option, then you _must_ provide all custom class definitions when loading
  the model. See the `custom_objects` argument in `tf.keras.models.load_model`.

  Args:
      model: Keras model instance to be saved.
      filepath: One of the following:
        - String or `pathlib.Path` object, path where to save the model
        - `h5py.File` object where to save the model
      overwrite: Whether we should overwrite any existing model at the target
        location, or instead ask the user with a manual prompt.
      include_optimizer: If True, save optimizer's state together.
      save_format: Either 'tf' or 'h5', indicating whether to save the model
        to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X, and 'h5'
        in TF 1.X.
      signatures: Signatures to save with the SavedModel. Applicable to the 'tf'
        format only. Please see the `signatures` argument in
        `tf.saved_model.save` for details.
      options: (only applies to SavedModel format) `tf.saved_model.SaveOptions`
        object that specifies options for saving to SavedModel.
      save_traces: (only applies to SavedModel format) When enabled, the
        SavedModel will store the function traces for each layer. This
        can be disabled, so that only the configs of each layer are stored.
        Defaults to `True`. Disabling this will decrease serialization time and
        reduce file size, but it requires that all custom layers/models
        implement a `get_config()` method.

  Raises:
      ImportError: If save format is hdf5, and h5py is not available.
  """
    # pylint: enable=line-too-long
    from tensorflow.python.keras.engine import sequential  # pylint: disable=g-import-not-at-top

    default_format = 'tf' if tf2.enabled() else 'h5'
    save_format = save_format or default_format

    filepath = path_to_string(filepath)

    if (save_format == 'h5'
            or (h5py is not None and isinstance(filepath, h5py.File))
            or saving_utils.is_hdf5_filepath(filepath)):
        # TODO(b/130258301): add utility method for detecting model type.
        if (not model._is_graph_network and  # pylint:disable=protected-access
                not isinstance(model, sequential.Sequential)):
            raise NotImplementedError(
                'Saving the model to HDF5 format requires the model to be a '
                'Functional model or a Sequential model. It does not work for '
                'subclassed models, because such models are defined via the body of '
                'a Python method, which isn\'t safely serializable. Consider saving '
                'to the Tensorflow SavedModel format (by setting save_format="tf") '
                'or using `save_weights`.')
        hdf5_format.save_model_to_hdf5(model, filepath, overwrite,
                                       include_optimizer)
    else:
        saved_model_save.save(model, filepath, overwrite, include_optimizer,
                              signatures, options, save_traces)
    def launch(self) -> int:
        """Execute the :class:`ClassificationNeuralNetwork <neural_networks.classification_neural_network.ClassificationNeuralNetwork>` neural_networks.classification_neural_network.ClassificationNeuralNetwork object."""

        # check input/output paths and parameters
        self.check_data_params(self.out_log, self.err_log)

        # Setup Biobb
        if self.check_restart(): return 0
        self.stage_files()

        # load dataset
        fu.log(
            'Getting dataset from %s' %
            self.io_dict["in"]["input_dataset_path"], self.out_log,
            self.global_log)
        if 'columns' in self.features:
            labels = getHeader(self.io_dict["in"]["input_dataset_path"])
            skiprows = 1
        else:
            labels = None
            skiprows = None
        data = pd.read_csv(self.io_dict["in"]["input_dataset_path"],
                           header=None,
                           sep="\s+|;|:|,|\t",
                           engine="python",
                           skiprows=skiprows,
                           names=labels)

        targets_list = data[getTargetValue(self.target)].to_numpy()

        X = getFeatures(self.features, data, self.out_log,
                        self.__class__.__name__)
        fu.log('Features: [%s]' % (getIndependentVarsList(self.features)),
               self.out_log, self.global_log)
        # target
        y = getTarget(self.target, data, self.out_log, self.__class__.__name__)
        fu.log('Target: %s' % (str(getTargetValue(self.target))), self.out_log,
               self.global_log)
        # weights
        if self.weight:
            w = getWeight(self.weight, data, self.out_log,
                          self.__class__.__name__)

        # shuffle dataset
        fu.log('Shuffling dataset', self.out_log, self.global_log)
        shuffled_indices = np.arange(X.shape[0])
        np.random.shuffle(shuffled_indices)
        np_X = X.to_numpy()
        shuffled_X = np_X[shuffled_indices]
        shuffled_y = targets_list[shuffled_indices]
        if self.weight: shuffled_w = w[shuffled_indices]

        # train / test split
        fu.log('Creating train and test sets', self.out_log, self.global_log)
        arrays_sets = (shuffled_X, shuffled_y)
        # if user provide weights
        if self.weight:
            arrays_sets = arrays_sets + (shuffled_w, )
            X_train, X_test, y_train, y_test, w_train, w_test = train_test_split(
                *arrays_sets,
                test_size=self.test_size,
                random_state=self.random_state)
        else:
            X_train, X_test, y_train, y_test = train_test_split(
                *arrays_sets,
                test_size=self.test_size,
                random_state=self.random_state)

        # scale dataset
        if self.scale:
            fu.log('Scaling dataset', self.out_log, self.global_log)
            X_train = scale(X_train)

        # build model
        fu.log('Building model', self.out_log, self.global_log)
        model = self.build_model((X_train.shape[1], ), np.unique(y_train).size)

        # model summary
        stringlist = []
        model.summary(print_fn=lambda x: stringlist.append(x))
        model_summary = "\n".join(stringlist)
        fu.log('Model summary:\n\n%s\n' % model_summary, self.out_log,
               self.global_log)

        # get optimizer
        mod = __import__('tensorflow.keras.optimizers',
                         fromlist=[self.optimizer])
        opt_class = getattr(mod, self.optimizer)
        opt = opt_class(lr=self.learning_rate)
        # compile model
        model.compile(optimizer=opt,
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy', 'mse'])

        # fitting
        fu.log('Training model', self.out_log, self.global_log)
        # set an early stopping mechanism
        # set patience=2, to be a bit tolerant against random validation loss increases
        early_stopping = EarlyStopping(patience=2)

        if self.weight:
            sample_weight = w_train
            class_weight = []
        else:
            # TODO: class_weight not working since TF 2.4.1 update
            #fu.log('No weight provided, class_weight will be estimated from the target data', self.out_log, self.global_log)
            fu.log('No weight provided', self.out_log, self.global_log)
            sample_weight = None
            class_weight = [
            ]  #compute_class_weight('balanced', np.unique(y_train), y_train)

        print(class_weight)
        # fit the model
        mf = model.fit(X_train,
                       y_train,
                       class_weight=class_weight,
                       sample_weight=sample_weight,
                       batch_size=self.batch_size,
                       epochs=self.max_epochs,
                       callbacks=[early_stopping],
                       validation_split=self.validation_size,
                       verbose=1)

        fu.log('Total epochs performed: %s' % len(mf.history['loss']),
               self.out_log, self.global_log)

        train_metrics = pd.DataFrame()
        train_metrics['metric'] = [
            'Train loss', ' Train accuracy', 'Train MSE', 'Validation loss',
            'Validation accuracy', 'Validation MSE'
        ]
        train_metrics['coefficient'] = [
            mf.history['loss'][-1], mf.history['accuracy'][-1],
            mf.history['mse'][-1], mf.history['val_loss'][-1],
            mf.history['val_accuracy'][-1], mf.history['val_mse'][-1]
        ]

        fu.log(
            'Training metrics\n\nTRAINING METRICS TABLE\n\n%s\n' %
            train_metrics, self.out_log, self.global_log)

        # confusion matrix
        train_predictions = model.predict(X_train)
        train_predictions = np.around(train_predictions, decimals=2)
        norm_pred = []
        [
            norm_pred.append(np.argmax(pred, axis=0))
            for pred in train_predictions
        ]
        cnf_matrix_train = math.confusion_matrix(y_train, norm_pred).numpy()
        np.set_printoptions(precision=2)
        if self.normalize_cm:
            cnf_matrix_train = cnf_matrix_train.astype(
                'float') / cnf_matrix_train.sum(axis=1)[:, np.newaxis]
            cm_type = 'NORMALIZED CONFUSION MATRIX'
        else:
            cm_type = 'CONFUSION MATRIX, WITHOUT NORMALIZATION'

        fu.log(
            'Calculating confusion matrix for training dataset\n\n%s\n\n%s\n' %
            (cm_type, cnf_matrix_train), self.out_log, self.global_log)

        # testing
        if self.scale:
            X_test = scale(X_test)
        fu.log('Testing model', self.out_log, self.global_log)
        test_loss, test_accuracy, test_mse = model.evaluate(X_test, y_test)

        test_metrics = pd.DataFrame()
        test_metrics['metric'] = ['Test loss', ' Test accuracy', 'Test MSE']
        test_metrics['coefficient'] = [test_loss, test_accuracy, test_mse]

        fu.log(
            'Testing metrics\n\nTESTING METRICS TABLE\n\n%s\n' % test_metrics,
            self.out_log, self.global_log)

        # predict data from X_test
        test_predictions = model.predict(X_test)
        test_predictions = np.around(test_predictions, decimals=2)
        tpr = tuple(map(tuple, test_predictions))

        test_table = pd.DataFrame()
        test_table['P' + np.array2string(np.unique(y_train))] = tpr
        test_table['target'] = y_test

        fu.log('TEST DATA\n\n%s\n' % test_table, self.out_log, self.global_log)

        # confusion matrix
        norm_pred = []
        [
            norm_pred.append(np.argmax(pred, axis=0))
            for pred in test_predictions
        ]
        cnf_matrix_test = math.confusion_matrix(y_test, norm_pred).numpy()
        np.set_printoptions(precision=2)
        if self.normalize_cm:
            cnf_matrix_test = cnf_matrix_test.astype(
                'float') / cnf_matrix_test.sum(axis=1)[:, np.newaxis]
            cm_type = 'NORMALIZED CONFUSION MATRIX'
        else:
            cm_type = 'CONFUSION MATRIX, WITHOUT NORMALIZATION'

        fu.log(
            'Calculating confusion matrix for testing dataset\n\n%s\n\n%s\n' %
            (cm_type, cnf_matrix_test), self.out_log, self.global_log)

        # save test data
        if (self.io_dict["out"]["output_test_table_path"]):
            fu.log(
                'Saving testing data to %s' %
                self.io_dict["out"]["output_test_table_path"], self.out_log,
                self.global_log)
            test_table.to_csv(self.io_dict["out"]["output_test_table_path"],
                              index=False,
                              header=True)

        # create test plot
        if (self.io_dict["out"]["output_plot_path"]):
            vs = np.unique(targets_list)
            vs.sort()
            if len(vs) > 2:
                plot = plotResultsClassMultCM(mf.history, cnf_matrix_train,
                                              cnf_matrix_test,
                                              self.normalize_cm, vs)
                fu.log(
                    'Saving confusion matrix plot to %s' %
                    self.io_dict["out"]["output_plot_path"], self.out_log,
                    self.global_log)
            else:
                plot = plotResultsClassBinCM(mf.history, train_predictions,
                                             test_predictions, y_train, y_test,
                                             cnf_matrix_train, cnf_matrix_test,
                                             self.normalize_cm, vs)
                fu.log(
                    'Saving binary classifier evaluator plot to %s' %
                    self.io_dict["out"]["output_plot_path"], self.out_log,
                    self.global_log)
            plot.savefig(self.io_dict["out"]["output_plot_path"], dpi=150)

        # save model and parameters
        vs = np.unique(targets_list)
        vs.sort()
        vars_obj = {
            'features': self.features,
            'target': self.target,
            'scale': self.scale,
            'vs': vs.tolist(),
            'type': 'classification'
        }
        variables = json.dumps(vars_obj)
        fu.log('Saving model to %s' % self.io_dict["out"]["output_model_path"],
               self.out_log, self.global_log)
        with h5py.File(self.io_dict["out"]["output_model_path"],
                       mode='w') as f:
            hdf5_format.save_model_to_hdf5(model, f)
            f.attrs['variables'] = variables

        return 0
Esempio n. 6
0
    def launch(self) -> int:
        """Execute the :class:`RecurrentNeuralNetwork <neural_networks.recurrent_neural_network.RecurrentNeuralNetwork>` neural_networks.recurrent_neural_network.RecurrentNeuralNetwork object."""

        # check input/output paths and parameters
        self.check_data_params(self.out_log, self.err_log)

        # Setup Biobb
        if self.check_restart(): return 0
        self.stage_files()

        # load dataset
        fu.log('Getting dataset from %s' % self.io_dict["in"]["input_dataset_path"], self.out_log, self.global_log)
        if 'column' in self.target:
            labels = getHeader(self.io_dict["in"]["input_dataset_path"])
            skiprows = 1
        else:
            labels = None
            skiprows = None
        data = pd.read_csv(self.io_dict["in"]["input_dataset_path"], header = None, sep="\s+|;|:|,|\t", engine="python", skiprows=skiprows, names=labels)

        # get target column
        target = data[getTargetValue(self.target)].to_numpy()

        # split into samples
        X, y = split_sequence(target, self.window_size)
        # reshape into [samples, timesteps, features]
        X = X.reshape((X.shape[0], X.shape[1], 1))

        # train / test split
        fu.log('Creating train and test sets', self.out_log, self.global_log)
        X_train, X_test, y_train, y_test = X[:-self.test_size], X[-self.test_size:], y[:-self.test_size], y[-self.test_size:]

        # build model
        fu.log('Building model', self.out_log, self.global_log)
        model = self.build_model((X_train.shape[1],1))

        # model summary
        stringlist = []
        model.summary(print_fn=lambda x: stringlist.append(x))
        model_summary = "\n".join(stringlist)
        fu.log('Model summary:\n\n%s\n' % model_summary, self.out_log, self.global_log)

        # get optimizer
        mod = __import__('tensorflow.keras.optimizers', fromlist = [self.optimizer])
        opt_class = getattr(mod, self.optimizer)
        opt = opt_class(lr = self.learning_rate)
        # compile model
        model.compile(optimizer = opt, loss = 'mse', metrics = ['mse', 'mae'])

        # fitting
        fu.log('Training model', self.out_log, self.global_log)
        # set an early stopping mechanism
        # set patience=2, to be a bit tolerant against random validation loss increases
        early_stopping = EarlyStopping(patience=2)
        # fit the model
        mf = model.fit(X_train, 
                       y_train, 
                       batch_size=self.batch_size, 
                       epochs=self.max_epochs, 
                       callbacks=[early_stopping],
                       validation_split=self.validation_size,
                       verbose = 1)

        train_metrics = pd.DataFrame()
        train_metrics['metric'] = ['Train loss',' Train MAE', 'Train MSE', 'Validation loss', 'Validation MAE', 'Validation MSE']
        train_metrics['coefficient'] = [mf.history['loss'][-1], mf.history['mae'][-1], mf.history['mse'][-1], mf.history['val_loss'][-1], mf.history['val_mae'][-1], mf.history['val_mse'][-1]]

        fu.log('Training metrics\n\nTRAINING METRICS TABLE\n\n%s\n' % train_metrics, self.out_log, self.global_log)

        # testing
        fu.log('Testing model', self.out_log, self.global_log)
        test_loss, test_mse, test_mae = model.evaluate(X_test, y_test)

        # predict data from X_test
        test_predictions = model.predict(X_test)
        test_predictions = np.around(test_predictions, decimals=2)        
        tpr = np.squeeze(np.asarray(test_predictions))

        test_metrics = pd.DataFrame()
        test_metrics['metric'] = ['Test loss', 'Test MAE', 'Test MSE']
        test_metrics['coefficient'] = [test_loss, test_mae, test_mse]

        fu.log('Testing metrics\n\nTESTING METRICS TABLE\n\n%s\n' % test_metrics, self.out_log, self.global_log)

        test_table = pd.DataFrame()
        test_table['prediction'] = tpr
        test_table['target'] = y_test
        test_table['residual'] = test_table['target'] - test_table['prediction']
        test_table['difference %'] = np.absolute(test_table['residual']/test_table['target']*100)
        pd.set_option('display.float_format', lambda x: '%.2f' % x)
        # sort by difference in %
        test_table = test_table.sort_values(by=['difference %'])
        test_table = test_table.reset_index(drop=True)
        fu.log('TEST DATA\n\n%s\n' % test_table, self.out_log, self.global_log)

        # save test data
        if(self.io_dict["out"]["output_test_table_path"]): 
            fu.log('Saving testing data to %s' % self.io_dict["out"]["output_test_table_path"], self.out_log, self.global_log)
            test_table.to_csv(self.io_dict["out"]["output_test_table_path"], index = False, header=True)

        # create test plot
        if(self.io_dict["out"]["output_plot_path"]): 
            fu.log('Saving plot to %s' % self.io_dict["out"]["output_plot_path"], self.out_log, self.global_log)
            test_predictions = test_predictions.flatten()
            train_predictions = model.predict(X_train).flatten()
            plot = plotResultsReg(mf.history, y_test, test_predictions, y_train, train_predictions)
            plot.savefig(self.io_dict["out"]["output_plot_path"], dpi=150)

        # save model and parameters
        vars_obj = {
            'target': self.target,
            'window_size': self.window_size,
            'type': 'recurrent'
        }
        variables = json.dumps(vars_obj)
        fu.log('Saving model to %s' % self.io_dict["out"]["output_model_path"], self.out_log, self.global_log)
        with h5py.File(self.io_dict["out"]["output_model_path"], mode='w') as f:
            hdf5_format.save_model_to_hdf5(model, f)
            f.attrs['variables'] = variables

        return 0
Esempio n. 7
0
def save_model(model,
               filepath,
               overwrite=True,
               include_optimizer=True,
               save_format=None):
    """Saves a model as a TensorFlow SavedModel or HDF5 file.

  The saved model contains:
      - the model's configuration (topology)
      - the model's weights
      - the model's optimizer's state (if any)

  Thus the saved model can be reinstantiated in
  the exact same state, without any of the code
  used for model definition or training.

  _SavedModel serialization_ (not yet added)

  The SavedModel serialization path uses `tf.saved_model.save` to save the model
  and all trackable objects attached to the model (e.g. layers and variables).
  `@tf.function`-decorated methods are also saved. Additional trackable objects
  and functions are added to the SavedModel to allow the model to be
  loaded back as a Keras Model object.

  Arguments:
      model: Keras model instance to be saved.
      filepath: One of the following:
        - String, path where to save the model
        - `h5py.File` object where to save the model
      overwrite: Whether we should overwrite any existing model at the target
        location, or instead ask the user with a manual prompt.
      include_optimizer: If True, save optimizer's state together.
      save_format: Either 'tf' or 'h5', indicating whether to save the model
        to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X, and 'h5'
        in TF 1.X.

  Raises:
      ImportError: If save format is hdf5, and h5py is not available.
  """
    from tensorflow.python.keras.engine import sequential  # pylint: disable=g-import-not-at-top

    default_format = 'tf' if tf2.enabled() else 'h5'
    save_format = save_format or default_format

    if (save_format == 'h5'
            or (h5py is not None and isinstance(filepath, h5py.File))
            or os.path.splitext(filepath)[1] in _HDF5_EXTENSIONS):
        # TODO(b/130258301): add utility method for detecting model type.
        if (not model._is_graph_network and  # pylint:disable=protected-access
                not isinstance(model, sequential.Sequential)):
            raise NotImplementedError(
                'Saving the model to HDF5 format requires the model to be a '
                'Functional model or a Sequential model. It does not work for '
                'subclassed models, because such models are defined via the body of '
                'a Python method, which isn\'t safely serializable. Consider saving '
                'to the Tensorflow SavedModel format (by setting save_format="tf") '
                'or using `save_weights`.')
        hdf5_format.save_model_to_hdf5(model, filepath, overwrite,
                                       include_optimizer)
    else:
        saved_model.save(model, filepath, overwrite, include_optimizer)
Esempio n. 8
0
    def launch(self) -> int:
        """Execute the :class:`RegressionNeuralNetwork <neural_networks.regression_neural_network.RegressionNeuralNetwork>` neural_networks.regression_neural_network.RegressionNeuralNetwork object."""

        # check input/output paths and parameters
        self.check_data_params(self.out_log, self.err_log)

        # Setup Biobb
        if self.check_restart(): return 0
        self.stage_files()

        # load dataset
        fu.log(
            'Getting dataset from %s' %
            self.io_dict["in"]["input_dataset_path"], self.out_log,
            self.global_log)
        if 'columns' in self.features:
            labels = getHeader(self.io_dict["in"]["input_dataset_path"])
            skiprows = 1
        else:
            labels = None
            skiprows = None
        data = pd.read_csv(self.io_dict["in"]["input_dataset_path"],
                           header=None,
                           sep="\s+|;|:|,|\t",
                           engine="python",
                           skiprows=skiprows,
                           names=labels)

        X = getFeatures(self.features, data, self.out_log,
                        self.__class__.__name__)
        fu.log('Features: [%s]' % (getIndependentVarsList(self.features)),
               self.out_log, self.global_log)
        # target
        y = getTarget(self.target, data, self.out_log, self.__class__.__name__)
        fu.log('Target: %s' % (str(getTargetValue(self.target))), self.out_log,
               self.global_log)
        # weights
        if self.weight:
            w = getWeight(self.weight, data, self.out_log,
                          self.__class__.__name__)

        # shuffle dataset
        fu.log('Shuffling dataset', self.out_log, self.global_log)
        shuffled_indices = np.arange(X.shape[0])
        np.random.shuffle(shuffled_indices)
        np_X = X.to_numpy()
        shuffled_X = np_X[shuffled_indices]
        shuffled_y = y[shuffled_indices]
        if self.weight: shuffled_w = w[shuffled_indices]

        # train / test split
        fu.log('Creating train and test sets', self.out_log, self.global_log)
        arrays_sets = (shuffled_X, shuffled_y)
        # if user provide weights
        if self.weight:
            arrays_sets = arrays_sets + (shuffled_w, )
            X_train, X_test, y_train, y_test, w_train, w_test = train_test_split(
                *arrays_sets,
                test_size=self.test_size,
                random_state=self.random_state)
        else:
            X_train, X_test, y_train, y_test = train_test_split(
                *arrays_sets,
                test_size=self.test_size,
                random_state=self.random_state)

        # scale dataset
        if self.scale:
            fu.log('Scaling dataset', self.out_log, self.global_log)
            X_train = scale(X_train)

        # build model
        fu.log('Building model', self.out_log, self.global_log)
        model = self.build_model((X_train.shape[1], ))

        # model summary
        stringlist = []
        model.summary(print_fn=lambda x: stringlist.append(x))
        model_summary = "\n".join(stringlist)
        fu.log('Model summary:\n\n%s\n' % model_summary, self.out_log,
               self.global_log)

        # get optimizer
        mod = __import__('tensorflow.keras.optimizers',
                         fromlist=[self.optimizer])
        opt_class = getattr(mod, self.optimizer)
        opt = opt_class(lr=self.learning_rate)
        # compile model
        model.compile(optimizer=opt,
                      loss='mse',
                      metrics=['mae', 'mse'],
                      sample_weight_mode='samplewise')

        # fitting
        fu.log('Training model', self.out_log, self.global_log)
        # set an early stopping mechanism
        # set patience=2, to be a bit tolerant against random validation loss increases
        early_stopping = EarlyStopping(patience=2)

        if self.weight:
            sample_weight = w_train
            class_weight = []
        else:
            # TODO: class_weight not working since TF 2.4.1 update
            #fu.log('No weight provided, class_weight will be estimated from the target data', self.out_log, self.global_log)
            sample_weight = None
            class_weight = [
            ]  #compute_class_weight('balanced', np.unique(y_train), y_train)

        # fit the model
        mf = model.fit(X_train,
                       y_train,
                       class_weight=class_weight,
                       sample_weight=sample_weight,
                       batch_size=self.batch_size,
                       epochs=self.max_epochs,
                       callbacks=[early_stopping],
                       validation_split=self.validation_size,
                       verbose=1)

        fu.log('Total epochs performed: %s' % len(mf.history['loss']),
               self.out_log, self.global_log)

        # predict data from X_train
        train_predictions = model.predict(X_train)
        train_predictions = np.around(train_predictions, decimals=2)

        score_train_inputs = (y_train, train_predictions)
        if self.weight:
            score_train_inputs = score_train_inputs + (w_train, )
        train_score = r2_score(*score_train_inputs)

        train_metrics = pd.DataFrame()
        train_metrics['metric'] = [
            'Train loss', 'Train MAE', 'Train MSE', 'Train R2',
            'Validation loss', 'Validation MAE', 'Validation MSE'
        ]
        train_metrics['coefficient'] = [
            mf.history['loss'][-1], mf.history['mae'][-1],
            mf.history['mse'][-1], train_score, mf.history['val_loss'][-1],
            mf.history['val_mae'][-1], mf.history['val_mse'][-1]
        ]

        fu.log(
            'Training metrics\n\nTRAINING METRICS TABLE\n\n%s\n' %
            train_metrics, self.out_log, self.global_log)

        # testing
        if self.scale:
            X_test = scale(X_test)
        fu.log('Testing model', self.out_log, self.global_log)
        test_loss, test_mae, test_mse = model.evaluate(X_test, y_test)

        # predict data from X_test
        test_predictions = model.predict(X_test)
        test_predictions = np.around(test_predictions, decimals=2)
        tpr = np.squeeze(np.asarray(test_predictions))

        score_test_inputs = (y_test, test_predictions)
        if self.weight:
            score_test_inputs = score_test_inputs + (w_test, )
        score = r2_score(*score_test_inputs)

        test_metrics = pd.DataFrame()
        test_metrics['metric'] = [
            'Test loss', 'Test MAE', 'Test MSE', 'Test R2'
        ]
        test_metrics['coefficient'] = [test_loss, test_mae, test_mse, score]

        fu.log(
            'Testing metrics\n\nTESTING METRICS TABLE\n\n%s\n' % test_metrics,
            self.out_log, self.global_log)

        test_table = pd.DataFrame()
        test_table['prediction'] = tpr
        test_table['target'] = y_test
        test_table[
            'residual'] = test_table['target'] - test_table['prediction']
        test_table['difference %'] = np.absolute(test_table['residual'] /
                                                 test_table['target'] * 100)
        pd.set_option('display.float_format', lambda x: '%.2f' % x)
        # sort by difference in %
        test_table = test_table.sort_values(by=['difference %'])
        test_table = test_table.reset_index(drop=True)
        fu.log('TEST DATA\n\n%s\n' % test_table, self.out_log, self.global_log)

        # save test data
        if (self.io_dict["out"]["output_test_table_path"]):
            fu.log(
                'Saving testing data to %s' %
                self.io_dict["out"]["output_test_table_path"], self.out_log,
                self.global_log)
            test_table.to_csv(self.io_dict["out"]["output_test_table_path"],
                              index=False,
                              header=True)

        # create test plot
        if (self.io_dict["out"]["output_plot_path"]):
            fu.log(
                'Saving plot to %s' % self.io_dict["out"]["output_plot_path"],
                self.out_log, self.global_log)
            test_predictions = test_predictions.flatten()
            train_predictions = model.predict(X_train).flatten()
            plot = plotResultsReg(mf.history, y_test, test_predictions,
                                  y_train, train_predictions)
            plot.savefig(self.io_dict["out"]["output_plot_path"], dpi=150)

        # save model and parameters
        vars_obj = {
            'features': self.features,
            'target': self.target,
            'scale': self.scale,
            'type': 'regression'
        }
        variables = json.dumps(vars_obj)
        fu.log('Saving model to %s' % self.io_dict["out"]["output_model_path"],
               self.out_log, self.global_log)
        with h5py.File(self.io_dict["out"]["output_model_path"],
                       mode='w') as f:
            hdf5_format.save_model_to_hdf5(model, f)
            f.attrs['variables'] = variables

        return 0