Ejemplo n.º 1
0
  def test_validate_callbacks_predefined_callbacks(self):
    supported_predefined_callbacks = [
        callbacks.TensorBoard(),
        callbacks.CSVLogger(filename='./log.csv'),
        callbacks.EarlyStopping(),
        callbacks.ModelCheckpoint(filepath='./checkpoint'),
        callbacks.TerminateOnNaN(),
        callbacks.ProgbarLogger(),
        callbacks.History(),
        callbacks.RemoteMonitor()
    ]

    distributed_training_utils.validate_callbacks(
        supported_predefined_callbacks, adam.Adam())

    unsupported_predefined_callbacks = [
        callbacks.ReduceLROnPlateau(),
        callbacks.LearningRateScheduler(schedule=lambda epoch: 0.001)
    ]

    for callback in unsupported_predefined_callbacks:
      with self.assertRaisesRegex(ValueError,
                                  'You must specify a Keras Optimizer V2'):
        distributed_training_utils.validate_callbacks([callback],
                                                      v1_adam.AdamOptimizer())
Ejemplo n.º 2
0
def _create_csv_logger(artifact_dir: str) -> callbacks.CSVLogger:
    """Create a CSVLogger callback.

    Args:
        artifact_dir: str, path to artifact directory.

    Returns:
        CSVLogger, CSVLogger callbackk.
    """
    filename = os.path.join(artifact_dir, CSV_LOGGER_FILENAME)
    return callbacks.CSVLogger(filename=filename, separator=",", append=True)
Ejemplo n.º 3
0
def define_callbacks(output, batch_size):
    csv_logger = callbacks.CSVLogger(join(output, 'training.log'))
    earlystop = callbacks.EarlyStopping(monitor='val_loss', patience=2)
    tensorboard = callbacks.TensorBoard(batch_size=batch_size)
    fpath = join(
        output,
        'weights.{epoch:02d}-{loss:.2f}-{acc:.2f}-{val_loss:.2f}-{val_acc:.2f}.hdf5'
    )
    cp_cb = callbacks.ModelCheckpoint(filepath=fpath,
                                      monitor='val_loss',
                                      save_best_only=True)
    return [csv_logger, earlystop, tensorboard, cp_cb]
Ejemplo n.º 4
0
  def setUp(self):
    super(CallbackFallbackTest, self).setUp()
    self.batch_size = 5
    self.numpy_input = np.zeros((50, 10))
    self.numpy_target = np.ones(50)
    self.tensor_input = constant_op.constant(2.0, shape=(50, 10))
    self.tensor_target = array_ops.ones((50,))
    self.dataset_input = dataset_ops.DatasetV2.from_tensor_slices(
        (self.numpy_input, self.numpy_target)).shuffle(50).batch(
            self.batch_size)

    def generator():
      yield (np.zeros((self.batch_size, 10)), np.ones(self.batch_size))
    self.generator_input = generator()
    self.sequence_input = TestSequence(batch_size=self.batch_size,
                                       feature_shape=10)

    self.fallback_ckeckpoint_cb = cbks.ModelCheckpoint(
        self.get_temp_dir(), save_freq=10)
    self.normal_checkpoint_cb = cbks.ModelCheckpoint(
        self.get_temp_dir(), save_freq='epoch')
    self.fallback_tensorboard_cb = cbks.TensorBoard(update_freq=10)
    self.normal_tensorboard_cb = cbks.TensorBoard(update_freq='batch')
    self.unaffected_cb = cbks.CSVLogger(self.get_temp_dir())
Ejemplo n.º 5
0
    def build_callbacks(self, conf, callbacks_list):
        '''
        The purpose of the method is to set up logging and history. It is based
        on Keras Callbacks
        https://github.com/fchollet/keras/blob/fbc9a18f0abc5784607cd4a2a3886558efa3f794/keras/callbacks.py

        Currently used callbacks include: BaseLogger, CSVLogger, EarlyStopping.
        Other possible callbacks to add in future: RemoteMonitor,
        LearningRateScheduler

        Argument list:
        - conf: There is a "callbacks" section in conf.yaml file.

        Relevant parameters are:
        - list: Parameter specifying additional callbacks, read
        in the driver script and passed as an argument of type  list (see next
        arg)

        - metrics: List of quantities monitored during training and validation

        - mode: one of {auto, min, max}. The decision to overwrite the current
        save file is made based on either the maximization or the minimization
        of the monitored quantity. For val_acc, this should be max, for
        val_loss this should be min, etc. In auto mode, the direction is
        automatically inferred from the name of the monitored quantity.

        - monitor: Quantity used for early stopping, has to
        be from the list of metrics

        - patience: Number of epochs used to decide on whether to apply early
          stopping or continue training

        - callbacks_list: uses callbacks.list configuration parameter,
          specifies the list of additional callbacks Returns: modified list of
          callbacks

        '''

        mode = conf['callbacks']['mode']
        monitor = conf['callbacks']['monitor']
        patience = conf['callbacks']['patience']
        csvlog_save_path = conf['paths']['csvlog_save_path']
        # CSV callback is on by default
        if not os.path.exists(csvlog_save_path):
            os.makedirs(csvlog_save_path)

        callbacks_list = conf['callbacks']['list']
        callbacks = [cbks.BaseLogger()]
        callbacks += [self.history]
        callbacks += [
            cbks.CSVLogger("{}callbacks-{}.log".format(
                csvlog_save_path,
                datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")))
        ]

        if "earlystop" in callbacks_list:
            callbacks += [
                cbks.EarlyStopping(patience=patience,
                                   monitor=monitor,
                                   mode=mode)
            ]
        if "lr_scheduler" in callbacks_list:
            pass

        return cbks.CallbackList(callbacks)
Ejemplo n.º 6
0
def run(epochs,
        num_batches,
        batch_size=1,
        learning_rate=0.001,
        beta1=0.9,
        beta2=0.999,
        epsilon=1e-08,
        save_every=10,
        patience=5,
        baseline=2e-5,
        resume=False):
    # Destroy old graph
    K.clear_session()

    # Initialize batch generators
    batch_train = build_features.get_train_batches(batch_size=batch_size)
    batch_valid = build_features.get_valid_batches(batch_size=batch_size)

    # Create TensorFlow Iterator object
    itr_train = build_features.make_iterator(batch_train)
    itr_valid = build_features.make_iterator(batch_valid)

    # Init callbacks
    cbs = list()

    # EarlyStopping callback: stops whenever loss doesn't imporve
    # cbs.append(early_stopping.EarlyStopping(monitor='val_loss', mode='min', patience=patience,
    #                                         verbose=1, baseline=baseline))

    # ModelCheckpoint callback: saves model every SAVE_EVERY
    save_path = paths.checkpoints.regnet(
        rot=ROT, disp=DISP)  # ./checkpoints/regnet/train
    save_path.parent.mkdir(exist_ok=True, parents=True)
    if save_path.exists() and not resume:
        save_path.unlink()  # deletes file before training
    cbs.append(
        callbacks.ModelCheckpoint(str(save_path),
                                  save_best_only=True,
                                  period=save_every))

    # TensorBoard callback: saves logs for tensorboard
    log_path = str(paths.logs.regnet())  # ./logs/regnet/train
    cbs.append(
        callbacks.TensorBoard(log_dir=log_path,
                              batch_size=batch_size,
                              write_graph=True))

    # History callback: saves all losses
    cbs.append(
        callbacks.CSVLogger(save_path.with_suffix('.csv'),
                            append=True,
                            separator=','))

    # Create the network
    net = regnet.Regnet(learning_rate, beta1, beta2, epsilon)

    # Configures the model for training
    net.model.compile(optimizer=net.train_opt,
                      loss=net.model_loss,
                      metrics=net.metrics)

    # Load the pretrained imagenet weights
    load_weights.imagenet_weights(net.model)

    if resume:
        net.model = keras.models.load_model(save_path,
                                            custom_objects=CUSTOM_LAYERS,
                                            compile=True)

    # Train network
    net.model.fit_generator(generator=itr_train,
                            validation_data=itr_valid,
                            validation_steps=batch_size,
                            epochs=epochs,
                            steps_per_epoch=num_batches,
                            callbacks=cbs,
                            verbose=1,
                            workers=0)
Ejemplo n.º 7
0
def run_training(configpath):

    params = config.Parameters(configpath)

    #save config for this training to be sure
    copyfile(configpath, params.folder_path_run + "config_backup_train.ini")

    datasetid = params.datasetid

    if params.estimator_mode == "vrad_kszgal":
        channels_in = 2  #2
        channels_out = 1

    npad = params.npad
    print("padding:", npad)
    img_shape = (params.imgsizepix + 2 * npad, params.imgsizepix + 2 * npad,
                 channels_in)
    batch_size = params.batch_size  #1
    epochs = params.epochs  #1000 #5

    num_train_examples = params.nsims_train  #10000 #1000 #10000 #should/could math the TFrecord file. this defines how long an epoch is.
    num_valid_examples = params.nsims_valid  #1000 #300 #1000
    nx = params.nx

    save_model_path = params.folder_path_run + 'model.ckpt'

    print("save as", save_model_path)

    restore_model = os.path.exists(
        save_model_path + ".index"
    )  #whether or not to restore a previous training and training from there

    ################### NETWORK

    estimatornet = networks.EstimatorNet(params)
    inputs, outputs = getattr(estimatornet, params.network)(img_shape,
                                                            channels_out)

    lossfunctions = losses.Lossfunctions(params)

    if params.loss_mode == "pixelMSE_unfiltered":
        lossfunc = lossfunctions.loss_pixelMSE_unfiltered
        lossfuncname = 'loss_pixelMSE_unfiltered'

    model = models.Model(inputs=[inputs], outputs=[outputs])
    if params.optimizer == 'Adam':
        optim = optimizers.Adam(
            lr=params.learning_rate
        )  #https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adam
    model.compile(optimizer=optim,
                  loss=lossfunc)  #, metrics=['mean_squared_error']
    model.summary()

    cp = tf.keras.callbacks.ModelCheckpoint(filepath=save_model_path,
                                            monitor='val_loss',
                                            save_weights_only=True,
                                            save_best_only=True,
                                            verbose=1)
    callback_csv = callbacks.CSVLogger(params.folder_path_run +
                                       'training_history_log.csv',
                                       append=True)
    #https://stackoverflow.com/questions/50127527/how-to-save-training-history-on-every-epoch-in-keras

    #check whether or not we want to load a previous model
    if restore_model:
        print("WE TRAIN FROM PREVIOUS CHECKPOINT.")
    else:
        print("WE TRAIN FROM START.")

    if restore_model:
        print("loading weights from", save_model_path)
        model.load_weights(save_model_path)

    ################### DATA SET

    dataset_train_raw = tf.data.TFRecordDataset(params.datapath +
                                                "datasets/dataset_train_" +
                                                str(datasetid) + ".tfrecords")
    dataset_valid_raw = tf.data.TFRecordDataset(params.datapath +
                                                "datasets/dataset_valid_" +
                                                str(datasetid) + ".tfrecords")

    dataset_train_parsed = dataset_train_raw.map(
        lambda x: trainingdata.tfrecord_parse_function(x, npad, params),
        num_parallel_calls=8)
    dataset_valid_parsed = dataset_valid_raw.map(
        lambda x: trainingdata.tfrecord_parse_function(x, npad, params),
        num_parallel_calls=8)

    #https://stackoverflow.com/questions/53514495/what-does-batch-repeat-and-shuffle-do-with-tensorflow-dataset
    dataset_train_parsed = dataset_train_parsed.shuffle(
        buffer_size=100,
        reshuffle_each_iteration=True).repeat().batch(batch_size)
    dataset_valid_parsed = dataset_valid_parsed.repeat().batch(batch_size)

    # Create an iterator for the dataset and the above modifications.
    iterator_train = dataset_train_parsed.make_one_shot_iterator()
    iterator_valid = dataset_valid_parsed.make_one_shot_iterator()

    #################### TRAINING
    history = model.fit(
        iterator_train,
        steps_per_epoch=int(np.ceil(num_train_examples / float(batch_size))),
        epochs=epochs,
        validation_data=iterator_valid,
        validation_steps=int(np.ceil(num_valid_examples / float(batch_size))),
        verbose=2,
        callbacks=[cp, callback_csv])

    loss = history.history['loss']
    val_loss = history.history['val_loss']
    np.savez(params.folder_path_run + "loss", loss=loss, val_loss=val_loss)
Ejemplo n.º 8
0
    monitor='val_predictions_categorical_accuracy',
    verbose=1,
    save_best_only=True,
    mode='auto',
    save_weights_only=True,
    period=1)
reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_predictions_loss',
                                        factor=0.25,
                                        patience=10,
                                        verbose=1,
                                        mode='auto',
                                        min_delta=1e-6,
                                        cooldown=0,
                                        min_lr=0)
csv_logger = callbacks.CSVLogger(os.path.join(save_dir, 'Log_V1.log'),
                                 separator=',',
                                 append=False)
train_data_generator = Train_data_generator(batch_size)
valid_data_generator = Valid_data_generator(batch_size)

model.fit_generator(generator=train_data_generator,
                    steps_per_epoch=int(210030 / batch_size),
                    epochs=epochs,
                    verbose=1,
                    callbacks=[checkpoint, reduce_lr, csv_logger],
                    validation_data=valid_data_generator,
                    validation_steps=int(7530 / batch_size),
                    workers=1,
                    class_weight=class_weight,
                    use_multiprocessing=False,
                    shuffle=True)