def setup_callback_list(self, model_name):
        if model_name in self.callback_lists:
            return self.callback_lists[model_name]

        model = self.models[model_name]
        callbacks = self.callbacks[model_name] \
            if model_name in self.callbacks else []

        # Prepare callbacks for autoencoder model
        all_callbacks = [cbks.BaseLogger()] + callbacks + [cbks.History()]
        all_callbacks = cbks.CallbackList(all_callbacks)
        out_labels = model.metrics_names

        if self.do_validation:
            callback_metrics = copy.copy(out_labels) + \
                               ["val_" + l for l in out_labels]
        else:
            callback_metrics = copy.copy(out_labels)

        callback_list = cbks.CallbackList(all_callbacks)
        callback_list.set_params({
            'batch_size': self.batch_size,
            'epochs': self.epochs,
            'verbose': 2,
            'do_validation': model_name in self.do_validation,
            'metrics': callback_metrics or [],
        })
        callback_list.set_model(model)

        return callback_list
def build_callbacks(conf):
    '''
    The purpose of the method is to set up logging and history. It is based on
    Keras Callbacks
    https://github.com/fchollet/keras/blob/fbc9a18f0abc5784607cd4a2a3886558efa3f794/keras/callbacks.py

    Currently used callbacks include: BaseLogger, CSVLogger, EarlyStopping.
    Other possible callbacks to add in future:
    RemoteMonitor, LearningRateScheduler

    Argument list:
        - conf: There is a "callbacks" section in conf.yaml file.

    Relevant parameters are:
        list: Parameter specifying additional callbacks, read in the driver
    script and passed as an argument of type list (see next arg)
        metrics: List of quantities monitored during training and
    validation
        mode: one of {auto, min, max}. The decision to overwrite the
    current save file is made based on either the maximization or the
    minimization of the monitored quantity. For val_acc, this should be max,
    for val_loss this should be min, etc. In auto mode, the direction is
    automatically inferred from the name of the monitored quantity.
        monitor: Quantity used for early stopping, has to be from the list
    of metrics
        patience: Number of epochs used to decide on whether to apply early
    stopping or continue training

        - callbacks_list: uses callbacks.list configuration parameter,
          specifies the list of additional callbacks

    Returns:
        modified list of callbacks
    '''

    # mode = conf['callbacks']['mode']
    # monitor = conf['callbacks']['monitor']
    # patience = conf['callbacks']['patience']
    csvlog_save_path = conf['paths']['csvlog_save_path']
    # CSV callback is on by default
    if not os.path.exists(csvlog_save_path):
        os.makedirs(csvlog_save_path)

    # callbacks_list = conf['callbacks']['list']

    callbacks = [cbks.BaseLogger()]
    callbacks += [
        cbks.CSVLogger("{}callbacks-{}.log".format(
            csvlog_save_path,
            datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")))
    ]
    return cbks.CallbackList(callbacks)
Esempio n. 3
0
    def _fit(self,
             f,
             nb_train_sample,
             nb_batches,
             batch_size=128,
             nb_epoch=100,
             verbose=1,
             callbacks=[],
             shuffle=True,
             metrics=[]):
        """
            Abstract fit function for f(*ins). Assume that f returns a list,
            labelled by out_labels.  """

        history = cbks.History()
        callbacks = [cbks.BaseLogger()] + callbacks + [history]
        if verbose:
            callbacks = callbacks + [cbks.ProgbarLogger()]

        callbacks = cbks.CallbackList(callbacks)
        callbacks._set_model(self)
        callbacks._set_params({
            'batch_size': nb_train_sample // nb_batches,
            'nb_epoch': nb_epoch,
            'nb_sample': nb_train_sample,
            'verbose': verbose,
            'do_validation': False,
            'metrics': metrics,
        })
        callbacks.on_train_begin()

        self.stop_training = False
        for epoch in range(nb_epoch):
            callbacks.on_epoch_begin(epoch)
            for batch_index in range(nb_batches):
                batch_logs = {}
                batch_logs['batch'] = batch_index
                batch_logs['size'] = batch_size
                callbacks.on_batch_begin(batch_index, batch_logs)

                f(self, batch_index, batch_logs)
                callbacks.on_batch_end(batch_index, batch_logs)
                epoch_logs = {}

            callbacks.on_epoch_end(epoch, epoch_logs)
            if self.stop_training:
                break

        callbacks.on_train_end()
        return history
Esempio n. 4
0
    def fit_generator(self,
                      generator,
                      nb_epoch,
                      nb_batches_per_epoch,
                      callbacks=[],
                      batch_size=None,
                      verbose=False):
        if batch_size is None:
            batch_size = 2 * len(next(generator)[0])

        out_labels = ['g', 'd', 'm']

        self.history = cbks.History()
        callbacks = [cbks.BaseLogger()] + callbacks + [self.history]
        if verbose:
            callbacks += [cbks.ProgbarLogger()]
        callbacks = cbks.CallbackList(callbacks)
        callbacks.set_model(self)
        callbacks.set_params({
            'nb_epoch': nb_epoch,
            'nb_sample': nb_batches_per_epoch * batch_size,
            'verbose': verbose,
            'metrics': out_labels,
        })
        callbacks.on_train_begin()

        for e in range(nb_epoch):
            callbacks.on_epoch_begin(e)
            for batch_index, (seq_input, real) in enumerate(generator):
                callbacks.on_batch_begin(batch_index)
                batch_logs = dict()
                batch_logs['batch'] = batch_index
                batch_logs['size'] = len(real) + len(seq_input)
                outs = self.train_on_batch(seq_input, real)

                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)
                if batch_index + 1 == nb_batches_per_epoch:
                    break

            callbacks.on_epoch_end(e)
        callbacks.on_train_end()
Esempio n. 5
0
def callbacks(model, callbacks, params):
    model.history = cbks.History()
    _callbacks = [
        cbks.BaseLogger(stateful_metrics=model.stateful_metric_names)
    ]
    _callbacks.append(
        cbks.ProgbarLogger(count_mode='steps',
                           stateful_metrics=model.stateful_metric_names))
    _callbacks += (callbacks or []) + [model.history]
    callbacks = cbks.CallbackList(_callbacks)
    if hasattr(model, 'callback_model') and model.callback_model:
        callback_model = model.callback_model
    else:
        callback_model = model
    callbacks.set_model(callback_model)
    out_labels = model.metrics_names
    callback_metrics = out_labels + ['val_' + n for n in out_labels]
    callbacks.set_params({
        **params,
        'metrics': callback_metrics,
    })
    return callbacks
Esempio n. 6
0
    def fit_with_pseudo_label(self,
                              steps_per_epoch,
                              validation_steps=None,
                              use_checkpoints=True,
                              class_labels=None,
                              verbose=1,
                              use_multiprocessing=False,
                              shuffle=False,
                              workers=1,
                              max_queue_size=10):

        # Default value if validation steps is none
        if (validation_steps == None):
            validation_steps = self.validation_generator.samples // self.batch_size

        wait_time = 0.01  # in seconds

        self.model._make_train_function()

        # Create a checkpoint callback
        checkpoint = ModelCheckpoint("../models_checkpoints/" +
                                     str(self.h5_filename) + ".h5",
                                     monitor='val_acc',
                                     verbose=1,
                                     save_best_only=True,
                                     save_weights_only=True,
                                     mode='auto',
                                     period=1)

        # Generate callbacks
        callback_list = []
        if use_checkpoints:
            callback_list.append(checkpoint)

        # Init train counters
        epoch = 0

        validation_data = self.validation_generator
        do_validation = bool(validation_data)
        self.model._make_train_function()
        if do_validation:
            self.model._make_test_function()

        val_gen = (hasattr(validation_data, 'next')
                   or hasattr(validation_data, '__next__')
                   or isinstance(validation_data, Sequence))
        if (val_gen and not isinstance(validation_data, Sequence)
                and not validation_steps):
            raise ValueError('`validation_steps=None` is only valid for a'
                             ' generator based on the `keras.utils.Sequence`'
                             ' class. Please specify `validation_steps` or use'
                             ' the `keras.utils.Sequence` class.')

        # Prepare display labels.
        out_labels = self.model.metrics_names
        callback_metrics = out_labels + ['val_' + n for n in out_labels]

        # Prepare train callbacks
        self.model.history = cbks.History()
        callbacks = [cbks.BaseLogger()] + (callback_list or []) + \
            [self.model.history]
        if verbose:
            callbacks += [cbks.ProgbarLogger(count_mode='steps')]
        callbacks = cbks.CallbackList(callbacks)

        # it's possible to callback a different model than self:
        if hasattr(self.model, 'callback_model') and self.model.callback_model:
            callback_model = self.model.callback_model

        else:
            callback_model = self.model

        callbacks.set_model(callback_model)

        is_sequence = isinstance(self.train_generator, Sequence)
        if not is_sequence and use_multiprocessing and workers > 1:
            warnings.warn(
                UserWarning('Using a generator with `use_multiprocessing=True`'
                            ' and multiple workers may duplicate your data.'
                            ' Please consider using the`keras.utils.Sequence'
                            ' class.'))

        if is_sequence:
            steps_per_epoch = len(self.train_generator)

        enqueuer = None
        val_enqueuer = None

        callbacks.set_params({
            'epochs': self.epochs,
            'steps': steps_per_epoch,
            'verbose': verbose,
            'do_validation': do_validation,
            'metrics': callback_metrics,
        })
        callbacks.on_train_begin()

        try:
            if do_validation and not val_gen:
                # Prepare data for validation
                if len(validation_data) == 2:
                    val_x, val_y = validation_data
                    val_sample_weight = None
                elif len(validation_data) == 3:
                    val_x, val_y, val_sample_weight = validation_data
                else:
                    raise ValueError('`validation_data` should be a tuple '
                                     '`(val_x, val_y, val_sample_weight)` '
                                     'or `(val_x, val_y)`. Found: ' +
                                     str(validation_data))
                val_x, val_y, val_sample_weights = self.model._standardize_user_data(
                    val_x, val_y, val_sample_weight)
                val_data = val_x + val_y + val_sample_weights
                if self.model.uses_learning_phase and not isinstance(
                        K.learning_phase(), int):
                    val_data += [0.]
                for cbk in callbacks:
                    cbk.validation_data = val_data

            if is_sequence:
                enqueuer = OrderedEnqueuer(
                    self.train_generator,
                    use_multiprocessing=use_multiprocessing,
                    shuffle=shuffle)
            else:
                enqueuer = GeneratorEnqueuer(
                    self.train_generator,
                    use_multiprocessing=use_multiprocessing,
                    wait_time=wait_time)
            enqueuer.start(workers=workers, max_queue_size=max_queue_size)
            output_generator = enqueuer.get()

            # Train the model

            # Construct epoch logs.
            epoch_logs = {}
            # Epochs
            while epoch < self.epochs:
                callbacks.on_epoch_begin(epoch)
                steps_done = 0
                batch_index = 0

                # Steps per epoch
                while steps_done < steps_per_epoch:

                    generator_output = next(output_generator)

                    if len(generator_output) == 2:
                        x, y = generator_output
                        sample_weight = None
                    elif len(generator_output) == 3:
                        x, y, sample_weight = generator_output
                    else:
                        raise ValueError('Output of generator should be '
                                         'a tuple `(x, y, sample_weight)` '
                                         'or `(x, y)`. Found: ' +
                                         str(generator_output))

                    #==========================
                    # Mini-batch
                    #==========================
                    if (self.print_pseudo_generate):
                        print ''
                        print 'Generating pseudo-labels...'
                        verbose = 1
                    else:
                        verbose = 0

                    if self.no_label_generator.samples > 0:
                        no_label_output = self.model.predict_generator(
                            self.no_label_generator,
                            self.no_label_generator.samples,
                            verbose=verbose)

                        # One-hot encoded
                        self.no_label_generator.classes = np.argmax(
                            no_label_output, axis=1)

                        # Concat Pseudo labels with true labels
                        x_pseudo, y_pseudo = next(self.no_label_generator)
                        x, y = np.concatenate((x, x_pseudo),
                                              axis=0), np.concatenate(
                                                  (y, y_pseudo), axis=0)

                    # build batch logs
                    batch_logs = {}
                    if isinstance(x, list):
                        batch_size = x[0].shape[0]
                    elif isinstance(x, dict):
                        batch_size = list(x.values())[0].shape[0]
                    else:
                        batch_size = x.shape[0]
                    batch_logs['batch'] = batch_index
                    batch_logs['size'] = batch_size
                    callbacks.on_batch_begin(batch_index, batch_logs)

                    # Runs a single gradient update on a single batch of data
                    scalar_training_loss = self.model.train_on_batch(x=x, y=y)

                    if not isinstance(scalar_training_loss, list):
                        scalar_training_loss = [scalar_training_loss]
                    for l, o in zip(out_labels, scalar_training_loss):
                        batch_logs[l] = o

                    callbacks.on_batch_end(batch_index, batch_logs)

                    #==========================
                    # end Mini-batch
                    #==========================

                    batch_index += 1
                    steps_done += 1

                if steps_done >= steps_per_epoch and do_validation:
                    if val_gen:
                        val_outs = self.model.evaluate_generator(
                            validation_data,
                            validation_steps,
                            workers=workers,
                            use_multiprocessing=use_multiprocessing,
                            max_queue_size=max_queue_size)
                    else:
                        # No need for try/except because
                        # data has already been validated.
                        val_outs = self.model.evaluate(
                            val_x,
                            val_y,
                            batch_size=batch_size,
                            sample_weight=val_sample_weights,
                            verbose=0)
                    if not isinstance(val_outs, list):
                        val_outs = [val_outs]
                    # Same labels assumed.
                    for l, o in zip(out_labels, val_outs):
                        epoch_logs['val_' + l] = o

                # Epoch finished.
                callbacks.on_epoch_end(epoch, epoch_logs)
                epoch += 1

        finally:
            try:
                if enqueuer is not None:
                    enqueuer.stop()
            finally:
                if val_enqueuer is not None:
                    val_enqueuer.stop()

        callbacks.on_train_end()
        return self.model.history
Esempio n. 7
0
def _fit_loop(self, f, ins, out_labels=None, batch_size=32,
              nb_epoch=100, verbose=1, callbacks=None,
              val_f=None, val_ins=None, shuffle=True,
              callback_metrics=None, initial_epoch=0):
    """Abstract fit function for f(ins).
    Assume that f returns a list, labeled by out_labels.

    # Arguments
        f: Keras function returning a list of tensors
        ins: list of tensors to be fed to `f`
        out_labels: list of strings, display names of
            the outputs of `f`
        batch_size: integer batch size
        nb_epoch: number of times to iterate over the data
        verbose: verbosity mode, 0, 1 or 2
        callbacks: list of callbacks to be called during training
        val_f: Keras function to call for validation
        val_ins: list of tensors to be fed to `val_f`
        shuffle: whether to shuffle the data at the beginning of each epoch
        callback_metrics: list of strings, the display names of the metrics
            passed to the callbacks. They should be the
            concatenation of list the display names of the outputs of
             `f` and the list of display names of the outputs of `f_val`.
        initial_epoch: epoch at which to start training
            (useful for resuming a previous training run)

    # Returns
        `History` object.

    [A tweaked version.]
    """
    do_validation = False
    if val_f and val_ins:
        do_validation = True
        if verbose:
            print('Train on %d samples, validate on %d samples' %
                  (ins[0].shape[0], val_ins[0].shape[0]))

    nb_train_sample = ins[0].shape[0]
    index_array = np.arange(nb_train_sample)

    self.history = cbks.History()
    callbacks = [cbks.BaseLogger()] + (callbacks or []) + [self.history]
    if verbose:
        callbacks += [cbks.ProgbarLogger()]
    callbacks = cbks.CallbackList(callbacks)
    out_labels = out_labels or []

    # it's possible to callback a different model than self
    # (used by Sequential models)
    if hasattr(self, 'callback_model') and self.callback_model:
        callback_model = self.callback_model
    else:
        callback_model = self

    callbacks.set_model(callback_model)
    callbacks.set_params({
        'batch_size': batch_size,
        'nb_epoch': nb_epoch,
        'nb_sample': nb_train_sample,
        'verbose': verbose,
        'do_validation': do_validation,
        'metrics': callback_metrics or [],
    })
    callbacks.on_train_begin()
    callback_model.stop_training = False
    self.validation_data = val_ins

    for epoch in range(initial_epoch, nb_epoch):
        callbacks.on_epoch_begin(epoch)
        if shuffle == 'batch':
            index_array = batch_shuffle(index_array, batch_size)
        elif shuffle:
            np.random.shuffle(index_array)

        batches = make_batches(nb_train_sample, batch_size)
        epoch_logs = {}
        for batch_index, (batch_start, batch_end) in enumerate(batches):
            batch_ids = index_array[batch_start:batch_end]
            try:
                if isinstance(ins[-1], float):
                    # do not slice the training phase flag
                    ins_batch = slice_X(ins[:-1], batch_ids) + [ins[-1]]
                else:
                    ins_batch = slice_X(ins, batch_ids)
            except TypeError:
                raise TypeError('TypeError while preparing batch. '
                                'If using HDF5 input data, '
                                'pass shuffle="batch".')
            batch_logs = {}
            batch_logs['batch'] = batch_index
            batch_logs['size'] = len(batch_ids)
            batch_logs['ids'] = batch_ids
            callbacks.on_batch_begin(batch_index, batch_logs)
            outs = f(ins_batch)
            if not isinstance(outs, list):
                outs = [outs]
            for l, o in zip(out_labels, outs):
                batch_logs[l] = o

            callbacks.on_batch_end(batch_index, batch_logs)

            if batch_index == len(batches) - 1:  # last batch
                # validation
                if do_validation:
                    # replace with self._evaluate
                    val_outs = self._test_loop(val_f, val_ins,
                                               batch_size=batch_size,
                                               verbose=0)
                    if not isinstance(val_outs, list):
                        val_outs = [val_outs]
                    # same labels assumed
                    for l, o in zip(out_labels, val_outs):
                        epoch_logs['val_' + l] = o
        callbacks.on_epoch_end(epoch, epoch_logs)
        if callback_model.stop_training:
            break
    callbacks.on_train_end()
    return self.history
Esempio n. 8
0
def train_model(name,
                g_train,
                d_train,
                sampler,
                generator,
                samples_per_epoch,
                nb_epoch,
                z_dim=100,
                verbose=1,
                callbacks=[],
                validation_data=None,
                nb_val_samples=None,
                saver=None):
    """
    Main training loop.
    modified from Keras fit_generator
    """
    self = {}
    epoch = 0
    counter = 0
    out_labels = ['g_loss', 'd_loss', 'd_loss_fake', 'd_loss_legit',
                  'time']  # self.metrics_names
    callback_metrics = out_labels + ['val_' + n for n in out_labels]

    # prepare callbacks
    history = cbks.History()
    callbacks = [cbks.BaseLogger()] + callbacks + [history]
    if verbose:
        callbacks += [cbks.ProgbarLogger()]
    callbacks = cbks.CallbackList(callbacks)

    callbacks._set_params({
        'nb_epoch': nb_epoch,
        'nb_sample': samples_per_epoch,
        'verbose': verbose,
        'metrics': callback_metrics,
    })
    callbacks.on_train_begin()

    while epoch < nb_epoch:
        callbacks.on_epoch_begin(epoch)
        samples_seen = 0
        batch_index = 0
        while samples_seen < samples_per_epoch:
            z, x = next(generator)
            # build batch logs
            batch_logs = {}
            if type(x) is list:
                batch_size = len(x[0])
            elif type(x) is dict:
                batch_size = len(list(x.values())[0])
            else:
                batch_size = len(x)
            batch_logs['batch'] = batch_index
            batch_logs['size'] = batch_size
            callbacks.on_batch_begin(batch_index, batch_logs)

            t1 = time.time()
            d_losses = d_train(x, z, counter)
            z, x = next(generator)
            g_loss, samples, xs = g_train(x, z, counter)
            outs = (g_loss, ) + d_losses + (time.time() - t1, )
            counter += 1

            # save samples
            if batch_index % 100 == 0:
                join_image = np.zeros_like(
                    np.concatenate([samples[:64], xs[:64]], axis=0))
                for j, (i1, i2) in enumerate(zip(samples[:64], xs[:64])):
                    join_image[j * 2] = i1
                    join_image[j * 2 + 1] = i2
                save_images(
                    join_image, [8 * 2, 8],
                    './outputs/samples_%s/train_%s_%s.png' %
                    (name, epoch, batch_index))

                samples, xs = sampler(z, x)
                join_image = np.zeros_like(
                    np.concatenate([samples[:64], xs[:64]], axis=0))
                for j, (i1, i2) in enumerate(zip(samples[:64], xs[:64])):
                    join_image[j * 2] = i1
                    join_image[j * 2 + 1] = i2
                save_images(
                    join_image, [8 * 2, 8],
                    './outputs/samples_%s/test_%s_%s.png' %
                    (name, epoch, batch_index))

            for l, o in zip(out_labels, outs):
                batch_logs[l] = o

            callbacks.on_batch_end(batch_index, batch_logs)

            # construct epoch logs
            epoch_logs = {}
            batch_index += 1
            samples_seen += batch_size

        if saver is not None:
            saver(epoch)

        callbacks.on_epoch_end(epoch, epoch_logs)
        epoch += 1

    # _stop.set()
    callbacks.on_train_end()
Esempio n. 9
0

log(
    json.dumps({
        'num_classes':
        len(classes),
        'num_batches':
        int(math.ceil(fpnum.sum()) / props['data']['batchsize'])
    }))

logfreq = int((fpnum.sum() / props['data']['batchsize']) / 50)

# setup training

cbs = []
cbs.append(keras_callbacks.BaseLogger())

cbs = keras_callbacks.CallbackList(cbs)
cbs._set_model(model)
cbs._set_params({
    'batch_size': props['data']['batchsize'],
    'nb_epoch': args.epochs,
    'nb_sample': fpnum.sum(),
    'verbose': True,
    'do_validation': True if props['data']['val'] else False,
    'metrics': ['loss', 'acc', 'val_loss', 'val_acc']
})
cbs.on_train_begin()

losses = []
accs = []
Esempio n. 10
0
def _fit_loop(self,
              f,
              ins,
              out_labels=None,
              batch_size=32,
              epochs=100,
              verbose=1,
              callbacks=None,
              val_f=None,
              val_ins=None,
              shuffle=True,
              callback_metrics=None,
              initial_epoch=0,
              steps_per_epoch=None,
              validation_steps=None):
    """Abstract fit function for f(ins).
    Assume that f returns a list, labeled by out_labels.

    # Arguments
        f: Keras function returning a list of tensors
        ins: List of tensors to be fed to `f`
        out_labels: List of strings, display names of
            the outputs of `f`
        batch_size: Integer batch size or None if unknown.
        epochs: Number of times to iterate over the data
        verbose: Verbosity mode, 0, 1 or 2
        callbacks: List of callbacks to be called during training
        val_f: Keras function to call for validation
        val_ins: List of tensors to be fed to `val_f`
        shuffle: Whether to shuffle the data at the beginning of each epoch
        callback_metrics: List of strings, the display names of the metrics
            passed to the callbacks. They should be the
            concatenation of list the display names of the outputs of
             `f` and the list of display names of the outputs of `f_val`.
        initial_epoch: Epoch at which to start training
            (useful for resuming a previous training run)
        steps_per_epoch: Total number of steps (batches of samples)
            before declaring one epoch finished and starting the
            next epoch. Ignored with the default value of `None`.
        validation_steps: Number of steps to run validation for
            (only if doing validation from data tensors).
            Ignored with the default value of `None`.

    # Returns
        `History` object.

    [A tweaked version.]
    """
    do_validation = False
    if val_f and val_ins:
        do_validation = True
        if verbose and ins and hasattr(ins[0], 'shape') and hasattr(
                val_ins[0], 'shape'):
            print('Train on %d samples, validate on %d samples' %
                  (ins[0].shape[0], val_ins[0].shape[0]))
    if validation_steps:
        do_validation = True
        if steps_per_epoch is None:
            raise ValueError('Can only use `validation_steps` '
                             'when doing step-wise '
                             'training, i.e. `steps_per_epoch` '
                             'must be set.')

    num_train_samples = self._check_num_samples(ins, batch_size,
                                                steps_per_epoch,
                                                'steps_per_epoch')
    if num_train_samples is not None:
        index_array = np.arange(num_train_samples)

    self.history = cbks.History()
    callbacks = [cbks.BaseLogger()] + (callbacks or []) + [self.history]
    if verbose:
        if steps_per_epoch is not None:
            count_mode = 'steps'
        else:
            count_mode = 'samples'
        callbacks += [cbks.ProgbarLogger(count_mode)]
    callbacks = cbks.CallbackList(callbacks)
    out_labels = out_labels or []

    # it's possible to callback a different model than self
    # (used by Sequential models)
    if hasattr(self, 'callback_model') and self.callback_model:
        callback_model = self.callback_model
    else:
        callback_model = self

    callbacks.set_model(callback_model)
    callbacks.set_params({
        'batch_size': batch_size,
        'epochs': epochs,
        'steps': steps_per_epoch,
        'samples': num_train_samples,
        'verbose': verbose,
        'do_validation': do_validation,
        'metrics': callback_metrics or [],
    })
    callbacks.on_train_begin()
    callback_model.stop_training = False
    # for cbk in callbacks:
    #     cbk.validation_data = val_ins

    for epoch in range(initial_epoch, epochs):
        callbacks.on_epoch_begin(epoch)
        epoch_logs = {}
        if steps_per_epoch is not None:
            for step_index in range(steps_per_epoch):
                batch_logs = {}
                batch_logs['batch'] = step_index
                batch_logs['size'] = 1
                callbacks.on_batch_begin(step_index, batch_logs)
                outs = f(ins)

                if not isinstance(outs, list):
                    outs = [outs]
                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(step_index, batch_logs)
                if callback_model.stop_training:
                    break

            if do_validation:
                val_outs = self._test_loop(val_f,
                                           val_ins,
                                           batch_size=batch_size,
                                           steps=validation_steps,
                                           verbose=0)
                if not isinstance(val_outs, list):
                    val_outs = [val_outs]
                # Same labels assumed.
                for l, o in zip(out_labels, val_outs):
                    epoch_logs['val_' + l] = o
        else:
            if shuffle == 'batch':
                index_array = _batch_shuffle(index_array, batch_size)
            elif shuffle:
                np.random.shuffle(index_array)

            batches = _make_batches(num_train_samples, batch_size)
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]
                try:
                    if isinstance(ins[-1], float):
                        # do not slice the training phase flag
                        ins_batch = _slice_arrays(ins[:-1],
                                                  batch_ids) + [ins[-1]]
                    else:
                        ins_batch = _slice_arrays(ins, batch_ids)
                except TypeError:
                    raise TypeError('TypeError while preparing batch. '
                                    'If using HDF5 input data, '
                                    'pass shuffle="batch".')
                batch_logs = {}
                batch_logs['batch'] = batch_index
                batch_logs['size'] = len(batch_ids)
                batch_logs['ids'] = batch_ids
                callbacks.on_batch_begin(batch_index, batch_logs)
                outs = f(ins_batch)
                if not isinstance(outs, list):
                    outs = [outs]
                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)
                if callback_model.stop_training:
                    break

                if batch_index == len(batches) - 1:  # last batch.
                    if do_validation:
                        val_outs = self._test_loop(val_f,
                                                   val_ins,
                                                   batch_size=batch_size,
                                                   verbose=0)
                        if not isinstance(val_outs, list):
                            val_outs = [val_outs]
                        # same labels assumed
                        for l, o in zip(out_labels, val_outs):
                            epoch_logs['val_' + l] = o
        callbacks.on_epoch_end(epoch, epoch_logs)
        if callback_model.stop_training:
            break
    callbacks.on_train_end()
    return self.history
Esempio n. 11
0
def fit_generator_autosized(
        model,
        generator,
        epochs=1,
        #steps_per_epoch=None,
        verbose=1,
        callbacks=None,
        validation_data=None,
        validation_steps=None,
        validation_callbacks=None,
        class_weight=None,
        max_queue_size=10,
        workers=1,
        use_multiprocessing=False,
        shuffle=True,
        initial_epoch=0):
    """See docstring for `Model.fit_generator`."""
    wait_time = 0.01  # in seconds
    epoch = initial_epoch

    do_validation = bool(validation_data)
    model._make_train_function()
    if do_validation:
        model._make_test_function()

    is_sequence = isinstance(generator, Sequence)
    if not is_sequence and use_multiprocessing and workers > 1:
        warnings.warn(
            UserWarning('Using a generator with `use_multiprocessing=True`'
                        ' and multiple workers may duplicate your data.'
                        ' Please consider using the`keras.utils.Sequence'
                        ' class.'))
    # if steps_per_epoch is None:
    #     if is_sequence:
    #         steps_per_epoch = len(generator)
    #     else:
    #         raise ValueError('`steps_per_epoch=None` is only valid for a'
    #                          ' generator based on the '
    #                          '`keras.utils.Sequence`'
    #                          ' class. Please specify `steps_per_epoch` '
    #                          'or use the `keras.utils.Sequence` class.')

    # python 2 has 'next', 3 has '__next__'
    # avoid any explicit version checks
    val_gen = (hasattr(validation_data, 'next')
               or hasattr(validation_data, '__next__')
               or isinstance(validation_data, Sequence))
    # if (val_gen and not isinstance(validation_data, Sequence) and
    #         not validation_steps):
    #     raise ValueError('`validation_steps=None` is only valid for a'
    #                      ' generator based on the `keras.utils.Sequence`'
    #                      ' class. Please specify `validation_steps` or use'
    #                      ' the `keras.utils.Sequence` class.')

    # Prepare display labels.
    out_labels = model.metrics_names
    callback_metrics = out_labels + ['val_' + n for n in out_labels]

    # prepare callbacks
    model.history = cbks.History()
    _callbacks = [
        cbks.BaseLogger(stateful_metrics=model.stateful_metric_names)
    ]
    # instead of ProgbarLogger (but only for first epoch):
    if verbose:
        print('Epoch 1/%d' % epochs)
        progbar = Progbar(target=None,
                          verbose=1,
                          stateful_metrics=model.stateful_metric_names)
    _callbacks += (callbacks or []) + [model.history]
    callbacks = cbks.CallbackList(_callbacks)

    # it's possible to callback a different model than self:
    if hasattr(model, 'callback_model') and model.callback_model:
        callback_model = model.callback_model
    else:
        callback_model = model
    callbacks.set_model(callback_model)
    callbacks.set_params({
        'epochs': epochs,
        'steps': None,  # will be refined during first epoch
        'verbose': verbose,
        'do_validation': do_validation,
        'metrics': callback_metrics,
    })
    callbacks.on_train_begin()

    enqueuer = None
    val_enqueuer = None

    try:
        if do_validation and not val_gen:
            # Prepare data for validation
            if len(validation_data) == 2:
                val_x, val_y = validation_data
                val_sample_weight = None
            elif len(validation_data) == 3:
                val_x, val_y, val_sample_weight = validation_data
            else:
                raise ValueError('`validation_data` should be a tuple '
                                 '`(val_x, val_y, val_sample_weight)` '
                                 'or `(val_x, val_y)`. Found: ' +
                                 str(validation_data))
            val_x, val_y, val_sample_weights = model._standardize_user_data(
                val_x, val_y, val_sample_weight)
            val_data = val_x + val_y + val_sample_weights
            if model.uses_learning_phase and not isinstance(
                    K.learning_phase(), int):
                val_data += [0.]
            for cbk in callbacks:
                cbk.validation_data = val_data

        if workers > 0:
            if is_sequence:
                enqueuer = OrderedEnqueuer(
                    generator,
                    use_multiprocessing=use_multiprocessing,
                    shuffle=shuffle)
            else:
                enqueuer = GeneratorEnqueuer(
                    generator,
                    use_multiprocessing=use_multiprocessing,
                    wait_time=wait_time)
            enqueuer.start(workers=workers, max_queue_size=max_queue_size)
            output_generator = enqueuer.get()
        else:
            if is_sequence:
                output_generator = iter(generator)
            else:
                output_generator = generator

        callback_model.stop_training = False
        # Construct epoch logs.
        epoch_logs = {}
        while epoch < epochs:
            for m in model.stateful_metric_functions:
                m.reset_states()
            callbacks.on_epoch_begin(epoch)
            steps_done = 0
            batch_index = 0
            for generator_output in output_generator:
                if not generator_output:  # end of epoch?
                    break
                if not hasattr(generator_output, '__len__'):
                    raise ValueError('Output of generator should be '
                                     'a tuple `(x, y, sample_weight)` '
                                     'or `(x, y)`. Found: ' +
                                     str(generator_output))

                if len(generator_output) == 2:
                    x, y = generator_output
                    sample_weight = None
                elif len(generator_output) == 3:
                    x, y, sample_weight = generator_output
                else:
                    raise ValueError('Output of generator should be '
                                     'a tuple `(x, y, sample_weight)` '
                                     'or `(x, y)`. Found: ' +
                                     str(generator_output))
                # build batch logs
                batch_logs = {}
                if not x:
                    # Handle data tensors support when no input given
                    # step-size = 1 for data tensors
                    batch_size = 1
                elif isinstance(x, list):
                    batch_size = x[0].shape[0]
                elif isinstance(x, dict):
                    batch_size = list(x.values())[0].shape[0]
                else:
                    batch_size = x.shape[0]
                batch_logs['batch'] = batch_index
                batch_logs['size'] = batch_size
                callbacks.on_batch_begin(batch_index, batch_logs)

                outs = model.train_on_batch(x,
                                            y,
                                            sample_weight=sample_weight,
                                            class_weight=class_weight)

                if not isinstance(outs, list):
                    outs = [outs]
                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)
                if epoch == initial_epoch and verbose:
                    log_values = []
                    for k in callback_metrics:
                        if k in batch_logs:
                            log_values.append((k, batch_logs[k]))
                    progbar.update(steps_done, log_values)

                batch_index += 1
                steps_done += 1

                if callback_model.stop_training:
                    break

            if epoch == initial_epoch:
                if verbose:
                    log_values = []
                    for k in callback_metrics:
                        if k in batch_logs:
                            log_values.append((k, batch_logs[k]))
                    progbar.update(steps_done, log_values)

            # Epoch finished.
            if do_validation:
                if val_gen:
                    val_outs, validation_steps = evaluate_generator_autosized(
                        model,
                        validation_data,
                        steps=validation_steps,
                        callbacks=validation_callbacks,
                        workers=workers,
                        use_multiprocessing=use_multiprocessing,
                        max_queue_size=max_queue_size,
                        verbose=1)
                else:
                    # No need for try/except because
                    # data has already been validated.
                    val_outs = model.evaluate(val_x,
                                              val_y,
                                              batch_size=batch_size,
                                              sample_weight=val_sample_weights,
                                              verbose=0)
                if not isinstance(val_outs, list):
                    val_outs = [val_outs]
                # Same labels assumed.
                for l, o in zip(out_labels, val_outs):
                    epoch_logs['val_' + l] = o

                if callback_model.stop_training:
                    break

            callbacks.on_epoch_end(epoch, epoch_logs)
            if epoch == initial_epoch:
                if verbose:
                    print()
                    progbar = cbks.ProgbarLogger(
                        count_mode='steps',
                        stateful_metrics=model.stateful_metric_names)
                    progbar.set_model(callback_model)
                    callbacks.append(progbar)
                callbacks.set_params({
                    'epochs': epochs,
                    'steps': steps_done,  # refine
                    'verbose': verbose,
                    'do_validation': do_validation,
                    'metrics': callback_metrics,
                })
                if verbose:
                    progbar.on_train_begin()

            epoch += 1
            if callback_model.stop_training:
                break

    finally:
        try:
            if enqueuer is not None:
                enqueuer.stop()
        finally:
            if val_enqueuer is not None:
                val_enqueuer.stop()

    callbacks.on_train_end()
    return model.history
Esempio n. 12
0
    def _fit_loop(self,
                  f,
                  ins,
                  out_labels=None,
                  batch_size=32,
                  epochs=100,
                  verbose=1,
                  callbacks=None,
                  val_f=None,
                  val_ins=None,
                  shuffle=True,
                  callback_metrics=None,
                  initial_epoch=0,
                  steps_per_epoch=None):
        """Abstract fit function for `f(ins)`.

        Assume that f returns a list, labeled by out_labels.

        # Arguments
            f: Keras function returning a list of tensors
            ins: list of tensors to be fed to `f`
            out_labels: list of strings, display names of
                the outputs of `f`
            batch_size: integer batch size
            epochs: number of times to iterate over the data
            verbose: verbosity mode, 0, 1 or 2
            callbacks: list of callbacks to be called during training
            val_f: Keras function to call for validation
            val_ins: list of tensors to be fed to `val_f`
            shuffle: whether to shuffle the data at the beginning of each epoch
            callback_metrics: list of strings, the display names of the metrics
                passed to the callbacks. They should be the
                concatenation of list the display names of the outputs of
                 `f` and the list of display names of the outputs of `f_val`.
            initial_epoch: epoch at which to start training
                (useful for resuming a previous training run)
            steps_per_epoch: Total number of steps (batches of samples)
                before declaring one epoch finished and starting the
                next epoch. The default `None` is equal to the number
                of unique samples in your dataset divided by the batch
                size, or 1 if that cannot be determined.

        # Returns
            `History` object.
        """
        do_validation = False
        if val_f and val_ins:
            do_validation = True
            if verbose and ins and hasattr(ins[0], 'shape'):
                print('Train on %d samples, validate on %d samples' %
                      (ins[0].shape[0], val_ins[0].shape[0]))

        if steps_per_epoch is not None:
            num_train_samples = steps_per_epoch
        else:
            if ins and hasattr(ins[0], 'shape'):
                num_train_samples = ins[0].shape[0]
            else:
                # May happen if we are running `fit` without Numpy input data,
                # i.e. if all inputs to the models are data tensors
                # instead of placeholders.
                # In that case we will run `fit` over a single batch.
                num_train_samples = batch_size
                verbose = 2
        index_array = np.arange(num_train_samples)

        self.history = cbks.History()
        callbacks = [cbks.BaseLogger()] + (callbacks or []) + [self.history]
        if verbose:
            # callbacks += [cbks.ProgbarLogger()]
            callbacks += [ProgbarLogger_TFRecord()]
        callbacks = cbks.CallbackList(callbacks)
        out_labels = out_labels or []

        # it's possible to callback a different model than self
        # (used by Sequential models)
        if hasattr(self, 'callback_model') and self.callback_model:
            callback_model = self.callback_model
        else:
            callback_model = self

        callbacks.set_model(callback_model)
        callbacks.set_params({
            'batch_size': batch_size,
            'epochs': epochs,
            'samples': num_train_samples,
            'verbose': verbose,
            'do_validation': do_validation,
            'metrics': callback_metrics or [],
        })
        callbacks.on_train_begin()
        callback_model.stop_training = False
        for cbk in callbacks:
            cbk.validation_data = val_ins

        for epoch in range(initial_epoch, epochs):
            callbacks.on_epoch_begin(epoch)
            if shuffle == 'batch':
                index_array = _batch_shuffle(index_array, batch_size)
            elif shuffle:
                np.random.shuffle(index_array)

            batches = _make_batches(num_train_samples, batch_size)
            epoch_logs = {}
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]
                try:
                    if isinstance(ins[-1], float):
                        # Do not slice the training phase flag.
                        ins_batch = \
                            _slice_arrays(ins[:-1], batch_ids) + [ins[-1]]
                    else:
                        ins_batch = _slice_arrays(ins, batch_ids)
                except TypeError:
                    raise TypeError('TypeError while preparing batch. '
                                    'If using HDF5 input data, '
                                    'pass shuffle="batch".')
                batch_logs = {}
                batch_logs['batch'] = batch_index
                batch_logs['size'] = len(batch_ids)
                callbacks.on_batch_begin(batch_index, batch_logs)
                outs = f(ins_batch)
                if not isinstance(outs, list):
                    outs = [outs]
                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)
                if callback_model.stop_training:
                    break

                if batch_index == len(batches) - 1:  # Last batch.
                    if do_validation:
                        val_outs = self._test_loop(val_f,
                                                   val_ins,
                                                   batch_size=batch_size,
                                                   verbose=0)
                        if not isinstance(val_outs, list):
                            val_outs = [val_outs]
                        # Same labels assumed.
                        for l, o in zip(out_labels, val_outs):
                            epoch_logs['val_' + l] = o
            callbacks.on_epoch_end(epoch, epoch_logs)
            if callback_model.stop_training:
                break
        callbacks.on_train_end()
        return self.history
def fit_and_predict_generator_with_sceneinst_metrics(
        model,
        generator,
        params,
        multithreading_metrics=False,
        steps_per_epoch=None,
        epochs=1,
        verbose=1,
        callbacks=None,
        validation_data=None,
        validation_steps=None,
        max_queue_size=10,
        workers=1,
        use_multiprocessing=False,
        shuffle=True,
        initial_epoch=0):
    """See docstring for `Model.fit_generator`."""
    wait_time = 0.01  # in seconds
    epoch = initial_epoch

    do_validation = bool(validation_data)
    model._make_train_function()
    if do_validation:
        model._make_test_function()

    is_sequence = isinstance(generator, Sequence)
    if not is_sequence and use_multiprocessing and workers > 1:
        warnings.warn(
            UserWarning('Using a generator with `use_multiprocessing=True`'
                        ' and multiple workers may duplicate your data.'
                        ' Please consider using the`keras.utils.Sequence'
                        ' class.'))
    if steps_per_epoch is None:
        if is_sequence:
            steps_per_epoch = len(generator)
        else:
            raise ValueError('`steps_per_epoch=None` is only valid for a'
                             ' generator based on the '
                             '`keras.utils.Sequence`'
                             ' class. Please specify `steps_per_epoch` '
                             'or use the `keras.utils.Sequence` class.')

    # python 2 has 'next', 3 has '__next__'
    # avoid any explicit version checks
    val_gen = (hasattr(validation_data, 'next')
               or hasattr(validation_data, '__next__')
               or isinstance(validation_data, Sequence))
    if (val_gen and not isinstance(validation_data, Sequence)
            and not validation_steps):
        raise ValueError('`validation_steps=None` is only valid for a'
                         ' generator based on the `keras.utils.Sequence`'
                         ' class. Please specify `validation_steps` or use'
                         ' the `keras.utils.Sequence` class.')

    # Prepare display labels.
    out_labels = model.metrics_names
    callback_metrics = out_labels + ['val_' + n for n in out_labels]

    # prepare callbacks
    model.history = cbks.History()
    _callbacks = [
        cbks.BaseLogger(stateful_metrics=model.stateful_metric_names)
    ]
    if verbose:
        _callbacks.append(
            cbks.ProgbarLogger(count_mode='steps',
                               stateful_metrics=model.stateful_metric_names))
    _callbacks += (callbacks or []) + [model.history]
    callbacks = cbks.CallbackList(_callbacks)

    # it's possible to callback a different model than self:
    if hasattr(model, 'callback_model') and model.callback_model:
        callback_model = model.callback_model
    else:
        callback_model = model
    callbacks.set_model(callback_model)
    callbacks.set_params({
        'epochs': epochs,
        'steps': steps_per_epoch,
        'verbose': verbose,
        'do_validation': do_validation,
        'metrics': callback_metrics,
    })
    callbacks.on_train_begin()

    enqueuer = None
    val_enqueuer = None

    try:
        if do_validation:
            if val_gen and workers > 0:
                # Create an Enqueuer that can be reused
                val_data = validation_data
                if isinstance(val_data, Sequence):
                    val_enqueuer = OrderedEnqueuer(
                        val_data, use_multiprocessing=use_multiprocessing)
                    validation_steps = len(val_data)
                else:
                    val_enqueuer = GeneratorEnqueuer(
                        val_data, use_multiprocessing=use_multiprocessing)
                val_enqueuer.start(workers=workers,
                                   max_queue_size=max_queue_size)
                val_enqueuer_gen = val_enqueuer.get()
            elif val_gen:
                val_data = validation_data
                if isinstance(val_data, Sequence):
                    val_enqueuer_gen = iter(val_data)
                else:
                    val_enqueuer_gen = val_data
            else:
                # Prepare data for validation
                if len(validation_data) == 2:
                    val_x, val_y = validation_data
                    val_sample_weight = None
                elif len(validation_data) == 3:
                    val_x, val_y, val_sample_weight = validation_data
                else:
                    raise ValueError('`validation_data` should be a tuple '
                                     '`(val_x, val_y, val_sample_weight)` '
                                     'or `(val_x, val_y)`. Found: ' +
                                     str(validation_data))
                val_x, val_y, val_sample_weights = model._standardize_user_data(
                    val_x, val_y, val_sample_weight)
                val_data = val_x + val_y + val_sample_weights
                if model.uses_learning_phase and not isinstance(
                        K.learning_phase(), int):
                    val_data += [0.]
                for cbk in callbacks:
                    cbk.validation_data = val_data

        if workers > 0:
            if is_sequence:
                enqueuer = OrderedEnqueuer(
                    generator,
                    use_multiprocessing=use_multiprocessing,
                    shuffle=shuffle)
            else:
                enqueuer = GeneratorEnqueuer(
                    generator,
                    use_multiprocessing=use_multiprocessing,
                    wait_time=wait_time)
            enqueuer.start(workers=workers, max_queue_size=max_queue_size)
            output_generator = enqueuer.get()
        else:
            if is_sequence:
                output_generator = iter(generator)
            else:
                output_generator = generator

        callback_model.stop_training = False
        # Construct epoch logs.
        epoch_logs = {}
        while epoch < epochs:

            # setup scene instance dictionary
            model.scene_instance_id_metrics_dict_train = {}

            # create thread for asynchronous batch metrics calculation (one thread per epoch, joined before final metrics calculation)
            if multithreading_metrics:
                label_queue = queue.Queue(
                )  # threadsafe queue into which we will push (y_pred, y) tuples
                trainmetrics_thread = threading.Thread(
                    target=metrics_per_batch_thread_handler,
                    args=(label_queue,
                          model.scene_instance_id_metrics_dict_train,
                          params['mask_value'], steps_per_epoch))

                trainmetrics_thread.start()
                #print('thread for calculating the batch train metrics has been started')

            for m in model.stateful_metric_functions:
                m.reset_states()
            callbacks.on_epoch_begin(epoch)
            steps_done = 0
            batch_index = 0

            runtime_generator_cumulated = 0.
            runtime_train_and_predict_on_batch_cumulated = 0.
            runtime_class_accuracies_cumulated = 0.
            skip_runtime_avg = 5  # skipping the first few batches to reduce bias due to inital extra time

            while steps_done < steps_per_epoch:
                t_start_batch = time()
                t_start = time()
                generator_output = next(output_generator)
                runtime_generator_next = time() - t_start

                if batch_index >= skip_runtime_avg:
                    runtime_generator_cumulated += runtime_generator_next

                if not hasattr(generator_output, '__len__'):
                    raise ValueError('Output of generator should be '
                                     'a tuple `(x, y, sample_weight)` '
                                     'or `(x, y)`. Found: ' +
                                     str(generator_output))

                if len(generator_output) == 2:
                    x, y = generator_output
                    sample_weight = None
                elif len(generator_output) == 3:
                    x, y, sample_weight = generator_output
                else:
                    raise ValueError('Output of generator should be '
                                     'a tuple `(x, y, sample_weight)` '
                                     'or `(x, y)`. Found: ' +
                                     str(generator_output))
                # build batch logs
                batch_logs = {}
                if x is None or len(x) == 0:
                    # Handle data tensors support when no input given
                    # step-size = 1 for data tensors
                    batch_size = 1
                elif isinstance(x, list):
                    batch_size = x[0].shape[0]
                elif isinstance(x, dict):
                    batch_size = list(x.values())[0].shape[0]
                else:
                    batch_size = x.shape[0]
                batch_logs['batch'] = batch_index
                batch_logs['size'] = batch_size
                t_start = time()
                callbacks.on_batch_begin(batch_index, batch_logs)
                runtime_callbacks_on_batch_begin = time() - t_start

                # remark on label shape: last (fourth) dimension contains in 0 the true labels, in 1 the corresponding sceneinstid (millioncode)
                t_start = time()

                # set sample weights
                if params['nosceneinstweights']:
                    sample_weight = None
                else:
                    sample_weight = heiner_calculate_sample_weights_batch(
                        y[:, :, 0, 1], generator.length_dict,
                        generator.scene_instance_ids_dict, 'train')

                # run forward and backward pass and do the gradient descent step
                batch_loss, y_pred_logits, gradient_norm = heiner_train_and_predict_on_batch(
                    model,
                    x,
                    y[:, :, :, 0],
                    sample_weight=sample_weight,
                    calc_global_gradient_norm=not params['nocalcgradientnorm'])
                runtime_train_and_predict_on_batch = time() - t_start
                if batch_index >= skip_runtime_avg:
                    runtime_train_and_predict_on_batch_cumulated += runtime_train_and_predict_on_batch

                batch_logs['loss'] = batch_loss

                model.gradient_norm = gradient_norm

                t_start = time()
                # from logits to predicted class probabilities
                y_pred_probs = sigmoid(y_pred_logits,
                                       out=y_pred_logits)  # last arg: inplace
                # from probabilities to hard class decisions
                y_pred = np.greater_equal(
                    y_pred_probs, params['outputthreshold'],
                    out=y_pred_probs)  # last arg: inplace

                # increment metrics for scene instances in batch
                if multithreading_metrics:
                    # the following two arrays need to be unchanged in order for being thread-safe
                    # assumption 1: batchloader yields array copies (true for moritz loader)
                    # assumption 2: *_and_predict_on_batch return newly allocated arrays
                    label_queue.put((y_pred, y))
                else:
                    heiner_calculate_class_accuracies_metrics_per_scene_instance_in_batch(
                        model.scene_instance_id_metrics_dict_train, y_pred, y,
                        params['mask_value'])
                runtime_class_accuracies = time() - t_start
                if batch_index >= skip_runtime_avg:
                    runtime_class_accuracies_cumulated += runtime_class_accuracies

                t_start = time()
                callbacks.on_batch_end(batch_index, batch_logs)
                runtime_callbacks_on_batch_end = time() - t_start

                runtime_batch = time() - t_start_batch
                # print((' ----> batch {} in epoch {} took in total {:.2f} sec => generator {:.2f} ' +
                #        'train_and_predict {:.2f}, metrics {:.2f}')
                #       .format(batch_index + 1, epoch + 1, runtime_batch, runtime_generator_next,
                #               runtime_train_and_predict_on_batch,
                #               runtime_class_accuracies))

                batch_index += 1
                steps_done += 1

                if steps_done > skip_runtime_avg and steps_done == steps_per_epoch - 1:
                    print(
                        ' --> batch {} we have average runtimes: generator {:.2f}, train_predict {:.2f}, metrics {:.2f}'
                        .format(
                            batch_index, runtime_generator_cumulated /
                            (steps_done - skip_runtime_avg),
                            runtime_train_and_predict_on_batch_cumulated /
                            (steps_done - skip_runtime_avg),
                            runtime_class_accuracies_cumulated /
                            (steps_done - skip_runtime_avg)))

                # Epoch finished.
                if steps_done >= steps_per_epoch and do_validation:
                    if val_gen:
                        val_outs = evaluate_and_predict_generator_with_sceneinst_metrics(
                            model,
                            val_enqueuer_gen,
                            params,
                            multithreading_metrics,
                            validation_steps,
                            workers=0,
                            verbose=1)
                    else:
                        # No need for try/except because
                        # data has already been validated.
                        val_outs = model.evaluate(
                            val_x,
                            val_y,
                            batch_size=batch_size,
                            sample_weight=val_sample_weights,
                            verbose=0)
                    val_outs = to_list(val_outs)
                    # Same labels assumed.
                    for l, o in zip(out_labels, val_outs):
                        epoch_logs['val_' + l] = o

                if callback_model.stop_training:
                    break

            if multithreading_metrics:
                trainmetrics_thread.join()
                print(
                    ' --> both threads for calculating the batch metrics -- training and validation -- finished all their work'
                )

            callbacks.on_epoch_end(epoch, epoch_logs)
            epoch += 1
            if callback_model.stop_training:
                break

    finally:
        try:
            if enqueuer is not None:
                enqueuer.stop()
        finally:
            if val_enqueuer is not None:
                val_enqueuer.stop()

        if multithreading_metrics:
            trainmetrics_thread.join()  # joined again (harmless)

    callbacks.on_train_end()
    return model.history
Esempio n. 14
0
def train_model(workspaceDir, modelName, devFileSuffix, testFileSuffix,
                saveModel, batchSize, epochs, max_len, num_buckets, vocab_size,
                training_mode, early_stop, predictor_model, predictor_data,
                **kwargs):
    logger.info("initializing TQE training")

    predictorModelFile = None
    if predictor_model:
        predictorModelFile = os.path.join(
            workspaceDir, ".".join(["tqe", predictor_model,
                                    "predictor.model"]))

    srcVocabTransformer = WordIndexTransformer(vocab_size=vocab_size)
    refVocabTransformer = WordIndexTransformer(vocab_size=vocab_size)

    X_train, y_train, X_dev, y_dev, X_test, y_test, pred_train = _prepareInput(
        workspaceDir,
        modelName,
        srcVocabTransformer,
        refVocabTransformer,
        max_len=max_len,
        num_buckets=num_buckets,
        devFileSuffix=devFileSuffix,
        testFileSuffix=testFileSuffix,
        predictorDataModel=predictor_data)

    model_multitask, model_predictor, model_estimator = \
        getEnsembledModel(srcVocabTransformer=srcVocabTransformer,
                          refVocabTransformer=refVocabTransformer,
                          keep_trainable=(training_mode == "stack-prop"),
                          **kwargs)

    if predictorModelFile and not pred_train:
        logger.info("Loading weights for predictor")
        model_predictor.load_weights(predictorModelFile)

    logger.info("Training")

    if early_stop < 0:
        early_stop = epochs

    def reshapeRef(ref):
        return np.array(map(lambda r: r.reshape((-1, 1)), ref))

    if pred_train:
        logger.info("Training predictor on predictor data")

        callbacks = None
        if predictorModelFile:
            callbacks = [
                ModelCheckpoint(filepath=(predictorModelFile + ".{epoch:02d}"),
                                save_weights_only=True)
            ]

        model_predictor.fit_generator(
            getBatchGenerator([pred_train['src'], pred_train['ref']],
                              [reshapeRef(pred_train['ref'])],
                              key=lambda x: "_".join(map(str, map(len, x))),
                              batch_size=batchSize),
            epochs=epochs,
            verbose=2,
            callbacks=callbacks)
        if predictorModelFile:
            logger.info("Saving weights for predictor")
            model_predictor.save_weights(predictorModelFile)

    if training_mode == "multitask":
        logger.info("Training multitask model")
        model_multitask.fit_generator(
            getBatchGenerator(
                [X_train['src'], X_train['mt']],
                [reshapeRef(X_train["ref"]), y_train],
                key=lambda x: "_".join(map(str, map(len, x))),
                batch_size=batchSize,
            ),
            epochs=epochs,
            validation_data=getBatchGenerator(
                [X_dev['src'], X_dev['mt']],
                [reshapeRef(X_dev["ref"]), y_dev],
                key=lambda x: "_".join(map(str, map(len, x))),
                batch_size=batchSize,
            ),
            callbacks=[
                EarlyStopping(monitor="val_quality_pearsonr",
                              patience=early_stop,
                              mode="max"),
            ],
            verbose=2)
    elif training_mode == "two-step":
        logger.info("Training predictor")
        model_predictor.fit_generator(
            getBatchGenerator(
                [X_train['src'], X_train['ref']],
                [
                    reshapeRef(X_train["ref"]),
                ],
                key=lambda x: "_".join(map(str, map(len, x))),
                batch_size=batchSize,
            ),
            epochs=epochs,
            validation_data=getBatchGenerator(
                [X_dev['src'], X_dev['mt']],
                [
                    reshapeRef(X_dev["ref"]),
                ],
                key=lambda x: "_".join(map(str, map(len, x))),
                batch_size=batchSize,
            ),
            callbacks=[
                EarlyStopping(monitor="val_sparse_categorical_accuracy",
                              patience=early_stop,
                              mode="max"),
            ],
            verbose=2)
        logger.info("Training estimator")
        model_estimator.fit_generator(
            getBatchGenerator(
                [X_train['src'], X_train['mt']],
                [y_train],
                key=lambda x: "_".join(map(str, map(len, x))),
                batch_size=batchSize,
            ),
            epochs=epochs,
            validation_data=getBatchGenerator(
                [X_dev['src'], X_dev['mt']],
                [y_dev],
                key=lambda x: "_".join(map(str, map(len, x))),
                batch_size=batchSize,
            ),
            callbacks=[
                EarlyStopping(monitor="val_pearsonr",
                              patience=early_stop,
                              mode="max"),
            ],
            verbose=2)
    elif training_mode == "stack-prop":
        logger.info("Training with stack propogation")
        # Set parameters
        models = [model_predictor, model_estimator]
        train_data = [
            getBatchGenerator(
                [X_train['src'], X_train['ref']],
                [
                    reshapeRef(X_train["ref"]),
                ],
                key=lambda x: "_".join(map(str, map(len, x))),
                batch_size=batchSize,
            ),
            getBatchGenerator(
                [X_train['src'], X_train['mt']],
                [
                    y_train,
                ],
                key=lambda x: "_".join(map(str, map(len, x))),
                batch_size=batchSize,
            )
        ]
        validation_data = [
            getBatchGenerator(
                [X_dev['src'], X_dev['mt']],
                [
                    reshapeRef(X_dev["ref"]),
                ],
                key=lambda x: "_".join(map(str, map(len, x))),
                batch_size=batchSize,
            ),
            getBatchGenerator(
                [X_dev['src'], X_dev['mt']],
                [
                    y_dev,
                ],
                key=lambda x: "_".join(map(str, map(len, x))),
                batch_size=batchSize,
            )
        ]
        callbacks = [
            EarlyStopping(monitor="val_pearsonr",
                          patience=early_stop,
                          mode="max"),
        ]
        verbose = 2
        # Done with setting parameters

        # Assume num_batches in all generator are equal
        steps_per_epoch = len(train_data[0])
        do_validation = bool(validation_data)

        # Prepare display labels.
        out_labels = sum(map(lambda m: m.metrics_names, models), [])
        callback_metrics = out_labels + ['val_' + n for n in out_labels]

        # prepare callbacks
        history = cbks.History()
        _callbacks = [cbks.BaseLogger()]
        if verbose:
            _callbacks.append(cbks.ProgbarLogger(count_mode='steps', ))
        _callbacks += (callbacks or []) + [history]
        callbacks = cbks.CallbackList(_callbacks)

        callback_model = model_estimator
        callbacks.set_model(callback_model)
        callbacks.set_params({
            'epochs': epochs,
            'steps': steps_per_epoch,
            'verbose': verbose,
            'do_validation': do_validation,
            'metrics': callback_metrics,
        })
        callbacks.on_train_begin()

        # Prepare for training
        callback_model.stop_training = False
        epoch_logs = {}

        # Start training
        for epoch in range(0, epochs):
            callbacks.on_epoch_begin(epoch)
            for batch_index in range(0, steps_per_epoch):
                # build batch logs
                # Get size of the batch
                x, y = train_data[0][batch_index]
                if isinstance(x, list):
                    batch_size = x[0].shape[0]
                elif isinstance(x, dict):
                    batch_size = list(x.values())[0].shape[0]
                else:
                    batch_size = x.shape[0]

                batch_logs = {}
                batch_logs['batch'] = batch_index
                batch_logs['size'] = batch_size
                callbacks.on_batch_begin(batch_index, batch_logs)

                outs = []
                for i, model in enumerate(models):
                    x, y = train_data[i][batch_index]
                    model_outs = model.train_on_batch(x, y)

                    if not isinstance(model_outs, list):
                        model_outs = [model_outs]

                    outs.extend(model_outs)

                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)

                if callback_model.stop_training:
                    break

            if do_validation:
                val_outs = []
                for i, model in enumerate(models):
                    outs = model.evaluate_generator(validation_data[i])

                    if not isinstance(outs, list):
                        outs = [outs]

                    val_outs.extend(outs)

                for l, o in zip(out_labels, val_outs):
                    epoch_logs['val_' + l] = o

            callbacks.on_epoch_end(epoch, epoch_logs)

            if callback_model.stop_training:
                break

        callbacks.on_train_end()
    else:
        raise ValueError("Training mode not recognized")

    # logger.info("Saving model")
    # model.save(fileBasename + "neural.model.h5")
    if saveModel:
        logger.info("Saving model")
        shelf = shelve.open(os.path.join(workspaceDir, "model." + saveModel))

        models = [model_multitask, model_predictor, model_estimator]

        shelf['config'] = [model.get_config() for model in models]
        shelf['weights'] = [model.get_weights() for model in models]
        shelf['params'] = {
            'srcVocabTransformer': srcVocabTransformer,
            'refVocabTransformer': refVocabTransformer,
        }

        shelf.close()

    logger.info("Evaluating on development data of size %d" % len(y_dev))
    dev_batches = getBatchGenerator(
        [X_dev['src'], X_dev['mt']],
        key=lambda x: "_".join(map(str, map(len, x))),
        batch_size=batchSize,
    )
    y_dev = dev_batches.align(y_dev)
    evaluate(
        model_estimator.predict_generator(dev_batches).reshape((-1, )), y_dev)

    logger.info("Evaluating on test data of size %d" % len(y_test))
    test_batches = getBatchGenerator(
        [X_test['src'], X_test['mt']],
        key=lambda x: "_".join(map(str, map(len, x))),
        batch_size=batchSize,
    )
    y_test = test_batches.align(y_test)
    evaluate(
        model_estimator.predict_generator(test_batches).reshape((-1, )),
        y_test)
Esempio n. 15
0
def train_model(name,
                ftrain,
                generator,
                samples_per_epoch,
                nb_epoch,
                verbose=1,
                callbacks=[],
                ftest=None,
                validation_data=None,
                nb_val_samples=None,
                saver=None):
    """
    Main training loop.
    modified from Keras fit_generator
    """
    gif = True
    if gif:
        plt.subplot(121)
        IM = plt.imshow(np.random.randn(ims, ims, 3), interpolation="none")
        plt.subplot(122)
        IM2 = plt.imshow(np.random.randn(ims, ims, 3), interpolation="none")
        plt.draw()
        plt.pause(.001)

    epoch = 0
    counter = 0
    out_labels = ['loss', 'time']  # self.metrics_names
    callback_metrics = out_labels + ['val_' + n for n in out_labels]

    # prepare callbacks
    history = cbks.History()
    callbacks = [cbks.BaseLogger()] + callbacks + [history]
    if verbose:
        callbacks += [cbks.ProgbarLogger()]
    callbacks = cbks.CallbackList(callbacks)

    callbacks._set_params({
        'nb_epoch': nb_epoch,
        'nb_sample': samples_per_epoch,
        'verbose': verbose,
        'metrics': callback_metrics,
    })
    callbacks.on_train_begin()

    while epoch < nb_epoch:
        callbacks.on_epoch_begin(epoch)
        samples_seen = 0
        batch_index = 0
        while samples_seen < samples_per_epoch:
            x, y = next(generator)
            # build batch logs
            batch_logs = {}
            if type(x) is list:
                batch_size = len(x[0])
            elif type(x) is dict:
                batch_size = len(list(x.values())[0])
            else:
                batch_size = len(x)
            batch_logs['batch'] = batch_index
            batch_logs['size'] = batch_size
            callbacks.on_batch_begin(batch_index, batch_logs)

            t1 = time.time()
            samples, losses = ftrain(x, y, counter)
            outs = (losses, ) + (time.time() - t1, )
            counter += 1

            if (counter % 100 == 0) and gif:
                for v, u in zip(samples[0], y[0]):
                    IM.set_data(v.reshape(ims, ims, 3))
                    IM2.set_data(u.reshape(ims, ims, 3))
                    plt.draw()
                    plt.pause(.01)

            for l, o in zip(out_labels, outs):
                batch_logs[l] = o

            callbacks.on_batch_end(batch_index, batch_logs)

            # construct epoch logs
            epoch_logs = {}
            batch_index += 1
            samples_seen += batch_size

        if validation_data is not None:
            valid_cost = 0
            valid_samples_seen = 0
            while valid_samples_seen < nb_val_samples:
                x, y = next(validation_data)
                valid_cost += ftest(x, y)[1]
                valid_samples_seen += 1
            valid_cost /= float(nb_val_samples)
            print "\nValidation: ", valid_cost

        if saver is not None:
            saver(epoch)

        callbacks.on_epoch_end(epoch, epoch_logs)
        epoch += 1

    # _stop.set()
    callbacks.on_train_end()
Esempio n. 16
0
def get_callbacks(generator, callbacks_config, output_dir):
    """Get callbacks.

    Args:
        generator: Keras test_generator.
        callbacks_config: Dictionary.
            Contains optional elements:
                custom_callbacks: dict of dicts: names
                    of classes from utils.callbacks and their params.
                monitor: String, value of argument monitor in
                    Keras standard callbacks.
                monitor_mod: String, value of argument monitor_mode in
                    Keras standard callbacks.
                patience: Integer, value of argument patience in
                    Keras standard callbacks.
    Returns:
        callbacks_list: List of callbacks objects to use.
    """

    log_dir = os.path.join(output_dir, 'logs')
    os.makedirs(log_dir, exist_ok=True)
    ckpt_dir = os.path.join(output_dir, 'checkpoints')
    os.makedirs(ckpt_dir, exist_ok=True)

    callbacks_dict = {
        'ImageLogger': callbacks.ImageLogger,
        'SegmentationDiceEpochCallback':
        callbacks.SegmentationDiceEpochCallback,
        'MultiClassifierEpochCallback': callbacks.MultiClassifierEpochCallback,
        'RegressionEpochCallback': callbacks.RegressionEpochCallback,
    }

    callbacks_list = []

    custom_callbacks = callbacks_config.get('custom_callbacks', {})
    for callback in custom_callbacks:
        arguments = custom_callbacks[callback]
        callbacks_list.append(callbacks_dict[callback](generator=generator,
                                                       log_dir=log_dir,
                                                       prefix='val',
                                                       **arguments))

    monitor = callbacks_config.get('monitor', 'val_loss')
    monitor_mode = callbacks_config.get('monitor_mode', 'auto')
    lr_factor = callbacks_config.get('lr_factor', 0.5)
    patience = callbacks_config.get('patience', 2)
    tb_update_freq = callbacks_config.get('tb_update_freq', 500)
    chpt_name = os.path.join(ckpt_dir, 'chpt.{epoch:02d}.hdf5')

    callbacks_list.append(
        keras_callbacks.ModelCheckpoint(filepath=chpt_name,
                                        monitor=monitor,
                                        mode=monitor_mode,
                                        verbose=1,
                                        save_best_only=False))

    callbacks_list.append(
        keras_callbacks.ReduceLROnPlateau(factor=lr_factor,
                                          monitor=monitor,
                                          mode=monitor_mode,
                                          patience=patience,
                                          min_lr=0.5e-6,
                                          verbose=1))

    # Value of update_freq should divide 'update_freq' of other
    # custom callbacks to see them on tensorboard.
    callbacks_list.append(
        callbacks.CustomTensorBoardCallback(log_dir=log_dir,
                                            histogram_freq=0,
                                            write_images=True,
                                            write_graph=True,
                                            update_freq=tb_update_freq))

    callbacks_list.append(keras_tqdm.TQDMCallback())
    callbacks_list.append(keras_callbacks.BaseLogger())

    return callbacks_list
Esempio n. 17
0
    def fit_generator_feed(self,
                           generator,
                           steps_per_epoch=None,
                           epochs=1,
                           verbose=1,
                           callbacks=None,
                           validation_data=None,
                           validation_steps=None,
                           class_weight=None,
                           max_queue_size=10,
                           workers=1,
                           use_multiprocessing=False,
                           shuffle=True,
                           initial_epoch=0,
                           check_array_lengths=True):
        """Train the model on data generated batch-by-batch by a Python generator
        or an instance of `Sequence`.

        See `Model.fit_generator()` for the full documentation.

        The only difference here is that the generator must also generate data for
        any native placeholders of the model.

        Only use this if you know what you are doing (especially with the `shuffle`
        and `check_array_lengths` parameters). If not, prefer `self.fit_fullbatches()`
        or `self.fit_minibatches()`.

        """

        # Disable validation, as we haven't converted the code for this yet.
        # All related code is commented with a `disabled:` prefix.
        if validation_data is not None:
            raise ValueError(
                'Validation with a feeding generator is not yet supported')
        # The original (feed-modified) method starts here.

        wait_time = 0.01  # in seconds
        epoch = initial_epoch

        # disable: do_validation = bool(validation_data)
        self._make_train_function()
        # disable: if do_validation:
        # disable:     self._make_test_function()

        is_sequence = isinstance(generator, Sequence)
        if not is_sequence and use_multiprocessing and workers > 1:
            warnings.warn(
                UserWarning('Using a generator with `use_multiprocessing=True`'
                            ' and multiple workers may duplicate your data.'
                            ' Please consider using the`keras.utils.Sequence'
                            ' class.'))
        if steps_per_epoch is None:
            if is_sequence:
                steps_per_epoch = len(generator)
            else:
                raise ValueError(
                    '`steps_per_epoch=None` is only valid for a'
                    ' generator based on the `keras.utils.Sequence`'
                    ' class. Please specify `steps_per_epoch` or use'
                    ' the `keras.utils.Sequence` class.')

        # disable: # python 2 has 'next', 3 has '__next__'
        # disable: # avoid any explicit version checks
        # disable: val_gen = (hasattr(validation_data, 'next') or
        # disable:            hasattr(validation_data, '__next__') or
        # disable:            isinstance(validation_data, Sequence))
        # disable: if (val_gen and not isinstance(validation_data, Sequence) and
        # disable:         not validation_steps):
        # disable:     raise ValueError('`validation_steps=None` is only valid for a'
        # disable:                      ' generator based on the `keras.utils.Sequence`'
        # disable:                      ' class. Please specify `validation_steps` or use'
        # disable:                      ' the `keras.utils.Sequence` class.')

        # Prepare display labels.
        out_labels = self.metrics_names
        callback_metrics = out_labels + ['val_' + n for n in out_labels]

        # prepare callbacks
        self.history = cbks.History()
        _callbacks = [
            cbks.BaseLogger(stateful_metrics=self.stateful_metric_names)
        ]
        if verbose:
            _callbacks.append(
                cbks.ProgbarLogger(
                    count_mode='steps',
                    stateful_metrics=self.stateful_metric_names))
        _callbacks += (callbacks or []) + [self.history]
        callbacks = cbks.CallbackList(_callbacks)

        # it's possible to callback a different model than self:
        if hasattr(self, 'callback_model') and self.callback_model:
            callback_model = self.callback_model
        else:
            callback_model = self
        callbacks.set_model(callback_model)
        callbacks.set_params({
            'epochs': epochs,
            'steps': steps_per_epoch,
            'verbose': verbose,
            # disable: 'do_validation': do_validation,
            'metrics': callback_metrics,
        })
        callbacks.on_train_begin()

        enqueuer = None
        # disable: val_enqueuer = None

        try:
            # disable: if do_validation and not val_gen:
            # disable:     # Prepare data for validation
            # disable:     if len(validation_data) == 2:
            # disable:         val_x, val_y = validation_data
            # disable:         val_sample_weight = None
            # disable:     elif len(validation_data) == 3:
            # disable:         val_x, val_y, val_sample_weight = validation_data
            # disable:     else:
            # disable:         raise ValueError('`validation_data` should be a tuple '
            # disable:                          '`(val_x, val_y, val_sample_weight)` '
            # disable:                          'or `(val_x, val_y)`. Found: ' +
            # disable:                          str(validation_data))
            # disable:     val_x, val_y, val_sample_weights = self._standardize_user_data(
            # disable:         val_x, val_y, val_sample_weight)
            # disable:     val_data = val_x + val_y + val_sample_weights
            # disable:     if self.uses_learning_phase and not isinstance(K.learning_phase(), int):
            # disable:         val_data += [0.]
            # disable:     for cbk in callbacks:
            # disable:         cbk.validation_data = val_data

            if workers > 0:
                if is_sequence:
                    enqueuer = OrderedEnqueuer(
                        generator,
                        use_multiprocessing=use_multiprocessing,
                        shuffle=shuffle)
                else:
                    enqueuer = GeneratorEnqueuer(
                        generator,
                        use_multiprocessing=use_multiprocessing,
                        wait_time=wait_time)
                enqueuer.start(workers=workers, max_queue_size=max_queue_size)
                output_generator = enqueuer.get()
            else:
                if is_sequence:
                    output_generator = iter(generator)
                else:
                    output_generator = generator

            callback_model.stop_training = False
            # Construct epoch logs.
            epoch_logs = {}
            while epoch < epochs:
                for m in self.metrics:
                    if isinstance(m, Layer) and m.stateful:
                        m.reset_states()
                callbacks.on_epoch_begin(epoch)
                steps_done = 0
                batch_index = 0
                while steps_done < steps_per_epoch:
                    generator_output = next(output_generator)

                    if not hasattr(generator_output, '__len__'):
                        raise ValueError(
                            'Output of generator should be '
                            'a tuple `(x, y, feeds, sample_weight)` '
                            'or `(x, y, feeds)`. Found: ' +
                            str(generator_output))

                    if len(generator_output) == 3:
                        x, y, feeds = generator_output
                        sample_weight = None
                    elif len(generator_output) == 4:
                        x, y, feeds, sample_weight = generator_output
                    else:
                        raise ValueError(
                            'Output of generator should be '
                            'a tuple `(x, y, feeds, sample_weight)` '
                            'or `(x, y, feeds)`. Found: ' +
                            str(generator_output))
                    # build batch logs
                    batch_logs = {}
                    if x is None or len(x) == 0:
                        # Handle data tensors support when no input given
                        # step-size = 1 for data tensors
                        batch_size = 1
                    elif isinstance(x, list):
                        batch_size = x[0].shape[0]
                    elif isinstance(x, dict):
                        batch_size = list(x.values())[0].shape[0]
                    else:
                        batch_size = x.shape[0]
                    batch_logs['batch'] = batch_index
                    batch_logs['size'] = batch_size
                    callbacks.on_batch_begin(batch_index, batch_logs)

                    outs = self.train_on_fed_batch(
                        x,
                        y,
                        feeds=feeds,
                        sample_weight=sample_weight,
                        class_weight=class_weight,
                        check_array_lengths=check_array_lengths)

                    if not isinstance(outs, list):
                        outs = [outs]
                    for l, o in zip(out_labels, outs):
                        batch_logs[l] = o

                    callbacks.on_batch_end(batch_index, batch_logs)

                    batch_index += 1
                    steps_done += 1

                    # Epoch finished.
                    # disable: if steps_done >= steps_per_epoch and do_validation:
                    # disable:     if val_gen:
                    # disable:         val_outs = self.evaluate_generator(
                    # disable:             validation_data,
                    # disable:             validation_steps,
                    # disable:             workers=workers,
                    # disable:             use_multiprocessing=use_multiprocessing,
                    # disable:             max_queue_size=max_queue_size)
                    # disable:     else:
                    # disable:         # No need for try/except because
                    # disable:         # data has already been validated.
                    # disable:         val_outs = self.evaluate(
                    # disable:             val_x, val_y,
                    # disable:             batch_size=batch_size,
                    # disable:             sample_weight=val_sample_weights,
                    # disable:             verbose=0)
                    # disable:     if not isinstance(val_outs, list):
                    # disable:         val_outs = [val_outs]
                    # disable:     # Same labels assumed.
                    # disable:     for l, o in zip(out_labels, val_outs):
                    # disable:         epoch_logs['val_' + l] = o

                    if callback_model.stop_training:
                        break

                callbacks.on_epoch_end(epoch, epoch_logs)
                epoch += 1
                if callback_model.stop_training:
                    break

        finally:
            try:
                if enqueuer is not None:
                    enqueuer.stop()
            finally:
                pass
                # disable: if val_enqueuer is not None:
                # disable:     val_enqueuer.stop()

        callbacks.on_train_end()
        return self.history
Esempio n. 18
0
    def fit_generator(self,
                      generator,
                      samples_per_epoch,
                      nb_epoch,
                      verbose=1,
                      callbacks=None,
                      validation_data=None,
                      nb_val_samples=None,
                      class_weight=None,
                      max_q_size=10,
                      nb_worker=1,
                      pickle_safe=False,
                      initial_epoch=0,
                      validate_batch=True):
        '''Fits the model on data generated batch-by-batch by
        a Python generator.
        The generator is run in parallel to the model, for efficiency.
        For instance, this allows you to do real-time data augmentation
        on images on CPU in parallel to training your model on GPU.

        # Arguments
            generator: a generator.
                The output of the generator must be either
                - a tuple (inputs, targets)
                - a tuple (inputs, targets, sample_weights).
                All arrays should contain the same number of samples.
                The generator is expected to loop over its data
                indefinitely. An epoch finishes when `samples_per_epoch`
                samples have been seen by the model.
            samples_per_epoch: integer, number of samples to process before
                going to the next epoch.
            nb_epoch: integer, total number of iterations on the data.
            verbose: verbosity mode, 0, 1, or 2.
            callbacks: list of callbacks to be called during training.
            validation_data: this can be either
                - a generator for the validation data
                - a tuple (inputs, targets)
                - a tuple (inputs, targets, sample_weights).
            nb_val_samples: only relevant if `validation_data` is a generator.
                number of samples to use from validation generator
                at the end of every epoch.
            class_weight: dictionary mapping class indices to a weight
                for the class.
            max_q_size: maximum size for the generator queue
            nb_worker: maximum number of processes to spin up
                when using process based threading
            pickle_safe: if True, use process based threading.
                Note that because
                this implementation relies on multiprocessing,
                you should not pass
                non picklable arguments to the generator
                as they can't be passed
                easily to children processes.
            initial_epoch: epoch at which to start training
                (useful for resuming a previous training run)

        # Returns
            A `History` object.

        # Example

        ```python
            def generate_arrays_from_file(path):
                while 1:
                    f = open(path)
                    for line in f:
                        # create numpy arrays of input data
                        # and labels, from each line in the file
                        x1, x2, y = process_line(line)
                        yield ({'input_1': x1, 'input_2': x2}, {'output': y})
                    f.close()

            model.fit_generator(generate_arrays_from_file('/my_file.txt'),
                                samples_per_epoch=10000, nb_epoch=10)
        ```
        '''
        wait_time = 0.01  # in seconds
        epoch = initial_epoch

        do_validation = bool(validation_data)
        self._make_train_function()
        if do_validation:
            self._make_test_function()

        # python 2 has 'next', 3 has '__next__'
        # avoid any explicit version checks
        val_gen = (hasattr(validation_data, 'next')
                   or hasattr(validation_data, '__next__'))
        if val_gen and not nb_val_samples:
            raise ValueError('When using a generator for validation data, '
                             'you must specify a value for "nb_val_samples".')

        out_labels = self.metrics_names
        callback_metrics = out_labels + ['val_' + n for n in out_labels]

        # prepare callbacks
        self.history = cbks.History()
        callbacks = [cbks.BaseLogger()] + (callbacks or []) + [self.history]
        if verbose:
            callbacks += [cbks.ProgbarLogger()]
        callbacks = cbks.CallbackList(callbacks)

        # it's possible to callback a different model than self:
        if hasattr(self, 'callback_model') and self.callback_model:
            callback_model = self.callback_model
        else:
            callback_model = self
        callbacks._set_model(callback_model)
        callbacks._set_params({
            'nb_epoch': nb_epoch,
            'nb_sample': samples_per_epoch,
            'verbose': verbose,
            'do_validation': do_validation,
            'metrics': callback_metrics,
        })
        callbacks.on_train_begin()

        if do_validation and not val_gen:
            if len(validation_data) == 2:
                val_x, val_y = validation_data
                val_sample_weight = None
            elif len(validation_data) == 3:
                val_x, val_y, val_sample_weight = validation_data
            else:
                raise ValueError('validation_data should be a tuple '
                                 '(val_x, val_y, val_sample_weight) '
                                 'or (val_x, val_y). Found: ' +
                                 str(validation_data))
            val_x, val_y, val_sample_weights = self._standardize_user_data(
                val_x, val_y, val_sample_weight)
            self.validation_data = val_x + [val_y, val_sample_weights]
        else:
            self.validation_data = None

        # start generator thread storing batches into a queue
        data_gen_queue, _stop, generator_threads = generator_queue(
            generator,
            max_q_size=max_q_size,
            nb_worker=nb_worker,
            pickle_safe=pickle_safe)

        callback_model.stop_training = False
        while epoch < nb_epoch:
            callbacks.on_epoch_begin(epoch)
            samples_seen = 0
            batch_index = 0
            while samples_seen < samples_per_epoch:
                generator_output = None
                while not _stop.is_set():
                    if not data_gen_queue.empty():
                        generator_output = data_gen_queue.get()
                        break
                    else:
                        time.sleep(wait_time)

                if not hasattr(generator_output, '__len__'):
                    _stop.set()
                    raise ValueError('output of generator should be a tuple '
                                     '(x, y, sample_weight) '
                                     'or (x, y). Found: ' +
                                     str(generator_output))
                if len(generator_output) == 2:
                    x, y = generator_output
                    sample_weight = None
                elif len(generator_output) == 3:
                    x, y, sample_weight = generator_output
                else:
                    _stop.set()
                    raise ValueError('output of generator should be a tuple '
                                     '(x, y, sample_weight) '
                                     'or (x, y). Found: ' +
                                     str(generator_output))
                # build batch logs
                batch_logs = {}
                if isinstance(x, list):
                    batch_size = x[0].shape[0]
                elif isinstance(x, dict):
                    batch_size = list(x.values())[0].shape[0]
                else:
                    batch_size = x.shape[0]
                batch_logs['batch'] = batch_index
                batch_logs['size'] = batch_size
                callbacks.on_batch_begin(batch_index, batch_logs)

                try:
                    outs = self.train_on_batch(x,
                                               y,
                                               sample_weight=sample_weight,
                                               class_weight=class_weight)
                except:
                    _stop.set()
                    raise

                if not isinstance(outs, list):
                    outs = [outs]
                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                if validate_batch:
                    try:
                        val_outs = self.evaluate_generator(
                            validation_data,
                            batch_size,
                            max_q_size=max_q_size,
                            nb_worker=nb_worker,
                            pickle_safe=pickle_safe)
                    except:
                        _stop.set()
                        raise
                    if not isinstance(val_outs, list):
                        outs = [outs]
                    for l, o in zip(out_labels, val_outs):
                        if l.startswith('val_'):
                            batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)

                # construct epoch logs
                epoch_logs = {}
                batch_index += 1
                samples_seen += batch_size

                # epoch finished
                if samples_seen > samples_per_epoch:
                    warnings.warn('Epoch comprised more than '
                                  '`samples_per_epoch` samples, '
                                  'which might affect learning results. '
                                  'Set `samples_per_epoch` correctly '
                                  'to avoid this warning.')
                if samples_seen >= samples_per_epoch and do_validation:
                    if val_gen:
                        val_outs = self.evaluate_generator(
                            validation_data,
                            nb_val_samples,
                            max_q_size=max_q_size,
                            nb_worker=nb_worker,
                            pickle_safe=pickle_safe)
                    else:
                        # no need for try/except because
                        # data has already been validated
                        val_outs = self.evaluate(
                            val_x,
                            val_y,
                            batch_size=batch_size,
                            sample_weight=val_sample_weights,
                            verbose=0)
                    if not isinstance(val_outs, list):
                        val_outs = [val_outs]
                    # same labels assumed
                    for l, o in zip(out_labels, val_outs):
                        epoch_logs['val_' + l] = o

            callbacks.on_epoch_end(epoch, epoch_logs)
            epoch += 1
            if callback_model.stop_training:
                break

        _stop.set()
        if pickle_safe:
            # Terminate all daemon processes
            for p in generator_threads:
                if p.is_alive():
                    p.terminate()
            data_gen_queue.close()
        callbacks.on_train_end()
        return self.history
    def fit_dataflow(self,
                     dflow,
                     steps_per_epoch,
                     epochs=1,
                     verbose=1,
                     callbacks=None,
                     validation_data=None,
                     validation_steps=None,
                     class_weight=None,
                     max_q_size=10,
                     workers=1,
                     pickle_safe=False,
                     initial_epoch=0):
        """Fits the model on data yielded batch-by-batch by a Python generator.

        The generator is run in parallel to the model, for efficiency.
        For instance, this allows you to do real-time data augmentation
        on images on CPU in parallel to training your model on GPU.

        # Arguments
            dflow: a dataflow object a-la-carte Tensorpack.
                The output of the generator must be either
                - a tuple (inputs, targets)
                - a tuple (inputs, targets, sample_weights).
                All arrays should contain the same number of samples.
                The generator is expected to loop over its data
                indefinitely. An epoch finishes when `steps_per_epoch`
                samples have been seen by the model.
            steps_per_epoch: Total number of steps (batches of samples)
                to yield from `generator` before declaring one epoch
                finished and starting the next epoch. It should typically
                be equal to the number of unique samples if your dataset
                divided by the batch size.
            epochs: integer, total number of iterations on the data.
            verbose: verbosity mode, 0, 1, or 2.
            callbacks: list of callbacks to be called during training.
            validation_data: this can be either
                - a generator for the validation data
                - a tuple (inputs, targets)
                - a tuple (inputs, targets, sample_weights).
            validation_steps: Only relevant if `validation_data`
                is a generator. Total number of steps (batches of samples)
                to yield from `generator` before stopping.
            class_weight: dictionary mapping class indices to a weight
                for the class.
            max_q_size: maximum size for the generator queue
            workers: maximum number of processes to spin up
                when using process based threading
            pickle_safe: if True, use process based threading.
                Note that because
                this implementation relies on multiprocessing,
                you should not pass
                non picklable arguments to the generator
                as they can't be passed
                easily to children processes.
            initial_epoch: epoch at which to start training
                (useful for resuming a previous training run)

        # Returns
            A `History` object.

        # Example

        ```python
            def generate_arrays_from_file(path):
                while 1:
                    f = open(path)
                    for line in f:
                        # create numpy arrays of input data
                        # and labels, from each line in the file
                        x1, x2, y = process_line(line)
                        yield ({'input_1': x1, 'input_2': x2}, {'output': y})
                    f.close()

            model.fit_generator(generate_arrays_from_file('/my_file.txt'),
                                steps_per_epoch=10000, epochs=10)
        ```

        # Raises
            ValueError: In case the generator yields
                data in an invalid format.
        """
        # wait_time = 0.01  # in seconds
        epoch = initial_epoch

        do_validation = bool(validation_data)
        self._make_train_function()
        if do_validation:
            self._make_test_function()

        # python 2 has 'next', 3 has '__next__'
        # avoid any explicit version checks
        val_gen = (hasattr(validation_data, 'next')
                   or hasattr(validation_data, '__next__'))
        if val_gen and not validation_steps:
            raise ValueError('When using a generator for validation data, '
                             'you must specify a value for '
                             '`validation_steps`.')

        out_labels = self.metrics_names
        callback_metrics = out_labels + ['val_' + n for n in out_labels]

        # prepare callbacks
        self.history = cbks.History()
        callbacks = [cbks.BaseLogger()] + (callbacks or []) + [self.history]
        if verbose:
            callbacks += [cbks.ProgbarLogger(count_mode='steps')]
        callbacks = cbks.CallbackList(callbacks)

        # it's possible to callback a different model than self:
        if hasattr(self, 'callback_model') and self.callback_model:
            callback_model = self.callback_model
        else:
            callback_model = self
        callbacks.set_model(callback_model)
        callbacks.set_params({
            'epochs': epochs,
            'steps': steps_per_epoch,
            'verbose': verbose,
            'do_validation': do_validation,
            'metrics': callback_metrics,
        })
        callbacks.on_train_begin()

        if do_validation and not val_gen:
            if len(validation_data) == 2:
                val_x, val_y = validation_data
                val_sample_weight = None
            elif len(validation_data) == 3:
                val_x, val_y, val_sample_weight = validation_data
            else:
                raise ValueError('validation_data should be a tuple '
                                 '`(val_x, val_y, val_sample_weight)` '
                                 'or `(val_x, val_y)`. Found: ' +
                                 str(validation_data))
            val_x, val_y, val_sample_weights = self._standardize_user_data(
                val_x, val_y, val_sample_weight)
            for cbk in callbacks:
                cbk.validation_data = val_x + [val_y, val_sample_weights]
        # enqueuer = None

        # TODO: Tensorpack does some kind of acceleratn using
        #     QueueInputTrainer, QueueInput, and EnqueueThread. The
        #     implementation below corresponds to SimpleTrainer which
        #     Tensorpack notes as being slow. I still cannot decipher what
        #     exactly is going on in Tensorpack. For the same per-GPU batchsize
        #     the runtime per epoch seems on par. Perhaps with Tensorpack
        #     implementation using Queue+Thread for datafalow the feed_dict
        #     would be faster. The keras fit_generator does use an enqueuer,
        #     but I did not notice performance difference between using
        #     fit_generator or this mixed-in fit_dataflow method.

        try:
            # enqueuer = GeneratorEnqueuer(generator, pickle_safe=pickle_safe)
            # enqueuer.start(max_q_size=max_q_size, workers=workers)

            dflow.reset_state()
            _generator = dflow.get_data()

            callback_model.stop_training = False
            while epoch < epochs:
                callbacks.on_epoch_begin(epoch)
                steps_done = 0
                batch_index = 0
                while steps_done < steps_per_epoch:
                    # generator_output = None
                    generator_output = next(_generator)
                    # while enqueuer.is_running():
                    #     if not enqueuer.queue.empty():
                    #         generator_output = enqueuer.queue.get()
                    #         break
                    #     else:
                    #         time.sleep(wait_time)

                    if not hasattr(generator_output, '__len__'):
                        raise ValueError('output of generator should be '
                                         'a tuple `(x, y, sample_weight)` '
                                         'or `(x, y)`. Found: ' +
                                         str(generator_output))
                    if len(generator_output) == 2:
                        x, y = generator_output
                        sample_weight = None
                    elif len(generator_output) == 3:
                        x, y, sample_weight = generator_output
                    else:
                        raise ValueError('output of generator should be '
                                         'a tuple `(x, y, sample_weight)` '
                                         'or `(x, y)`. Found: ' +
                                         str(generator_output))
                    # build batch logs
                    batch_logs = {}
                    if isinstance(x, list):
                        batch_size = x[0].shape[0]
                    elif isinstance(x, dict):
                        batch_size = list(x.values())[0].shape[0]
                    else:
                        batch_size = x.shape[0]
                    batch_logs['batch'] = batch_index
                    batch_logs['size'] = batch_size
                    callbacks.on_batch_begin(batch_index, batch_logs)

                    outs = self.train_on_batch(x,
                                               y,
                                               sample_weight=sample_weight,
                                               class_weight=class_weight)

                    if not isinstance(outs, list):
                        outs = [outs]
                    for l, o in zip(out_labels, outs):
                        batch_logs[l] = o

                    callbacks.on_batch_end(batch_index, batch_logs)

                    # Construct epoch logs.
                    epoch_logs = {}
                    batch_index += 1
                    steps_done += 1

                    # Epoch finished.
                    if steps_done >= steps_per_epoch and do_validation:
                        if val_gen:
                            val_outs = self.evaluate_generator(
                                validation_data,
                                validation_steps,
                                max_q_size=max_q_size,
                                workers=workers,
                                pickle_safe=pickle_safe)
                        else:
                            # No need for try/except because
                            # data has already been validated.
                            val_outs = self.evaluate(
                                val_x,
                                val_y,
                                batch_size=batch_size,
                                sample_weight=val_sample_weights,
                                verbose=0)
                        if not isinstance(val_outs, list):
                            val_outs = [val_outs]
                        # Same labels assumed.
                        for l, o in zip(out_labels, val_outs):
                            epoch_logs['val_' + l] = o

                callbacks.on_epoch_end(epoch, epoch_logs)
                epoch += 1
                if callback_model.stop_training:
                    break

        finally:
            # if enqueuer is not None:
            #     enqueuer.stop()
            pass

        callbacks.on_train_end()
        return self.history
Esempio n. 20
0
    def fit_with_pseudo_label(self,
                              steps_per_epoch,
                              use_checkpoints=False,
                              class_labels=None,
                              verbose=1,
                              use_multiprocessing=False,
                              shuffle=False,
                              workers=1,
                              max_queue_size=10):

        wait_time = 0.01  # in seconds

        self.model._make_train_function()

        # Create a checkpoint callback
        checkpoint = ModelCheckpoint("../models_checkpoints/" +
                                     str(self.h5_filename) + ".h5",
                                     monitor='val_acc',
                                     verbose=1,
                                     save_best_only=True,
                                     save_weights_only=True,
                                     mode='auto',
                                     period=1)

        # Generate callbacks
        callback_list = []
        if use_checkpoints:
            callback_list.extend(checkpoint)

        # Init train counters
        epoch = 0

        # Prepare display labels.
        out_labels = self.model._get_deduped_metrics_names()
        callback_metrics = out_labels + ['val_' + n for n in out_labels]

        # Prepare train callbacks
        self.model.history = cbks.History()
        callbacks = [cbks.BaseLogger()] + (callback_list or []) + \
            [self.model.history]
        if verbose:
            callbacks += [cbks.ProgbarLogger(count_mode='steps')]
        callbacks = cbks.CallbackList(callbacks)

        # it's possible to callback a different model than self:
        if hasattr(self.model, 'callback_model') and self.model.callback_model:
            callback_model = self.model.callback_model

        else:
            callback_model = self.model

        callbacks.set_model(callback_model)

        is_sequence = isinstance(self.train_generator, Sequence)
        if not is_sequence and use_multiprocessing and workers > 1:
            warnings.warn(
                UserWarning('Using a generator with `use_multiprocessing=True`'
                            ' and multiple workers may duplicate your data.'
                            ' Please consider using the`keras.utils.Sequence'
                            ' class.'))

        if is_sequence:
            steps_per_epoch = len(self.train_generator)
        enqueuer = None

        callbacks.set_params({
            'epochs': self.epochs,
            'steps': steps_per_epoch,
            'verbose': verbose,
            'do_validation': True,
            'metrics': callback_metrics,
        })
        callbacks.on_train_begin()

        try:
            if is_sequence:
                enqueuer = OrderedEnqueuer(
                    self.train_generator,
                    use_multiprocessing=use_multiprocessing,
                    shuffle=shuffle)
            else:
                enqueuer = GeneratorEnqueuer(
                    self.train_generator,
                    use_multiprocessing=use_multiprocessing,
                    wait_time=wait_time)
            enqueuer.start(workers=workers, max_queue_size=max_queue_size)
            output_generator = enqueuer.get()

            # Train the model
            # Epochs
            while epoch < self.epochs:
                callbacks.on_epoch_begin(epoch)
                steps_done = 0
                batch_index = 0

                # Steps per epoch
                while steps_done < steps_per_epoch:

                    generator_output = next(output_generator)

                    if len(generator_output) == 2:
                        x, y = generator_output
                        sample_weight = None
                    elif len(generator_output) == 3:
                        x, y, sample_weight = generator_output
                    else:
                        raise ValueError('Output of generator should be '
                                         'a tuple `(x, y, sample_weight)` '
                                         'or `(x, y)`. Found: ' +
                                         str(generator_output))

                    #==========================
                    # Mini-batch
                    #==========================
                    print ''
                    print 'Generating pseudo-labels...'
                    no_label_output = self.model.predict_generator(
                        self.no_label_generator,
                        None,  # because the model is instance of sequence
                        verbose=1)

                    # One-hot encoded
                    self.no_label_generator.classes = np.argmax(
                        no_label_output, axis=1)

                    # Concat Pseudo labels with true labels
                    x_pseudo, y_pseudo = next(self.no_label_generator)
                    x, y = np.concatenate(
                        (x, x_pseudo), axis=0), np.concatenate((y, y_pseudo),
                                                               axis=0)

                    if len(generator_output) == 2:
                        x, y = generator_output
                        sample_weight = None
                    elif len(generator_output) == 3:
                        x, y, sample_weight = generator_output
                    else:
                        raise ValueError('Output of generator should be '
                                         'a tuple `(x, y, sample_weight)` '
                                         'or `(x, y)`. Found: ' +
                                         str(generator_output))

                    # build batch logs
                    batch_logs = {}
                    if isinstance(x, list):
                        batch_size = x[0].shape[0]
                    elif isinstance(x, dict):
                        batch_size = list(x.values())[0].shape[0]
                    else:
                        batch_size = x.shape[0]
                    batch_logs['batch'] = batch_index
                    batch_logs['size'] = batch_size
                    callbacks.on_batch_begin(batch_index, batch_logs)

                    # Runs a single gradient update on a single batch of data
                    scalar_training_loss = self.model.train_on_batch(x=x, y=y)

                    if not isinstance(scalar_training_loss, list):
                        scalar_training_loss = [scalar_training_loss]
                    for l, o in zip(out_labels, scalar_training_loss):
                        batch_logs[l] = o

                    callbacks.on_batch_end(batch_index, batch_logs)

                    #==========================
                    # end Mini-batch
                    #==========================

                    batch_index += 1
                    steps_done += 1

                # Epoch finished.
                epoch += 1

        finally:
            if enqueuer is not None:
                enqueuer.stop()

        callbacks.on_train_end()
        return self.model.history
Esempio n. 21
0
def fit_models(callback_model,
               models,
               generators,
               metrics_names,
               batch_size,
               steps_per_epoch=None,
               epochs=1,
               verbose=1,
               callbacks=None,
               initial_epoch=0):
    epoch = initial_epoch

    # Prepare display labels.
    callback_metrics = [n for m in metrics_names for n in m.keys()]

    # prepare callbacks
    stateful_metric_names = []
    for model in models:
        model.history = cbks.History()
        try:
            stateful_metric_names.extend(model.stateful_metric_names)
        except AttributeError:
            stateful_metric_names.extend(model.model.stateful_metric_names)
    _callbacks = [cbks.BaseLogger(stateful_metrics=stateful_metric_names)]
    if verbose:
        _callbacks.append(
            cbks.ProgbarLogger(count_mode='steps',
                               stateful_metrics=stateful_metric_names))
    _callbacks += (callbacks or []) + [model.history for model in models]
    callbacks = cbks.CallbackList(_callbacks)

    # it's possible to callback a different model than self:
    callbacks.set_model(callback_model)
    callbacks.set_params({
        'epochs': epochs,
        'steps': steps_per_epoch,
        'verbose': verbose,
        'do_validation': False,
        'metrics': callback_metrics,
    })
    callbacks.on_train_begin()

    try:
        callback_model.stop_training = False
        # Construct epoch logs.
        epoch_logs = {}
        while epoch < epochs:
            for model in models:
                try:
                    stateful_metric_functions = model.stateful_metric_functions
                except AttributeError:
                    stateful_metric_functions = model.model.stateful_metric_functions
                for m in stateful_metric_functions:
                    m.reset_states()
            callbacks.on_epoch_begin(epoch)
            steps_done = 0
            batch_index = 0
            while steps_done < steps_per_epoch:

                # build batch logs
                batch_logs = {}
                batch_logs['batch'] = batch_index
                batch_logs['size'] = batch_size
                callbacks.on_batch_begin(batch_index, batch_logs)

                for model, output_generator, metrics in zip(
                        models, generators, metrics_names):

                    generator_output = next(output_generator)

                    if not hasattr(generator_output, '__len__'):
                        raise ValueError('Output of generator should be '
                                         'a tuple `(x, y, sample_weight)` '
                                         'or `(x, y)`. Found: ' +
                                         str(generator_output))

                    if len(generator_output) == 2:
                        x, y = generator_output
                        sample_weight = None
                    elif len(generator_output) == 3:
                        x, y, sample_weight = generator_output
                    else:
                        raise ValueError('Output of generator should be '
                                         'a tuple `(x, y, sample_weight)` '
                                         'or `(x, y)`. Found: ' +
                                         str(generator_output))

                    outs = model.train_on_batch(x,
                                                y,
                                                sample_weight=sample_weight)

                    if not isinstance(outs, list):
                        outs = [outs]

                    for name, i in metrics.items():
                        batch_logs[name] = outs[i]

                callbacks.on_batch_end(batch_index, batch_logs)

                batch_index += 1
                steps_done += 1

                # Epoch finished.
                if callback_model.stop_training:
                    break

            callbacks.on_epoch_end(epoch, epoch_logs)
            epoch += 1
            if callback_model.stop_training:
                break

    finally:
        pass

    callbacks.on_train_end()

    return [model.history for model in models]
Esempio n. 22
0
def main():
    encoder, decoder, discriminator, vae, vae_loss = create_models()
    #
    # encoder.compile('rmsprop', 'mse')
    #
    # x = np.random.uniform(-1.0, 1.0, size=[1, 64, 64, 1])
    # y1 = np.random.uniform(-1.0, 1.0, size=[1, 128])
    # y2 = np.random.uniform(-1.0, 1.0, size=[1, 128])
    #
    # encoder.fit(x, [y1, y2], callbacks=[TensorBoard()])
    #
    # return

    batch_size = 32

    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    # Resize to 64x64
    x_train_new = np.zeros((x_train.shape[0], 64, 64), dtype='int32')
    for i, img in enumerate(x_train):
        x_train_new[i] = cv2.resize(img, (64, 64),
                                    interpolation=cv2.INTER_CUBIC)

    x_train = x_train_new
    del x_train_new

    # Normalize to [-1, 1]
    #x_train = np.pad(x_train, ((0, 0), (18, 18), (18, 18)), mode='constant', constant_values=0)
    x_train = np.expand_dims(x_train, -1)
    x_train = (x_train.astype('float32') - 127.5) / 127.5
    x_train = np.clip(x_train, -1., 1.)

    # Assume images in x_train
    # x_train =  np.zeros((100, 64, 64, 3))

    discriminator.compile('rmsprop', 'binary_crossentropy', ['accuracy'])
    discriminator.trainable = False

    model = Model(vae.inputs, discriminator(vae.outputs), name='vaegan')
    model.add_loss(vae_loss)
    model.compile('rmsprop', 'binary_crossentropy', ['accuracy'])

    import keras.callbacks as cbks
    import os.path

    verbose = True
    checkpoint = cbks.ModelCheckpoint(os.path.join('.',
                                                   'model.{epoch:02d}.h5'),
                                      save_weights_only=True)

    callbacks = [TensorBoard(batch_size=batch_size), checkpoint]

    epochs = 100
    steps_per_epoch = x_train.shape[0] // batch_size
    do_validation = False

    callback_metrics = [
        'disc_loss', 'disc_accuracy', 'vaegan_loss', 'vaegan_accuracy'
    ]

    model.history = cbks.History()
    callbacks = [cbks.BaseLogger()] + (callbacks or []) + [model.history]
    if verbose:
        callbacks += [cbks.ProgbarLogger(count_mode='steps')]
    callbacks = cbks.CallbackList(callbacks)

    # it's possible to callback a different model than self:
    if hasattr(model, 'callback_model') and model.callback_model:
        callback_model = model.callback_model
    else:
        callback_model = model
    callbacks.set_model(callback_model)
    callbacks.set_params({
        'epochs': epochs,
        'steps': steps_per_epoch,
        'verbose': verbose,
        'do_validation': do_validation,
        'metrics': callback_metrics,
    })
    callbacks.on_train_begin()

    epoch_logs = {}

    for epoch in range(epochs):

        callbacks.on_epoch_begin(epoch)

        for batch_index in range(steps_per_epoch):
            batch_logs = {}
            batch_logs['batch'] = batch_index
            batch_logs['size'] = batch_size
            callbacks.on_batch_begin(batch_index, batch_logs)

            rand_indexes = np.random.randint(0,
                                             x_train.shape[0],
                                             size=batch_size)
            real_images = x_train[rand_indexes]

            fake_images = vae.predict(real_images)
            # print(fake_images.shape)
            half_batch = batch_size // 2
            inputs = np.concatenate(
                [real_images[:half_batch], fake_images[:half_batch]])

            # Label real and fake images
            y = np.ones([batch_size, 1], dtype='float32')
            y[half_batch:, :] = 0

            # Train the Discriminator network
            metrics = discriminator.train_on_batch(inputs, y)
            # print('discriminator', metrics)

            y = np.ones([batch_size, 1], dtype='float32')
            vg_metrics = model.train_on_batch(fake_images, y)
            # print('full', metrics)

            batch_logs['disc_loss'] = metrics[0]
            batch_logs['disc_accuracy'] = metrics[1]
            batch_logs['vaegan_loss'] = vg_metrics[0]
            batch_logs['vaegan_accuracy'] = vg_metrics[1]

            callbacks.on_batch_end(batch_index, batch_logs)

        callbacks.on_epoch_end(epoch, epoch_logs)

    rand_indexes = np.random.randint(0, x_train.shape[0], size=1)
    real_images = x_train[rand_indexes]

    model.save_weights('trained.h5')

    a = encoder.predict(real_images)
    print(a)
Esempio n. 23
0
async def fit_generator(model,
                        generator,
                        steps_per_epoch=None,
                        epochs=1,
                        verbose=1,
                        callbacks=None,
                        validation_data=None,
                        validation_steps=None,
                        class_weight=None,
                        shuffle=True,
                        initial_epoch=0):
    """See docstring for `Model.fit_generator`."""
    epoch = initial_epoch

    do_validation = bool(validation_data)
    model._make_train_function()
    if do_validation:
        model._make_test_function()

    if steps_per_epoch is None:
        steps_per_epoch = len(generator)

    # Prepare display labels.
    out_labels = model.metrics_names
    callback_metrics = out_labels + ['val_' + n for n in out_labels]

    # prepare callbacks
    model.history = cbks.History()
    _callbacks = [
        cbks.BaseLogger(stateful_metrics=model.stateful_metric_names)
    ]
    if verbose:
        _callbacks.append(
            cbks.ProgbarLogger(count_mode='steps',
                               stateful_metrics=model.stateful_metric_names))
    _callbacks += (callbacks or []) + [model.history]
    callbacks = cbks.CallbackList(_callbacks)

    # it's possible to callback a different model than self:
    if hasattr(model, 'callback_model') and model.callback_model:
        callback_model = model.callback_model
    else:
        callback_model = model
    callbacks.set_model(callback_model)
    callbacks.set_params({
        'epochs': epochs,
        'steps': steps_per_epoch,
        'verbose': verbose,
        'do_validation': do_validation,
        'metrics': callback_metrics,
    })
    callbacks.on_train_begin()

    output_generator = generator.async_next

    callback_model.stop_training = False
    # Construct epoch logs.
    epoch_logs = {}
    while epoch < epochs:
        for m in model.stateful_metric_functions:
            m.reset_states()
        callbacks.on_epoch_begin(epoch)
        steps_done = 0
        batch_index = 0
        while steps_done < steps_per_epoch:
            generator_output = await output_generator()

            if not hasattr(generator_output, '__len__'):
                raise ValueError('Output of generator should be '
                                 'a tuple `(x, y, sample_weight)` '
                                 'or `(x, y)`. Found: ' +
                                 str(generator_output))

            if len(generator_output) == 2:
                x, y = generator_output
                sample_weight = None
            elif len(generator_output) == 3:
                x, y, sample_weight = generator_output
            else:
                raise ValueError('Output of generator should be '
                                 'a tuple `(x, y, sample_weight)` '
                                 'or `(x, y)`. Found: ' +
                                 str(generator_output))
            # build batch logs
            batch_logs = {}
            if x is None or len(x) == 0:
                # Handle data tensors support when no input given
                # step-size = 1 for data tensors
                batch_size = 1
            elif isinstance(x, list):
                batch_size = x[0].shape[0]
            elif isinstance(x, dict):
                batch_size = list(x.values())[0].shape[0]
            else:
                batch_size = x.shape[0]
            batch_logs['batch'] = batch_index
            batch_logs['size'] = batch_size
            callbacks.on_batch_begin(batch_index, batch_logs)

            outs = model.train_on_batch(x,
                                        y,
                                        sample_weight=sample_weight,
                                        class_weight=class_weight)

            outs = to_list(outs)
            for l, o in zip(out_labels, outs):
                batch_logs[l] = o

            callbacks.on_batch_end(batch_index, batch_logs)

            batch_index += 1
            steps_done += 1

            # Epoch finished.
            if steps_done >= steps_per_epoch and do_validation:
                val_outs = await evaluate_generator(model, validation_data,
                                                    validation_steps)
                val_outs = to_list(val_outs)
                # Same labels assumed.
                for l, o in zip(out_labels, val_outs):
                    epoch_logs['val_' + l] = o

            if callback_model.stop_training:
                break

        generator.on_epoch_end()
        callbacks.on_epoch_end(epoch, epoch_logs)
        epoch += 1
        if callback_model.stop_training:
            break

    callbacks.on_train_end()
    return model.history
Esempio n. 24
0
def custom_fit_generator(model,
                         generator,
                         steps_per_epoch=None,
                         epochs=1,
                         verbose=1,
                         callbacks=None,
                         validation_data=None,
                         validation_steps=None,
                         class_weight=None,
                         max_queue_size=10,
                         workers=1,
                         use_multiprocessing=False,
                         shuffle=True,
                         initial_epoch=0):
    """
        Same function fit_generator as Keras but with only a subset of the variables displayed
        """
    wait_time = 0.01  # in seconds
    epoch = initial_epoch

    do_validation = bool(validation_data)
    model._make_train_function()
    if do_validation:
        model._make_test_function()

    is_sequence = isinstance(generator, Sequence)
    if not is_sequence and use_multiprocessing and workers > 1:
        warnings.warn(
            UserWarning('Using a generator with `use_multiprocessing=True`'
                        ' and multiple workers may duplicate your data.'
                        ' Please consider using the`keras.utils.Sequence'
                        ' class.'))
    if steps_per_epoch is None:
        if is_sequence:
            steps_per_epoch = len(generator)
        else:
            raise ValueError('`steps_per_epoch=None` is only valid for a'
                             ' generator based on the `keras.utils.Sequence`'
                             ' class. Please specify `steps_per_epoch` or use'
                             ' the `keras.utils.Sequence` class.')

    # python 2 has 'next', 3 has '__next__'
    # avoid any explicit version checks
    val_gen = (hasattr(validation_data, 'next')
               or hasattr(validation_data, '__next__')
               or isinstance(validation_data, Sequence))
    if (val_gen and not isinstance(validation_data, Sequence)
            and not validation_steps):
        raise ValueError('`validation_steps=None` is only valid for a'
                         ' generator based on the `keras.utils.Sequence`'
                         ' class. Please specify `validation_steps` or use'
                         ' the `keras.utils.Sequence` class.')

    # Prepare display labels.
    out_labels = model.metrics_names
    callback_metrics = out_labels + ['val_' + n for n in out_labels]
    callback_metrics = [
        'loss', 'acc', 'case_loss', 'case_acc', 'val_loss', 'val_acc',
        'val_case_loss', 'val_case_acc'
    ]
    # prepare callbacks
    model.history = cbks.History()
    _callbacks = [
        cbks.BaseLogger(stateful_metrics=model.stateful_metric_names)
    ]
    if verbose:
        _callbacks.append(
            cbks.ProgbarLogger(count_mode='steps',
                               stateful_metrics=model.stateful_metric_names))
    _callbacks += (callbacks or []) + [model.history]
    callbacks = cbks.CallbackList(_callbacks)

    # it's possible to callback a different model than model:
    if hasattr(model, 'callback_model') and model.callback_model:
        callback_model = model.callback_model
    else:
        callback_model = model
    callbacks.set_model(callback_model)
    callbacks.set_params({
        'epochs': epochs,
        'steps': steps_per_epoch,
        'verbose': verbose,
        'do_validation': do_validation,
        'metrics': callback_metrics,
    })
    callbacks.on_train_begin()

    enqueuer = None
    val_enqueuer = None

    try:
        if do_validation and not val_gen:
            # Prepare data for validation
            if len(validation_data) == 2:
                val_x, val_y = validation_data
                val_sample_weight = None
            elif len(validation_data) == 3:
                val_x, val_y, val_sample_weight = validation_data
            else:
                raise ValueError('`validation_data` should be a tuple '
                                 '`(val_x, val_y, val_sample_weight)` '
                                 'or `(val_x, val_y)`. Found: ' +
                                 str(validation_data))
            val_x, val_y, val_sample_weights = model._standardize_user_data(
                val_x, val_y, val_sample_weight)
            val_data = val_x + val_y + val_sample_weights
            if model.uses_learning_phase and not isinstance(
                    K.learning_phase(), int):
                val_data += [0.]
            for cbk in callbacks:
                cbk.validation_data = val_data

        if workers > 0:
            if is_sequence:
                enqueuer = OrderedEnqueuer(
                    generator,
                    use_multiprocessing=use_multiprocessing,
                    shuffle=shuffle)
            else:
                enqueuer = GeneratorEnqueuer(
                    generator,
                    use_multiprocessing=use_multiprocessing,
                    wait_time=wait_time)
            enqueuer.start(workers=workers, max_queue_size=max_queue_size)
            output_generator = enqueuer.get()
        else:
            if is_sequence:
                output_generator = iter(generator)
            else:
                output_generator = generator

        callback_model.stop_training = False
        # Construct epoch logs.
        epoch_logs = {}
        while epoch < epochs:
            callbacks.on_epoch_begin(epoch)
            steps_done = 0
            batch_index = 0
            while steps_done < steps_per_epoch:
                generator_output = next(output_generator)

                if not hasattr(generator_output, '__len__'):
                    raise ValueError('Output of generator should be '
                                     'a tuple `(x, y, sample_weight)` '
                                     'or `(x, y)`. Found: ' +
                                     str(generator_output))

                if len(generator_output) == 2:
                    x, y = generator_output
                    sample_weight = None
                elif len(generator_output) == 3:
                    x, y, sample_weight = generator_output
                else:
                    raise ValueError('Output of generator should be '
                                     'a tuple `(x, y, sample_weight)` '
                                     'or `(x, y)`. Found: ' +
                                     str(generator_output))
                # build batch logs
                batch_logs = {}
                if isinstance(x, list):
                    batch_size = x[0].shape[0]
                elif isinstance(x, dict):
                    batch_size = list(x.values())[0].shape[0]
                else:
                    batch_size = x.shape[0]
                batch_logs['batch'] = batch_index
                batch_logs['size'] = batch_size
                callbacks.on_batch_begin(batch_index, batch_logs)

                outs = model.train_on_batch(x,
                                            y,
                                            sample_weight=sample_weight,
                                            class_weight=class_weight)

                if not isinstance(outs, list):
                    outs = [outs]
                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)

                batch_index += 1
                steps_done += 1

                # Epoch finished.
                if steps_done >= steps_per_epoch and do_validation:
                    if val_gen:
                        val_outs = model.evaluate_generator(
                            validation_data,
                            validation_steps,
                            workers=workers,
                            use_multiprocessing=use_multiprocessing,
                            max_queue_size=max_queue_size)
                    else:
                        # No need for try/except because
                        # data has already been validated.
                        val_outs = model.evaluate(
                            val_x,
                            val_y,
                            batch_size=batch_size,
                            sample_weight=val_sample_weights,
                            verbose=0)
                    if not isinstance(val_outs, list):
                        val_outs = [val_outs]
                    # Same labels assumed.
                    for l, o in zip(out_labels, val_outs):
                        epoch_logs['val_' + l] = o

                if callback_model.stop_training:
                    break

            callbacks.on_epoch_end(epoch, epoch_logs)
            epoch += 1
            if callback_model.stop_training:
                break

    finally:
        try:
            if enqueuer is not None:
                enqueuer.stop()
        finally:
            if val_enqueuer is not None:
                val_enqueuer.stop()

    callbacks.on_train_end()
    return model.history
Esempio n. 25
0
def fit_generator_Ndiff(model,
                        generator,
                        steps_per_epoch=None,
                        batch_size=1,
                        N_diff=5,
                        margin=0.5,
                        epochs=1,
                        verbose=1,
                        callbacks=None,
                        validation_data=None,
                        validation_steps=None,
                        class_weight=None,
                        max_queue_size=10,
                        workers=1,
                        use_multiprocessing=False,
                        shuffle=True,
                        initial_epoch=0):
    """Trains the model on data yielded batch-by-batch by a Python generator.
    The generator is run in parallel to the model, for efficiency.
    For instance, this allows you to do real-time data augmentation
    on images on CPU in parallel to training your model on GPU.
    The use of `keras.utils.Sequence` guarantees the ordering
    and guarantees the single use of every input per epoch when
    using `use_multiprocessing=True`.
    # Arguments
        generator: A generator or an instance of `Sequence`
            (`keras.utils.Sequence`) object in order to avoid
            duplicate data when using multiprocessing.
            The output of the generator must be either
            - a tuple `(inputs, targets)`
            - a tuple `(inputs, targets, sample_weights)`.
            This tuple (a single output of the generator) makes a single
            batch. Therefore, all arrays in this tuple must have the same
            length (equal to the size of this batch). Different batches
            may have different sizes. For example, the last batch of the
            epoch is commonly smaller than the others, if the size of the
            dataset is not divisible by the batch size.
            The generator is expected to loop over its data
            indefinitely. An epoch finishes when `steps_per_epoch`
            batches have been seen by the model.
        steps_per_epoch: Integer.
            Total number of steps (batches of samples)
            to yield from `generator` before declaring one epoch
            finished and starting the next epoch. It should typically
            be equal to the number of samples of your dataset
            divided by the batch size.
            Optional for `Sequence`: if unspecified, will use
            the `len(generator)` as a number of steps.
        epochs: Integer. Number of epochs to train the model.
            An epoch is an iteration over the entire data provided,
            as defined by `steps_per_epoch`.
            Note that in conjunction with `initial_epoch`,
            `epochs` is to be understood as "final epoch".
            The model is not trained for a number of iterations
            given by `epochs`, but merely until the epoch
            of index `epochs` is reached.
        verbose: Integer. 0, 1, or 2. Verbosity mode.
            0 = silent, 1 = progress bar, 2 = one line per epoch.
        callbacks: List of `keras.callbacks.Callback` instances.
            List of callbacks to apply during training.
            See [callbacks](/callbacks).
        validation_data: This can be either
            - a generator for the validation data
            - tuple `(x_val, y_val)`
            - tuple `(x_val, y_val, val_sample_weights)`
            on which to evaluate
            the loss and any model metrics at the end of each epoch.
            The model will not be trained on this data.
        validation_steps: Only relevant if `validation_data`
            is a generator. Total number of steps (batches of samples)
            to yield from `validation_data` generator before stopping.
            Optional for `Sequence`: if unspecified, will use
            the `len(validation_data)` as a number of steps.
        class_weight: Optional dictionary mapping class indices (integers)
            to a weight (float) value, used for weighting the loss function
            (during training only).
            This can be useful to tell the model to
            "pay more attention" to samples from
            an under-represented class.
        max_queue_size: Integer. Maximum size for the generator queue.
            If unspecified, `max_queue_size` will default to 10.
        workers: Integer. Maximum number of processes to spin up
            when using process based threading.
            If unspecified, `workers` will default to 1. If 0, will
            execute the generator on the main thread.
        use_multiprocessing: Boolean. If True, use process based threading.
            If unspecified, `use_multiprocessing` will default to False.
            Note that because
            this implementation relies on multiprocessing,
            you should not pass
            non picklable arguments to the generator
            as they can't be passed
            easily to children processes.
        shuffle: Boolean. Whether to shuffle the training data
            in batch-sized chunks before each epoch.
            Only used with instances of `Sequence` (`keras.utils.Sequence`).
        initial_epoch: Integer.
            Epoch at which to start training
            (useful for resuming a previous training run).
    # Returns
        A `History` object. Its `History.history` attribute is
        a record of training loss values and metrics values
        at successive epochs, as well as validation loss values
        and validation metrics values (if applicable).
    # Example
    ```python
        def generate_arrays_from_file(path):
            while 1:
                with open(path) as f:
                    for line in f:
                        # create numpy arrays of input data
                        # and labels, from each line in the file
                        x1, x2, y = process_line(line)
                        yield ({'input_1': x1, 'input_2': x2}, {'output': y})
        model.fit_generator(generate_arrays_from_file('/my_file.txt'),
                            steps_per_epoch=10000, epochs=10)
    ```
    # Raises
        ValueError: In case the generator yields
            data in an invalid format.
    """
    wait_time = 0.01  # in seconds
    epoch = initial_epoch

    do_validation = bool(validation_data)
    # self._make_train_function()
    # if do_validation:
    #     self._make_test_function()

    is_sequence = isinstance(generator, Sequence)
    # do_validation = True if is_sequence else False

    if not is_sequence and use_multiprocessing and workers > 1:
        warnings.warn(
            UserWarning('Using a generator with `use_multiprocessing=True`'
                        ' and multiple workers may duplicate your data.'
                        ' Please consider using the`keras.utils.Sequence'
                        ' class.'))
    if steps_per_epoch is None:
        if is_sequence:
            steps_per_epoch = len(generator)
        else:
            raise ValueError('`steps_per_epoch=None` is only valid for a'
                             ' generator based on the `keras.utils.Sequence`'
                             ' class. Please specify `steps_per_epoch` or use'
                             ' the `keras.utils.Sequence` class.')

    # python 2 has 'next', 3 has '__next__'
    # avoid any explicit version checks
    val_gen = (hasattr(validation_data, 'next')
               or hasattr(validation_data, '__next__')
               or isinstance(validation_data, Sequence))
    if (val_gen and not isinstance(validation_data, Sequence)
            and not validation_steps):
        raise ValueError('`validation_steps=None` is only valid for a'
                         ' generator based on the `keras.utils.Sequence`'
                         ' class. Please specify `validation_steps` or use'
                         ' the `keras.utils.Sequence` class.')

    # Prepare display labels.
    out_labels = model._get_deduped_metrics_names()
    callback_metrics = out_labels + ['val_' + n for n in out_labels]

    # prepare callbacks
    history = cbks.History()
    callbacks = [cbks.BaseLogger()] + (callbacks or []) + [history]
    if verbose:
        callbacks += [cbks.ProgbarLogger(count_mode='steps')]
    callbacks = cbks.CallbackList(callbacks)

    # # it's possible to callback a different model than self:
    if hasattr(model, 'callback_model') and model.callback_model:
        callback_model = model.callback_model
    else:
        callback_model = model
    callbacks.set_model(callback_model)
    callbacks.set_params({
        'epochs': epochs,
        'steps': steps_per_epoch,
        'verbose': verbose,
        'do_validation': do_validation,
        'metrics': callback_metrics,
    })
    callbacks.on_train_begin()

    enqueuer = None
    val_enqueuer = None

    try:
        if do_validation:
            if val_gen:
                if workers > 0:
                    if isinstance(validation_data, Sequence):
                        val_enqueuer = OrderedEnqueuer(
                            validation_data,
                            use_multiprocessing=use_multiprocessing)
                        if validation_steps is None:
                            validation_steps = len(validation_data)
                    else:
                        val_enqueuer = GeneratorEnqueuer(
                            validation_data,
                            use_multiprocessing=use_multiprocessing,
                            wait_time=wait_time)
                    val_enqueuer.start(workers=workers,
                                       max_queue_size=max_queue_size)
                    validation_generator = val_enqueuer.get()
                else:
                    validation_generator = validation_data
            else:
                pass
                # if len(validation_data) == 2:
                #     val_x, val_y = validation_data
                #     val_sample_weights = None
                # elif len(validation_data) == 3:
                #     val_x, val_y, val_sample_weights = validation_data
                # else:
                #     raise ValueError('`validation_data` should be a tuple '
                #                      '`(val_x, val_y, val_sample_weight)` '
                #                      'or `(val_x, val_y)`. Found: ' +
                #                      str(validation_data))
                # val_x, val_y, val_sample_weights = _standardize_user_data(
                #     val_x, val_y, val_sample_weight)
                # val_data = val_x + val_y + val_sample_weights
                # if self.uses_learning_phase and not isinstance(K.learning_phase(), int):
                #     val_data += [0.]
                # for cbk in callbacks:
                #     cbk.validation_data = val_data

        if workers > 0:
            if is_sequence:
                enqueuer = OrderedEnqueuer(
                    generator,
                    use_multiprocessing=use_multiprocessing,
                    shuffle=shuffle)
            else:
                enqueuer = GeneratorEnqueuer(
                    generator,
                    use_multiprocessing=use_multiprocessing,
                    wait_time=wait_time)
            enqueuer.start(workers=workers, max_queue_size=max_queue_size)
            output_generator = enqueuer.get()
        else:
            output_generator = generator

        callback_model.stop_training = False
        # Construct epoch logs.
        epoch_logs = {}
        while epoch < epochs:
            callbacks.on_epoch_begin(epoch)
            steps_done = 0
            batch_index = 0
            while steps_done < steps_per_epoch:
                generator_output = next(output_generator)

                if not hasattr(generator_output, '__len__'):
                    raise ValueError('Output of generator should be '
                                     'batch_size lists ' +
                                     str(generator_output))

                if len(generator_output) == batch_size:
                    # ii_ndiff: the index of the negative sample
                    gen_out = generator_output
                    sample_weight = None
                else:
                    raise ValueError('Output of generator should be '
                                     'batch_size lists ' +
                                     str(generator_output))

                # build batch logs
                batch_logs = {}
                # if isinstance(x, list):
                #     batch_size = x[0].shape[0]
                # elif isinstance(x, dict):
                #     batch_size = list(x.values())[0].shape[0]
                # else:
                #     batch_size = x.shape[0]
                batch_logs['batch'] = batch_index
                batch_logs['size'] = batch_size
                callbacks.on_batch_begin(batch_index, batch_logs)

                # aggregate the losses by inner index n_diff
                loss_mat = np.zeros((batch_size, N_diff))
                for ii_ndiff in range(N_diff):

                    # get the maximum sequence length
                    len_anchor_max, len_same_max, len_diff_max = \
                        get_maximum_length(batch_size=batch_size,
                                           generator_output=gen_out,
                                           index=[ii_ndiff]*batch_size)

                    print(len_anchor_max, len_same_max, len_diff_max)
                    # organize the input for the prediction
                    input_anchor, input_same, input_diff = \
                        make_same_length_batch(batch_size=batch_size,
                                               len_anchor_max=len_anchor_max,
                                               len_same_max=len_same_max,
                                               len_diff_max=len_diff_max,
                                               generator_output=gen_out,
                                               index=[ii_ndiff]*batch_size)

                    output_batch_pred = model.predict_on_batch(
                        [input_anchor, input_same, input_diff])

                    loss = K.eval(
                        triplet_loss_no_mean(output_batch_pred, margin))
                    loss_mat[:, ii_ndiff] = loss

                # this the index of the input which has the maximum loss for each N_diff pairs
                index_max_loss = np.argmax(loss_mat, axis=-1)

                len_anchor_max, len_same_max, len_diff_max = get_maximum_length(
                    batch_size=batch_size,
                    generator_output=gen_out,
                    index=index_max_loss)

                input_anchor, input_same, input_diff = \
                    make_same_length_batch(batch_size=batch_size,
                                           len_anchor_max=len_anchor_max,
                                           len_same_max=len_same_max,
                                           len_diff_max=len_diff_max,
                                           generator_output=gen_out,
                                           index=index_max_loss)

                outs = model.train_on_batch(
                    [input_anchor, input_same, input_diff],
                    None,
                    sample_weight=sample_weight,
                    class_weight=class_weight)

                if not isinstance(outs, list):
                    outs = [outs]
                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)

                batch_index += 1
                steps_done += 1

                # Epoch finished.
                if steps_done >= steps_per_epoch and do_validation:
                    if val_gen:
                        val_outs = evaluate_generator(
                            model=model,
                            generator=validation_generator,
                            steps=validation_steps,
                            batch_size=batch_size,
                            margin=margin,
                            N_diff=N_diff,
                            workers=0)
                    else:
                        pass
                        # # No need for try/except because
                        # # data has already been validated.
                        # val_outs = model.evaluate(
                        #     val_x, val_y,
                        #     batch_size=batch_size,
                        #     sample_weight=val_sample_weights,
                        #     verbose=0)
                    if not isinstance(val_outs, list):
                        val_outs = [val_outs]
                    # Same labels assumed.
                    for l, o in zip(out_labels, val_outs):
                        epoch_logs['val_' + l] = o

                if callback_model.stop_training:
                    break

            callbacks.on_epoch_end(epoch, epoch_logs)
            epoch += 1
            if callback_model.stop_training:
                break

    finally:
        try:
            if enqueuer is not None:
                enqueuer.stop()
        finally:
            if val_enqueuer is not None:
                val_enqueuer.stop()

    callbacks.on_train_end()
    return history
Esempio n. 26
0
    def gan_fit_generator(
            self,
            generator,
            datacollection,
            steps_per_epoch=None,
            epochs=1,
            verbose=1,
            callbacks_discriminator=None,
            callbacks_gan=None,
            validation_data=None,
            validation_steps=None,
            class_weight=None,
            gan_skipping_factor=1,
            discr_skipping_factor=1,
            validation_freq=1,  ###TBI FIXME
            max_queue_size=10,
            initial_epoch=0,
            recover_discriminator=True):
        """See docstring for `Model.fit_generator`."""

        import keras
        from sklearn.utils import shuffle
        import keras.callbacks as cbks
        #from keras.training_utils import should_run_validation
        from keras.utils.generic_utils import to_list
        import numpy as np

        epoch = initial_epoch

        do_validation = bool(validation_data)
        #DEBUG self.discriminator._make_train_function()
        #DEBUG self.gan._make_train_function()
        if do_validation and False:  #DEBUG
            self.discriminator._make_test_function()
            self.gan._make_test_function()

        d_out_labels = ['dis_' + n for n in self.discriminator.metrics_names]
        g_out_labels = ['gan_' + n for n in self.gan.metrics_names]

        d_callback_metrics = d_out_labels + ['val_' + n for n in d_out_labels]
        g_callback_metrics = g_out_labels + ['val_' + n for n in g_out_labels]

        # prepare callbacks
        self.discriminator.history = cbks.History()
        self.gan.history = cbks.History()
        _callbacks = [
            cbks.BaseLogger(
                stateful_metrics=self.discriminator.stateful_metric_names)
        ]
        _callbacks += [
            cbks.BaseLogger(stateful_metrics=self.gan.stateful_metric_names)
        ]

        if verbose:
            _callbacks.append(
                cbks.ProgbarLogger(
                    count_mode='steps',
                    stateful_metrics=self.gan.stateful_metric_names)
            )  #one model is enough here!#use only gan here

        callbacks_gan = callbacks_gan or []
        callbacks_discriminator = callbacks_discriminator or []
        for c in callbacks_gan:
            c.set_model(self.gan)
        for c in callbacks_discriminator:
            c.set_model(self.discriminator)

        _callbacks += (callbacks_gan) + (callbacks_discriminator) + [
            self.discriminator.history
        ] + [self.gan.history]
        callbacks = cbks.CallbackList(_callbacks)

        callbacks.set_params({
            'epochs':
            epochs,
            'steps':
            steps_per_epoch,
            'verbose':
            verbose,
            'do_validation':
            do_validation,
            'metrics':
            d_callback_metrics + g_callback_metrics,
        })
        #newer keras callbacks._call_begin_hook('train')
        callbacks.on_train_begin()

        enqueuer = None
        val_enqueuer = None

        try:
            if do_validation:

                val_data = validation_data
                val_enqueuer_gen = val_data

                output_generator = generator

            ## callbacks.model.stop_training = False ##FIXME TBI
            # Construct epoch logs.
            epoch_logs = {}
            skip_gan_training = False
            while epoch < epochs:
                for m in self.discriminator.stateful_metric_functions:
                    m.reset_states()
                for m in self.gan.stateful_metric_functions:
                    m.reset_states()
                callbacks.on_epoch_begin(epoch)
                steps_done = 0
                batch_index = 0
                while steps_done < steps_per_epoch:
                    generator_output = next(output_generator)

                    if not hasattr(generator_output, '__len__'):
                        raise ValueError('Output of generator should be '
                                         'a tuple `(x, y, sample_weight)` '
                                         'or `(x, y)`. Found: ' +
                                         str(generator_output))

                    if len(generator_output) == 2:
                        x, y = generator_output
                        sample_weight = None
                    elif len(generator_output) == 3:
                        x, y, sample_weight = generator_output
                    else:
                        raise ValueError('Output of generator should be '
                                         'a tuple `(x, y, sample_weight)` '
                                         'or `(x, y)`. Found: ' +
                                         str(generator_output))
                    if x is None or len(x) == 0:
                        # Handle data tensors support when no input given
                        # step-size = 1 for data tensors
                        batch_size = 1
                    elif isinstance(x, list):
                        batch_size = x[0].shape[0]
                    elif isinstance(x, dict):
                        batch_size = list(x.values())[0].shape[0]
                    else:
                        batch_size = x.shape[0]
                    # build batch logs
                    batch_logs = {'batch': batch_index, 'size': batch_size}
                    callbacks.on_batch_begin(batch_index, batch_logs)

                    #GAN training here

                    x_gen = self.generator.predict(x)

                    #DEBUG - NEEDS CALLBACK
                    # REMOVE IN FULL VERSION
                    if False and steps_done % 50:
                        forplots = np.concatenate([x_gen[0][:4], x[0][:4]],
                                                  axis=0)
                        from tools import quickplot, plotgrid
                        plotgrid(forplots,
                                 nplotsx=4,
                                 nplotsy=2,
                                 outname="merged.pdf")
                        quickplot(x_gen[0][0], "gen.pdf")
                        quickplot(x[0][0], "data.pdf")

                    #this needs to be more generic and actually done for every list item
                    #replaceTruthForGAN gives a list

                    adapted_truth_data = datacollection.replaceTruthForGAN(
                        generated_array=np.zeros(batch_size, dtype='float32') +
                        1,
                        original_truth=y)

                    adapted_truth_generated = datacollection.replaceTruthForGAN(
                        generated_array=np.zeros(batch_size, dtype='float32'),
                        original_truth=y)

                    y_dis = [np.concatenate([adapted_truth_data[i],adapted_truth_generated[i]],axis=0) \
                             for i in range(len(adapted_truth_data))]

                    x_dis = [
                        np.concatenate([x[i], x_gen[i]], axis=0)
                        for i in range(len(x))
                    ]

                    y_dis_new = [
                        shuffle(n, random_state=steps_done) for n in y_dis
                    ]
                    x_dis_new = [
                        shuffle(n, random_state=steps_done) for n in x_dis
                    ]

                    y_dis_b1 = [
                        y_dis_new[i][:batch_size, ...]
                        for i in range(len(y_dis_new))
                    ]
                    y_dis_b2 = [
                        y_dis_new[i][batch_size:, ...]
                        for i in range(len(y_dis_new))
                    ]

                    x_dis_b1 = [
                        x_dis_new[i][:batch_size, ...]
                        for i in range(len(x_dis_new))
                    ]
                    x_dis_b2 = [
                        x_dis_new[i][batch_size:, ...]
                        for i in range(len(x_dis_new))
                    ]

                    #add [:batch_size,...]
                    #to the above for cut-off
                    # TBI TBI FIXME

                    ## FIXME: cut in half to have same batch size everywhere
                    # also here would be the place to implement weighting of discr versus gen

                    if (not batch_index % discr_skipping_factor):
                        self.discriminator.trainable = True
                        outs = self.discriminator.train_on_batch(
                            x_dis_b1,
                            y_dis_b1,
                            sample_weight=sample_weight,
                            class_weight=class_weight)

                        outs = self.discriminator.train_on_batch(
                            x_dis_b2,
                            y_dis_b2,
                            sample_weight=sample_weight,
                            class_weight=class_weight)

                        outs = to_list(outs)

                        if recover_discriminator:
                            if outs[1] < 0.5:
                                skip_gan_training = True
                            else:
                                skip_gan_training = False
                        for l, o in zip(d_out_labels, outs):
                            batch_logs[l] = o

                    if (not skip_gan_training) and (
                            not batch_index % gan_skipping_factor):
                        self.discriminator.trainable = False
                        y_gen = np.zeros(batch_size, dtype='float32') + 1.
                        outs = self.gan.train_on_batch(
                            x,
                            y_gen,
                            sample_weight=sample_weight,
                            class_weight=class_weight)
                        outs = to_list(outs)
                        for l, o in zip(g_out_labels, outs):
                            batch_logs[l] = o

                    #callbacks._call_batch_hook('train', 'end', batch_index, batch_logs)
                    callbacks.on_batch_end(batch_index, batch_logs)

                    batch_index += 1
                    steps_done += 1

                    # Epoch finished.
                    if (steps_done >= steps_per_epoch and do_validation):
                        # Note that `callbacks` here is an instance of
                        # `keras.callbacks.CallbackList`

                        ## this evaluate will get problems with the truth definition
                        ## needs to be fixed in the generator? Or just make traindata do it?

                        val_outs = self.discriminator.evaluate_generator(
                            val_enqueuer_gen,
                            validation_steps,
                            #callbacks=callbacks,
                            workers=0)

                        val_outs = to_list(val_outs)
                        # Same labels assumed.
                        for l, o in zip(d_out_labels, val_outs):
                            epoch_logs['val_' + l] = o

                        val_outs = self.gan.evaluate_generator(
                            val_enqueuer_gen,
                            validation_steps,
                            #callbacks=callbacks,
                            workers=0)

                        val_outs = to_list(val_outs)
                        # Same labels assumed.
                        for l, o in zip(g_out_labels, val_outs):
                            epoch_logs['val_' + l] = o

                    #if callbacks.model.stop_training:  ##FIXME TBI
                    #    break

                callbacks.on_epoch_end(epoch, epoch_logs)
                epoch += 1
                #if callbacks.model.stop_training:  ##FIXME TBI
                #    break

        finally:
            try:
                if enqueuer is not None:
                    enqueuer.stop()
            finally:
                if val_enqueuer is not None:
                    val_enqueuer.stop()

        #callbacks._call_end_hook('train')
        callbacks.on_train_end()
        return self.gan.history, self.discriminator.history
Esempio n. 27
0
    def _fit(self,
             f,
             ins,
             out_labels=[],
             batch_size=128,
             nb_epoch=100,
             verbose=1,
             callbacks=[],
             val_f=None,
             val_ins=None,
             shuffle=True,
             metrics=[]):
        '''
            Abstract fit function for f(*ins). Assume that f returns a list, labelled by out_labels.
        '''
        do_validation = False
        if val_f and val_ins:
            do_validation = True
            if verbose:
                print("Train on %d samples, validate on %d samples" %
                      (len(ins[0]), len(val_ins[0])))

        nb_train_sample = len(ins[0])
        index_array = np.arange(nb_train_sample)

        history = cbks.History()
        if verbose:
            callbacks = [history, cbks.BaseLogger()] + callbacks
        else:
            callbacks = [history] + callbacks
        callbacks = cbks.CallbackList(callbacks)

        callbacks._set_model(self)
        callbacks._set_params({
            'batch_size': batch_size,
            'nb_epoch': nb_epoch,
            'nb_sample': nb_train_sample,
            'verbose': verbose,
            'do_validation': do_validation,
            'metrics': metrics,
        })
        callbacks.on_train_begin()

        self.stop_training = False
        for epoch in range(nb_epoch):
            callbacks.on_epoch_begin(epoch)
            if shuffle == 'batch':
                index_array = batch_shuffle(index_array, batch_size)
            elif shuffle:
                np.random.shuffle(index_array)

            batches = make_batches(nb_train_sample, batch_size)
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]
                try:
                    ins_batch = slice_X(ins, batch_ids)
                except TypeError as err:
                    raise Exception('TypeError while preparing batch. \
                        If using HDF5 input data, pass shuffle="batch".\n')

                batch_logs = {}
                batch_logs['batch'] = batch_index
                batch_logs['size'] = len(batch_ids)
                callbacks.on_batch_begin(batch_index, batch_logs)
                outs = f(*ins_batch)
                if type(outs) != list:
                    outs = [outs]
                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)

                epoch_logs = {}
                if batch_index == len(batches) - 1:  # last batch
                    # validation
                    if do_validation:
                        # replace with self._evaluate
                        val_outs = self._test_loop(val_f,
                                                   val_ins,
                                                   batch_size=batch_size,
                                                   verbose=0)
                        if type(val_outs) != list:
                            val_outs = [val_outs]
                        # same labels assumed
                        for l, o in zip(out_labels, val_outs):
                            epoch_logs['val_' + l] = o

            callbacks.on_epoch_end(epoch, epoch_logs)
            if self.stop_training:
                break

        callbacks.on_train_end()
        return history
Esempio n. 28
0
def fit_tfrecord(train_model,
                 nb_train_sample,
                 batch_size,
                 nb_epoch=10,
                 verbose=1,
                 callbacks=[],
                 initial_epoch=0):
    def _make_train_function(model):
        if not hasattr(model, 'train_function'):
            raise RuntimeError('You must compile your model before using it.')
        if model.train_function is None:
            inputs = [K.learning_phase()]

            training_updates = model.optimizer.get_updates(
                model._collected_trainable_weights, model.constraints,
                model.total_loss)
            updates = model.updates + training_updates

            # returns loss and metrics. Updates weights at each call.
            model.train_function = K.function(inputs, [model.total_loss] +
                                              model.metrics_tensors,
                                              updates=updates)

    ins = [1.]

    _make_train_function(train_model)
    f = train_model.train_function

    # prepare display labels
    out_labels = train_model.metrics_names

    # rename duplicated metrics name
    # (can happen with an output layer shared among multiple dataflows)
    deduped_out_labels = []
    for i, label in enumerate(out_labels):
        new_label = label
        if out_labels.count(label) > 1:
            dup_idx = out_labels[:i].count(label)
            new_label += '_' + str(dup_idx + 1)
        deduped_out_labels.append(new_label)
    out_labels = deduped_out_labels

    callback_metrics = copy.copy(out_labels)

    train_model.history = cbks.History()
    callbacks = [cbks.BaseLogger()] + (callbacks) + [train_model.history]
    if verbose:
        callbacks += [cbks.ProgbarLogger()]
    callbacks = cbks.CallbackList(callbacks)
    out_labels = out_labels or []

    callback_model = train_model

    callbacks.set_model(callback_model)
    callbacks.set_params({
        'batch_size': batch_size,
        'epochs': nb_epoch,
        'samples': nb_train_sample,
        'verbose': verbose,
        'do_validation': False,
        'metrics': callback_metrics or [],
    })
    callbacks.on_train_begin()
    callback_model.stop_training = False

    sess = K.get_session()
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    for epoch in range(initial_epoch, nb_epoch):
        callbacks.on_epoch_begin(epoch)

        epoch_logs = {}
        for batch_index in range(0, nb_train_sample // batch_size):
            batch_logs = {}
            batch_logs['batch'] = batch_index
            batch_logs['size'] = batch_size
            callbacks.on_batch_begin(batch_index, batch_logs)
            outs = f(ins)
            if not isinstance(outs, list):
                outs = [outs]
            for l, o in zip(out_labels, outs):
                batch_logs[l] = o

            callbacks.on_batch_end(batch_index, batch_logs)

        callbacks.on_epoch_end(epoch, epoch_logs)
        if callback_model.stop_training:
            break
    callbacks.on_train_end()

    coord.request_stop()
    coord.join(threads)
    # sess.close()

    return train_model.history
    def fit_tfrecord(self,
                     steps_per_epoch,
                     epochs=1,
                     verbose=1,
                     callbacks=None,
                     validation_steps=None,
                     initial_epoch=0):
        epoch = initial_epoch

        self._make_tfrecord_train_function()

        do_validation = bool(len(self.val_inputs) > 0)
        if do_validation and not validation_steps:
            raise ValueError('When using a validation batch, '
                             'you must specify a value for '
                             '`validation_steps`.')

        # Prepare display labels.
        out_labels = self._get_deduped_metrics_names()

        if do_validation:
            callback_metrics = copy.copy(out_labels) + [
                'val_' + n for n in out_labels
            ]
        else:
            callback_metrics = copy.copy(out_labels)

        # prepare callbacks
        self.history = cbks.History()
        callbacks = [cbks.BaseLogger()] + (callbacks or []) + [self.history]
        if verbose:
            callbacks += [cbks.ProgbarLogger(count_mode='steps')]
        callbacks = cbks.CallbackList(callbacks)

        # it's possible to callback a different model than self:
        if hasattr(self, 'callback_model') and self.callback_model:
            callback_model = self.callback_model
        else:
            callback_model = self
        callbacks.set_model(callback_model)
        callbacks.set_params({
            'epochs': epochs,
            'steps': steps_per_epoch,
            'verbose': verbose,
            'do_validation': do_validation,
            'metrics': callback_metrics,
        })
        callbacks.on_train_begin()

        if do_validation:
            val_sample_weight = None
            for cbk in callbacks:
                cbk.validation_data = [
                    self.val_inputs, self.y_val, val_sample_weight
                ]

        try:
            sess = K.get_session()
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            callback_model.stop_training = False
            while epoch < epochs:
                callbacks.on_epoch_begin(epoch)
                steps_done = 0
                batch_index = 0
                while steps_done < steps_per_epoch:
                    # build batch logs
                    batch_logs = {
                        'batch': batch_index,
                        'size': self.inputs[0].shape[0].value
                    }
                    callbacks.on_batch_begin(batch_index, batch_logs)

                    if self.uses_learning_phase and not isinstance(
                            K.learning_phase(), int):
                        ins = [1.]
                    else:
                        ins = []
                    outs = self.train_function(ins)

                    if not isinstance(outs, list):
                        outs = [outs]
                    for l, o in zip(out_labels, outs):
                        batch_logs[l] = o

                    callbacks.on_batch_end(batch_index, batch_logs)

                    # Construct epoch logs.
                    epoch_logs = {}
                    batch_index += 1
                    steps_done += 1

                    # Epoch finished.
                    if steps_done >= steps_per_epoch and do_validation:
                        val_outs = self._validate_tfrecord(
                            steps=validation_steps)
                        if not isinstance(val_outs, list):
                            val_outs = [val_outs]
                        # Same labels assumed.
                        for l, o in zip(out_labels, val_outs):
                            epoch_logs['val_' + l] = o

                callbacks.on_epoch_end(epoch, epoch_logs)
                epoch += 1
                if callback_model.stop_training:
                    break

        finally:
            # TODO: If you close the queue, you can't open it again..
            # coord.request_stop()
            # coord.join(threads)
            pass

        callbacks.on_train_end()
        return self.history
Esempio n. 30
0
def train_model(name,
                ftrain,
                generator,
                samples_per_epoch,
                nb_epoch,
                verbose=1,
                callbacks=[],
                ftest=None,
                test_data=None,
                validation_data=None,
                nb_val_samples=None,
                saver=None,
                gif=None):
    """
    Main training loop.
    modified from Keras fit_generator
    """
    if gif:
        plt.subplot(121)
        IM = plt.imshow(np.random.uniform(0, 256, ims).astype('uint8'),
                        interpolation="none")
        plt.subplot(122)
        IM2 = plt.imshow(np.random.uniform(0, 256, ims).astype('uint8'),
                         interpolation="none")
        plt.draw()
        plt.pause(.001)

    self = {}
    epoch = 0
    counter = 0
    out_labels = ['loss', 'nll', 'time']  # self.metrics_names
    callback_metrics = out_labels + ['val_' + n for n in out_labels]
    train_costs = np.zeros((nb_epoch, 4))

    # prepare callbacks
    history = cbks.History()
    callbacks = [cbks.BaseLogger()] + callbacks + [history]
    if verbose:
        callbacks += [cbks.ProgbarLogger()]
    callbacks = cbks.CallbackList(callbacks)

    callbacks.set_params({
        'epochs': nb_epoch,
        'samples': samples_per_epoch,
        'verbose': verbose,
        'metrics': callback_metrics,
    })
    callbacks.on_train_begin()

    while epoch < nb_epoch:
        callbacks.on_epoch_begin(epoch)
        samples_seen = 0
        batch_index = 0
        while samples_seen < samples_per_epoch:
            x, y = next(generator)
            y = x[:, 1:]
            x = x[:, :-1]
            # build batch logs
            batch_logs = {}
            if type(x) is list:
                batch_size = len(x[0])
            elif type(x) is dict:
                batch_size = len(list(x.values())[0])
            else:
                batch_size = len(x)
            batch_logs['batch'] = batch_index
            batch_logs['size'] = batch_size
            callbacks.on_batch_begin(batch_index, batch_logs)

            t1 = time.time()
            samples, losses, nll_loss = ftrain(x, y, counter)
            train_costs[epoch, 0] += losses
            train_costs[epoch, 1] += nll_loss
            outs = (losses, nll_loss) + (time.time() - t1, )
            counter += 1

            if (counter % 100 == 0) and gif:
                for v, u in zip(samples[0], y[0]):
                    IM.set_data(((v.reshape(ims) + 1) * 127.5).astype('uint8'))
                    IM2.set_data(
                        ((u.reshape(ims) + 1) * 127.5).astype('uint8'))
                    plt.draw()
                    plt.pause(.01)

            for l, o in zip(out_labels, outs):
                batch_logs[l] = o

            callbacks.on_batch_end(batch_index, batch_logs)

            # construct epoch logs
            epoch_logs = {}
            batch_index += 1
            samples_seen += batch_size

        train_costs[epoch, :] = train_costs[epoch, :] / samples_per_epoch

        if saver is not None:
            saver(epoch)

        callbacks.on_epoch_end(epoch, epoch_logs)
        epoch += 1

    # save_gif(samples,
    out_costs = 0.
    if ftest is not None:
        val_seen = 0
        val_cost = 0
        test_cost = 0
        val_nll = 0
        test_nll = 0
        while val_seen < nb_val_samples:
            _, valc, v_nll_loss = ftest(*next(validation_data))
            _, talc, t_nll_loss = ftest(*next(test_data))
            val_cost += valc
            test_cost += talc
            val_seen += 1
        val_cost /= val_seen
        test_cost /= val_seen
        val_nll /= v_nll_loss
        test_nll /= t_nll_loss
        out_costs = val_cost, test_cost, val_nll, test_nll
        print "Val: ", val_cost, "Test: ", test_cost

    # _stop.set()
    callbacks.on_train_end()
    np.save("./outputs/out_costs_" + name, [train_costs, out_costs])