Example #1
0
    def fit_generator(self,
                      generator,
                      n_steps_per_epoch,
                      n_epochs=1,
                      validation_data=None,
                      n_validation_steps=None,
                      callbacks=None):
        """Train the network on batches of data generated from `generator`

        :param generator: a generator yielding batches indefinitely, where each
         batch is a tuple of (inputs, targets)
        :type generator: generator
        :param n_steps_per_epoch: number of batches to train on in one epoch
        :type n_steps_per_epoch: int
        :param n_epochs: number of epochs to train the model
        :type n_epochs: int
        :param validation_data: generator yielding batches to evaluate the loss
         on at the end of each epoch, where each batch is a tuple of (inputs,
         targets)
        :type validation_data: generator
        :param n_validation_steps: number of batches to evaluate on from
         `validation_data`
        :param callbacks: callbacks to be used during training
        :type callbacks: list[object]
        :raises RuntimeError: if only one of `validation_data` and
         `n_validation_steps` are passed in
        """

        default_callbacks = self._load_default_callbacks()
        default_callbacks.append(ProgbarLogger(count_mode='steps'))
        if callbacks:
            default_callbacks.extend(callbacks)
        callbacks = CallbackList(default_callbacks)

        self._assert_compiled()

        invalid_inputs = ((validation_data is not None
                           and not n_validation_steps)
                          or (n_validation_steps and validation_data is None))
        if invalid_inputs:
            msg = ('`validation_data` and `n_validation_steps` must both be '
                   'passed, or neither.')
            raise RuntimeError(msg)

        if self.device:
            self.network.to(self.device)

        metrics = ['loss']
        if self.n_outputs > 1:
            for idx_output in range(1, self.n_outputs + 1):
                metrics.append('loss{}'.format(idx_output))
        if validation_data is not None:
            metrics.append('val_loss')
            if self.n_outputs > 1:
                for idx_output in range(1, self.n_outputs + 1):
                    metrics.append('val_loss{}'.format(idx_output))
        for metric_name in self.metric_names:
            metrics.append(metric_name)
            if validation_data is not None:
                metrics.append('val_{}'.format(metric_name))

        callbacks.set_params({
            'epochs': n_epochs,
            'metrics': metrics,
            'steps': n_steps_per_epoch,
            'verbose': True
        })
        callbacks.set_model(self)

        callbacks.on_train_begin()
        for idx_epoch in range(n_epochs):
            if self.stop_training:
                break

            epoch_logs = {}
            callbacks.on_epoch_begin(idx_epoch)

            for idx_batch in range(n_steps_per_epoch):
                batch_logs = {'batch': idx_batch, 'size': 1}
                callbacks.on_batch_begin(idx_batch, batch_logs)

                generator_output = next(generator)
                if len(generator_output) != 2:
                    msg = ('Output of generator should be a tuple of '
                           '(inputs, targets), but instead got a {}: '
                           '{}.').format(type(generator_output),
                                         str(generator_output))
                inputs, targets = generator_output
                train_outputs = self.train_on_batch(inputs, targets)

                batch_logs['loss'] = train_outputs[0]
                if self.n_outputs > 1:
                    for idx_output in range(1, self.n_outputs + 1):
                        batch_logs['loss{}'.format(idx_output)] = (
                            train_outputs[idx_output])

                idx_metric_values = (1 if self.n_outputs == 1 else
                                     self.n_outputs + 1)
                it = zip(self.metric_names, train_outputs[idx_metric_values:])
                for metric_name, train_output in it:
                    batch_logs[metric_name] = train_output
                callbacks.on_batch_end(idx_batch, batch_logs)

                if self.stop_training:
                    break

            if validation_data:
                val_outputs = self.evaluate_generator(validation_data,
                                                      n_validation_steps)

                epoch_logs['val_loss'] = val_outputs[0]
                if self.n_outputs > 1:
                    for idx_output in range(1, self.n_outputs + 1):
                        epoch_logs['val_loss{}'.format(idx_output)] = (
                            val_outputs[idx_output])

                idx_metric_values = (1 if self.n_outputs == 1 else
                                     self.n_outputs + 1)
                it = zip(self.metric_names, val_outputs[idx_metric_values:])
                for metric_name, val_output in it:
                    metric_name = 'val_{}'.format(metric_name)
                    epoch_logs[metric_name] = val_output
            callbacks.on_epoch_end(idx_epoch, epoch_logs)
        callbacks.on_train_end()
Example #2
0
def GanTrain(discriminator,
             generator,
             opt,
             global_batch_size,
             warmup_epochs,
             datapath,
             EventsperFile,
             nEvents,
             WeightsDir,
             mod=0,
             nb_epochs=30,
             batch_size=128,
             latent_size=128,
             gen_weight=6,
             aux_weight=0.2,
             ecal_weight=0.1,
             lr=0.001,
             rho=0.9,
             decay=0.0,
             g_weights='params_generator_epoch_',
             d_weights='params_generator_epoch_',
             xscale=1,
             verbose=True):
    start_init = time.time()
    # verbose = False
    if hvd.rank() == 0:
        print('[INFO] Building discriminator')
    #discriminator.summary()
    discriminator.compile(optimizer=opt,
                          loss=[
                              'binary_crossentropy',
                              'mean_absolute_percentage_error',
                              'mean_absolute_percentage_error'
                          ],
                          loss_weights=[gen_weight, aux_weight, ecal_weight])

    # build the generator
    if hvd.rank() == 0:
        print('[INFO] Building generator')
    #generator.summary()
    generator.compile(optimizer=opt, loss='binary_crossentropy')

    # build combined Model
    latent = Input(shape=(latent_size, ), name='combined_z')
    fake_image = generator(latent)
    discriminator.trainable = False
    fake, aux, ecal = discriminator(fake_image)
    combined = Model(input=[latent],
                     output=[fake, aux, ecal],
                     name='combined_model')

    # Getting Data
    Trainfiles, Testfiles = DivideFiles(datapath,
                                        nEvents=nEvents,
                                        EventsperFile=EventsperFile,
                                        datasetnames=["ECAL"],
                                        Particles=["Ele"])

    if hvd.rank() == 0:
        print("Train files: {0} \nTest files: {1}".format(
            Trainfiles, Testfiles))

    #Read test data into a single array
    for index, dtest in enumerate(Testfiles):
        if index == 0:
            X_test, Y_test, ecal_test = GetData(dtest)
        else:
            X_temp, Y_temp, ecal_temp = GetData(dtest)
            X_test = np.concatenate((X_test, X_temp))
            Y_test = np.concatenate((Y_test, Y_temp))
            ecal_test = np.concatenate((ecal_test, ecal_temp))

    for index, dtrain in enumerate(Trainfiles):
        if index == 0:
            X_train, Y_train, ecal_train = GetData(dtrain)
        else:
            X_temp, Y_temp, ecal_temp = GetData(dtrain)
            X_train = np.concatenate((X_train, X_temp))
            Y_train = np.concatenate((Y_train, Y_temp))
            ecal_train = np.concatenate((ecal_train, ecal_temp))

    nb_test = X_test.shape[0]
    assert X_train.shape[0] == EventsperFile * len(
        Trainfiles), "# Total events in training files"
    nb_train = X_train.shape[0]  # Total events in training files
    total_batches = nb_train / global_batch_size
    if hvd.rank() == 0:
        print('Total Training batches = {} with {} events'.format(
            total_batches, nb_train))

    combined.compile(
        #optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1),
        optimizer=opt,
        loss=[
            'binary_crossentropy', 'mean_absolute_percentage_error',
            'mean_absolute_percentage_error'
        ],
        loss_weights=[gen_weight, aux_weight, ecal_weight])

    gcb = CallbackList( \
        callbacks=[ \
        hvd.callbacks.BroadcastGlobalVariablesCallback(0), \
        hvd.callbacks.MetricAverageCallback(), \
        hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=warmup_epochs, verbose=1, steps_per_epoch=total_batches), \
        hvd.callbacks.LearningRateScheduleCallback(start_epoch=warmup_epochs, end_epoch=nb_epochs, multiplier=1.), \
        keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1) \
        ])

    dcb = CallbackList( \
        callbacks=[ \
        hvd.callbacks.BroadcastGlobalVariablesCallback(0), \
        hvd.callbacks.MetricAverageCallback(), \
        hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=warmup_epochs, verbose=1, steps_per_epoch=total_batches), \
        hvd.callbacks.LearningRateScheduleCallback(start_epoch=warmup_epochs, end_epoch=nb_epochs, multiplier=1.), \
        keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1) \
        ])

    ccb = CallbackList( \
        callbacks=[ \
        hvd.callbacks.BroadcastGlobalVariablesCallback(0), \
        hvd.callbacks.MetricAverageCallback(), \
        hvd.callbacks.LearningRateWarmupCallback(warmup_epochs=warmup_epochs, verbose=1, steps_per_epoch=total_batches), \
        hvd.callbacks.LearningRateScheduleCallback(start_epoch=warmup_epochs, end_epoch=nb_epochs, multiplier=1.), \
        keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1) \
        ])

    gcb.set_model(generator)
    dcb.set_model(discriminator)
    ccb.set_model(combined)

    gcb.on_train_begin()
    dcb.on_train_begin()
    ccb.on_train_begin()

    print("On hostname {0} - After init using {1} memory".format(
        socket.gethostname(),
        psutil.Process(os.getpid()).memory_info()[0]))

    train_history = defaultdict(list)
    test_history = defaultdict(list)

    if hvd.rank() == 0:
        print('Initialization time was {} seconds'.format(time.time() -
                                                          start_init))

    for epoch in range(nb_epochs):
        epoch_start = time.time()
        if hvd.rank() == 0:
            print('Epoch {} of {}'.format(epoch + 1, nb_epochs))

        randomize(X_train, Y_train, ecal_train)

        epoch_gen_loss = []
        epoch_disc_loss = []

        image_batches = genbatches(X_train, batch_size)
        energy_batches = genbatches(Y_train, batch_size)
        ecal_batches = genbatches(ecal_train, batch_size)

        for index in range(total_batches):
            start = time.time()
            image_batch = image_batches.next()
            energy_batch = energy_batches.next()
            ecal_batch = ecal_batches.next()

            noise = np.random.normal(0, 1, (batch_size, latent_size))
            sampled_energies = np.random.uniform(0.1, 5, (batch_size, 1))
            generator_ip = np.multiply(sampled_energies, noise)
            # ecal sum from fit
            ecal_ip = GetEcalFit(sampled_energies, mod, xscale)
            generated_images = generator.predict(generator_ip, verbose=0)
            real_batch_loss = discriminator.train_on_batch(
                image_batch,
                [BitFlip(np.ones(batch_size)), energy_batch, ecal_batch])
            fake_batch_loss = discriminator.train_on_batch(
                generated_images,
                [BitFlip(np.zeros(batch_size)), sampled_energies, ecal_ip])
            epoch_disc_loss.append([
                (a + b) / 2 for a, b in zip(real_batch_loss, fake_batch_loss)
            ])

            trick = np.ones(batch_size)
            gen_losses = []
            for _ in range(2):
                noise = np.random.normal(0, 1, (batch_size, latent_size))
                sampled_energies = np.random.uniform(0.1, 5, (batch_size, 1))
                generator_ip = np.multiply(sampled_energies, noise)
                ecal_ip = GetEcalFit(sampled_energies, mod, xscale)
                gen_losses.append(
                    combined.train_on_batch(
                        [generator_ip],
                        [trick,
                         sampled_energies.reshape((-1, 1)), ecal_ip]))
            epoch_gen_loss.append([(a + b) / 2 for a, b in zip(*gen_losses)])

            if (index % 1) == 0 and hvd.rank() == 0:
                # progress_bar.update(index)
                print('processed {}/{} batches in {}'.format(
                    index + 1, total_batches,
                    time.time() - start))

        # save weights every epoch
        if hvd.rank() == 0:

            safe_mkdir(WeightsDir)

            print("saving weights of gen")
            generator.save_weights(
                WeightsDir +
                '/generator_{0}{1:03d}.hdf5'.format(g_weights, epoch),
                overwrite=True)

            print("saving weights of disc")
            discriminator.save_weights(
                WeightsDir +
                '/discriminator_{0}{1:03d}.hdf5'.format(d_weights, epoch),
                overwrite=True)

            epoch_time = time.time() - epoch_start
            print("The {} epoch took {} seconds".format(epoch, epoch_time))
Example #3
0
    def fit(self,
            x,
            y,
            batch_size,
            n_epochs=1,
            callbacks=None,
            validation_data=None):
        """Trains the network on the given data for a fixed number of epochs

        :param x: input data to train on
        :type x: torch.Tensor
        :param y: target data to train on
        :type y: torch.Tensor
        :param batch_size: number of samples to use per forward and backward
         pass
        :type batch_size: int
        :param n_epochs: number of epochs (iterations of the dataset) to train
         the model
        :type n_epochs: int
        :param callbacks: callbacks to be used during training
        :type callbacks: list[object]
        :param validation_data: data on which to evaluate the loss and metrics
         at the end of each epoch
        :type validation_data: tuple(numpy.ndarray)
        """

        default_callbacks = self._load_default_callbacks()
        default_callbacks.append(ProgbarLogger(count_mode='samples'))
        if callbacks:
            default_callbacks.extend(callbacks)
        callbacks = CallbackList(default_callbacks)

        self._assert_compiled()

        if self.device:
            self.network.to(self.device)

        metrics = ['loss']
        if self.n_outputs > 1:
            for idx_output in range(1, self.n_outputs + 1):
                metrics.append('loss{}'.format(idx_output))
        if validation_data is not None:
            metrics.append('val_loss')
            if self.n_outputs > 1:
                for idx_output in range(1, self.n_outputs + 1):
                    metrics.append('val_loss{}'.format(idx_output))
        for metric_name in self.metric_names:
            metrics.append(metric_name)
            if validation_data is not None:
                metrics.append('val_{}'.format(metric_name))

        index_array = np.arange(x.shape[0])

        callbacks.set_params({
            'batch_size': batch_size,
            'epochs': n_epochs,
            'metrics': metrics,
            'steps': None,
            'samples': x.shape[0],
            'verbose': True
        })
        callbacks.set_model(self)

        callbacks.on_train_begin()
        for idx_epoch in range(n_epochs):
            if self.stop_training:
                break

            epoch_logs = {}
            callbacks.on_epoch_begin(idx_epoch)

            np.random.shuffle(index_array)
            batches = make_batches(len(index_array), batch_size)
            for idx_batch, (idx_start, idx_end) in enumerate(batches):
                batch_logs = {'batch': idx_batch, 'size': idx_end - idx_start}
                callbacks.on_batch_begin(idx_batch, batch_logs)

                inputs = x[index_array[idx_start:idx_end]]
                if self.n_outputs > 1:
                    targets = []
                    for idx_output in range(self.n_outputs):
                        targets.append(
                            y[idx_output][index_array[idx_start:idx_end]])
                else:
                    targets = y[index_array[idx_start:idx_end]]
                train_outputs = self.train_on_batch(inputs, targets)

                batch_logs['loss'] = train_outputs[0]
                if self.n_outputs > 1:
                    for idx_output in range(1, self.n_outputs + 1):
                        batch_logs['loss{}'.format(idx_output)] = (
                            train_outputs[idx_output])

                idx_metric_values = (1 if self.n_outputs == 1 else
                                     self.n_outputs + 1)
                it = zip(self.metric_names, train_outputs[idx_metric_values:])
                for metric_name, train_output in it:
                    batch_logs[metric_name] = train_output
                callbacks.on_batch_end(idx_batch, batch_logs)

                if self.stop_training:
                    break

            if validation_data:
                val_outputs = self.evaluate(validation_data[0],
                                            validation_data[1], batch_size)

                epoch_logs['val_loss'] = val_outputs[0]
                if self.n_outputs > 1:
                    for idx_output in range(1, self.n_outputs + 1):
                        epoch_logs['val_loss{}'.format(idx_output)] = (
                            val_outputs[idx_output])

                idx_metric_values = (1 if self.n_outputs == 1 else
                                     self.n_outputs + 1)
                it = zip(self.metric_names, val_outputs[idx_metric_values:])
                for metric_name, val_output in it:
                    metric_name = 'val_{}'.format(metric_name)
                    epoch_logs[metric_name] = val_output
            callbacks.on_epoch_end(idx_epoch, epoch_logs)
        callbacks.on_train_end()