def queue_train_generator(train_gen,
                          workers=1,
                          use_multiprocessing=False,
                          max_queue_size=10,
                          use_sequence_api=True):
    # all the queue stuff is from https://github.com/keras-team/keras/blob/master/keras/engine/training_generator.py
    if workers > 0:
        if use_sequence_api:
            enqueuer = OrderedEnqueuer(
                train_gen,
                use_multiprocessing=use_multiprocessing,
                # TODO: add a parameter to control this
                shuffle=False,
            )
        else:
            enqueuer = GeneratorEnqueuer(
                train_gen,
                use_multiprocessing=use_multiprocessing,
            )
        enqueuer.start(workers=workers, max_queue_size=max_queue_size)
        train_generator = enqueuer.get()
    else:
        if use_sequence_api:
            train_generator = iter_sequence_infinite(train_gen)
        else:
            train_generator = train_gen
    return train_generator
Beispiel #2
0
 def __init__(self, data_generator, batch_size, num_samples, output_dir,
              input_shape, n_classes):
     self.batch_size = batch_size
     self.num_samples = num_samples
     self.tensorboard_writer = tf.summary.create_file_writer(
         output_dir + "/diagnose/", flush_millis=10000)
     self.data_generator = data_generator
     self.input_shape = input_shape
     self.colors = np.array([[255, 255, 0], [255, 0, 0], [0, 255, 0],
                             [0, 0, 255], [0, 0, 0]])
     self.color_dict = {0: (0, 0, 0), 1: (0, 255, 0)}
     self.n_classes = n_classes
     self.colors = self.colors[:self.n_classes]
     is_sequence = isinstance(self.data_generator, Sequence)
     if is_sequence:
         self.enqueuer = OrderedEnqueuer(self.data_generator,
                                         use_multiprocessing=True,
                                         shuffle=False)
     else:
         self.enqueuer = GeneratorEnqueuer(self.data_generator,
                                           use_multiprocessing=True,
                                           wait_time=0.01)
     self.enqueuer.start(workers=4, max_queue_size=4)
Beispiel #3
0
 def __init__(self,
              data_generator,
              batch_size,
              num_samples,
              output_dir,
              normalization_mean,
              start_index=0):
     super().__init__()
     self.data_generator = data_generator
     self.batch_size = batch_size
     self.num_samples = num_samples
     self.tensorboard_writer = TensorboardWriter(output_dir)
     self.normalization_mean = normalization_mean
     self.start_index = start_index
     is_sequence = isinstance(self.data_generator, Sequence)
     if is_sequence:
         self.enqueuer = OrderedEnqueuer(self.data_generator,
                                         use_multiprocessing=False,
                                         shuffle=False)
     else:
         self.enqueuer = GeneratorEnqueuer(self.data_generator,
                                           use_multiprocessing=False)
     self.enqueuer.start(workers=1, max_queue_size=4)
def train(n_epochs, _batch_size, start_epoch=0):
    """
        train with fixed batch_size for given epochs
        make some example plots and save model after each epoch
    """
    global batch_size
    batch_size = _batch_size
    # create a dataqueue with the keras facilities. this allows
    # to prepare the data in parallel to the training
    sample_dataqueue = GeneratorEnqueuer(generate_real_samples(batch_size),
                                         use_multiprocessing=True)
    sample_dataqueue.start(workers=2, max_queue_size=10)
    sample_gen = sample_dataqueue.get()

    # targets for loss function
    gan_sample_dataqueue = GeneratorEnqueuer(
        generate_latent_points_as_generator(batch_size),
        use_multiprocessing=True)
    gan_sample_dataqueue.start(workers=2, max_queue_size=10)
    gan_sample_gen = gan_sample_dataqueue.get()

    # targets for loss function
    valid = -np.ones((batch_size, 1))
    fake = np.ones((batch_size, 1))
    dummy = np.zeros((batch_size, 1))  # Dummy gt for gradient penalty

    bat_per_epo = int(n_samples / batch_size)

    # we need to call the discriminator once in order
    # to initialize the input shapes
    [X_real, cond_real] = next(sample_gen)
    latent = np.random.normal(size=(batch_size, latent_dim))
    critic_model.predict([X_real, cond_real, latent])
    for i in trange(n_epochs):
        epoch = 1 + i + start_epoch
        # enumerate batches over the training set
        for j in trange(bat_per_epo):

            for _ in range(n_disc):
                # fetch a batch from the queue
                [X_real, cond_real] = next(sample_gen)
                latent = np.random.normal(size=(batch_size, latent_dim))
                d_loss = critic_model.train_on_batch(
                    [X_real, cond_real, latent], [valid, fake, dummy])
                # we get for losses back here. average, valid, fake, and gradient_penalty
                # we want the average of valid and fake
                d_loss = np.mean([d_loss[1], d_loss[2]])

            # train generator
            # prepare points in latent space as input for the generator
            [latent, cond] = next(gan_sample_gen)
            # update the generator via the discriminator's error
            g_loss = generator_model.train_on_batch([latent, cond], valid)
            # summarize loss on this batch
            print(f'{epoch}, {j + 1}/{bat_per_epo}, d_loss {d_loss}' + \
                  f' g:{g_loss} ')  # , d_fake:{d_loss_fake} d_real:{d_loss_real}')

            if np.isnan(g_loss) or np.isnan(d_loss):
                raise ValueError('encountered nan in g_loss and/or d_loss')

            hist['d_loss'].append(d_loss)
            hist['g_loss'].append(g_loss)

        # plot generated examples
        plt.figure(figsize=(25, 25))
        n_plot = 30
        X_fake, cond_fake = generate_fake_samples(n_plot)
        for iplot in range(n_plot):
            plt.subplot(n_plot, 25, iplot * 25 + 1)
            plt.imshow(cond_fake[iplot, :, :].squeeze(),
                       cmap=plt.cm.gist_earth_r,
                       norm=LogNorm(vmin=0.01, vmax=1))
            plt.axis('off')
            for jplot in range(1, 24):
                plt.subplot(n_plot, 25, iplot * 25 + jplot + 1)
                plt.imshow(X_fake[iplot, jplot, :, :].squeeze(),
                           vmin=0,
                           vmax=1,
                           cmap=plt.cm.hot_r)
                plt.axis('off')
        plt.colorbar()
        plt.suptitle(f'epoch {epoch:04d}')
        plt.savefig(
            f'{plotdir}/fake_samples_{params}_{epoch:04d}_{j:06d}.{plot_format}'
        )

        # plot loss
        plt.figure()
        plt.plot(hist['d_loss'], label='d_loss')
        plt.plot(hist['g_loss'], label='g_loss')
        plt.ylabel('batch')
        plt.legend()
        plt.savefig(f'{plotdir}/training_loss_{params}.{plot_format}')
        pd.DataFrame(hist).to_csv('hist.csv')
        plt.close('all')

        generator.save(f'{outdir}/gen_{params}_{epoch:04d}.h5')
        critic.save(f'{outdir}/disc_{params}_{epoch:04d}.h5')
Beispiel #5
0
    def predict_generator(self,
                          generator,
                          steps,
                          max_queue_size=10,
                          workers=1,
                          use_multiprocessing=False,
                          verbose=0):
        """Generates predictions for the input samples from a data generator.
        The generator should return the same kind of data as accepted by `predict_on_batch`.

        generator = DataGenerator class that returns:
            x = Input data as a 3D Tensor (batch_size, max_input_len, dim_features)
            x_len = 1D array with the length of each data in batch_size

        # Arguments
            generator: Generator yielding batches of input samples
                    or an instance of Sequence (tensorflow.keras.utils.Sequence)
                    object in order to avoid duplicate data
                    when using multiprocessing.
            steps:
                Total number of steps (batches of samples)
                to yield from `generator` before stopping.
            max_queue_size:
                Maximum size for the generator queue.
            workers: Maximum number of processes to spin up
                when using process based threading
            use_multiprocessing: If `True`, use process based threading.
                Note that because this implementation relies on multiprocessing,
                you should not pass non picklable arguments to the generator
                as they can't be passed easily to children processes.
            verbose:
                verbosity mode, 0 or 1.

        # Returns
            A numpy array(s) of predictions.

        # Raises
            ValueError: In case the generator yields
                data in an invalid format.
        """

        self.model_pred._make_predict_function()
        is_sequence = isinstance(generator, Sequence)

        allab_outs = []
        steps_done = 0
        enqueuer = None

        try:
            if is_sequence:
                enqueuer = OrderedEnqueuer(
                    generator, use_multiprocessing=use_multiprocessing)
            else:
                enqueuer = GeneratorEnqueuer(
                    generator, use_multiprocessing=use_multiprocessing)

            enqueuer.start(workers=workers, max_queue_size=max_queue_size)
            output_generator = enqueuer.get()

            if verbose == 1:
                progbar = Progbar(target=steps)

            while steps_done < steps:
                x = next(output_generator)
                outs = self.predict_on_batch(x)

                if not isinstance(outs, list):
                    outs = [outs]

                for i, out in enumerate(outs):
                    allab_outs.append([int(c) for c in out])

                steps_done += 1
                if verbose == 1:
                    progbar.update(steps_done)

        finally:
            if enqueuer is not None:
                enqueuer.stop()

        return allab_outs
    def start(this):
        sequence = this.__generate(this.generator_functors,
                                   this.generator_cfgs)

        this.enqueuer = GeneratorEnqueuer(sequence, use_multiprocessing=True)
        this.enqueuer.start(max_queue_size=24, workers=this.num_workers)
 def __init__(this, generator_functor, generator_cfg, num_workers):
     this.num_workers = num_workers
     sequence = this.__generate(generator_functor, generator_cfg)
     this.enqueuer = GeneratorEnqueuer(sequence, use_multiprocessing=True)
    def predict(self,
                x,
                batch_size=None,
                verbose=0,
                steps=1,
                callbacks=None,
                max_queue_size=10,
                workers=1,
                use_multiprocessing=False):
        """
        Model predicting on data yielded (generator).
        A predict() abstration function of TensorFlow 2 using the encoder and decoder models

        :param: See tensorflow.keras.Model.predict()
        :return: A numpy array(s) of predictions.

        References:
            Tal Weiss
            Deep Spelling
            Medium: https://machinelearnings.co/deep-spelling-9ffef96a24f6
            Github: https://github.com/MajorTal/DeepSpell

            Vu Tran
            Sequence-to-Sequence Learning for Spelling Correction
            Github: https://github.com/vuptran/deep-spell-checkr
        """

        try:
            enqueuer = GeneratorEnqueuer(
                x, use_multiprocessing=use_multiprocessing)
            enqueuer.start(workers=workers, max_queue_size=max_queue_size)
            output_generator = enqueuer.get()

            steps_done = 0
            if verbose == 1:
                print("Model Predict")
                progbar = Progbar(target=steps)

            predicts = []

            while steps_done < steps:
                x = next(output_generator)[0]
                batch_size = len(x)

                # Encode the input as state vectors
                encoder_out, state_h, state_c = self.encoder.predict(x)
                dec_state = tf.concat([state_h, state_c], axis=-1)

                # Create batch of empty target sequences of length 1 character and populate
                # the first element of target sequence with the # start-of-sequence character
                target = np.zeros((batch_size, 1, self.tokenizer.vocab_size))
                target[:, 0, self.tokenizer.SOS] = 1.0

                # Sampling loop for a batch of sequences
                decoded_tokens = [''] * batch_size

                for _ in range(self.tokenizer.maxlen):
                    # `char_probs` has shape (batch_size, 1, nb_target_chars)
                    char_probs, dec_state = self.decoder.predict(
                        [encoder_out, dec_state, target])

                    # Reset the target sequences.
                    target = np.zeros(
                        (batch_size, 1, self.tokenizer.vocab_size))

                    # Sample next character using argmax or multinomial mode
                    sampled_chars = []

                    for i in range(batch_size):
                        next_index = char_probs[i].argmax(axis=-1)
                        next_char = self.tokenizer.decode([next_index])

                        decoded_tokens[i] += next_char
                        sampled_chars.append(next_char)

                        # Update target sequence with index of next character
                        target[i, 0, next_index] = 1.0

                    stop_char = set(sampled_chars)

                    if len(stop_char) == 1 and stop_char.pop(
                    ) == self.tokenizer.EOS_TK:
                        break

                # Sampling finished
                predicts.extend(
                    [self.tokenizer.remove_tokens(x) for x in decoded_tokens])

                steps_done += 1
                if verbose == 1:
                    progbar.update(steps_done)

        finally:
            enqueuer.stop()

        return predicts
Beispiel #9
0
class ModelDiagonoser(Callback):
    def __init__(self, data_generator, batch_size, num_samples, output_dir,
                 input_shape, n_classes):
        self.batch_size = batch_size
        self.num_samples = num_samples
        self.tensorboard_writer = tf.summary.create_file_writer(
            output_dir + "/diagnose/", flush_millis=10000)
        self.data_generator = data_generator
        self.input_shape = input_shape
        self.colors = np.array([[255, 255, 0], [255, 0, 0], [0, 255, 0],
                                [0, 0, 255], [0, 0, 0]])
        self.color_dict = {0: (0, 0, 0), 1: (0, 255, 0)}
        self.n_classes = n_classes
        self.colors = self.colors[:self.n_classes]
        is_sequence = isinstance(self.data_generator, Sequence)
        if is_sequence:
            self.enqueuer = OrderedEnqueuer(self.data_generator,
                                            use_multiprocessing=True,
                                            shuffle=False)
        else:
            self.enqueuer = GeneratorEnqueuer(self.data_generator,
                                              use_multiprocessing=True,
                                              wait_time=0.01)
        self.enqueuer.start(workers=4, max_queue_size=4)

    def on_epoch_end(self, epoch, logs=None):
        output_generator = self.enqueuer.get()
        generator_output = next(output_generator)
        x, y = generator_output[:2]
        y_pred = self.model.predict(x)
        # y_pred1 = self.model.predict(x)
        y_pred1, y_pred2 = y_pred[0], y_pred[-1]
        y_pred1 = np.argmax(y_pred1, axis=-1)
        y_pred2 = np.argmax(y_pred2, axis=-1)

        # y = y1['softmax_out']
        y_imgs = []
        y_imgs_pred = []
        y_imgs_pred1 = []
        x_imgs = []
        #        n_classes = np.shape(y)[-1]
        # print(np.unique(np.argmax(y, -1)), np.unique(y_pred1),"\n\n\n\n\n")
        y_pred1 = gray_to_onehot_all(y_pred1, self.color_dict)
        y_pred2 = gray_to_onehot_all(y_pred2, self.color_dict)
        # print(np.shape(y_pred1), np.shape(y_pred2), np.shape(y))
        for i in range(len(y)):
            y_img = np.resize(
                np.dot(np.reshape(y[i], (-1, self.n_classes)), self.colors),
                self.input_shape)
            y_img_pred = np.resize(
                np.dot(np.reshape(y_pred1[i], (-1, self.n_classes)),
                       self.colors), self.input_shape)
            y_img_pred1 = np.resize(
                np.dot(np.reshape(y_pred2[i], (-1, self.n_classes)),
                       self.colors), self.input_shape)

            y_imgs.append(y_img)
            y_imgs_pred.append(y_img_pred)
            y_imgs_pred1.append(y_img_pred1)
            x_imgs.append(x[i].astype('uint8'))

        y_imgs = np.array(y_imgs)
        x_imgs = np.array(x_imgs)
        y_imgs_pred = np.array(y_imgs_pred)
        y_imgs_pred1 = np.array(y_imgs_pred1)

        with self.tensorboard_writer.as_default():
            is_written = tf.summary.image("img", x_imgs, step=epoch)
            is_written = tf.summary.image("train/gts", y_imgs, step=epoch)
            is_written = tf.summary.image("train/predictions1",
                                          y_imgs_pred,
                                          step=epoch)
            is_written = tf.summary.image("train/predictions2",
                                          y_imgs_pred1,
                                          step=epoch)
            # if(is_written):
            # print(' image has written to the tensorboard')
        self.tensorboard_writer.flush()

    def on_train_end(self, logs=None):
        self.enqueuer.stop()
        self.tensorboard_writer.close()
    def predict(self,
                x,
                batch_size=None,
                verbose=0,
                steps=1,
                callbacks=None,
                max_queue_size=10,
                workers=1,
                use_multiprocessing=False):
        """
        Model predicting on data yielded (generator).
        A predict() abstration function of TensorFlow 2 using the encoder and decoder models

        :param: See tensorflow.keras.Model.predict()
        :return: A numpy array(s) of predictions.
        """

        try:
            enqueuer = GeneratorEnqueuer(x, use_multiprocessing=use_multiprocessing)
            enqueuer.start(workers=workers, max_queue_size=max_queue_size)
            output_generator = enqueuer.get()

            steps_done = 0
            if verbose == 1:
                print("Model Predict")
                progbar = Progbar(target=steps)

            predicts = []

            while steps_done < steps:
                x = next(output_generator)[0]

                for sentence in x:
                    enc_input = tf.expand_dims(sentence, axis=0)
                    dec_input = tf.expand_dims([self.tokenizer.SOS], axis=0)

                    for _ in range(self.tokenizer.maxlen):
                        enc_padding_mask, look_ahead_mask, dec_padding_mask = create_masks(enc_input, dec_input)

                        enc_output = self.encoder(enc_input, enc_padding_mask)  # (batch_size, inp_seq_len, d_model)
                        dec_output, _ = self.decoder(dec_input, enc_output, look_ahead_mask, dec_padding_mask)

                        # select the last word from the seq_len dimension
                        predictions = dec_output[:, -1:, :]  # (batch_size, 1, vocab_size)
                        predicted_id = tf.cast(tf.argmax(predictions, axis=-1), dtype=tf.int32)

                        # return the result if the predicted_id is equal to the end token
                        if tf.equal(predicted_id, self.tokenizer.EOS):
                            break

                        # concatentate the predicted_id to the output which is given to the decoder as its input.
                        dec_input = tf.concat([dec_input, predicted_id], axis=-1)

                    dec_input = tf.squeeze(dec_input, axis=0)
                    dec_input = self.tokenizer.decode(dec_input)

                    predicts.append(self.tokenizer.remove_tokens(dec_input))

                steps_done += 1
                if verbose == 1:
                    progbar.update(steps_done)

        finally:
            enqueuer.stop()

        return predicts
        x = layer(x)
        if isinstance(layer, Conv2D):
            mean_activations.append(layer_mean_activations(x))

    get_batch_means = K.function([vgg_net.input], mean_activations)

    idgen = ImageDataGenerator(preprocessing_function=vgg16.preprocess_input)
    gen = idgen.flow_from_directory(directory=data_folder,
                                    target_size=img_size,
                                    class_mode=None,
                                    shuffle=False,
                                    batch_size=batch_size)
    num_images = gen.samples

    if use_generator_enqueuer:
        enq = GeneratorEnqueuer(gen, use_multiprocessing=False)
        enq.start(workers=1)
        gen = enq.get()

    print("Gathering mean activations...")
    iters = num_images // batch_size
    accumulated_means = None
    for i in range(iters):
        batch_means = get_batch_means([next(gen)])
        if accumulated_means is None:
            accumulated_means = batch_means
        else:
            for accumulated, m in zip(accumulated_means, batch_means):
                accumulated += m
        if (i + 1) % 50 == 0: print("Batches done:", i + 1)
Beispiel #12
0
class ModelDiagonoser(Callback):
    def __init__(self,
                 data_generator,
                 batch_size,
                 num_samples,
                 output_dir,
                 normalization_mean,
                 start_index=0):
        super().__init__()
        self.data_generator = data_generator
        self.batch_size = batch_size
        self.num_samples = num_samples
        self.tensorboard_writer = TensorboardWriter(output_dir)
        self.normalization_mean = normalization_mean
        self.start_index = start_index
        is_sequence = isinstance(self.data_generator, Sequence)
        if is_sequence:
            self.enqueuer = OrderedEnqueuer(self.data_generator,
                                            use_multiprocessing=False,
                                            shuffle=False)
        else:
            self.enqueuer = GeneratorEnqueuer(self.data_generator,
                                              use_multiprocessing=False)
        self.enqueuer.start(workers=1, max_queue_size=4)

    def on_epoch_end(self, epoch, logs=None):
        steps_done = 0
        total_steps = int(np.ceil(np.divide(self.num_samples,
                                            self.batch_size)))
        sample_index = 0
        while steps_done < total_steps:
            x, y = next(self.data_generator)
            sample_index += 1
            if sample_index <= self.start_index:
                continue
            y_pred = self.model.predict(x)
            y_pred = np.argmax(y_pred, axis=-1)
            y_true = np.argmax(y, axis=-1)

            for i in range(0, len(y_pred)):
                n = steps_done * self.batch_size + i
                if n >= self.num_samples:
                    return
                img = np.squeeze(x[i, :, :, :])
                img = 255. * (
                    img + self.normalization_mean
                )  # mean is the training images normalization mean
                img = img[:, :, [2, 1, 0]]  # reordering of channels

                pred = y_pred[i]
                pred = pred.reshape(img.shape[0:2])

                ground_truth = y_true[i]
                ground_truth = ground_truth.reshape(img.shape[0:2])

                self.tensorboard_writer.save_image(
                    "Epoch-{}/{}/x".format(epoch, sample_index - 1), img,
                    epoch)
                self.tensorboard_writer.save_image(
                    "Epoch-{}/{}/y".format(epoch, sample_index - 1),
                    ground_truth, epoch)
                self.tensorboard_writer.save_image(
                    "Epoch-{}/{}/y_pred".format(epoch, sample_index - 1), pred,
                    epoch)
                sample_index += 1

            steps_done += 1

    def on_train_end(self, logs=None):
        self.enqueuer.stop()
        self.tensorboard_writer.close()