def queue_train_generator(train_gen, workers=1, use_multiprocessing=False, max_queue_size=10, use_sequence_api=True): # all the queue stuff is from https://github.com/keras-team/keras/blob/master/keras/engine/training_generator.py if workers > 0: if use_sequence_api: enqueuer = OrderedEnqueuer( train_gen, use_multiprocessing=use_multiprocessing, # TODO: add a parameter to control this shuffle=False, ) else: enqueuer = GeneratorEnqueuer( train_gen, use_multiprocessing=use_multiprocessing, ) enqueuer.start(workers=workers, max_queue_size=max_queue_size) train_generator = enqueuer.get() else: if use_sequence_api: train_generator = iter_sequence_infinite(train_gen) else: train_generator = train_gen return train_generator
def __init__(self, data_generator, batch_size, num_samples, output_dir, input_shape, n_classes): self.batch_size = batch_size self.num_samples = num_samples self.tensorboard_writer = tf.summary.create_file_writer( output_dir + "/diagnose/", flush_millis=10000) self.data_generator = data_generator self.input_shape = input_shape self.colors = np.array([[255, 255, 0], [255, 0, 0], [0, 255, 0], [0, 0, 255], [0, 0, 0]]) self.color_dict = {0: (0, 0, 0), 1: (0, 255, 0)} self.n_classes = n_classes self.colors = self.colors[:self.n_classes] is_sequence = isinstance(self.data_generator, Sequence) if is_sequence: self.enqueuer = OrderedEnqueuer(self.data_generator, use_multiprocessing=True, shuffle=False) else: self.enqueuer = GeneratorEnqueuer(self.data_generator, use_multiprocessing=True, wait_time=0.01) self.enqueuer.start(workers=4, max_queue_size=4)
def __init__(self, data_generator, batch_size, num_samples, output_dir, normalization_mean, start_index=0): super().__init__() self.data_generator = data_generator self.batch_size = batch_size self.num_samples = num_samples self.tensorboard_writer = TensorboardWriter(output_dir) self.normalization_mean = normalization_mean self.start_index = start_index is_sequence = isinstance(self.data_generator, Sequence) if is_sequence: self.enqueuer = OrderedEnqueuer(self.data_generator, use_multiprocessing=False, shuffle=False) else: self.enqueuer = GeneratorEnqueuer(self.data_generator, use_multiprocessing=False) self.enqueuer.start(workers=1, max_queue_size=4)
def train(n_epochs, _batch_size, start_epoch=0): """ train with fixed batch_size for given epochs make some example plots and save model after each epoch """ global batch_size batch_size = _batch_size # create a dataqueue with the keras facilities. this allows # to prepare the data in parallel to the training sample_dataqueue = GeneratorEnqueuer(generate_real_samples(batch_size), use_multiprocessing=True) sample_dataqueue.start(workers=2, max_queue_size=10) sample_gen = sample_dataqueue.get() # targets for loss function gan_sample_dataqueue = GeneratorEnqueuer( generate_latent_points_as_generator(batch_size), use_multiprocessing=True) gan_sample_dataqueue.start(workers=2, max_queue_size=10) gan_sample_gen = gan_sample_dataqueue.get() # targets for loss function valid = -np.ones((batch_size, 1)) fake = np.ones((batch_size, 1)) dummy = np.zeros((batch_size, 1)) # Dummy gt for gradient penalty bat_per_epo = int(n_samples / batch_size) # we need to call the discriminator once in order # to initialize the input shapes [X_real, cond_real] = next(sample_gen) latent = np.random.normal(size=(batch_size, latent_dim)) critic_model.predict([X_real, cond_real, latent]) for i in trange(n_epochs): epoch = 1 + i + start_epoch # enumerate batches over the training set for j in trange(bat_per_epo): for _ in range(n_disc): # fetch a batch from the queue [X_real, cond_real] = next(sample_gen) latent = np.random.normal(size=(batch_size, latent_dim)) d_loss = critic_model.train_on_batch( [X_real, cond_real, latent], [valid, fake, dummy]) # we get for losses back here. average, valid, fake, and gradient_penalty # we want the average of valid and fake d_loss = np.mean([d_loss[1], d_loss[2]]) # train generator # prepare points in latent space as input for the generator [latent, cond] = next(gan_sample_gen) # update the generator via the discriminator's error g_loss = generator_model.train_on_batch([latent, cond], valid) # summarize loss on this batch print(f'{epoch}, {j + 1}/{bat_per_epo}, d_loss {d_loss}' + \ f' g:{g_loss} ') # , d_fake:{d_loss_fake} d_real:{d_loss_real}') if np.isnan(g_loss) or np.isnan(d_loss): raise ValueError('encountered nan in g_loss and/or d_loss') hist['d_loss'].append(d_loss) hist['g_loss'].append(g_loss) # plot generated examples plt.figure(figsize=(25, 25)) n_plot = 30 X_fake, cond_fake = generate_fake_samples(n_plot) for iplot in range(n_plot): plt.subplot(n_plot, 25, iplot * 25 + 1) plt.imshow(cond_fake[iplot, :, :].squeeze(), cmap=plt.cm.gist_earth_r, norm=LogNorm(vmin=0.01, vmax=1)) plt.axis('off') for jplot in range(1, 24): plt.subplot(n_plot, 25, iplot * 25 + jplot + 1) plt.imshow(X_fake[iplot, jplot, :, :].squeeze(), vmin=0, vmax=1, cmap=plt.cm.hot_r) plt.axis('off') plt.colorbar() plt.suptitle(f'epoch {epoch:04d}') plt.savefig( f'{plotdir}/fake_samples_{params}_{epoch:04d}_{j:06d}.{plot_format}' ) # plot loss plt.figure() plt.plot(hist['d_loss'], label='d_loss') plt.plot(hist['g_loss'], label='g_loss') plt.ylabel('batch') plt.legend() plt.savefig(f'{plotdir}/training_loss_{params}.{plot_format}') pd.DataFrame(hist).to_csv('hist.csv') plt.close('all') generator.save(f'{outdir}/gen_{params}_{epoch:04d}.h5') critic.save(f'{outdir}/disc_{params}_{epoch:04d}.h5')
def predict_generator(self, generator, steps, max_queue_size=10, workers=1, use_multiprocessing=False, verbose=0): """Generates predictions for the input samples from a data generator. The generator should return the same kind of data as accepted by `predict_on_batch`. generator = DataGenerator class that returns: x = Input data as a 3D Tensor (batch_size, max_input_len, dim_features) x_len = 1D array with the length of each data in batch_size # Arguments generator: Generator yielding batches of input samples or an instance of Sequence (tensorflow.keras.utils.Sequence) object in order to avoid duplicate data when using multiprocessing. steps: Total number of steps (batches of samples) to yield from `generator` before stopping. max_queue_size: Maximum size for the generator queue. workers: Maximum number of processes to spin up when using process based threading use_multiprocessing: If `True`, use process based threading. Note that because this implementation relies on multiprocessing, you should not pass non picklable arguments to the generator as they can't be passed easily to children processes. verbose: verbosity mode, 0 or 1. # Returns A numpy array(s) of predictions. # Raises ValueError: In case the generator yields data in an invalid format. """ self.model_pred._make_predict_function() is_sequence = isinstance(generator, Sequence) allab_outs = [] steps_done = 0 enqueuer = None try: if is_sequence: enqueuer = OrderedEnqueuer( generator, use_multiprocessing=use_multiprocessing) else: enqueuer = GeneratorEnqueuer( generator, use_multiprocessing=use_multiprocessing) enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() if verbose == 1: progbar = Progbar(target=steps) while steps_done < steps: x = next(output_generator) outs = self.predict_on_batch(x) if not isinstance(outs, list): outs = [outs] for i, out in enumerate(outs): allab_outs.append([int(c) for c in out]) steps_done += 1 if verbose == 1: progbar.update(steps_done) finally: if enqueuer is not None: enqueuer.stop() return allab_outs
def start(this): sequence = this.__generate(this.generator_functors, this.generator_cfgs) this.enqueuer = GeneratorEnqueuer(sequence, use_multiprocessing=True) this.enqueuer.start(max_queue_size=24, workers=this.num_workers)
def __init__(this, generator_functor, generator_cfg, num_workers): this.num_workers = num_workers sequence = this.__generate(generator_functor, generator_cfg) this.enqueuer = GeneratorEnqueuer(sequence, use_multiprocessing=True)
def predict(self, x, batch_size=None, verbose=0, steps=1, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False): """ Model predicting on data yielded (generator). A predict() abstration function of TensorFlow 2 using the encoder and decoder models :param: See tensorflow.keras.Model.predict() :return: A numpy array(s) of predictions. References: Tal Weiss Deep Spelling Medium: https://machinelearnings.co/deep-spelling-9ffef96a24f6 Github: https://github.com/MajorTal/DeepSpell Vu Tran Sequence-to-Sequence Learning for Spelling Correction Github: https://github.com/vuptran/deep-spell-checkr """ try: enqueuer = GeneratorEnqueuer( x, use_multiprocessing=use_multiprocessing) enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() steps_done = 0 if verbose == 1: print("Model Predict") progbar = Progbar(target=steps) predicts = [] while steps_done < steps: x = next(output_generator)[0] batch_size = len(x) # Encode the input as state vectors encoder_out, state_h, state_c = self.encoder.predict(x) dec_state = tf.concat([state_h, state_c], axis=-1) # Create batch of empty target sequences of length 1 character and populate # the first element of target sequence with the # start-of-sequence character target = np.zeros((batch_size, 1, self.tokenizer.vocab_size)) target[:, 0, self.tokenizer.SOS] = 1.0 # Sampling loop for a batch of sequences decoded_tokens = [''] * batch_size for _ in range(self.tokenizer.maxlen): # `char_probs` has shape (batch_size, 1, nb_target_chars) char_probs, dec_state = self.decoder.predict( [encoder_out, dec_state, target]) # Reset the target sequences. target = np.zeros( (batch_size, 1, self.tokenizer.vocab_size)) # Sample next character using argmax or multinomial mode sampled_chars = [] for i in range(batch_size): next_index = char_probs[i].argmax(axis=-1) next_char = self.tokenizer.decode([next_index]) decoded_tokens[i] += next_char sampled_chars.append(next_char) # Update target sequence with index of next character target[i, 0, next_index] = 1.0 stop_char = set(sampled_chars) if len(stop_char) == 1 and stop_char.pop( ) == self.tokenizer.EOS_TK: break # Sampling finished predicts.extend( [self.tokenizer.remove_tokens(x) for x in decoded_tokens]) steps_done += 1 if verbose == 1: progbar.update(steps_done) finally: enqueuer.stop() return predicts
class ModelDiagonoser(Callback): def __init__(self, data_generator, batch_size, num_samples, output_dir, input_shape, n_classes): self.batch_size = batch_size self.num_samples = num_samples self.tensorboard_writer = tf.summary.create_file_writer( output_dir + "/diagnose/", flush_millis=10000) self.data_generator = data_generator self.input_shape = input_shape self.colors = np.array([[255, 255, 0], [255, 0, 0], [0, 255, 0], [0, 0, 255], [0, 0, 0]]) self.color_dict = {0: (0, 0, 0), 1: (0, 255, 0)} self.n_classes = n_classes self.colors = self.colors[:self.n_classes] is_sequence = isinstance(self.data_generator, Sequence) if is_sequence: self.enqueuer = OrderedEnqueuer(self.data_generator, use_multiprocessing=True, shuffle=False) else: self.enqueuer = GeneratorEnqueuer(self.data_generator, use_multiprocessing=True, wait_time=0.01) self.enqueuer.start(workers=4, max_queue_size=4) def on_epoch_end(self, epoch, logs=None): output_generator = self.enqueuer.get() generator_output = next(output_generator) x, y = generator_output[:2] y_pred = self.model.predict(x) # y_pred1 = self.model.predict(x) y_pred1, y_pred2 = y_pred[0], y_pred[-1] y_pred1 = np.argmax(y_pred1, axis=-1) y_pred2 = np.argmax(y_pred2, axis=-1) # y = y1['softmax_out'] y_imgs = [] y_imgs_pred = [] y_imgs_pred1 = [] x_imgs = [] # n_classes = np.shape(y)[-1] # print(np.unique(np.argmax(y, -1)), np.unique(y_pred1),"\n\n\n\n\n") y_pred1 = gray_to_onehot_all(y_pred1, self.color_dict) y_pred2 = gray_to_onehot_all(y_pred2, self.color_dict) # print(np.shape(y_pred1), np.shape(y_pred2), np.shape(y)) for i in range(len(y)): y_img = np.resize( np.dot(np.reshape(y[i], (-1, self.n_classes)), self.colors), self.input_shape) y_img_pred = np.resize( np.dot(np.reshape(y_pred1[i], (-1, self.n_classes)), self.colors), self.input_shape) y_img_pred1 = np.resize( np.dot(np.reshape(y_pred2[i], (-1, self.n_classes)), self.colors), self.input_shape) y_imgs.append(y_img) y_imgs_pred.append(y_img_pred) y_imgs_pred1.append(y_img_pred1) x_imgs.append(x[i].astype('uint8')) y_imgs = np.array(y_imgs) x_imgs = np.array(x_imgs) y_imgs_pred = np.array(y_imgs_pred) y_imgs_pred1 = np.array(y_imgs_pred1) with self.tensorboard_writer.as_default(): is_written = tf.summary.image("img", x_imgs, step=epoch) is_written = tf.summary.image("train/gts", y_imgs, step=epoch) is_written = tf.summary.image("train/predictions1", y_imgs_pred, step=epoch) is_written = tf.summary.image("train/predictions2", y_imgs_pred1, step=epoch) # if(is_written): # print(' image has written to the tensorboard') self.tensorboard_writer.flush() def on_train_end(self, logs=None): self.enqueuer.stop() self.tensorboard_writer.close()
def predict(self, x, batch_size=None, verbose=0, steps=1, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False): """ Model predicting on data yielded (generator). A predict() abstration function of TensorFlow 2 using the encoder and decoder models :param: See tensorflow.keras.Model.predict() :return: A numpy array(s) of predictions. """ try: enqueuer = GeneratorEnqueuer(x, use_multiprocessing=use_multiprocessing) enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() steps_done = 0 if verbose == 1: print("Model Predict") progbar = Progbar(target=steps) predicts = [] while steps_done < steps: x = next(output_generator)[0] for sentence in x: enc_input = tf.expand_dims(sentence, axis=0) dec_input = tf.expand_dims([self.tokenizer.SOS], axis=0) for _ in range(self.tokenizer.maxlen): enc_padding_mask, look_ahead_mask, dec_padding_mask = create_masks(enc_input, dec_input) enc_output = self.encoder(enc_input, enc_padding_mask) # (batch_size, inp_seq_len, d_model) dec_output, _ = self.decoder(dec_input, enc_output, look_ahead_mask, dec_padding_mask) # select the last word from the seq_len dimension predictions = dec_output[:, -1:, :] # (batch_size, 1, vocab_size) predicted_id = tf.cast(tf.argmax(predictions, axis=-1), dtype=tf.int32) # return the result if the predicted_id is equal to the end token if tf.equal(predicted_id, self.tokenizer.EOS): break # concatentate the predicted_id to the output which is given to the decoder as its input. dec_input = tf.concat([dec_input, predicted_id], axis=-1) dec_input = tf.squeeze(dec_input, axis=0) dec_input = self.tokenizer.decode(dec_input) predicts.append(self.tokenizer.remove_tokens(dec_input)) steps_done += 1 if verbose == 1: progbar.update(steps_done) finally: enqueuer.stop() return predicts
x = layer(x) if isinstance(layer, Conv2D): mean_activations.append(layer_mean_activations(x)) get_batch_means = K.function([vgg_net.input], mean_activations) idgen = ImageDataGenerator(preprocessing_function=vgg16.preprocess_input) gen = idgen.flow_from_directory(directory=data_folder, target_size=img_size, class_mode=None, shuffle=False, batch_size=batch_size) num_images = gen.samples if use_generator_enqueuer: enq = GeneratorEnqueuer(gen, use_multiprocessing=False) enq.start(workers=1) gen = enq.get() print("Gathering mean activations...") iters = num_images // batch_size accumulated_means = None for i in range(iters): batch_means = get_batch_means([next(gen)]) if accumulated_means is None: accumulated_means = batch_means else: for accumulated, m in zip(accumulated_means, batch_means): accumulated += m if (i + 1) % 50 == 0: print("Batches done:", i + 1)
class ModelDiagonoser(Callback): def __init__(self, data_generator, batch_size, num_samples, output_dir, normalization_mean, start_index=0): super().__init__() self.data_generator = data_generator self.batch_size = batch_size self.num_samples = num_samples self.tensorboard_writer = TensorboardWriter(output_dir) self.normalization_mean = normalization_mean self.start_index = start_index is_sequence = isinstance(self.data_generator, Sequence) if is_sequence: self.enqueuer = OrderedEnqueuer(self.data_generator, use_multiprocessing=False, shuffle=False) else: self.enqueuer = GeneratorEnqueuer(self.data_generator, use_multiprocessing=False) self.enqueuer.start(workers=1, max_queue_size=4) def on_epoch_end(self, epoch, logs=None): steps_done = 0 total_steps = int(np.ceil(np.divide(self.num_samples, self.batch_size))) sample_index = 0 while steps_done < total_steps: x, y = next(self.data_generator) sample_index += 1 if sample_index <= self.start_index: continue y_pred = self.model.predict(x) y_pred = np.argmax(y_pred, axis=-1) y_true = np.argmax(y, axis=-1) for i in range(0, len(y_pred)): n = steps_done * self.batch_size + i if n >= self.num_samples: return img = np.squeeze(x[i, :, :, :]) img = 255. * ( img + self.normalization_mean ) # mean is the training images normalization mean img = img[:, :, [2, 1, 0]] # reordering of channels pred = y_pred[i] pred = pred.reshape(img.shape[0:2]) ground_truth = y_true[i] ground_truth = ground_truth.reshape(img.shape[0:2]) self.tensorboard_writer.save_image( "Epoch-{}/{}/x".format(epoch, sample_index - 1), img, epoch) self.tensorboard_writer.save_image( "Epoch-{}/{}/y".format(epoch, sample_index - 1), ground_truth, epoch) self.tensorboard_writer.save_image( "Epoch-{}/{}/y_pred".format(epoch, sample_index - 1), pred, epoch) sample_index += 1 steps_done += 1 def on_train_end(self, logs=None): self.enqueuer.stop() self.tensorboard_writer.close()