def train(run_name, digit, nepochs, batch_size, latent_dim, noise_type, k, optimizer, model_dir, gen_intermediate_dims=None, disc_intermediate_dims=None): # load data and params Xtr, Xte, _, _ = load_data(digit) Xte = Xte[:(Xte.shape[0] / batch_size) * batch_size] # correct for batch_size original_dim = Xtr.shape[-1] nbatches = int(Xtr.shape[0] / batch_size) fnm_gen, fnm_disc, fnm_hist, fnm_samp = get_filenames(model_dir, run_name) # load models generator = build_generator(batch_size, latent_dim, gen_intermediate_dims, original_dim, optimizer) discriminator = build_discriminator(2 * batch_size, original_dim, disc_intermediate_dims, optimizer) combined = build_combined(2 * batch_size, latent_dim, generator, discriminator, optimizer) # train and test history = init_history() for i in xrange(nepochs): print('Epoch {} of {}'.format(i + 1, nepochs)) progress_bar = Progbar(target=nbatches) # train on mini-batches of train data epoch_losses = [] for j in xrange(nbatches): progress_bar.update(j) batch_loss = train_on_batch(Xtr, j, batch_size, latent_dim, noise_type, k, generator, discriminator, combined) epoch_losses.append(batch_loss) # evaluate on test data print('\nTesting epoch {}:'.format(i + 1)) test_loss = evaluate(Xte, batch_size, latent_dim, noise_type, generator, discriminator, combined, fnm_samp.format(i + 1)) train_loss = np.mean(np.array(epoch_losses), axis=0) history = update_history(history, train_loss, test_loss, do_print=True) # save weights generator.save_weights(fnm_gen.format(i), True) discriminator.save_weights(fnm_disc.format(i), True) # save history save_history(fnm_hist, history)
def __init__(self, args): # dataset params self.master_path = args.datasetpath # define image size and channels self.img_rows = args.imgsize self.img_cols = args.imgsize self.channels = 3 self.img_shape = (self.img_rows, self.img_cols, self.channels) # 潜在変数の次元数 self.z_dim = args.zdims self.batch_size = args.batchsize self.epochs = args.epochs self.saveinterval = args.saveinterval # define opt params discriminator_optimizer = Adam(lr=1e-5, beta_1=0.1) combined_optimizer = Adam(lr=2e-4, beta_1=0.5) """ ensure directory """ if not os.path.exists('./images/'): os.makedirs('./images/') if not os.path.exists('./saved_model/'): os.makedirs('./saved_model/') if not os.path.exists('./result_image/'): os.makedirs('./result_image/') # discriminator model self.discriminator = model.build_discriminator(self.img_shape) plot_model(self.discriminator, to_file='./images/discriminator.png', show_shapes=True) self.discriminator.compile(loss='binary_crossentropy', optimizer=discriminator_optimizer, metrics=['accuracy']) # generator model self.generator = model.build_generator(self.z_dim) plot_model(self.generator, to_file='./images/generator.png', show_shapes=True) #Generator単体では学習を行わないのでコンパイル不要 self.combined = model.build_combined(self.z_dim, self.generator, self.discriminator) self.combined.compile(loss='binary_crossentropy', optimizer=combined_optimizer) self.X_train = []
def __init__(self, model_name=None): self.imgs, self.digits, self.test_imgs, self.test_digits = load_mnist() self.img_rows, self.img_cols, self.channels = self.imgs.shape[1:] self.img_shape = (self.img_rows, self.img_cols, self.channels) loss_func = 'binary_crossentropy' optimizer_D = Adam(lr=0.0002) optimizer_G = Adam(lr=0.0002) self.D = build_discriminator() self.D.compile(loss= loss_func, optimizer=optimizer_D, metrics=[metrics.binary_accuracy]) self.D.summary() self.G, self.G_mask = build_generator_incep() # self.G, self.G_mask = build_generator() img_input = Input(shape=self.img_shape) digit_input = Input(shape=(10,)) img_added = self.G([img_input, digit_input]) self.D.trainable = False D_output = self.D([img_added, digit_input]) self.combined = Model([img_input, digit_input], D_output) self.combined.compile(loss=loss_func, optimizer=optimizer_G, metrics=[metrics.binary_accuracy]) self.combined.summary() self.tb = keras.callbacks.TensorBoard( log_dir='./logs', histogram_freq=0, batch_size=64, write_graph=True, write_grads=True ) self.tb.set_model(self.combined)
tf.reset_default_graph() dataset = open_new_dataset(f'{HOME}/dataset/dataset.tr', \ batch_size=batch_size) images_iter = dataset.make_initializable_iterator() images = images_iter.get_next() # Build the graph wrecked_images = tf.placeholder(tf.float32, shape=(None, 218, 178, 3)) real_images = tf.placeholder(tf.float32, shape=(None, 218, 178, 3)) l1_ratio = 0.3 # Build the graph with tf.variable_scope('') as scope: fake_images = build_generator(wrecked_images) fake_logits = build_discriminator(fake_images) scope.reuse_variables() real_logits = build_discriminator(real_images) G_loss, D_loss = loss(fake_logits, real_logits, fake_images, real_images, batch_size, build_discriminator) G_loss = (1 - l1_ratio) * G_loss + l1_ratio * tf.losses.mean_squared_error( fake_images, real_images) D_solver = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1) G_solver = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=beta1) D_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'discriminator') G_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'generator') D_train_step = D_solver.minimize(D_loss, var_list=D_vars) G_train_step = G_solver.minimize(G_loss, var_list=G_vars)
if not os.path.exists(output_dir): os.makedirs(output_dir) # ----- ETL ----- # # ETL = Extraction, Transformation, Load train_data_gen = get_data_gen() """ x = next(train_data_gen) print(len(x)) plot_images(x[:5]) """ # ----- MODEL ----- # # discriminator discriminator = build_discriminator() discriminator_optimizer = tf.keras.optimizers.Adam( learning_rate=LEARNING_RATE, beta_1=BETA_1 ) discriminator.summary() # generator generator = build_generator() generator_optimizer = tf.keras.optimizers.Adam( learning_rate=LEARNING_RATE, beta_1=BETA_1 ) generator.summary() # ----- TRAINING ----- #
os.makedirs(output_folder, exist_ok=True) # %% # -- Create a data iterator that feeds batches train_data = mx.io.NDArrayIter(data=nd.concatenate(img_list), batch_size=BATCH_SIZE, shuffle=False) # %% # -- Define the model n_channels = INPUT_SHAPE[2] n_filters = 64 netG = model.build_generator(n_filters, n_channels, mx_ctx) netD = model.build_discriminator(n_filters, n_channels, mx_ctx) if RESUME_SESSION is not None: netG.load_parameters(f"output/{RESUME_SESSION}/_net_g.params", ctx=mx_ctx) netD.load_parameters(f"output/{RESUME_SESSION}/_net_d.params", ctx=mx_ctx) # %% # -- Define loss function and optimizer loss_fn = gluon.loss.SigmoidBinaryCrossEntropyLoss() trainerG = gluon.Trainer(netG.collect_params(), "adam", {"learning_rate": LEARNING_RATE, "beta1": BETA1}) trainerD = gluon.Trainer(netD.collect_params(), "adam", {"learning_rate": LEARNING_RATE, "beta1": BETA1}) # %% # -- Custom metric function
def train(): if not os.path.exists('./output'): os.makedirs('./output') if not os.path.exists('./weights'): os.makedirs('./weights') # --------------------- # Model and Optimizer # --------------------- print('Building models...') G = build_generator() D = build_discriminator() G_opti = Adam(2e-4, 0.0, 0.9) D_opti = Adam(2e-4, 0.0, 0.9) # --------------------- # Train # --------------------- print('Start Training...') start_time = datetime.datetime.now() train_generator = data_generator(batch_size=BATCH_SIZE, seed=SEED) for iteration in range(1, ITERATIONS + 1): # --------------------- # Train Discriminator # --------------------- l_train, _, p_train, s_train = next(train_generator) # Train the discriminator with tf.GradientTape() as tape: fs_1, fs_2, fs_3 = G([p_train, l_train]) r_valid = D([p_train, l_train, s_train]) f_valid = D([p_train, l_train, fs_1]) D_loss = 0.5 * (tf.reduce_mean(-tf.math.log(r_valid + 1e-9) - tf.math.log(1 - f_valid + 1e-9))) D_grad = tape.gradient(D_loss, D.trainable_variables) D_opti.apply_gradients(zip(D_grad, D.trainable_variables)) # --------------------- # Train Generator # --------------------- l_train, _, p_train, s_train = next(train_generator) # Train the generator with tf.GradientTape() as tape: fs_1, fs_2, fs_3 = G([p_train, l_train]) f_valid = D([p_train, l_train, fs_1]) # MSE Loss + TV Reg for main output main_loss = 5e-1 * tf.reduce_mean( tf.square(fs_1 - s_train)) + 1e-6 * tf.reduce_sum( tf.image.total_variation(fs_1)) # MSE Loss for sub output sub_loss = 2e-1 * (tf.reduce_mean(tf.square(fs_2 - s_train)) + tf.reduce_mean(tf.square(fs_3 - s_train))) # Adv Loss for main output (Vanilla GAN) adv_loss = 4e-1 * tf.reduce_mean(-tf.math.log(f_valid + 1e-9)) G_loss = main_loss + sub_loss + adv_loss G_grad = tape.gradient(G_loss, G.trainable_variables) G_opti.apply_gradients(zip(G_grad, G.trainable_variables)) print('[Time: %s] [Iteration: %d] [D loss: %f] [G loss: %f]' % ( datetime.datetime.now() - start_time, iteration, D_loss, G_loss, )) # Save training samples if iteration % 100 == 0: # At least 3 images, bs < 3 causes error train_x_batch, cond_batch, train_pos_batch, train_y_batch = next( train_generator) gen_imgs, s1, s2 = G.predict([train_pos_batch, train_x_batch]) plot_figs(train_x_batch, train_y_batch, cond_batch, gen_imgs, True, '%d' % iteration) plot_figs(train_x_batch, train_y_batch, cond_batch, s1, False, '%d_s1' % iteration) plot_figs(train_x_batch, train_y_batch, cond_batch, s2, False, '%d_s2' % iteration) if iteration % 200 == 0: D.save_weights('./weights/G_%05d.h5' % iteration) G.save_weights('./weights/D_%05d.h5' % iteration)
def train_model(learning_rate_dis=0.0004, learning_rate_model=0.0004, n_epochs=40, batch_size=20): ''' Function that compute the training of the model ''' ####################### # Loading the dataset # ####################### print ('... Loading data') # Load the dataset on the CPU data_path = get_path() train_input_path = 'train_input_' train_target_path = 'train_target_' valid_input_path = 'valid_input_' valid_target_path = 'valid_target_' nb_train_batch = 8 # Creating symbolic variables input_channel = 3 max_height = 64 max_width = 64 min_height = 32 min_width = 32 # Shape = (100, 3, 64, 64) input = shared_GPU_data(shape=(batch_size, input_channel, max_height, max_width)) # Shape = (100, 3, 32, 32) target = shared_GPU_data(shape=(batch_size, input_channel, min_height, min_width)) ###################### # Building the model # ###################### # Symbolic variables # Shape = (_, 3, 64, 64) x = T.tensor4('x', dtype=theano.config.floatX) # Shape = (_, 3, 32, 32) y = T.tensor4('y', dtype=theano.config.floatX) # Shape = (_, 3, 32, 32) z = T.tensor4('x', dtype=theano.config.floatX) # Creation of the model model = build_context_encoder(input_var=None) discriminator = build_discriminator(input_var=None) fake_image = layers.get_output(model, inputs=x) fake_image_det = layers.get_output(model, inputs=x, deterministic=True) prob_real = layers.get_output(discriminator, inputs=y) prob_fake = layers.get_output(discriminator, inputs=fake_image) params_model = layers.get_all_params(model, trainable=True) params_dis = layers.get_all_params(discriminator, trainable=True) loss_real = -T.mean(T.log(prob_real)) loss_fake = -T.mean(T.log(1 - prob_fake)) loss_dis = 0.001 * (loss_real + loss_fake) loss_gen = -T.mean(T.log(prob_fake)) recons_error = T.mean(objectives.squared_error(fake_image, z)) loss_model = 0.001 * loss_gen + 0.999 * recons_error updates_dis = lasagne.updates.adam(loss_dis, params_dis, learning_rate=learning_rate_dis, beta1=0.5) updates_model = lasagne.updates.adam(loss_model, params_model, learning_rate=learning_rate_model, beta1=0.5) # Creation of theano functions train_dis = theano.function([], loss_dis, updates=updates_dis, allow_input_downcast=True, givens={x: input, y: target}) train_model = theano.function([], loss_model, updates=updates_model, allow_input_downcast=True, givens={x: input, z: target}) predict_image = theano.function([], fake_image_det, allow_input_downcast=True, givens={x: input}) ################### # Train the model # ################### print('... Training') epoch = 0 nb_train_dis = 25 nb_train_gen = 10 nb_batch = 10000 // batch_size nb_block = nb_batch // nb_train_dis loss_dis = [] loss_model = [] idx = 50 pred_batch = 5 #start_time = timeit.default_timer() while (epoch < n_epochs): epoch = epoch + 1 for i in range(nb_train_batch): #print (i) # Shape = (10000, 3, 64, 64) & Shape = (10000, 3, 32, 32) contour, center = get_image(data_path, train_input_path, train_target_path, str(i)) for j in range(nb_block): #print (j) for index in range(nb_train_dis * j, nb_train_dis * (j + 1)): #print (index) input.set_value(contour[index * batch_size: (index + 1) * batch_size]) target.set_value(center[index * batch_size: (index + 1) * batch_size]) loss = train_dis() loss_dis.append(loss) for index in range(nb_train_gen * j, nb_train_gen * (j + 1)): #print (index) input.set_value(contour[index * batch_size: (index + 1) * batch_size]) target.set_value(center[index * batch_size: (index + 1) * batch_size]) loss = train_model() loss_model.append(loss) if epoch % 4 == 0: # save the model and a bunch of generated pictures print ('... saving model and generated images') np.savez('discriminator_epoch' + str(epoch) + '.npz', *layers.get_all_param_values(discriminator)) np.savez('context_encoder_epoch' + str(epoch) + '.npz', *layers.get_all_param_values(model)) np.save('loss_dis', loss_dis) np.save('loss_gen', loss_model) contour, center = get_image(data_path, valid_input_path, valid_target_path, str(0)) input.set_value(contour[idx * pred_batch: (idx + 1) * pred_batch]) generated_centers = predict_image() generated_images = assemble(contour[idx * pred_batch: (idx + 1) * pred_batch], generated_centers) for k in range(pred_batch): plt.subplot(1, pred_batch, (k + 1)) plt.axis('off') plt.imshow(generated_images[k, :, :, :].transpose(1, 2, 0)) plt.savefig('generated_images_epoch' + str(epoch) + '.png', bbox_inches='tight') #end_time = timeit.default_timer() # Plot the learning curve ax1 = host_subplot(111, axes_class=AA.Axes) plt.subplots_adjust(right=0.75) ax2 = ax1.twiny() x1 = range(1, len(loss_dis) + 1) ax1.set_xlim([x1[0], x1[-1]]) x2 = range(1, len(loss_model) + 1) ax2.set_xlim([x2[0], x2[-1]]) ax1.set_xlabel('training iteration (Discriminator)', color='g') ax2.set_xlabel('training iteration (Context encoder)', color='b') ax1.set_ylabel('Loss') ax1.plot(x1, rolling_average(loss_dis), 'g', label='Discriminator loss') ax2.plot(x2, rolling_average(loss_model), 'b', label='Context encoder Loss') ax1.grid(True) ax1.legend() plt.savefig('Learning_curve') print('Optimization complete.')
# batch and shuffle the data train_dataset = tf.data.Dataset.from_tensor_slices(training_voxels).shuffle( BUFFER_SIZE).batch(BATCH_SIZE) # create folder for epoch images epoch_timestamp = math.floor(datetime.timestamp(datetime.now())) epoch_images_path = "epoch_images/{}".format(epoch_timestamp) os.mkdir(epoch_images_path) # create generator and discriminator models generator = model.build_generator() noise = tf.random.normal([1, 100]) generated_image = generator(noise, training=False) plt.imshow(generated_image[0, :, :, 0], cmap='gray') discriminator = model.build_discriminator() decision = discriminator(generated_image) print(decision) # loss functions? # this method returns a helper function to compute cross entropy loss cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits=True) def discriminator_loss(real_output, fake_output): real_loss = cross_entropy(tf.ones_like(real_output), real_output) fake_loss = cross_entropy(tf.zeros_like(fake_output), fake_output) total_loss = real_loss + fake_loss return total_loss
def train(param): models = {} inp = Input(shape=(param["inp_dims"])) embedding = model.build_embedding(param, inp) classifier = model.build_classifier(param, embedding) discriminator = model.build_discriminator(param, embedding) if param["number_of_gpus"] > 1: models["combined_classifier"] = multi_gpu_model( model.build_combined_classifier(inp, classifier), gpus=param["number_of_gpus"]) models["combined_discriminator"] = multi_gpu_model( model.build_combined_discriminator(inp, discriminator), gpus=param["number_of_gpus"]) models["combined_model"] = multi_gpu_model( model.build_combined_model(inp, [classifier, discriminator]), gpus=param["number_of_gpus"]) else: models["combined_classifier"] = model.build_combined_classifier( inp, classifier) models["combined_discriminator"] = model.build_combined_discriminator( inp, discriminator) models["combined_model"] = model.build_combined_model( inp, [classifier, discriminator]) models["combined_classifier"].compile( optimizer=optimizer.opt_classifier(param), loss='categorical_crossentropy', metrics=['accuracy']) models["combined_discriminator"].compile( optimizer=optimizer.opt_discriminator(param), loss='binary_crossentropy', metrics=['accuracy']) models["combined_model"].compile(optimizer = optimizer.opt_combined(param), loss = {'class_act_last': 'categorical_crossentropy', 'dis_act_last': \ 'binary_crossentropy'}, loss_weights = {'class_act_last': param["class_loss_weight"], 'dis_act_last': param["dis_loss_weight"]}, metrics = ['accuracy']) Xs, ys = param["source_data"], param["source_label"] Xt, yt = param["target_data"], param["target_label"] # Source domain is represented by label 0 and Target by 1 ys_adv = np.array(([0.] * ys.shape[0])) yt_adv = np.array(([1.] * yt.shape[0])) y_advb_1 = np.array(([1] * param["batch_size"] + [0] * param["batch_size"])) # For gradient reversal y_advb_2 = np.array( ([0] * param["batch_size"] + [1] * param["batch_size"])) weight_class = np.array( ([1] * param["batch_size"] + [0] * param["batch_size"])) weight_adv = np.ones((param["batch_size"] * 2, )) S_batches = batch_generator([Xs, ys], param["batch_size"]) T_batches = batch_generator([Xt, np.zeros(shape=(len(Xt), ))], param["batch_size"]) param["target_accuracy"] = 0 optim = {} optim["iter"] = 0 optim["acc"] = "" optim["labels"] = np.array(Xt.shape[0], ) gap_last_snap = 0 for i in range(param["num_iterations"]): Xsb, ysb = next(S_batches) Xtb, ytb = next(T_batches) X_adv = np.concatenate([Xsb, Xtb]) y_class = np.concatenate([ysb, np.zeros_like(ysb)]) adv_weights = [] for layer in models["combined_model"].layers: if (layer.name.startswith("dis_")): adv_weights.append(layer.get_weights()) stats1 = models["combined_model"].train_on_batch(X_adv, [y_class, y_advb_1],\ sample_weight=[weight_class, weight_adv]) k = 0 for layer in models["combined_model"].layers: if (layer.name.startswith("dis_")): layer.set_weights(adv_weights[k]) k += 1 class_weights = [] for layer in models["combined_model"].layers: if (not layer.name.startswith("dis_")): class_weights.append(layer.get_weights()) stats2 = models["combined_discriminator"].train_on_batch( X_adv, [y_advb_2]) k = 0 for layer in models["combined_model"].layers: if (not layer.name.startswith("dis_")): layer.set_weights(class_weights[k]) k += 1 if ((i + 1) % param["test_interval"] == 0): ys_pred = models["combined_classifier"].predict(Xs) yt_pred = models["combined_classifier"].predict(Xt) ys_adv_pred = models["combined_discriminator"].predict(Xs) yt_adv_pred = models["combined_discriminator"].predict(Xt) source_accuracy = accuracy_score(ys.argmax(1), ys_pred.argmax(1)) target_accuracy = accuracy_score(yt.argmax(1), yt_pred.argmax(1)) source_domain_accuracy = accuracy_score(ys_adv, np.round(ys_adv_pred)) target_domain_accuracy = accuracy_score(yt_adv, np.round(yt_adv_pred)) log_str = "iter: {:05d}: \nLABEL CLASSIFICATION: source_accuracy: {:.5f}, target_accuracy: {:.5f}\ \nDOMAIN DISCRIMINATION: source_domain_accuracy: {:.5f}, target_domain_accuracy: {:.5f} \n"\ .format(i, source_accuracy*100, target_accuracy*100, source_domain_accuracy*100, target_domain_accuracy*100) print(log_str) if param["target_accuracy"] < target_accuracy: optim["iter"] = i optim["acc"] = log_str optim["labels"] = ys_pred.argmax(1) if (gap_last_snap >= param["snapshot_interval"]): gap_last_snap = 0 np.save( os.path.join(param["output_path"], "yPred_{}".format(optim["iter"])), optim["labels"]) open( os.path.join(param["output_path"], "acc_{}.txt".format(optim["iter"])), "w").write(optim["acc"]) models["combined_classifier"].save( os.path.join(param["output_path"], "iter_{:05d}_model.h5".format(i))) gap_last_snap = gap_last_snap + 1
def train_model(learning_rate_dis=0.0004, learning_rate_model=0.0004, n_epochs=36, batch_size=20, nb_caption='max'): ''' Function that compute the training of the model ''' ####################### # Loading the dataset # ####################### print ('... Loading data') # Load the dataset on the CPU data_path = get_path() train_input_path = 'train_input_' train_target_path = 'train_target_' train_caption_path = 'train_caption_' valid_input_path = 'valid_input_' valid_target_path = 'valid_target_' valid_caption_path = 'valid_caption_' nb_train_batch = 8 ###################### # Building the model # ###################### # Symbolic variables # Shape = (_, 3, 64, 64) x = T.tensor4('x', dtype=theano.config.floatX) # Shape = (_, 3, 32, 32) y = T.tensor4('y', dtype=theano.config.floatX) # Shape = (_, 3, 32, 32) z = T.tensor4('x', dtype=theano.config.floatX) # Shape = (_, seq_length) w = T.imatrix('captions') # Creation of the model model = build_context_encoder(input_var1=x, input_var2=w) discriminator = build_discriminator(input_var=None) fake_image = layers.get_output(model) fake_image_det = layers.get_output(model, deterministic=True) prob_real = layers.get_output(discriminator, inputs=y) prob_fake = layers.get_output(discriminator, inputs=fake_image) params_model = layers.get_all_params(model, trainable=True) params_dis = layers.get_all_params(discriminator, trainable=True) loss_real = -T.mean(T.log(prob_real)) loss_fake = -T.mean(T.log(1 - prob_fake)) loss_dis = 0.005 * (loss_real + loss_fake) loss_gen = -T.mean(T.log(prob_fake)) recons_error = T.mean(objectives.squared_error(fake_image, z)) loss_model = 0.005 * loss_gen + 0.995 * recons_error updates_dis = lasagne.updates.adam(loss_dis, params_dis, learning_rate=learning_rate_dis, beta1=0.5) updates_model = lasagne.updates.adam(loss_model, params_model, learning_rate=learning_rate_model, beta1=0.5) # Creation of theano functions train_dis = theano.function([x, y, w], loss_dis, updates=updates_dis, allow_input_downcast=True) train_model = theano.function([x, z, w], loss_model, updates=updates_model, allow_input_downcast=True) predict_image = theano.function([x, w], fake_image_det, allow_input_downcast=True) ################### # Train the model # ################### print('... Training') epoch = 0 nb_train_dis = 25 nb_train_gen = 10 nb_batch = 10000 // batch_size nb_block = nb_batch // nb_train_dis loss_dis = [] loss_model = [] idx = [0, 1, 2, 4, 5] #start_time = timeit.default_timer() while (epoch < n_epochs): epoch = epoch + 1 for i in range(nb_train_batch): #print (i) # Shape = (10000, 3, 64, 64) & Shape = (10000, 3, 32, 32) contour, center = get_image(data_path, train_input_path, train_target_path, str(i)) # List of captions of different sequence length caption = get_caption(data_path, train_caption_path, str(i), str(nb_caption)) # List of size nb_train_dis list = [k % len(caption) for k in range(nb_train_dis)] for j in range(nb_block): #print (j) for index in range(nb_train_dis * j, nb_train_dis * (j + 1)): #print (index) train_caption = caption[list[index % nb_train_dis]] if train_caption.shape[0] >= batch_size: random_idx = random.sample(range(0, train_caption.shape[0]), batch_size) else: random_idx = random.sample(range(0, train_caption.shape[0]), train_caption.shape[0]) input = contour[train_caption[random_idx, -1] - i * 10000] target = center[train_caption[random_idx, -1] - i * 10000] train_caption = train_caption[random_idx, :-1] loss = train_dis(input, target, train_caption) loss_dis.append(loss) for index in range(nb_train_gen * j, nb_train_gen * (j + 1)): #print (index) rand_nb = random.randint(0, len(list) - 1) train_caption = caption[rand_nb] if train_caption.shape[0] >= batch_size: random_idx = random.sample(range(0, train_caption.shape[0]), batch_size) else: random_idx = random.sample(range(0, train_caption.shape[0]), train_caption.shape[0]) input = contour[train_caption[random_idx, -1] - i * 10000] target = center[train_caption[random_idx, -1] - i * 10000] train_caption = train_caption[random_idx, :-1] loss = train_model(input, target, train_caption) loss_model.append(loss) if epoch % 4 == 0: # save the model and a bunch of generated pictures print ('... saving model and generated images') np.savez('discriminator_epoch' + str(epoch) + '.npz', *layers.get_all_param_values(discriminator)) np.savez('context_encoder_epoch' + str(epoch) + '.npz', *layers.get_all_param_values(model)) np.save('loss_dis', loss_dis) np.save('loss_gen', loss_model) contour, center = get_image(data_path, valid_input_path, valid_target_path, str(0)) caption = get_caption(data_path, valid_caption_path, str(0), str(nb_caption)) valid_caption = caption[4][idx] input = contour[valid_caption[:, -1]] generated_centers = predict_image(input, valid_caption[:, :-1]) generated_images = assemble(input, generated_centers) for k in range(len(idx)): plt.subplot(1, len(idx), (k + 1)) plt.axis('off') plt.imshow(generated_images[k, :, :, :].transpose(1, 2, 0)) plt.savefig('generated_images_epoch' + str(epoch) + '.png', bbox_inches='tight') #end_time = timeit.default_timer() # Plot the learning curve ax1 = host_subplot(111, axes_class=AA.Axes) plt.subplots_adjust(right=0.75) ax2 = ax1.twiny() x1 = range(1, len(loss_dis) + 1) ax1.set_xlim([x1[0], x1[-1]]) x2 = range(1, len(loss_model) + 1) ax2.set_xlim([x2[0], x2[-1]]) ax1.set_xlabel('training iteration (Discriminator)', color='g') ax2.set_xlabel('training iteration (Context encoder)', color='b') ax1.set_ylabel('Loss') ax1.plot(x1, rolling_average(loss_dis), 'g', label='Discriminator loss') ax2.plot(x2, rolling_average(loss_model), 'b', label='Context encoder Loss') ax1.grid(True) ax1.legend() plt.savefig('Learning_curve') print('Optimization complete.')
def train(): if not os.path.exists('./output'): os.makedirs('./output') if not os.path.exists('./weights'): os.makedirs('./weights') # --------------------- # Model and Optimizer # --------------------- print('Building models...') baseG = build_generator() baseD = build_discriminator() # Build D train rp_in = Input(shape=(3, )) rl_in = Input(shape=(None, None, 1)) rs_in = Input(shape=(None, None, 1)) fp_in = Input(shape=(3, )) fl_in = Input(shape=(None, None, 1)) G = Model(inputs=baseG.inputs, outputs=baseG.outputs) D = Model(inputs=baseD.inputs, outputs=baseD.outputs) G.trainable = False D.trainable = True fs_1, fs_2, fs_3 = G([fp_in, fl_in]) r_valid = D([rp_in, rl_in, rs_in]) f_valid = D([fp_in, fl_in, fs_1]) D_train = Model([rp_in, fp_in, rl_in, fl_in, rs_in], [r_valid, f_valid]) # Adv Loss (Vanilla GAN) D_train.add_loss( 0.5 * (K.mean(-K.log(r_valid + 1e-9) - K.log(1 - f_valid + 1e-9)))) D_train.compile(optimizer=Adam(2e-4, 0.0, 0.9)) # Build G train fp_in = Input(shape=(3, )) fl_in = Input(shape=(None, None, 1)) fs_in = Input(shape=(None, None, 1)) G = Model(inputs=baseG.inputs, outputs=baseG.outputs) D = Model(inputs=baseD.inputs, outputs=baseD.outputs) G.trainable = True D.trainable = False fs_1, fs_2, fs_3 = G([fp_in, fl_in]) f_valid = D([fp_in, fl_in, fs_1]) G_train = Model([fp_in, fl_in, fs_in], [fs_1, fs_2, fs_3, f_valid]) # MSE Loss + TV Reg for main output G_train.add_loss(5e-1 * K.mean(K.square(fs_1 - fs_in)) + 1e-6 * tf.reduce_sum(tf.image.total_variation(fs_1))) # MSE Loss for sub output G_train.add_loss( 2e-1 * (K.mean(K.square(fs_2 - fs_in)) + K.mean(K.square(fs_3 - fs_in)))) # Adv Loss for main output (Vanilla GAN) G_train.add_loss(4e-1 * K.mean(-K.log(f_valid + 1e-9))) G_train.compile(optimizer=Adam(2e-4, 0.0, 0.9)) # --------------------- # Train # --------------------- print('Start Training...') start_time = datetime.datetime.now() train_generator = data_generator(batch_size=BATCH_SIZE, seed=SEED) for iteration in range(1, ITERATIONS + 1): # --------------------- # Train Discriminator # --------------------- l_train, _, p_train, s_train = next(train_generator) # Train the discriminator D_loss = D_train.train_on_batch( [p_train, p_train, l_train, l_train, s_train], None) # --------------------- # Train Generator # --------------------- l_train, _, p_train, s_train = next(train_generator) # Train the generator G_loss = G_train.train_on_batch([p_train, l_train, s_train], None) print('[Time: %s] [Iteration: %d] [D loss: %f] [G loss: %f]' % ( datetime.datetime.now() - start_time, iteration, D_loss, G_loss, )) # Save training samples if iteration % 100 == 0: # At least 3 images, bs < 3 causes error train_x_batch, cond_batch, train_pos_batch, train_y_batch = next( train_generator) gen_imgs, s1, s2 = G.predict([train_pos_batch, train_x_batch]) plot_figs(train_x_batch, train_y_batch, cond_batch, gen_imgs, True, '%d' % iteration) plot_figs(train_x_batch, train_y_batch, cond_batch, s1, False, '%d_s1' % iteration) plot_figs(train_x_batch, train_y_batch, cond_batch, s2, False, '%d_s2' % iteration) if iteration % 200 == 0: D.save_weights('./weights/G_%05d.h5' % iteration) G.save_weights('./weights/D_%05d.h5' % iteration)
def train_model(learning_rate_dis=0.0004, learning_rate_gen=0.0004, n_epochs=40, batch_size=100): ''' Function that compute the training of the model ''' ####################### # Loading the dataset # ####################### print('... Loading data') # Load the dataset on the CPU data_path = get_path() train_input_path = 'train_input_' train_target_path = 'train_target_' nb_train_batch = 8 # Creating symbolic variables input_channel = 3 max_height = 64 max_width = 64 pred_batch = 5 # Shape = (100, 3, 64, 64) image = shared_GPU_data(shape=(batch_size, input_channel, max_height, max_width)) # Shape = (100, 100) random_matrix = shared_GPU_data(shape=(batch_size, 100)) # Shape = (5, 100) small_random_matrix = shared_GPU_data(shape=(pred_batch, 100)) ###################### # Building the model # ###################### # Symbolic variables noise = T.matrix('noise', dtype=theano.config.floatX) x = T.tensor4('x', dtype=theano.config.floatX) # Creation of the model generator = build_generator(input_var=None) discriminator = build_discriminator(input_var=None) fake_image = layers.get_output(generator, inputs=noise) fake_image_det = layers.get_output(generator, inputs=noise, deterministic=True) prob_real = layers.get_output(discriminator, inputs=x) prob_fake = layers.get_output(discriminator, inputs=fake_image) params_gen = layers.get_all_params(generator, trainable=True) params_dis = layers.get_all_params(discriminator, trainable=True) loss_real = -T.mean(T.log(prob_real)) loss_fake = -T.mean(T.log(1 - prob_fake)) loss_dis = loss_real + loss_fake loss_gen = -T.mean(T.log(prob_fake)) updates_dis = lasagne.updates.adam(loss_dis, params_dis, learning_rate=learning_rate_dis, beta1=0.5) updates_gen = lasagne.updates.adam(loss_gen, params_gen, learning_rate=learning_rate_gen, beta1=0.5) # Creation of theano functions train_dis = theano.function([], loss_dis, updates=updates_dis, allow_input_downcast=True, givens={ x: image, noise: random_matrix }) train_gen = theano.function([], loss_gen, updates=updates_gen, allow_input_downcast=True, givens={noise: random_matrix}) predict_image = theano.function([], fake_image_det, allow_input_downcast=True, givens={noise: small_random_matrix}) ################### # Train the model # ################### print('... Training') epoch = 0 nb_train_dis = 25 nb_train_gen = 10 nb_batch = 10000 // batch_size nb_block = nb_batch // nb_train_dis loss_dis = [] loss_gen = [] #start_time = timeit.default_timer() while (epoch < n_epochs): epoch = epoch + 1 for i in range(nb_train_batch): #print (i) # Shape = (10000, 3, 64, 64) & Shape = (10000, 3, 32, 32) input, target = get_image(data_path, train_input_path, train_target_path, str(i)) # Shape = (10000, 3, 64, 64) assemblage = assemble(input, target) # Shape = (10000, 100) sample = random_sample(size=(10000, 100)) for j in range(nb_block): #print (j) for index in range(nb_train_dis * j, nb_train_dis * (j + 1)): #print (index) image.set_value(assemblage[index * batch_size:(index + 1) * batch_size]) random_matrix.set_value( sample[index * batch_size:(index + 1) * batch_size]) loss = train_dis() loss_dis.append(loss) for index in range(nb_train_gen * j, nb_train_gen * (j + 1)): #print (index) random_matrix.set_value( sample[index * batch_size:(index + 1) * batch_size]) loss = train_gen() loss_gen.append(loss) if epoch % 4 == 0: # save the model and a bunch of generated pictures print('... saving model and generated images') np.savez('discriminator_epoch' + str(epoch) + '.npz', *layers.get_all_param_values(discriminator)) np.savez('generator_epoch' + str(epoch) + '.npz', *layers.get_all_param_values(generator)) np.save('loss_dis', loss_dis) np.save('loss_gen', loss_gen) sample = random_sample(size=(pred_batch, 100)) small_random_matrix.set_value(sample) generated_images = predict_image() for k in range(pred_batch): plt.subplot(1, pred_batch, (k + 1)) plt.axis('off') plt.imshow(generated_images[k, :, :, :].transpose(1, 2, 0)) plt.savefig('generated_images_epoch' + str(epoch) + '.png', bbox_inches='tight') #end_time = timeit.default_timer() # Plot the learning curve ax1 = host_subplot(111, axes_class=AA.Axes) plt.subplots_adjust(right=0.75) ax2 = ax1.twiny() x1 = range(1, len(loss_dis) + 1) ax1.set_xlim([x1[0], x1[-1]]) x2 = range(1, len(loss_gen) + 1) ax2.set_xlim([x2[0], x2[-1]]) ax1.set_xlabel('training iteration (Discriminator)', color='g') ax2.set_xlabel('training iteration (Generator)', color='b') ax1.set_ylabel('Loss') ax1.plot(x1, rolling_average(loss_dis), 'g', label='Discriminator loss') ax2.plot(x2, rolling_average(loss_gen), 'b', label='Generator Loss') ax1.grid(True) ax1.legend() plt.savefig('Learning_curve') print('Optimization complete.')
def __init__(self, alpha_g, alpha_d, trial, version): self.BUFFER_SIZE = 60000 self.BATCH_SIZE = 100 self.EPOCHS = 250 self.test_size = 10000 self.alpha_g = alpha_g self.alpha_d = alpha_d self.trial = trial self.version = version self.noise_dim = 28 * 28 self.num_examples_to_generate = 16 self.seed = tf.random.normal( [self.num_examples_to_generate, self.noise_dim]) (self.dataset, self.real_mu, self.real_sigma) = data.load_mnist( self.BUFFER_SIZE, self.BATCH_SIZE) #ADD to train function self.generator = build_generator() #Add to build function self.discriminator = build_discriminator() #Add to build function self.generator_optimizer = tf.keras.optimizers.Adam( learning_rate=0.0001, beta_1=0.5, beta_2=0.999, epsilon=1e-7) self.discriminator_optimizer = tf.keras.optimizers.Adam( learning_rate=0.0001, beta_1=0.5, beta_2=0.999, epsilon=1e-7) self.checkpoint_dir = 'data/renyiganV_' + str( self.version) + '/AlphaG=' + str(self.alpha_g) + '_AlphaD=' + str( self.alpha_d) + '/trial' + str( self.trial) + './training_checkpoints' self.checkpoint_prefix = os.path.join(self.checkpoint_dir, "ckpt") self.checkpoint = tf.train.Checkpoint( generator_optimizer=self.generator_optimizer, discriminator_optimizer=self.discriminator_optimizer, generator=self.generator, discriminator=self.discriminator) self.image_dir = 'data/renyiganV_' + str( self.version) + '/AlphaG=' + str(self.alpha_g) + '_AlphaD=' + str( self.alpha_d) + '/trial' + str(self.trial) + '/images' self.plot_dir = 'data/renyiganV_' + str( self.version) + '/AlphaG=' + str(self.alpha_g) + '_AlphaD=' + str( self.alpha_d) + '/trial' + str(self.trial) + '/plots' self.make_directory('data') self.make_directory('data/renyiganV_' + str(self.version)) self.make_directory('data/renyiganV_' + str(self.version) + '/AlphaG=' + str(self.alpha_g) + '_AlphaD=' + str(self.alpha_d)) self.make_directory('data/renyiganV_' + str(self.version) + '/AlphaG=' + str(self.alpha_g) + '_AlphaD=' + str(self.alpha_d) + '/trial' + str(self.trial)) self.make_directory(self.image_dir) self.make_directory(self.plot_dir) if (version == 1): self.generator_loss = loss.generator_loss_renyi self.discriminator_loss = loss.discriminator_loss_rgan elif (version == 2): self.generator_loss = loss.generator_loss_renyiL1 self.discriminator_loss = loss.discriminator_loss_rgan elif (version == 3): self.generator_loss = loss.generator_loss_original self.discriminator_loss = loss.discriminator_loss_rgan elif (version == 4): self.generator_loss = loss.generator_loss_rgan self.discriminator_loss = loss.discriminator_loss_rgan else: quit()
test_size = 10000 alpha_g = 0.1 alpha_d = 0.1 version = 1 trial = 1 noise_dim = 28 * 28 num_examples_to_generate = 16 seed = tf.random.normal([num_examples_to_generate, noise_dim]) (dataset, real_mu, real_sigma) = data.load_mnist(BUFFER_SIZE, BATCH_SIZE) #ADD to train function generator = build_generator() #Add to build function discriminator = build_discriminator() #Add to build function generator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.5, beta_2=0.999, epsilon=1e-7) discriminator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.5, beta_2=0.999, epsilon=1e-7) checkpoint_dir = 'data/renyiganV_' + str(version) + '/AlphaG=' + str( alpha_g) + '_AlphaD=' + str(alpha_d) + '/trial' + str( trial) + './training_checkpoints' checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") checkpoint = tf.train.Checkpoint( generator_optimizer=generator_optimizer,