def initialize_models(args, device): # network En_A = models.encoder(in_nc=args.in_ngc, nf=args.ngf, img_size=args.img_size).to(device) En_B = models.encoder(in_nc=args.in_ngc, nf=args.ngf, img_size=args.img_size).to(device) De_A = models.decoder(out_nc=args.out_ngc, nf=args.ngf).to(device) De_B = models.decoder(out_nc=args.out_ngc, nf=args.ngf).to(device) Disc_A = models.discriminator(in_nc=args.in_ndc, out_nc=args.out_ndc, nf=args.ndf, img_size=args.img_size).to(device) Disc_B = models.discriminator(in_nc=args.in_ndc, out_nc=args.out_ndc, nf=args.ndf, img_size=args.img_size).to(device) print('---------- models initialized -------------') utils.print_network(En_A) utils.print_network(En_B) utils.print_network(De_A) utils.print_network(De_B) utils.print_network(Disc_A) utils.print_network(Disc_B) print('-----------------------------------------------') # Parallelize code En_A = nn.DataParallel(En_A) En_B = nn.DataParallel(En_B) De_A = nn.DataParallel(De_A) De_B = nn.DataParallel(De_B) Disc_A = nn.DataParallel(Disc_A) Disc_B = nn.DataParallel(Disc_B) all_models = [En_A, En_B, De_A, De_B, Disc_A, Disc_B] return all_models
def translate(input_sentence): sentence = preprocess_sentence(input_sentence) inputs = [src_LI.word2idx[i] for i in sentence.split(' ')] inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs], maxlen=50, padding='post') inputs = tf.convert_to_tensor(inputs) result = '' print(inputs.shape) state = [[tf.zeros((1, 16)) for i in range(2)], [tf.zeros((1, 16)) for i in range(2)]] encoder_out, encoder_state = encoder(inputs, state) print(encoder_out.shape) decoder_state = encoder_state[0] decoder_input = tf.expand_dims([tgt_LI.word2idx['<start>']], 0) for t in range(50): predictions, decoder_state, _ = decoder(decoder_input, decoder_state, encoder_out) predicted_id = tf.argmax(predictions[0]).numpy() result += tgt_LI.idx2word[predicted_id] + ' ' if tgt_LI.idx2word[predicted_id] == '<end>': return result, sentence # the predicted ID is fed back into the model decoder_input = tf.expand_dims([predicted_id], 0) return result, sentence
def __init__(self, encoded_size, x_dim, y_dim): super(NP, self).__init__() self.encoded_size = encoded_size self._encoder = encoder(encoded_size, x_dim, y_dim) self._rz = r_to_z(encoded_size) self._decoder = decoder(encoded_size, x_dim, y_dim) self.tanh = nn.Tanh()
def build_network(input_sequences, initial_state=None, initialize_to_zero=True): input_sequences_rs = tf.expand_dims(input_sequences, axis=-1) # encoder network encoder_channels = [32, 16] encoding_channels = encoder_channels[-1] with tf.variable_scope("encoder"): all_encoder_states, final_encoder_state = encoder( inputs=input_sequences_rs, channels=encoder_channels, initial_state=initial_state, initialize_to_zero=initialize_to_zero) encoder_saver = tf.compat.v1.train.Saver() print('v1 train saver worked') # additional output network predictions_flat = tf.reshape(all_encoder_states, shape=(-1, image_height, image_width, encoding_channels)) predictions_flat = output_layers(predictions_flat) predictions = tf.reshape(predictions_flat, tf.shape(input_sequences)) print('build network worked') return predictions_flat, predictions, final_encoder_state, encoder_saver
def regularization_loss(self): opts = self.opts self.random_z_recon, _ = encoder(opts, inputs=self.pseudo_G_z, reuse=True, is_training=self.is_training) loss_1 = tf.reduce_mean( tf.sqrt( tf.reduce_sum(tf.square(self.random_z_recon - self.random_z), axis=[1]))) # tf.sqrt() self.G_new_z, _ = decoder(opts, noise=self.random_z, reuse=True, is_training=self.is_training) diff = tf.reshape(self.G_new_z, [-1, np.prod(self.opts['datashape'])]) -\ tf.reshape(self.pseudo_G_z, [-1, np.prod(self.opts['datashape'])]) loss_2 = tf.reduce_mean( tf.sqrt(tf.reduce_sum(tf.square(diff), axis=[1]))) loss = loss_1 + loss_2 return loss
def make_model(): # Making all the modules of the model architecture i_s = 336 encoder_inp_shape = (i_s,i_s,3) enc = encoder(encoder_inp_shape) hg_inp_shape_1 = (i_s // 4, i_s // 4, 512) hg1 = hourglass(hg_inp_shape_1) hg_inp_shape_2 = (i_s // 4, i_s // 4, 256) hg2 = hourglass(hg_inp_shape_2) decoder_inp_shape = (i_s // 4, i_s // 4, 256) dec = decoder(decoder_inp_shape) proSR = net2(encoder_inp_shape) # Making the graph by connecting all the moduless of the model architecture # Each of this model can be seen as a layer now. input_tensor_1 = Input(encoder_inp_shape) input_tensor_2 = Input(encoder_inp_shape) part1 = enc(input_tensor_1) part2 = hg1(part1) part3 = hg2(part2) part4 = dec(part3) part5 = proSR(input_tensor_2) output = Add()([part4, part5]) model = Model([input_tensor_1, input_tensor_2], output) model.compile(loss=root_mean_sq_GxGy, optimizer = RMSprop()) with open('hourglass_sr_t1_t2.txt', 'w') as f: with redirect_stdout(f): model.summary() return model
def train_step(img_tensor, target, tokenizer): loss = 0 # initializing the hidden state for each batch # because the captions are not related from image to image hidden = decoder.reset_state(batch_size=target.shape[0]) dec_input = tf.expand_dims([tokenizer.word_index['<start>']] * target.shape[0], 1) with tf.GradientTape() as tape: features = encoder(img_tensor) for i in range(1, target.shape[1]): # passing the features through the decoder predictions, hidden, _ = decoder(dec_input, features, hidden) loss += loss_function(target[:, i], predictions) # using teacher forcing dec_input = tf.expand_dims(target[:, i], 1) total_loss = (loss / int(target.shape[1])) trainable_variables = encoder.trainable_variables + decoder.trainable_variables gradients = tape.gradient(loss, trainable_variables) optimizer.apply_gradients(zip(gradients, trainable_variables)) return loss, total_loss
def add_aefixedpoint_cost(opts, wae_model): w_aefixedpoint = tf.placeholder(tf.float32, name='w_aefixedpoint') wae_model.w_aefixedpoint = w_aefixedpoint gen_images = wae_model.decoded gen_images.set_shape([opts['batch_size']] + wae_model.data_shape) tmp = encoder(opts, reuse=True, inputs=gen_images, is_training=wae_model.is_training) tmp_sg = encoder(opts, reuse=True, inputs=tf.stop_gradient(gen_images), is_training=wae_model.is_training) encoded_gen_images = tmp[0] encoded_gen_images_sg = tmp_sg[0] if opts['e_noise'] == 'gaussian': # Encoder outputs means and variances of Gaussian # Encoding into means encoded_gen_images = encoded_gen_images[0] encoded_gen_images_sg = encoded_gen_images_sg[0] autoencoded_gen_images, _ = decoder(opts, reuse=True, noise=encoded_gen_images, is_training=wae_model.is_training) autoencoded_gen_images_sg, _ = decoder(opts, reuse=True, noise=encoded_gen_images_sg, is_training=wae_model.is_training) a = wae.WAE.reconstruction_loss(opts, gen_images, autoencoded_gen_images) b = tf.stop_gradient(a) c = wae_model.reconstruction_loss(tf.stop_gradient(gen_images), autoencoded_gen_images_sg) extra_cost = b + a - c # Check gradients # encoder_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='encoder') # wae_model.grad_extra = tf.gradients(ys=extra_cost, xs=encoder_vars) # for idx, el in enumerate(wae_model.grad_extra): # print encoder_vars[idx].name, el wae_model.wae_objective += wae_model.w_aefixedpoint * extra_cost
def _setup_encoder(self, X, X_seq_len, dropout): """ Sets up the encoder. Args: X: input sequence X_seq_len: inputs' sequence lengths dropout: dropout (1-keep_prob) to apply to encoder cell Returns (see the Encoder class for more details): outputs: encoder outputs output_state: final state of the encoder """ with tf.variable_scope("encoder"): encoder = Encoder(self.config) outputs, output_state = encoder(X, X_seq_len, dropout) return outputs, output_state
def __init__(self, args): super(VAE,self).__init__() self.device = args.device self.latent_size = args.latent_size self.hidden_size = args.hidden_size self.batch_size = args.batch_size self.input_dim = args.input_dim self.warmup = args.warmup self.encoder = models.encoder(self.input_dim, self.hidden_size, self.latent_size, self.device) self.decoder = models.decoder(self.latent_size, self.input_dim, self.device) self.reconstruction_criterion = nn.MSELoss(reduction='mean').to(self.device) self.reparameterize_with_noise = False self.reconstruction = args.recons
def _setup_rnn(self, X, X_seq_len, dropout, vocab_size): config = self.config params = encoder_params_helper(config.rnn_num_layers, config.rnn_unit_type, config.rnn_type, config.rnn_num_units, config.rnn_num_residual_layers, config.verbose) with tf.variable_scope("rnn"): encoder = Encoder(params) outputs, output_state = encoder(X, X_seq_len, dropout) output_layer = tf.layers.Dense(vocab_size+1) outputs = output_layer(outputs) if config.verbose: variable_summaries(output_layer.trainable_weights[0], 'output_layer_weights') variable_summaries(output_layer.trainable_weights[1], 'output_layer_biases') variable_summaries(outputs, 'linear_projections') return outputs
def deblur_defmo(I, B, bbox_tight, nsplits, radius, obj_dim): bbox = extend_bbox(bbox_tight.copy(), 4 * np.max(radius), g_resolution_y / g_resolution_x, I.shape) im_crop = crop_resize(I, bbox, (g_resolution_x, g_resolution_y)) bgr_crop = crop_resize(B, bbox, (g_resolution_x, g_resolution_y)) preprocess = get_transform() input_batch = torch.cat((preprocess(im_crop), preprocess(bgr_crop)), 0).to(device).unsqueeze(0).float() with torch.no_grad(): latent = encoder(input_batch) times = torch.linspace(0, 1, nsplits * multi_f + 1).to(device) renders = rendering(latent, times[None]) renders = renders[:, :-1].reshape( 1, nsplits, multi_f, 4, g_resolution_y, g_resolution_x).mean(2) # add small motion blur renders_rgba = renders[0].data.cpu().detach().numpy().transpose( 2, 3, 1, 0) est_hs_crop = rgba2hs(renders_rgba, bgr_crop) est_hs = rev_crop_resize(est_hs_crop, bbox, I) est_traj = renders2traj(renders, device)[0].T.cpu() est_traj = rev_crop_resize_traj(est_traj, bbox, (g_resolution_x, g_resolution_y)) return est_hs, est_traj
out = tf.transpose(model.output, perm=[0, 2, 1, 3]) out = tf.keras.layers.Reshape([-1, out.shape[-1] * out.shape[-2]])(out) if config.n_layers > 0: if config.mode == 'GRU': out = tf.keras.layers.Dense(config.n_dim)(out) for i in range(config.n_layers): # out = transformer_layer(config.n_dim, config.n_heads)(out) out = tf.keras.layers.Bidirectional( tf.keras.layers.GRU(config.n_dim, return_sequences=True), backward_layer=tf.keras.layers.GRU(config.n_dim, return_sequences=True, go_backwards=True))(out) elif config.mode == 'transformer': out = tf.keras.layers.Dense(config.n_dim)(out) out = encoder(config.n_layers, config.n_dim, config.n_heads)(out) out = tf.keras.layers.Flatten()(out) out = tf.keras.layers.ReLU()(out) else: for i in range(config.n_layers): # out = tf.keras.layers.Dropout(0.1)(out) out = tf.keras.layers.Dense(config.n_dim)(out) out = tf.keras.layers.Activation('sigmoid')(out) * out out = tf.keras.layers.Dense(config.n_classes, activation='relu')(out) model = tf.keras.models.Model(inputs=model.input, outputs=out) specs = None for name in config.name.split(','): NAME = name if name.endswith('.h5') else name + '.h5'
def run(args): print("Creating Langauge Indices for source and target...") src_LI = LanguageIndex(language="source") tgt_LI = LanguageIndex(language="target") src_LI.add(read_file(args.src_path)) tgt_LI.add(read_file(args.tgt_path)) print("Created Langauge Indices.") print("Instantiating DataLoader object...") Data = DataLoader(args.src_path, args.tgt_path, src_LI, tgt_LI, args.batch_size) print("Loaded data.") print("Creating an encoder object...") encoder = BiLSTMEncoder( vocab_size = len(src_LI.word2idx), embedding_dim=128, encoder_size=16, batch_size=args.batch_size ) print("Created an encoder object.") print("Creating a decoder object...") decoder = Decoder( vocab_size = len(tgt_LI.word2idx), embedding_dim=128, decoder_size=16, batch_size=args.batch_size ) print("Created a decoder object.") # create optimize if(args.optimizer=='adam'): optimizer = tf.train.AdamOptimizer() # function to calculate loss def loss_function(real, pred): mask = 1 - np.equal(real, 0) loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=real, logits=pred) * mask return tf.reduce_mean(loss_) # create a checkpoint object for saving the model checkpoint = tf.train.Checkpoint(optimizer=optimizer, encoder=encoder, decoder=decoder) for epoch in range(args.epochs): start = time.time() # intialize the hidden states of encoder encoder_state = encoder.initialize_hidden() total_loss = 0 num_batch = 0 for (batch, (input_seq, target_seq)) in enumerate(Data.data): num_batch += 1 loss = 0 with tf.GradientTape() as tape: encoder_output, encoder_state = encoder(input_seq, encoder_state) # initialize decoder hidden state decoder_state = encoder_state[0] decoder_input = tf.expand_dims([tgt_LI.word2idx['<start>']] * args.batch_size, 1) # Teacher forcing - feeding the target as the next input for t in range(1, target_seq.shape[1]): # passing encoder_outputs to the decoder predictions, decoder_state, _ = decoder(decoder_input, decoder_state, encoder_output) loss += loss_function(target_seq[:, t], predictions) # using teacher forcing decoder_input = tf.expand_dims(target_seq[:, t], 1) batch_loss = (loss / int(target_seq.shape[1])) # write batch_loss to the tensorboard logs with writer.as_default(), tf.contrib.summary.always_record_summaries(): tf.contrib.summary.scalar('TrainingLoss', batch_loss.numpy()) total_loss += batch_loss variables = encoder.variables + decoder.variables gradients = tape.gradient(loss, variables) optimizer.apply_gradients(zip(gradients, variables), global_step=global_step) if batch % 100 == 0: print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1, batch, batch_loss.numpy())) if batch % 2000 == 0: checkpoint.save(file_prefix = checkpoint_prefix) # saving (checkpoint) the model every 1 epoch if (epoch + 1) % 1 == 0: checkpoint.save(file_prefix = checkpoint_prefix) print('Epoch {} Loss {:.4f}'.format(epoch + 1, total_loss / num_batch)) print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))
import argparse import os import time import numpy as np import tensorflow as tf tf.enable_eager_execution() # import modules for encoder and decoder from models.encoder import * from models.decoder import * print("Creating an encoder object...") encoder = BiLSTMEncoder(vocab_size=1024, embedding_dim=128, encoder_size=16, batch_size=4) print("Created an encoder object.") encoder_hidden = [[tf.zeros((1, 16)), tf.zeros((1, 16))], [tf.zeros((1, 16)), tf.zeros((1, 16))]] # print(encoder_hidden.shape) inputs = tf.convert_to_tensor([2, 3, 4]) print(inputs.shape) inputs = tf.reshape(inputs, (1, -1)) outputs, encoder_hidden = encoder(inputs, encoder_hidden) print(type(outputs)) print(outputs.shape)
def __init__(self, opts, tag): tf.reset_default_graph() logging.error('Building the Tensorflow Graph') gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(gpu_options=gpu_options) self.sess = tf.Session(config=config) self.opts = opts assert opts['dataset'] in datashapes, 'Unknown dataset.' self.data_shape = datashapes[opts['dataset']] self.add_inputs_placeholders() self.add_training_placeholders() sample_size = tf.shape(self.sample_points)[0] enc_mean, enc_sigmas = encoder(opts, inputs=self.sample_points, is_training=self.is_training, y=self.labels) enc_sigmas = tf.clip_by_value(enc_sigmas, -50, 50) self.enc_mean, self.enc_sigmas = enc_mean, enc_sigmas eps = tf.random_normal((sample_size, opts['zdim']), 0., 1., dtype=tf.float32) self.encoded = self.enc_mean + tf.multiply( eps, tf.sqrt(1e-8 + tf.exp(self.enc_sigmas))) # self.encoded = self.enc_mean + tf.multiply( # eps, tf.exp(self.enc_sigmas / 2.)) (self.reconstructed, self.reconstructed_logits), self.probs1 = \ decoder(opts, noise=self.encoded, is_training=self.is_training) self.correct_sum = tf.reduce_sum( tf.cast(tf.equal(tf.argmax(self.probs1, axis=1), self.labels), tf.float32)) # Decode the content of sample_noise (self.decoded, self.decoded_logits), _ = decoder(opts, reuse=True, noise=self.sample_noise, is_training=self.is_training) # -- Objectives, losses, penalties self.loss_cls = self.cls_loss(self.labels, self.probs1) self.loss_mmd = self.mmd_penalty(self.sample_noise, self.encoded) self.loss_recon = self.reconstruction_loss(self.opts, self.sample_points, self.reconstructed) self.mixup_loss = self.MIXUP_loss(opts, self.encoded, self.labels) self.gmmpara_init() self.loss_mixture = self.mixture_loss(self.encoded) self.objective = self.loss_recon + opts[ 'lambda_cls'] * self.loss_cls + opts['lambda_mixture'] * tf.cast( self.loss_mixture, dtype=tf.float32) self.objective_pre = self.loss_recon + opts[ 'lambda'] * self.loss_mmd + self.loss_cls self.result_logger = ResultLogger(tag, opts['work_dir'], verbose=True) self.tag = tag logpxy = [] dimY = opts['n_classes'] N = sample_size S = opts['sampling_size'] x_rep = tf.tile(self.sample_points, [S, 1, 1, 1]) with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): for i in range(dimY): y = tf.fill((N, ), i) mu, log_sig = encoder(opts, inputs=self.sample_points, reuse=True, is_training=False, y=y) mu = tf.tile(mu, [S, 1]) log_sig = tf.tile(log_sig, [S, 1]) y = tf.tile(y, [S]) eps2 = tf.random_normal((N * S, opts['zdim']), 0., 1., dtype=tf.float32) z = mu + tf.multiply(eps2, tf.sqrt(1e-8 + tf.exp(log_sig))) (mu_x, _), logit_y = decoder(opts, reuse=True, noise=z, is_training=False) logp = -tf.reduce_sum((x_rep - mu_x)**2, axis=[1, 2, 3]) log_pyz = -tf.nn.sparse_softmax_cross_entropy_with_logits( labels=y, logits=logit_y) posterior = tf.log( self.theta_p) - 0.5 * tf.log(2 * math.pi * self.lambda_p) self.u_p_1 = tf.expand_dims(self.u_p, 2) z_m = tf.expand_dims(tf.transpose(z), 1) aa = tf.square(z_m - self.u_p_1) self.lambda_p_1 = tf.expand_dims(self.lambda_p, 2) bb = aa / 2 * self.lambda_p_1 posterior = tf.expand_dims(posterior, 2) - bb posterior_sum = tf.reduce_sum(tf.reduce_sum(posterior, axis=0), axis=0) bound = 0.5 * logp + opts['lambda_cls'] * log_pyz + opts[ 'lambda_mixture'] * posterior_sum bound = tf.reshape(bound, [S, N]) bound = self.logsumexp(bound) - tf.log(float(S)) logpxy.append(tf.expand_dims(bound, 1)) logpxy = tf.concat(logpxy, 1) y_pred = tf.nn.softmax(logpxy) self.eval_probs = y_pred self.test_a = 0.5 * logp self.test_b = log_pyz self.test_c = posterior_sum if opts['e_pretrain']: self.loss_pretrain = self.pretrain_loss() else: self.loss_pretrain = None self.add_optimizers() self.add_savers()
batch_size=args.batch_size, num_workers=12) testloader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, num_workers=12) num_layers = args.x device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Assuming that we are on a CUDA machine, this should print a CUDA device: print(device) if args.encoder: encoder = models.encoder(x=num_layers, pretrained_path=args.encoder).to(device) else: encoder = models.encoder(x=num_layers).to(device) if args.decoder: decoder = models.decoder(x=num_layers, pretrained_path=args.decoder).to(device) else: decoder = models.decoder(x=num_layers).to(device) encoder.train(True) decoder.train(True) criterion = nn.MSELoss().to(device) optimizer = optim.Adam(list(decoder.parameters()) + list(encoder.parameters()), lr=args.learn_rate) # .to(device)
def improved_sampling(opts): NUM_ROWS = 10 NUM_COLS = 10 NUM_GD_STEPS = 100000 num_z = NUM_ROWS * NUM_COLS checkpoint = opts['checkpoint'] with tf.Session() as sess: with sess.graph.as_default(): z = tf.get_variable( "latent_codes", [num_z, opts['zdim']], tf.float32, tf.random_normal_initializer(stddev=1.)) is_training_ph = tf.placeholder(tf.bool, name='is_training_ph') gen, _ = decoder(opts, z, is_training=is_training_ph) data_shape = datashapes[opts['dataset']] gen.set_shape([num_z] + data_shape) e_gen, _ = encoder(opts, gen, is_training=is_training_ph) if opts['e_noise'] == 'gaussian': e_gen = e_gen[0] ae_gen = decoder(opts, e_gen, reuse=True, is_training=is_training_ph) loss = wae.WAE.reconstruction_loss(opts, gen, ae_gen) # optim = tf.train.AdamOptimizer(0.001, 0.9) optim = tf.train.AdamOptimizer(0.01, 0.9) optim = optim.minimize(loss, var_list=[z]) # Now restoring weights from the checkpoint # We need to restore all variables except for newly created ones all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) enc_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='encoder') dec_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='generator') new_vars = [v for v in all_vars if \ v not in enc_vars and v not in dec_vars] vars_to_restore = enc_vars + dec_vars saver = tf.train.Saver(vars_to_restore) saver.restore(sess, checkpoint) logging.error('Restored.') init = tf.variables_initializer(new_vars) for iteration in range(1): pic_id = 0 loss_prev = 1e10 init.run() for step in range(NUM_GD_STEPS): if (step < 100) or (step >= 100 and step % 100 == 0): # will save all 100 first steps and then every 100 steps pics = gen.eval(feed_dict={is_training_ph: False}) codes = z.eval() pic_path = os.path.join(opts['work_dir'], 'pic%03d' % pic_id) code_path = os.path.join(opts['work_dir'], 'code%03d' % pic_id) np.save(pic_path, pics) np.save(code_path, codes) pic_id += 1 # Make a gradient step sess.run(optim, feed_dict={is_training_ph: False}) if step % 10 == 0: loss_cur = loss.eval(feed_dict={is_training_ph: False}) rel_imp = abs(loss_cur - loss_prev) / abs(loss_prev) logging.error('step %d, loss=%f, rel_imp=%f' % (step, loss_cur, rel_imp)) # if rel_imp < 1e-2: # break loss_prev = loss_cur
def main(_): with tf.Session(config=config) as sess: train_x = [] print("Start reading", train_num, "of training files ...") a = datetime.now().replace(microsecond=0) while True: try: imgs = sess.run(features) train_x.append(imgs) except tf.errors.OutOfRangeError: break b = datetime.now().replace(microsecond=0) print("Complete reading training files.") print("Time cost:", b - a) train_x = np.array(train_x) regularizer = tf.contrib.layers.l2_regularizer(scale=weight_decay) mean, var = encoder(X, latent_size, regularizer, is_training) std = tf.sqrt(var, name='z_std') epsilon = tf.random_normal(tf.shape(var), name='random_prob') sample_z = mean + epsilon * std sample_z = tf.identity(sample_z, name='input_z') decoded_x = decoder(sample_z, regularizer, is_training) # Add training ops into graph. with tf.variable_scope('train'): img_loss = tf.reduce_sum(tf.squared_difference(decoded_x, X), axis=[1, 2, 3]) #img_loss = tf.reduce_sum(tf.losses.log_loss(X, decoded_x, reduction=tf.losses.Reduction.NONE), axis=[1, 2, 3]) latent_loss = 0.5 * tf.reduce_sum( var + tf.square(mean) - 1 - tf.log(var), 1) loss = tf.reduce_mean(img_loss + latent_loss, name='loss_op') loss += tf.losses.get_regularization_loss() global_step = tf.Variable(0, name='global_step', trainable=False, collections=[ tf.GraphKeys.GLOBAL_VARIABLES, tf.GraphKeys.GLOBAL_STEP ]) optimizer = tf.train.AdamOptimizer( learning_rate=init_learning_rate) train_op = optimizer.minimize(loss, global_step=global_step, name='train_op') sess.run(tf.global_variables_initializer()) global_step_tensor = sess.graph.get_tensor_by_name( 'train/global_step:0') train_op = sess.graph.get_operation_by_name('train/train_op') loss_tensor = sess.graph.get_tensor_by_name('train/loss_op:0') z_tensor = sess.graph.get_tensor_by_name('input_z:0') prob_tensor = sess.graph.get_tensor_by_name('random_prob:0') # Start training print('Start training ...') a = datetime.now().replace(microsecond=0) loss_history = [] for i in range(epochs): total_loss = 0 np.random.shuffle(train_x) for j in range(steps_per_epoch): pos = j * batch_size nums = min(train_num, pos + batch_size) - pos _, loss_value = sess.run( [train_op, loss_tensor], feed_dict={X: train_x[pos:pos + nums]}) total_loss += loss_value * nums total_loss /= train_num print("Iter: {}, Global step: {}, loss: {:.4f}".format( i + 1, global_step_tensor.eval(), total_loss)) loss_history.append(total_loss) b = datetime.now().replace(microsecond=0) print("Time cost:", b - a) plt.plot(loss_history, label='training loss') plt.xlabel("epochs") plt.ylabel("Totla loss") plt.title("Training curve") plt.savefig("batch_" + str(batch_size) + "_latent_" + str(latent_size) + "_training_curve.png", dpi=100) plt.gcf().clear() # plot some images decoded_img, rand_prob = sess.run([decoded_x, prob_tensor], feed_dict={ X: train_x[:64], is_training: False }) test_img = train_x[:64].reshape([8, 8, 96, 96, 3]) test_img = np.column_stack(test_img) test_img = np.column_stack(test_img) decoded_img = np.column_stack(decoded_img.reshape([8, 8, 96, 96, 3])) decoded_img = np.column_stack(decoded_img) fig, ax = plt.subplots(1, 2) ax[0].imshow(test_img) ax[0].set_title("Before encode") ax[1].imshow(decoded_img) ax[1].set_title("After decode") fig.suptitle("batch size: " + str(batch_size) + ", latent size: " + str(latent_size)) plt.savefig("batch_" + str(batch_size) + "_latent_" + str(latent_size) + "_images_comparison.png", dpi=150) plt.gcf().clear() generate_img = sess.run(decoded_x, feed_dict={ z_tensor: rand_prob, is_training: False }) generate_img = np.column_stack(generate_img.reshape([8, 8, 96, 96, 3])) generate_img = np.column_stack(generate_img) plt.imshow(generate_img) plt.title("Random images") plt.savefig("batch_" + str(batch_size) + "_latent_" + str(latent_size) + "_generation.png", dpi=100)
def __init__(self, opts): logging.error('Building the Tensorflow Graph') self.sess = tf.Session() self.opts = opts # -- Some of the parameters for future use assert opts['dataset'] in datashapes, 'Unknown dataset.' self.data_shape = datashapes[opts['dataset']] # -- Placeholders self.add_model_placeholders() self.add_training_placeholders() sample_size = tf.shape(self.sample_points)[0] # -- Transformation ops # Encode the content of sample_points placeholder if not opts['e_is_random']: self.encoded = encoder(opts, inputs=self.sample_points, is_training=self.is_training) else: enc_mean, enc_sigmas = encoder(opts, inputs=self.sample_points, is_training=self.is_training) if opts['verbose']: # Debug the largest and smallest log variances enc_sigmas = tf.Print( enc_sigmas, [tf.nn.top_k(tf.reshape(enc_sigmas, [-1]), 1).values[0]], 'Maximal log sigmas:') enc_sigmas = tf.Print( enc_sigmas, [-tf.nn.top_k(tf.reshape(-enc_sigmas, [-1]), 1).values[0]], 'Minimal log sigmas:') eps = tf.random_normal((sample_size, opts['zdim']), 0., 1., dtype=tf.float32) self.encoded = enc_mean + tf.multiply( eps, tf.sqrt(1e-8 + tf.exp(enc_sigmas))) # Decode the points encoded above (i.e. reconstruct) self.reconstructed, self.reconstructed_logits = \ decoder(opts, noise=self.encoded, is_training=self.is_training) # Decode the content of sample_noise self.decoded, self.decoded_logits = \ decoder(opts, reuse=True, noise=self.sample_noise, is_training=self.is_training) # -- Objectives, losses, penalties self.penalty, self.loss_gan = self.matching_penalty() self.loss_reconstruct = self.reconstruction_loss() self.wae_objective = self.loss_reconstruct + \ opts['lambda'] * self.penalty if opts['e_pretrain']: self.loss_pretrain = self.pretrain_loss() else: self.loss_pretrain = None self.add_least_gaussian2d_ops() # -- Optimizers, savers, etc self.add_optimizers() self.add_savers() self.init = tf.global_variables_initializer()
import torch from torch import nn, optim from torch.autograd import Variable import itertools from models import encoder, decoder, discriminator, loss_functions import helpers import matplotlib.pyplot as plt import numpy as np from torch.nn import functional as F #%% setting up parameters batch_size, dim = 750, 2 Enc = encoder(dim=dim, k=2, batch_size=batch_size) Dec = decoder(dim=dim, k=2, batch_size=batch_size) Disc = discriminator(dim=dim, k=2, batch_size=batch_size) losses_ = loss_functions() dataHandler=helpers.data_and_plotting(batch_size,encoder=Enc,decoder=Dec,discriminator=Disc,mixture=False,\ semi_circle=True) #%% setting up the optimizers #generator optimizer optimizerE = optim.Adam(itertools.chain(Enc.parameters(), Dec.parameters()), lr=5e-4) #discriminator optimizer optimizerD = optim.Adam(itertools.chain(Disc.parameters()), lr=5e-4)
def __init__(self, opts, train_size=0): logging.error('Building the Tensorflow Graph') self.sess = tf.Session() self.opts = opts self.train_size = train_size # ===================================================================== # -- Some of the parameters for future use # ===================================================================== assert opts['dataset'] in datashapes, 'Unknown dataset.' self.data_shape = datashapes[opts['dataset']] # ===================================================================== # -- Placeholders # ===================================================================== self.add_inputs_placeholders() self.add_training_placeholders() sample_size = tf.shape(self.sample_points)[0] # batch_size # ===================================================================== # -- Transformation ops # ===================================================================== # ================================================ # Encode the content of sample_points placeholder # ================================================ res = encoder(opts, inputs=self.sample_points, is_training=self.is_training) # ================================================ # the encoder outputs depend on the hyperparameter -> e_noise # here, the outputs are assigned to the class vars accoring to the type of e_noise computing done by the encoder... # ================================================ if opts['e_noise'] in ('deterministic', 'implicit', 'add_noise'): self.enc_mean, self.enc_sigmas = None, None if opts['e_noise'] == 'implicit': self.encoded, self.encoder_A = res else: self.encoded, _ = res elif opts['e_noise'] == 'gaussian': # Encoder outputs means and variances of Gaussian enc_mean, enc_sigmas = res[0] enc_sigmas = tf.clip_by_value(enc_sigmas, -50, 50) self.enc_mean, self.enc_sigmas = enc_mean, enc_sigmas if opts['verbose']: self.add_sigmas_debug() eps = tf.random_normal((sample_size, opts['zdim']), 0., 1., dtype=tf.float32) self.encoded = self.enc_mean + tf.multiply( eps, tf.sqrt(1e-8 + tf.exp(self.enc_sigmas))) # self.encoded = self.enc_mean + tf.multiply(eps, tf.exp(self.enc_sigmas / 2.)) # ================================================ # Decode the points encoded above (i.e. reconstruct) # ================================================ self.reconstructed, self.reconstructed_logits = decoder( opts, noise=self.encoded, is_training=self.is_training) # ================================================ # Decode the content of sample_noise # ================================================ self.decoded, self.decoded_logits = decoder( opts, reuse=True, noise=self.sample_noise, is_training=self.is_training) # ================================================ # -- Objectives, losses, penalties # ================================================ self.penalty, self.loss_gan = self.matching_penalty() self.loss_reconstruct = self.reconstruction_loss( self.opts, self.sample_points, self.reconstructed) self.wae_objective = self.loss_reconstruct + self.wae_lambda * self.penalty # Extra costs if any if 'w_aef' in opts and opts['w_aef'] > 0: improved_wae.add_aefixedpoint_cost(opts, self) # ================================================ # ================================================ self.blurriness = self.compute_blurriness() # ================================================ # ================================================ if opts['e_pretrain']: self.loss_pretrain = self.pretrain_loss() else: self.loss_pretrain = None # ================================================ # ================================================ self.add_least_gaussian2d_ops() # ================================================ # -- Optimizers, savers, etc # ================================================ self.add_optimizers() self.add_savers() self.init = tf.global_variables_initializer()
sentence_str='' for index for output: word=index2word[index] if word=='EOS': break elif word!='PAD': sentence_str+=word return sentence_str if __name__=='__main__': dataset=datasets.Cornell encoder=models.encoder(dataset.num_word,512,2,0.1) decoder=models.decoder(dataset.num_word,512,2,'dot',0.1) utils.load_model(encoder,os.path.join('./Model',str(config.MODEL)),'encoder.pth') utils.load_model(decoder,os.path.join('./Model',str(config.MODEL)),'decoder.pth') bot=GreedySearchBot(encoder,decoder) index2word=dataset.index2word word2index=dataset.word2index max_len=10 while(True): input_sentence=input('>>> ') if input_sentence=='q': break else:
def __init__(self, opts): logging.error('Building the Tensorflow Graph') self.sess = tf.Session() self.opts = opts # -- Some of the parameters for future use assert opts['dataset'] in datashapes, 'Unknown dataset.' self.data_shape = datashapes[opts['dataset']] # -- Placeholders self.add_model_placeholders() self.add_training_placeholders() sample_size = tf.shape(self.sample_points)[0] # -- Transformation ops # Encode the content of sample_points placeholder if opts['e_noise'] in ('deterministic', 'implicit', 'add_noise'): self.enc_mean, self.enc_sigmas = None, None res = encoder(opts, inputs=self.sample_points, is_training=self.is_training) if opts['e_noise'] == 'implicit': self.encoded, self.encoder_A = res else: self.encoded = res elif opts['e_noise'] == 'gaussian': # Encoder outputs means and variances of Gaussian enc_mean, enc_sigmas = encoder(opts, inputs=self.sample_points, is_training=self.is_training) enc_sigmas = tf.clip_by_value(enc_sigmas, -50, 50) self.enc_mean, self.enc_sigmas = enc_mean, enc_sigmas if opts['verbose']: self.add_sigmas_debug() eps = tf.random_normal((sample_size, opts['zdim']), 0., 1., dtype=tf.float32) self.encoded = self.enc_mean + tf.multiply( eps, tf.sqrt(1e-8 + tf.exp(self.enc_sigmas))) # self.encoded = self.enc_mean + tf.multiply( # eps, tf.exp(self.enc_sigmas / 2.)) # Decode the points encoded above (i.e. reconstruct) self.reconstructed, self.reconstructed_logits = \ decoder(opts, noise=self.encoded, is_training=self.is_training) # Decode the content of sample_noise self.decoded, self.decoded_logits = \ decoder(opts, reuse=True, noise=self.sample_noise, is_training=self.is_training) # -- Objectives, losses, penalties self.penalty, self.loss_gan = self.matching_penalty() self.loss_reconstruct = self.reconstruction_loss() self.wae_objective = self.loss_reconstruct + \ self.wae_lambda * self.penalty if opts['e_pretrain']: self.loss_pretrain = self.pretrain_loss() else: self.loss_pretrain = None self.add_least_gaussian2d_ops() # -- Optimizers, savers, etc self.add_optimizers() self.add_savers() self.init = tf.global_variables_initializer()
def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") torch.backends.cudnn.benchmark = True encoder = EncoderCNN() rendering = RenderingCNN() loss_function = FMOLoss() if g_finetune: g_load_temp_folder = '/home.stud/rozumden/tmp/PyTorch/20200918_2239_consfm2' encoder.load_state_dict(torch.load(os.path.join(g_load_temp_folder, 'encoder.pt'))) rendering.load_state_dict(torch.load(os.path.join(g_load_temp_folder, 'rendering.pt'))) encoder = nn.DataParallel(encoder).to(device) rendering = nn.DataParallel(rendering).to(device) loss_function = nn.DataParallel(loss_function).to(device) if not os.path.exists(g_temp_folder): os.makedirs(g_temp_folder) log_path = os.path.join(g_temp_folder,'training') if not os.path.exists(log_path): os.makedirs(log_path) encoder_params = sum(p.numel() for p in encoder.parameters()) rendering_params = sum(p.numel() for p in rendering.parameters()) encoder_grad = sum(int(p.requires_grad) for p in encoder.parameters()) encoder_p = sum(1 for p in encoder.parameters()) print('Encoder params {:2f}M, rendering params {:2f}M'.format(encoder_params/1e6,rendering_params/1e6)) training_set = ShapeBlurDataset(dataset_folder=g_dataset_folder, render_objs = g_render_objs, number_per_category=g_number_per_category,do_augment=True,use_latent_learning=g_use_latent_learning) training_generator = torch.utils.data.DataLoader(training_set, batch_size=g_batch_size,shuffle=True,num_workers=g_num_workers,drop_last=True) val_set = ShapeBlurDataset(dataset_folder=g_validation_folder, render_objs = g_render_objs_val, number_per_category=g_number_per_category_val,do_augment=True,use_latent_learning=False) val_generator = torch.utils.data.DataLoader(val_set, batch_size=g_batch_size,shuffle=True,num_workers=g_num_workers,drop_last=True) vis_train_batch, _ = get_training_sample(["can"],min_obj=5,max_obj=5,dataset_folder=g_dataset_folder) vis_train_batch = vis_train_batch.unsqueeze(0).to(device) vis_val_batch, _ = get_training_sample(["can"],min_obj=4,max_obj=4,dataset_folder=g_validation_folder) vis_val_batch = vis_val_batch.unsqueeze(0).to(device) all_parameters = list(encoder.parameters()) + list(rendering.parameters()) optimizer = torch.optim.Adam(all_parameters, lr=g_lr) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.5) writer = SummaryWriter(log_path) train_losses = [] val_losses = [] best_val_loss = 100.0 for epoch in range(g_epochs): encoder.train() rendering.train() t0 = time.time() supervised_loss = [] model_losses = [] sharp_losses = [] timecons_losses = [] latent_losses = [] joint_losses = [] for it, (input_batch, times, hs_frames, times_left) in enumerate(training_generator): input_batch, times, hs_frames, times_left = input_batch.to(device), times.to(device), hs_frames.to(device), times_left.to(device) renders = [] if g_use_latent_learning: latent = encoder(input_batch[:,:6]) latent2 = encoder(input_batch[:,6:]) else: latent = encoder(input_batch) latent2 = [] renders = rendering(latent, torch.cat((times,times_left),1)) sloss, mloss, shloss, tloss, lloss, jloss = loss_function(renders, hs_frames, input_batch[:,:6], (latent,latent2)) supervised_loss.append(sloss.mean().item()) model_losses.append(mloss.mean().item()) sharp_losses.append(shloss.mean().item()) timecons_losses.append(tloss.mean().item()) latent_losses.append(lloss.mean().item()) jloss = jloss.mean() joint_losses.append(jloss.item()) if it % 50 == 0: print("Epoch {:4d}, it {:4d}".format(epoch+1, it), end =" ") if g_use_supervised: print(", loss {:.3f}".format(np.mean(supervised_loss)), end =" ") if g_use_selfsupervised_model: print(", model {:.3f}".format(np.mean(model_losses)), end =" ") if g_use_selfsupervised_sharp_mask: print(", sharp {:.3f}".format(np.mean(sharp_losses)), end =" ") if g_use_selfsupervised_timeconsistency: print(", time {:.3f}".format(np.mean(timecons_losses)), end =" ") if g_use_latent_learning: print(", latent {:.3f}".format(np.mean(latent_losses)), end =" ") print(", joint {:.3f}".format(np.mean(joint_losses))) optimizer.zero_grad() jloss.backward() optimizer.step() train_losses.append(np.mean(supervised_loss)) with torch.no_grad(): encoder.eval() rendering.eval() running_losses_min = [] running_losses_max = [] for it, (input_batch, times, hs_frames, _) in enumerate(val_generator): input_batch, times, hs_frames = input_batch.to(device), times.to(device), hs_frames.to(device) latent = encoder(input_batch) renders = rendering(latent, times)[:,:,:4] val_loss1 = fmo_loss(renders, hs_frames) val_loss2 = fmo_loss(renders, torch.flip(hs_frames,[1])) losses = torch.cat((val_loss1.unsqueeze(0),val_loss2.unsqueeze(0)),0) min_loss,_ = losses.min(0) max_loss,_ = losses.max(0) running_losses_min.append(min_loss.mean().item()) running_losses_max.append(max_loss.mean().item()) print("Epoch {:4d}, val it {:4d}, loss {}".format(epoch+1, it, np.mean(running_losses_min))) val_losses.append(np.mean(running_losses_min)) if val_losses[-1] < best_val_loss and epoch >= 0: torch.save(encoder.module.state_dict(), os.path.join(g_temp_folder, 'encoder_best.pt')) torch.save(rendering.module.state_dict(), os.path.join(g_temp_folder, 'rendering_best.pt')) best_val_loss = val_losses[-1] print(' Saving best validation loss model! ') writer.add_scalar('Loss/train_supervised', train_losses[-1], epoch+1) writer.add_scalar('Loss/train_joint', np.mean(joint_losses), epoch+1) if g_use_selfsupervised_model: writer.add_scalar('Loss/train_selfsupervised_model', np.mean(model_losses), epoch+1) if g_use_selfsupervised_sharp_mask: writer.add_scalar('Loss/train_selfsupervised_sharpness', np.mean(sharp_losses), epoch+1) if g_use_selfsupervised_timeconsistency: writer.add_scalar('Loss/train_selfsupervised_timeconsistency', np.mean(timecons_losses), epoch+1) if g_use_latent_learning: writer.add_scalar('Loss/train_selfsupervised_latent', np.mean(latent_losses), epoch+1) writer.add_scalar('Loss/val_min', val_losses[-1], epoch+1) writer.add_scalar('Loss/val_max', np.mean(running_losses_max), epoch+1) writer.add_scalar('LR/value', optimizer.param_groups[0]['lr'], epoch+1) writer.add_images('Vis Train Batch', get_images(encoder, rendering, device, vis_train_batch)[0], global_step=epoch+1) writer.add_images('Vis Val Batch', get_images(encoder, rendering, device, vis_val_batch)[0], global_step=epoch+1) concat = torch.cat((renders[:,0],renders[:,-1],hs_frames[:,0],hs_frames[:,-1]),2) writer.add_images('Val Batch', concat[:,3:]*(concat[:,:3]-1)+1, global_step=epoch+1) time_elapsed = (time.time() - t0)/60 print('Epoch {:4d} took {:.2f} minutes, lr = {}, av train loss {:.5f}, val loss min {:.5f} max {:.5f}'.format(epoch+1, time_elapsed, optimizer.param_groups[0]['lr'], train_losses[-1], val_losses[-1], np.mean(running_losses_max))) scheduler.step() # pdb.set_trace() torch.cuda.empty_cache() torch.save(encoder.module.state_dict(), os.path.join(g_temp_folder, 'encoder.pt')) torch.save(rendering.module.state_dict(), os.path.join(g_temp_folder, 'rendering.pt')) writer.close()
shuffle=shuffle_train, num_workers=num_workers) ################################################################################ loss_list = [] loss_values = [] avg_loss_values = [] total_step = len(data_loader) ################################################################################ print("Pushing the model to GPU ...\n") init = initializer().to(device) encoder = encoder().to(device) mean_encoder = mean_encoder().to(device) decoder = decoder().to(device) clstm = ConvLSTMCell(input_size=(8, 14), input_dim=512, hidden_dim=512, kernel_size=(3, 3), bias=True).to(device) reduction = nn.Conv2d(1024, 512, kernel_size=3, stride=1, padding=1).to(device) criterion = nn.BCELoss() params = list(init.parameters()) + list(encoder.parameters()) + list( decoder.parameters()) + list(clstm.parameters()) + list( reduction.parameters()) optimizer = torch.optim.Adam(params, lr=lr)
def improved_sampling(opts): MAX_GD_STEPS = 200 LOSS_EVERY_STEPS = 50 DEBUG = False NUM_POINTS = 10000 BATCH_SIZE = 100 checkpoint = opts['checkpoint'] # Creating a dummy file for later FID evaluations dummy_path = os.path.join(opts['work_dir'], 'checkpoints', 'dummy.meta') with open(dummy_path, 'w') as f: f.write('dummy string') with tf.Session() as sess: with sess.graph.as_default(): # Creating the graph if opts['pz'] in ('normal', 'sphere'): codes = tf.get_variable( "latent_codes", [BATCH_SIZE, opts['zdim']], tf.float32, tf.random_normal_initializer(stddev=1.)) if opts['pz'] == 'sphere': z = codes / (tf.norm(codes, axis=0) + 1e-8) else: z = codes elif opts['pz'] == 'uniform': codes = tf.get_variable( "latent_codes", [BATCH_SIZE, opts['zdim']], tf.float32, tf.random_uniform_initializer(minval=-1., maxval=1.)) z = opts['pz_scale'] * z is_training_ph = tf.placeholder(tf.bool, name='is_training_ph') gen, _ = decoder(opts, z, is_training=is_training_ph) data_shape = datashapes[opts['dataset']] gen.set_shape([BATCH_SIZE] + data_shape) e_gen, _ = encoder(opts, gen, is_training=is_training_ph) if opts['e_noise'] == 'gaussian': e_gen = e_gen[0] ae_gen, _ = decoder(opts, e_gen, reuse=True, is_training=is_training_ph) # Cool hack: normalizing by the picture contrast, # otherwise SGD manages to decrease the loss by reducing # the contrast loss = wae.WAE.reconstruction_loss(opts, contrast_norm(gen), contrast_norm(ae_gen)) optim = tf.train.AdamOptimizer(opts['lr'], 0.9) optim = optim.minimize(loss, var_list=[codes]) # Now restoring encoder and decoder from the checkpoint all_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) enc_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='encoder') dec_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='generator') new_vars = [v for v in all_vars if \ v not in enc_vars and v not in dec_vars] vars_to_restore = enc_vars + dec_vars saver = tf.train.Saver(vars_to_restore) saver.restore(sess, checkpoint) logging.error('Restored.') init = tf.variables_initializer(new_vars) # Finally, start generating the samples res_samples = [] res_codes = [] for ibatch in range(NUM_POINTS / BATCH_SIZE): logging.error('Batch %d of %d' % (ibatch + 1, NUM_POINTS / BATCH_SIZE)) loss_prev = 1e10 init.run() for step in xrange(MAX_GD_STEPS): # Make a gradient step sess.run(optim, feed_dict={is_training_ph: False}) if step == 0 or step % LOSS_EVERY_STEPS == LOSS_EVERY_STEPS - 1: loss_cur, pics, codes = sess.run( [loss, gen, z], feed_dict={is_training_ph: False}) if DEBUG: if opts['input_normalize_sym']: pics = (pics + 1.) / 2. pic_path = os.path.join( opts['work_dir'], 'checkpoints', 'dummy.samples100_%05d' % step) code_path = os.path.join(opts['work_dir'], 'checkpoints', 'code%05d' % step) np.save(pic_path, pics) np.save(code_path, codes) rel_imp = abs(loss_cur - loss_prev) / abs(loss_prev) logging.error('-- step %d, loss=%f, rel_imp=%f' % (step, loss_cur, rel_imp)) if step > 0 and rel_imp < 0.1: break loss_prev = loss_cur res_samples.append(pics) res_codes.append(codes) samples = np.array(res_samples) samples = np.vstack(samples) codes = np.array(res_codes) codes = np.vstack(codes) pic_path = os.path.join(opts['work_dir'], 'checkpoints', 'dummy.samples%d' % (NUM_POINTS)) code_path = os.path.join(opts['work_dir'], 'checkpoints', 'codes%d' % (NUM_POINTS)) np.save(pic_path, samples) np.save(code_path, codes)
def __init__(self, opts, tag): tf.reset_default_graph() logging.error('Building the Tensorflow Graph') gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(gpu_options=gpu_options) self.sess = tf.Session(config=config) self.opts = opts assert opts['dataset'] in datashapes, 'Unknown dataset.' self.data_shape = datashapes[opts['dataset']] self.add_inputs_placeholders() self.add_training_placeholders() sample_size = tf.shape(self.sample_points)[0] enc_mean, enc_sigmas = encoder(opts, inputs=self.sample_points, is_training=self.is_training, y=self.labels) enc_sigmas = tf.clip_by_value(enc_sigmas, -50, 50) self.enc_mean, self.enc_sigmas = enc_mean, enc_sigmas eps = tf.random_normal((sample_size, opts['zdim']), 0., 1., dtype=tf.float32) self.encoded = self.enc_mean + tf.multiply( eps, tf.sqrt(1e-8 + tf.exp(self.enc_sigmas))) # self.encoded = self.enc_mean + tf.multiply( # eps, tf.exp(self.enc_sigmas / 2.)) (self.reconstructed, self.reconstructed_logits), self.probs1 = \ decoder(opts, noise=self.encoded, is_training=self.is_training) self.correct_sum = tf.reduce_sum( tf.cast(tf.equal(tf.argmax(self.probs1, axis=1), self.labels), tf.float32)) (self.decoded, self.decoded_logits), _ = decoder(opts, reuse=True, noise=self.sample_noise, is_training=self.is_training) self.loss_cls = self.cls_loss(self.labels, self.probs1) self.loss_mmd = self.mmd_penalty(self.sample_noise, self.encoded) self.loss_recon = self.reconstruction_loss(self.opts, self.sample_points, self.reconstructed) self.objective = self.loss_recon + opts[ 'lambda'] * self.loss_mmd + self.loss_cls self.tag = tag logpxy = [] dimY = opts['n_classes'] N = sample_size S = opts['sampling_size'] x_rep = tf.tile(self.sample_points, [S, 1, 1, 1]) for i in range(dimY): y = tf.fill((N * S, ), i) mu, log_sig = encoder(opts, inputs=x_rep, reuse=True, is_training=False, y=y) eps2 = tf.random_normal((N * S, opts['zdim']), 0., 1., dtype=tf.float32) z = mu + tf.multiply(eps2, tf.sqrt(1e-8 + tf.exp(log_sig))) z_sample = tf.random_normal((tf.shape(z)[0], opts['zdim']), 0., 1., dtype=tf.float32) (mu_x, _), logit_y = decoder(opts, reuse=True, noise=z, is_training=False) logp = -tf.reduce_sum((x_rep - mu_x)**2, axis=[1, 2, 3]) log_pyz = -tf.nn.sparse_softmax_cross_entropy_with_logits( labels=y, logits=logit_y) mmd_loss = self.mmd_penalty(z_sample, z) bound = 0.5 * logp + log_pyz + opts['lambda'] * mmd_loss bound = tf.reshape(bound, [S, N]) bound = self.logsumexp(bound) - tf.log(float(S)) logpxy.append(tf.expand_dims(bound, 1)) logpxy = tf.concat(logpxy, 1) y_pred = tf.nn.softmax(logpxy) self.eval_probs = y_pred if opts['e_pretrain']: self.loss_pretrain = self.pretrain_loss() else: self.loss_pretrain = None self.add_optimizers() self.add_savers()
opt.seed = 0 print("Random Seed: ", opt.seed) random.seed(opt.seed) torch.manual_seed(opt.seed) if torch.cuda.is_available() and not opt.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) colorMapFile = './colormap.mat' colormap = loadmat(colorMapFile)['cmap'] colormap = torch.from_numpy(colormap).cuda() #################################### # Initialize Network encoder = models.encoder(isAddCostVolume=opt.isAddCostVolume) for param in encoder.parameters(): param.requires_grad = False encoder.load_state_dict( torch.load('{0}/encoder_{1}.pth'.format(opt.experiment, opt.nepoch - 1), map_location={'cuda:0': 'cuda:{0}'.format(opt.gpuId)})) decoder = models.decoder(isAddVisualHull=opt.isAddVisualHull) for param in decoder.parameters(): param.requires_grad = False decoder.load_state_dict( torch.load('{0}/decoder_{1}.pth'.format(opt.experiment, opt.nepoch - 1), map_location={'cuda:0': 'cuda:{0}'.format(opt.gpuId)})) normalFeature = models.normalFeature() for param in normalFeature.parameters():
def build_network(input_sequences, output_sequences, initial_state=None, initialize_to_zero=True): batch_size = tf.shape(input_sequences)[1] input_sequences_rs = tf.expand_dims(input_sequences, axis=-1) num_prediction_steps = sequence_length - tf.shape(input_sequences)[0] # encoder network encoder_channels = [32, 16] encoding_channels = encoder_channels[-1] with tf.variable_scope("encoder"): all_encoder_states, final_encoder_state = encoder( inputs=input_sequences_rs, channels=encoder_channels, initial_state=initial_state, initialize_to_zero=initialize_to_zero) encoder_saver = tf.train.Saver() # decoder network # uses a tf.while_loop to store the predictions in a tf.TensorArray # The decoder state is initialized to the final vailes of the encoder state, # and the final output of the encoder is used as input to the decoder decoder_channels = encoder_channels[::-1] with tf.variable_scope("decoder") as scope: decoder_lstm_cells = [ tf.contrib.rnn.Conv2DLSTMCell( input_shape=[image_height, image_width, encoding_channels], kernel_shape=[3, 3], output_channels=num_channels) for num_channels in decoder_channels ] decoder_lstm = tf.contrib.rnn.MultiRNNCell(decoder_lstm_cells) # array to store outputs init_prediction_sequence = tf.TensorArray( tf.float32, size=tf.shape(output_sequences)[0]) # use the last encoder state to initialize the first decoder state init_decoder_state = final_encoder_state[::-1] def condition(step, _, __, ___): return tf.less(step, num_prediction_steps) def body(step, state, input, prediction_sequence): decoder_state, new_state = decoder_lstm(input, state) new_prediction = output_layers(decoder_state) # ensure variables in output layers are reused scope.reuse_variables() return tf.add( step, 1), new_state, new_prediction, prediction_sequence.write( step, new_prediction) init = (tf.constant(0, name="i"), init_decoder_state, input_sequences_rs[-1, :, :, :, :], init_prediction_sequence) i, final_decoder_state, _, predictions_ta = tf.while_loop( condition, body, init) predictions_flat = predictions_ta.concat() # predictions: [time, batch, height, width, 1] # contains the sequence predicted to follow the encoder input predictions = tf.reshape(predictions_flat, (-1, batch_size, image_height, image_width)) return predictions, predictions_flat, final_encoder_state, encoder_saver