def sample_from_gen(args, device, num_classes, gen): """Sample fake images and labels from generator. Args: args (argparse object) device (torch.device) num_classes (int): for pseudo_y gen (nn.Module) Returns: fake, pseudo_y, z """ z = utils.sample_z( args.batch_size, args.gen_dim_z, device, args.gen_distribution ) if args.cGAN: pseudo_y = utils.sample_pseudo_labels( num_classes, args.batch_size, device ) else: pseudo_y = None fake = gen(z, pseudo_y) return fake, pseudo_y, z
def build_model(self): with tf.device('/gpu:%d' % self.gpu_id): ### Placeholder ### self.X = tf.placeholder(tf.float32, [None, self.input_dim]) self.k = tf.placeholder(tf.int32) self.keep_prob = tf.placeholder(tf.float32) ### Encoding ### self.z_mu, self.z_logvar = self.encoder(self.X, self.enc_h_dim_list, self.z_dim, self.keep_prob) self.z = sample_z(self.z_mu, self.z_logvar) ### Decoding ### self.recon_X_logit = self.decoder(self.z, self.dec_h_dim_list, self.input_dim, self.keep_prob, False) self.recon_X = tf.nn.tanh(self.recon_X_logit) self.output = tf.nn.tanh(self.recon_X_logit) #self.logger.info([x.name for x in tf.global_variables()]) #cost = tf.reduce_mean(tf.square(X-output)) ### Loss ### self.recon_loss = self.recon_loss() self.kl_loss = kl_divergence_normal_distribution(self.z_mu, self.z_logvar) self.total_loss = self.recon_loss + self.kl_loss #cost_summary = tf.summary.scalar('cost', cost) ### Solver ### self.solver = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.total_loss) ### Recommendaiton metric ### #self.recon_X = tf.nn.sigmoid(self.recon_X_logit) with tf.device('/cpu:0'): self.top_k_op = tf.nn.top_k(self.recon_X, self.k)
def generator_rnn(representations, query_poses, sequence_size=12, scope='GQN_RNN'): dim_r = representations.get_shape().as_list() batch = tf.shape(representations)[0] height, width = dim_r[1], dim_r[2] cell = GeneratorLSTMCell(input_shape=[height, width, C.GENERATOR_INPUT_CHANNELS], output_channels=C.LSTM_OUTPUT_CHANNELS, canvas_channels=C.LSTM_CANVAS_CHANNELS, kernel_size=C.LSTM_KERNEL_SIZE, name='GeneratorCell') outputs = [] endpoints = {} with tf.variable_scope(scope, reuse=tf.AUTO_REUSE) as var_scope: if not tf.executing_eagerly(): if var_scope.caching_device is None: var_scope.set_caching_device(lambda op: op.device) query_poses = broadcast_pose(query_poses, height, width) state = cell.zero_state(batch, tf.float32) for step in range(sequence_size): z = sample_z(state.lstm.h, scope='sample_eta_pi') inputs = _GeneratorCellInput(representations, query_poses, z) with tf.name_scope('Generator'): (output, state) = cell(inputs, state, 'LSTM_gen') ep_canvas = 'canvas_{}'.format(step) endpoints[ep_canvas] = output.canvas outputs.append(output) target_canvas = outputs[-1].canvas mu_target = eta_g(target_canvas, channels=C.IMG_CHANNELS, scope='eta_g') endpoints['mu_target'] = mu_target return mu_target, endpoints
def updateQ(self, sess, states, actions, rewards, states_, dones, states2, actions2, batch_size, latent_size): feed_dict = { self.rewards: rewards, self.dones: dones, self.target_qnet.states: states_, self.qnet.states: states, self.qnet.actions: actions, self.cgan_reward.states2: states2, self.cgan_reward.actions2: actions2, self.cgan_reward.Z2: sample_z(batch_size, latent_size), self.cgan_state.states2: states2, self.cgan_state.actions2: actions2, self.cgan_state.Z2: sample_z(batch_size, latent_size), self.qnet.states2: states2, self.qnet.actions2: actions2 } _ = sess.run(self.opt_Q, feed_dict=feed_dict)
def __call__(self, *args, **kwargs): with torch.no_grad(): self.generator.eval() z = utils.sample_z(self.batch, self.latent, self.device, self.gen_distribution) pseudo_y = utils.sample_pseudo_labels(num_classes, self.batch, self.device) fake_img = self.generator(z, pseudo_y) return fake_img
def sample_from_gen(args, device, num_classes, gen): z = utils.sample_z(args.batch_size, args.gen_dim_z, device, args.gen_distribution) if args.cGAN: pseudo_y = utils.sample_pseudo_labels(num_classes, args.batch_size, device) else: pseudo_y = None fake = gen(z, pseudo_y) return fake, pseudo_y, z
def build_model(self): with tf.device('/gpu:%d' % self.gpu_id): ### Placeholder ### self.X = tf.placeholder(tf.float32, [None, self.input_dim]) self.k = tf.placeholder(tf.int32) self.z = tf.placeholder(tf.float32, [None, self.z_dim]) self.keep_prob = tf.placeholder(tf.float32) ### Encoding ### self.z_mu, self.z_logvar = self.encoder_to_distribution(self.X, self.enc_h_dim_list, self.z_dim, self.keep_prob) self.z_sampled = sample_z(self.z_mu, self.z_logvar) self.kl_loss = kl_divergence_normal_distribution(self.z_mu, self.z_logvar) ### Decoding ### self.recon_X_logit = self.decoder(self.z_sampled, self.dec_h_dim_list, self.input_dim, self.keep_prob, False) self.recon_X = tf.nn.sigmoid(self.recon_X_logit) self.output = tf.nn.tanh(self.recon_X_logit) self.recon_loss = self.recon_loss() ### Generating ### self.gen_X_logit = self.decoder(self.z, self.dec_h_dim_list, self.input_dim, self.keep_prob, True) self.gen_X = tf.nn.sigmoid(self.gen_X_logit) ### Discriminating ### dis_logit_real = self.discriminator(self.X, self.dis_h_dim_list, 1, self.keep_prob, False) dis_logit_fake = self.discriminator(self.gen_X, self.dis_h_dim_list, 1, self.keep_prob, True) self.dec_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_logit_fake, labels=tf.ones_like(dis_logit_fake))) self.dis_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_logit_real, labels=tf.ones_like(dis_logit_real))) self.dis_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_logit_fake, labels=tf.zeros_like(dis_logit_fake))) # Improved part dis_logit_recon = self.discriminator(self.recon_X, self.dis_h_dim_list, 1, self.keep_prob, True) self.dec_loss_recon = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_logit_recon, labels=tf.ones_like(dis_logit_recon))) self.dis_loss_recon = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_logit_recon, labels=tf.zeros_like(dis_logit_recon))) ### Loss ### self.enc_loss = self.kl_loss + self.recon_loss self.dec_loss = self.recon_loss + self.dec_loss_fake + self.dec_loss_recon self.dis_loss = self.dis_loss_real + self.dis_loss_fake + self.dis_loss_recon ### Theta ### enc_theta = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='enc') dec_theta = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='dec') dis_theta = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='dis') ### Solver ### self.enc_solver = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.enc_loss, var_list=enc_theta) self.dec_solver = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.dec_loss, var_list=dec_theta) self.dis_solver = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.dis_loss, var_list=dis_theta)
def updateR(self, sess, states, actions, rewards, states2, actions2, batch_size, latent_size): feed_dict = { self.cgan_reward.states: states, self.cgan_reward.actions: actions, self.cgan_reward.Z: sample_z(batch_size, latent_size), self.cgan_reward.X: rewards[..., np.newaxis] } _ = sess.run(self.opt_reward_model_D, feed_dict=feed_dict) feed_dict = { self.cgan_reward.states: states, self.cgan_reward.actions: actions, self.cgan_reward.Z: sample_z(batch_size, latent_size), self.cgan_reward.states2: states2, self.cgan_reward.actions2: actions2, self.cgan_reward.Z2: sample_z(batch_size, latent_size), self.cgan_state.states2: states2, self.cgan_state.actions2: actions2, self.cgan_state.Z2: sample_z(batch_size, latent_size), self.qnet.states2: states2, self.qnet.actions2: actions2 } _ = sess.run(self.opt_reward_model_G, feed_dict=feed_dict)
def updateS(self, sess, states, actions, states_, states2, actions2, batch_size, latent_size): feed_dict = { self.cgan_state.states: states, self.cgan_state.actions: actions, self.cgan_state.Z: sample_z(batch_size, latent_size), self.cgan_state.X: states_ } _ = sess.run(self.opt_state_model_D, feed_dict=feed_dict) feed_dict = { self.cgan_state.states: states, self.cgan_state.actions: actions, self.cgan_state.Z: sample_z(batch_size, latent_size), self.cgan_reward.states2: states2, self.cgan_reward.actions2: actions2, self.cgan_reward.Z2: sample_z(batch_size, latent_size), self.cgan_state.states2: states2, self.cgan_state.actions2: actions2, self.cgan_state.Z2: sample_z(batch_size, latent_size), self.qnet.states2: states2, self.qnet.actions2: actions2 } _ = sess.run(self.opt_state_model_G, feed_dict=feed_dict)
def main(): args = parse_arguments() with open(args.config, 'r') as f: config = yaml.safe_load(f) config = OmegaConf.create(f) device = 'cuda' if torch.cuda.is_available() else 'cpu' labels = [int(l) for l in args.labels.split(',')] generator = instantiate(config.generator) generator.load_state_dict( torch.load(config.resume_checkpoint)['g_model_dict']) for y in range(labels): y = torch.tensor((y), device=device) z = sample_z(generator.z_dim, 1, device) x_fake = generator(z, y) show_tensor_images(x_fake)
def compute_loss(batch, grid, mask, z_params_full, z_params_masked, h, w, decoder): ## compute loss z_full = sample_z(z_params_full) # size bsize * hidden z_full = z_full.unsqueeze(1).expand(-1, h * w, -1) # resize context to have one context per input coordinate grid_input = grid.view(1, h * w, -1).expand(batch.size(0), -1, -1) target_input = torch.cat([z_full, grid_input], dim=-1) reconstructed_image_mean, reconstructed_image_variance = decoder( target_input) # bsize,h*w,1 reconstruction_loss = -( log_normal(x=batch.view(batch.size(0), 3, h * w).transpose(1, 2), m=reconstructed_image_mean, v=reconstructed_image_variance) * (1 - mask.view(-1, h * w))).sum(dim=1).mean() kl_loss = kl_normal(z_params_full, z_params_masked).mean() return reconstruction_loss, kl_loss, reconstructed_image_mean, reconstructed_image_variance
def compute_loss(batch, grid, mask, z_params_full, z_params_masked, h, w, decoder): ## compute loss z_full = sample_z(z_params_full) # size bsize * hidden z_full = z_full.unsqueeze(1).expand(-1, h * w, -1) # resize context to have one context per input coordinate grid_input = grid.view(1, h * w, -1).expand(batch.size(0), -1, -1) target_input = torch.cat([z_full, grid_input], dim=-1) reconstructed_image = decoder(target_input) # bsize,h*w,1 reconstruction_loss = ( F.binary_cross_entropy(reconstructed_image, batch.view(batch.size(0), h * w, 1), reduction='none') * (1 - mask.view(-1, h * w, 1))).sum(dim=1).mean() kl_loss = kl_normal(z_params_full, z_params_masked).mean() return reconstruction_loss, kl_loss, reconstructed_image
for i in range(num_epoch): seed = seed + 1 mini = 0 minibatches = random_mini_batches(toydata.T, mini_batch_size=mb_size, seed=seed) for minibatch in minibatches: mini = mini + 1 X_mb = minibatch.T count = count + 1 # Train auto-encoders z_mb = sample_z(X_mb.shape[0], z_dim) sess.run([R_solver], feed_dict={X: X_mb, z: z_mb}) # Train disciminator for k in range(n_critics): z_mb_critics = sample_z(X_mb.shape[0], z_dim) X_mb_critics = random_batches(toydata, X_mb.shape[0]) sess.run([D_solver], feed_dict={ X: X_mb_critics, z: z_mb_critics }) # Train generator z_mb = sample_z(X_mb.shape[0], z_dim) sess.run([G_solver], feed_dict={X: X_mb, z: z_mb})
def train(self, sess, states_B, actions_B, rewards_B, states1_B, dones_B, states_M, actions_M, batch_size, latent_size): dones_B = dones_B.astype(np.float64) #Joint dictionary feed_dict_joint = { self.cgan_state.states2: states_M, self.cgan_state.actions2: actions_M, self.cgan_state.Z2: sample_z(batch_size, latent_size), self.cgan_reward.states2: states_M, self.cgan_reward.actions2: actions_M, self.cgan_reward.Z2: sample_z(batch_size, latent_size) } #Update critic #Dict for critic feed_dict_critic = { self.states: states_B, self.actions: actions_B, self.rewards: rewards_B, self.states1: states1_B, self.dones: dones_B } _, l_Q = sess.run([self.opt_Q, self.loss_Q], feed_dict=merge_two_dicts(feed_dict_joint, feed_dict_critic)) #Get another Z sample feed_dict_joint[self.cgan_state.Z2] = sample_z(batch_size, latent_size) feed_dict_joint[self.cgan_reward.Z2] = sample_z( batch_size, latent_size) #Update actor _ = sess.run(self.opt_actor, feed_dict={self.states: states_B}) #Update state model (discriminator) feed_dict_state_model = { self.cgan_state.states: states_B, self.cgan_state.actions: actions_B, self.cgan_state.Z: sample_z(batch_size, latent_size), self.cgan_state.X: states1_B } _ = sess.run(self.opt_state_model_D, feed_dict=feed_dict_state_model) #Update state model (generator) feed_dict_state_model.pop(self.cgan_state.X) feed_dict_state_model[self.cgan_state.Z] = sample_z( batch_size, latent_size) _ = sess.run(self.opt_state_model_G, feed_dict=merge_two_dicts(feed_dict_joint, feed_dict_state_model)) #Get another Z sample feed_dict_joint[self.cgan_state.Z2] = sample_z(batch_size, latent_size) feed_dict_joint[self.cgan_reward.Z2] = sample_z( batch_size, latent_size) #Update reward model (discriminator) feed_dict_reward_model = { self.cgan_reward.states: states_B, self.cgan_reward.actions: actions_B, self.cgan_reward.Z: sample_z(batch_size, latent_size), self.cgan_reward.X: rewards_B[..., np.newaxis] } _ = sess.run(self.opt_reward_model_D, feed_dict=feed_dict_reward_model) #Update reward model (generator) feed_dict_reward_model.pop(self.cgan_reward.X) feed_dict_reward_model[self.cgan_reward.Z] = sample_z( batch_size, latent_size) _ = sess.run(self.opt_reward_model_G, feed_dict=merge_two_dicts(feed_dict_joint, feed_dict_reward_model))
loss_Z = 0 for X, _ in train_dataloader: X = select_white_line_images(X, proba_white_line) # put a mask on images input_masked = X.to(device) * mask # Freeze H network freeze(net_H) H, skip_connect_layers = net_H(input_masked) # freeze G network freeze(net_G) # generate init z z_t = sample_z(X.shape[0], z_size=10) z_t.requires_grad = True for _ in range(STEPS): ########################### # ##### updating z ##### # ########################### z = z_t.clone().detach() X_hat = net_G(H, z_t.permute(0, 2, 1).unsqueeze(3), skip_connect_layers) loss = criterion(X_hat * ~mask, X.to(device) * ~mask) loss.backward() with torch.no_grad():
def train(dataloaders, models, optimizers, train_config, device, start_epoch=0): ''' Train function for BigGAN ''' # unpack modules train_dataloader, val_dataloader = dataloaders generator, discriminator = models g_optimizer, d_optimizer = optimizers log_dir = os.path.join(train_config.log_dir, datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) os.makedirs(log_dir, mode=0o775, exist_ok=False) loss = BigGANLoss(device=device) for epoch in range(start_epoch, train_config.epochs): # training epoch epoch_steps = 0 mean_g_loss = 0.0 mean_d_loss = 0.0 generator.train() discriminator.train() pbar = tqdm(train_dataloader, position=0, desc='train [G loss: -.-----][D loss: -.-----]') for (x, y) in pbar: x = x.to(device) y = y.to(device) z = sample_z(generator.z_dim, x.shape[0], device) with torch.cuda.amp.autocast(enabled=(device == 'cuda')): g_loss, d_loss, x_fake = loss(generator, discriminator, x, y, z) g_optimizer.zero_grad() g_loss.backward() g_optimizer.step() d_optimizer.zero_grad() d_loss.backward() d_optimizer.step() mean_g_loss += g_loss.item() mean_d_loss += d_loss.item() epoch_steps += 1 pbar.set_description( desc= f'train [G loss: {mean_g_loss/epoch_steps:.5f}][D loss: {mean_d_loss/epoch_steps:.5f}]' ) if epoch + 1 % train_config.save_every == 0: print(f'Epoch {epoch}: saving checkpoint') torch.save( { 'g_model_dict': generator.state_dict(), 'd_model_dict': discriminator.state_dict(), 'g_optim_dict': g_optimizer.state_dict(), 'd_optim_dict': d_optimizer.state_dict(), 'epoch': epoch, }, os.path.join(log_dir, f'epoch={epoch}.pt')) # validation epoch epoch_steps = 0 mean_g_loss = 0.0 mean_d_loss = 0.0 generator.eval() discriminator.eval() pbar = tqdm(val_dataloader, position=0, desc='val [G loss: -.-----][D loss: -.-----]') for (x, y) in pbar: x = x.to(device) y = y.to(device) z = sample_z(generator.z_dim, x.shape[0], device) with torch.no_grad(): with torch.cuda.amp.autocast(enabled=(device == 'cuda')): g_loss, d_loss, x_fake = loss(generator, discriminator, x, y, z) mean_g_loss += g_loss.item() mean_d_loss += d_loss.item() epoch_steps += 1 pbar.set_description( desc= f'val [G loss: {mean_g_loss/epoch_steps:.5f}][D loss: {mean_d_loss/epoch_steps:.5f}]' )
for i in range(steps): ob = obs[i:i+1] # (1, 64, 64, 1) action = oh_actions[i:i+1] # (1, n) z = vae.encode(ob) # (1, 32) VAE done! rnn_z = np.expand_dims(z, axis=0) # (1, 1, 32) action = np.expand_dims(action, axis=0) # (1, 1, n) input_x = np.concatenate([rnn_z, action], axis=2) # (1, 1, 32+n) feed = {rnn.input_x: input_x, rnn.initial_state: state} # predict the next state and next z. if pz is not None: # decode from the z frame = vae.decode(pz[None]) frame2 = vae.decode(z) #neglogp = neg_likelihood(logmix, mean, logstd, z.reshape(32,1)) #imsave(output_dir + '/%s_origin_%.2f.png' % (pad_num(i), np.exp(-neglogp)), 255.*ob.reshape(64, 64)) #imsave(output_dir + '/%s_reconstruct.png' % pad_num(i), 255. * frame[0].reshape(64, 64)) img = concat_img(255.*ob, 255*frame2, 255.*frame) imsave(output_dir + '/%s.png' % pad_num(i), img) (logmix, mean, logstd, state) = rnn.sess.run([rnn.out_logmix, rnn.out_mean, rnn.out_logstd, rnn.final_state], feed) # Sample the next frame's state. pz = sample_z(logmix, mean, logstd, OUTWIDTH, T)
if __name__ == "__main__": # Create the models net_H = Net_H().to(device) net_Z = Net_Z().to(device) net_G = Net_G().to(device) # Initialize models weights net_H.apply(weights_init) net_Z.apply(weights_init) net_G.apply(weights_init) # Print the models print(net_H) print(net_Z) print(net_G) """Test outputs size""" z = torch.zeros(10, 1, 10).to(device) X = torch.zeros(10, 1, 32, 32).to(device) with torch.no_grad(): h, skip_connect_layers = net_H(X) z = net_Z(h.squeeze(3).permute(0, 2, 1), z) output = net_G(h, z[:, :, :10].permute(0, 2, 1).unsqueeze(3), skip_connect_layers) print(h.shape) # expected: [10, 4000, 1, 1] print(z.shape) # expected: [10, 1, 10] print(output.shape) # expected: [10, 1, 32, 32] """Test vector z shape""" print(sample_z(10, z_size=10).size()) # expected size [10,1,10]
def train_G(self, batch_size, loop=1): for i in range(loop): _, loss = self.sess.run( [self.G_solver, self.G_loss], feed_dict={self.zs: sample_z(batch_size, self.z_dim)}) return loss
def main(): # set GPU card os.environ["CUDA_VISIBLE_DEVICES"] = "0" # load anime face data_dir = '../anime_face/data_64/images/' data_extra_dir = '../anime_face/extra_data/images/' ds = dataset() ds.load_data(data_dir, verbose=0) ds.load_data(data_extra_dir, verbose=0) ds.shuffle() # reset graph tf.reset_default_graph() # set session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) # build model model = GAN(sess, gf_dim=128) # training z_plot = sample_z(36, 100) # initial fake image z = sample_z((bs), 100) i = 1 while True: if (i == 1) or (i <= 100 and i % 20 == 0) or (i <= 200 and i % 50 == 0) or ( i <= 1000 and i % 100 == 0) or (i % 200 == 0): g_samples = model.generate(z_plot) plot_samples(g_samples, save=True, filename=str(i), folder_path='out2/', h=6, w=6) # train discriminator more for _ in range(5): real_img = ds.next_batch(bs) z = sample_z(bs, 100) fake_img = model.generate(z) # train D D_loss = model.train_D(real_img, fake_img) G_loss = model.train_G(bs) if (i % 100) == 0: model.save(model_name='WGAN_v2') z_loss = sample_z(64, 100) g_loss = model.generate(sample_z(32, 100)) g, d = model.sess.run([model.G_loss, model.D_loss], feed_dict={ model.xs: ds.random_sample(32), model.gs: g_loss, model.zs: z_loss }) print(str(i) + ' iteration:') print('D_loss:', d) print('G_loss:', g, '\n') i = i + 1
def train(args): random.seed(8722) torch.manual_seed(4565) measure_history = deque([0]*3000, 3000) convergence_history = [] prev_measure = 1 iter = 0 thresh = 0.5 graph = bn.create_bayes_net() bce_loss = torch.nn.BCEWithLogitsLoss() lr = args.lr iters = args.load_step prepare_paths(args) u_dist = utils.create_uniform(-1, 1) fixed_z = utils.sample_z(u_dist, (args.batch_size, args.z)) (netG, optimG), (netD, optimD), (netL, optimL) = init_models(args) pretrain_labeler(args, netL, optimL) data_loader = datagen.load_celeba_50k_attrs(args) for epoch in range(args.epochs): for i, (data, _, attrs) in enumerate(data_loader): data = data.cuda() attrs = torch.squeeze(attrs>0).float().cuda() """ Labeler """ netL.zero_grad() real_labels = netL(data) real_label_loss = bce_loss(real_labels, attrs).mean() real_label_loss.backward() optimL.step() """ Discriminator """ for p in netD.parameters(): p.requires_grad = True z = utils.sample_z(u_dist, (args.batch_size, args.z)) marginals = rand_marginals(args, graph) netD.zero_grad() with torch.no_grad(): g_fake = netG(z, marginals) _, d_fake = netD(g_fake, marginals) _, d_real = netD(data, attrs) real_loss_d = (d_real - data).abs().mean() fake_loss_d = (d_fake - g_fake).abs().mean() lossD = real_loss_d - args.k * fake_loss_d lossD.backward() optimD.step() """ Generator """ for p in netD.parameters(): p.requires_grad = False marginals = rand_marginals(args, graph) netG.zero_grad() netL.zero_grad() z = utils.sample_z(u_dist, (args.batch_size, args.z)) g_fake = netG(z, marginals) _, d_fake = netD(g_fake, marginals) lossG = (d_fake - g_fake).abs().mean() ## Ok lets penalize distance from marginals (labeler) with torch.no_grad(): marginals = rand_marginals(args, graph) z = utils.sample_z(u_dist, (args.batch_size, args.z)) g_fake = netG(z, marginals) fake_labels = netL(g_fake) fake_label_loss = bce_loss(fake_labels, marginals).mean() loss_gac = fake_label_loss + lossG loss_gac.backward() optimG.step(); optimL.step() lagrangian = (args.gamma*real_loss_d - fake_loss_d).detach() args.k += args.lambda_k * lagrangian args.k = max(min(1, args.k), 0) convg_measure = real_loss_d.item() + lagrangian.abs() measure_history.append(convg_measure) if iter % args.print_step == 0: print ("Iter: {}, D loss: {}, G Loss: {}, AC loss: {}".format(iter, lossD.item(), lossG.item(), fake_label_loss.item())) save_images(args, g_fake.detach(), d_real.detach(), iter) """update training parameters""" lr = args.lr * 0.95 ** (iter//3000) for p in optimG.param_groups + optimD.param_groups: p['lr'] = lr if iter % 1000 == 0: pathG = 'experiments/{}/models/netG_{}.pt'.format(args.name, iter) pathD = 'experiments/{}/models/netD_{}.pt'.format(args.name, iter) utils.save_model(pathG, netG, optimG, args.k) utils.save_model(pathD, netD, optimD, args.k) iter += 1
def main(): parser = argparse.ArgumentParser() parser.add_argument("--env-interface", type=str, default='gym!atari') parser.add_argument("--environment", type=str, default='CartPole-v0') parser.add_argument("--action-size", type=int, default=2) parser.add_argument("--input-shape", type=list, default=[None, 4]) parser.add_argument("--target-update-freq", type=int, default=200) parser.add_argument("--epsilon-max", type=float, default=1.) parser.add_argument("--epsilon-min", type=float, default=.01) parser.add_argument("--epsilon-decay", type=float, default=.001) parser.add_argument("--learning-rate", type=float, default=.99) parser.add_argument("--batch-size", type=int, default=64) parser.add_argument("--epochs", type=int, default=30000) parser.add_argument("--replay-mem-size", type=int, default=1000000) parser.add_argument("--K", type=int, default=1, help='The number of steps to train the environment') parser.add_argument( "--L", type=int, default=1, help='The number of Q-learning steps for hypothetical rollouts') parser.add_argument("--latent-size", type=int, default=4, help='Size of vector for Z') args = parser.parse_args() env = env_interface(args.env_interface, args.environment, pixel_feature=False, render=True) #args.action_size = env.action_space.n args.action_size = env.action_size args.input_shape = [None] + list(env.obs_space_shape) print args # Other parameters epsilon = args.epsilon_max # Replay memory memory = Memory(args.replay_mem_size) # Time step time_step = 0. # Initialize the GANs cgan_state = CGAN(input_shape=args.input_shape, action_size=args.action_size, latent_size=args.latent_size, gen_input_shape=args.input_shape) cgan_reward = CGAN(input_shape=args.input_shape, action_size=args.action_size, latent_size=args.latent_size, gen_input_shape=[None, 1]) qnet = qnetwork(input_shape=args.input_shape, action_size=args.action_size, scope='qnet') target_qnet = qnetwork(input_shape=args.input_shape, action_size=args.action_size, scope='target_qnet') update_ops = update_target_graph('qnet', 'target_qnet') rand_no = np.random.rand() #env = gym.wrappers.Monitor(env, '/tmp/cartpole-experiment-' + str(rand_no), force=True, video_callable=False) init = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init) for epoch in range(args.epochs): total_reward = 0 observation = env.reset() for t in range(1000000): #env.render() action = qnet.get_action(sess, observation) if np.random.rand() < epsilon: #action = env.action_space.sample() action = np.random.randint(args.action_size) observation1, reward, done, info = env.step(action) total_reward += reward # Add to memory memory.add([observation, action, reward, observation1, done]) # Reduce epsilon time_step += 1. epsilon = args.epsilon_min + ( args.epsilon_max - args.epsilon_min) * np.exp( -args.epsilon_decay * time_step) # Training step batch = np.array(memory.sample(args.batch_size)) qnet.train(sess, batch, args.learning_rate, target_qnet) # Training step: environment model for k in range(args.K): batch = np.array(memory.sample(args.batch_size)) states = np.vstack(batch[:, 0]) actions = np.array(batch[:, 1]) rewards = batch[:, 2] states1 = np.vstack(batch[:, 3]) _, D_loss_state = sess.run( [cgan_state.D_solver, cgan_state.D_loss], feed_dict={ cgan_state.states: states, cgan_state.actions: actions, cgan_state.Z: sample_z(len(batch), args.latent_size), cgan_state.X: states1 }) _, G_loss_state = sess.run( [cgan_state.G_solver, cgan_state.G_loss], feed_dict={ cgan_state.states: states, cgan_state.actions: actions, cgan_state.Z: sample_z(len(batch), args.latent_size) }) _, D_loss_reward = sess.run( [cgan_reward.D_solver, cgan_reward.D_loss], feed_dict={ cgan_reward.states: states, cgan_reward.actions: actions, cgan_reward.Z: sample_z(len(batch), args.latent_size), cgan_reward.X: rewards[..., np.newaxis] }) _, G_loss_reward = sess.run( [cgan_reward.G_solver, cgan_reward.G_loss], feed_dict={ cgan_reward.states: states, cgan_reward.actions: actions, cgan_reward.Z: sample_z(len(batch), args.latent_size) }) #print D_loss_state, G_loss_state, D_loss_reward, G_loss_state # Training step: imagination rollouts if time_step == 0.: print "time_step 0 here" if time_step >= 0.: for l in range(args.L): batch = np.array(memory.sample(args.batch_size)) assert len(batch) > 0 states1 = np.vstack(batch[:, 3]) actions = np.random.randint(args.action_size, size=len(batch)) dones = np.array([False] * len(batch)) G_sample_state = sess.run(cgan_state.G_sample, feed_dict={ cgan_state.states: states1, cgan_state.actions: actions, cgan_state.Z: sample_z( len(batch), args.latent_size) }) G_sample_reward = sess.run(cgan_reward.G_sample, feed_dict={ cgan_reward.states: states1, cgan_reward.actions: actions, cgan_reward.Z: sample_z( len(batch), args.latent_size) }) qnet.train(sess, None, args.learning_rate, target_qnet, states1, actions, G_sample_reward, G_sample_state, dones) # Set observation observation = observation1 # Update? if int(time_step) % args.target_update_freq == 0: #print "Updating target..." sess.run(update_ops) if done: print "Episode finished after {} timesteps".format( t + 1), 'epoch', epoch, 'total_rewards', total_reward break
def main(): parser = argparse.ArgumentParser() parser.add_argument("--environment", type=str, default='Pendulum-v0') parser.add_argument("--action-dim", type=int, default=1) parser.add_argument("--state-dim", type=int, default=1) parser.add_argument("--input-shape", type=list, default=[None, 1]) parser.add_argument("--epochs", type=int, default=30000) parser.add_argument('--tau', help='soft target update parameter', default=0.001) parser.add_argument("--action-bound", type=float, default=1.) parser.add_argument("--replay-mem-size", type=int, default=1000000) parser.add_argument("--batch-size", type=int, default=64) parser.add_argument("--gamma", type=float, default=.99) parser.add_argument("--K", type=int, default=1, help='The number of steps to train the environment') parser.add_argument( "--L", type=int, default=1, help='The number of Q-learning steps for hypothetical rollouts') parser.add_argument("--latent-size", type=int, default=4, help='Size of vector for Z') args = parser.parse_args() # Initialize environment env = gym.make(args.environment) args.state_dim = env.observation_space.shape[0] args.input_shape = [None, args.state_dim] args.action_dim = env.action_space.shape[0] #assert args.action_dim == 1 args.action_bound = env.action_space.high print(args) # Networks actor_source = actor(state_shape=[None, args.state_dim],\ action_shape=[None, args.action_dim],\ output_bound=args.action_bound[0],\ scope='actor_source') critic_source = critic(state_shape=[None, args.state_dim],\ action_shape=[None, args.action_dim],\ scope='critic_source') actor_target = actor(state_shape=[None, args.state_dim],\ action_shape=[None, args.action_dim],\ output_bound=args.action_bound[0],\ scope='actor_target') critic_target = critic(state_shape=[None, args.state_dim],\ action_shape=[None, args.action_dim],\ scope='critic_target') # Initialize the GANs cgan_state = CGAN(input_shape=args.input_shape,\ action_size=args.action_dim,\ latent_size=args.latent_size,\ gen_input_shape=args.input_shape,\ continuous_action=True) cgan_reward = CGAN(input_shape=args.input_shape,\ action_size=args.action_dim,\ latent_size=args.latent_size,\ gen_input_shape=[None, 1],\ continuous_action=True) # Update and copy operators update_target_actor = update_target_graph2('actor_source', 'actor_target', args.tau) update_target_critic = update_target_graph2('critic_source', 'critic_target', args.tau) copy_target_actor = update_target_graph2('actor_source', 'actor_target', 1.) copy_target_critic = update_target_graph2('critic_source', 'critic_target', 1.) # Replay memory memory = Memory(args.replay_mem_size) # Actor noise actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(args.action_dim)) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(copy_target_critic) sess.run(copy_target_actor) for epoch in range(args.epochs): state = env.reset() total_rewards = 0.0 while True: #env.render() # Choose an action action = sess.run( actor_source.action, feed_dict={actor_source.states: state[np.newaxis, ...] })[0] + actor_noise() # Execute action state1, reward, done, _ = env.step(action) total_rewards += float(reward) # Store tuple in replay memory memory.add([state[np.newaxis, ...],\ action[np.newaxis, ...],\ reward,\ state1[np.newaxis, ...],\ done]) # Training step: update actor critic using real experience batch = np.array(memory.sample(args.batch_size)) assert len(batch) > 0 states = np.concatenate(batch[:, 0], axis=0) actions = np.concatenate(batch[:, 1], axis=0) rewards = batch[:, 2] states1 = np.concatenate(batch[:, 3], axis=0) dones = batch[:, 4] # Update the critic actions1 = sess.run(actor_target.action,\ feed_dict={actor_target.states:states1}) targetQ = np.squeeze(sess.run(critic_target.Q,\ feed_dict={critic_target.states:states1,\ critic_target.actions:actions1}), axis=-1) targetQ = rewards + ( 1. - dones.astype(np.float32)) * args.gamma * targetQ targetQ = targetQ[..., np.newaxis] _, critic_loss = sess.run([critic_source.critic_solver,\ critic_source.loss],\ feed_dict={critic_source.states:states,\ critic_source.actions:actions,\ critic_source.targetQ:targetQ}) # Update the actor critic_grads = sess.run(critic_source.grads,\ feed_dict={critic_source.states:states,\ critic_source.actions:actions})[0]# Grab gradients from critic _ = sess.run(actor_source.opt,\ feed_dict={actor_source.states:states,\ actor_source.dQ_by_da:critic_grads}) # Update target networks sess.run(update_target_critic) sess.run(update_target_actor) # Training step: update the environment model using real experience (i.e., update the conditional GANs) for k in range(args.K): batch = np.array(memory.sample(args.batch_size)) states = np.concatenate(batch[:, 0], axis=0) actions = np.concatenate(batch[:, 1], axis=0) rewards = batch[:, 2] states1 = np.concatenate(batch[:, 3], axis=0) _, D_loss_state = sess.run([cgan_state.D_solver, cgan_state.D_loss],\ feed_dict={cgan_state.states:states,\ cgan_state.actions:actions,\ cgan_state.Z:sample_z(len(batch),\ args.latent_size),\ cgan_state.X:states1}) _, G_loss_state = sess.run([cgan_state.G_solver,\ cgan_state.G_loss],\ feed_dict={cgan_state.states:states,\ cgan_state.actions:actions,\ cgan_state.Z:sample_z(len(batch),\ args.latent_size)}) _, D_loss_reward = sess.run([cgan_reward.D_solver,\ cgan_reward.D_loss],\ feed_dict={cgan_reward.states:states,\ cgan_reward.actions:actions,\ cgan_reward.Z:sample_z(len(batch),\ args.latent_size),\ cgan_reward.X:rewards[..., np.newaxis]}) _, G_loss_reward = sess.run([cgan_reward.G_solver,\ cgan_reward.G_loss],\ feed_dict={cgan_reward.states:states,\ cgan_reward.actions:actions,\ cgan_reward.Z:sample_z(len(batch),\ args.latent_size)}) #print D_loss_state, G_loss_state, D_loss_reward, G_loss_state # Training step: update actor critic using imagination rollouts for l in range(args.L): batch = np.array(memory.sample(args.batch_size)) states_ = np.concatenate(batch[:, 3], axis=0) actions = np.random.uniform(env.action_space.low[0],\ env.action_space.high[0],\ size=(len(batch),\ env.action_space.shape[0])) dones = np.array([False] * len(batch)) G_sample_state = sess.run(cgan_state.G_sample,\ feed_dict={cgan_state.states:states_,\ cgan_state.actions:actions,\ cgan_state.Z:sample_z(len(batch),\ args.latent_size)}) G_sample_reward = sess.run(cgan_reward.G_sample,\ feed_dict={cgan_reward.states:states_,\ cgan_reward.actions:actions,\ cgan_reward.Z:sample_z(len(batch),\ args.latent_size)}) G_sample_reward = np.squeeze(G_sample_reward, axis=-1) # Update the critic actions1 = sess.run(actor_target.action,\ feed_dict={actor_target.states:G_sample_state}) targetQ = np.squeeze(sess.run(critic_target.Q,\ feed_dict={critic_target.states:G_sample_state,\ critic_target.actions:actions1}), axis=-1) targetQ = G_sample_reward + ( 1. - dones.astype(np.float32)) * args.gamma * targetQ targetQ = targetQ[..., np.newaxis] _, critic_loss = sess.run([critic_source.critic_solver,\ critic_source.loss],\ feed_dict={critic_source.states:states_,\ critic_source.actions:actions,\ critic_source.targetQ:targetQ}) # Update the actor critic_grads = sess.run(critic_source.grads,\ feed_dict={critic_source.states:states_,\ critic_source.actions:actions})[0]# Grab gradients from critic _ = sess.run(actor_source.opt,\ feed_dict={actor_source.states:states_,\ actor_source.dQ_by_da:critic_grads}) # Update target networks sess.run(update_target_critic) sess.run(update_target_actor) state = np.copy(state1) if done == True: print 'epoch', epoch, 'total rewards', total_rewards break
def build_model(self): with tf.device('/gpu:%d' % self.gpu_id): self.X = tf.placeholder(tf.float32, [None, self.input_dim]) self.k = tf.placeholder(tf.int32) self.keep_prob = tf.placeholder(tf.float32) ### Encoding ### self.z_mu, self.z_logvar = self.encoder(self.X, self.enc_h_dim_list, self.z_dim, self.keep_prob) self.z = sample_z(self.z_mu, self.z_logvar) ### Decoding/Generating ### self.recon_X_logit = self.decoder(self.z, self.dec_h_dim_list, self.input_dim, self.keep_prob, False) gen_X_logit = self.decoder(tf.random_normal(tf.shape(self.z)), self.dec_h_dim_list, self.input_dim, self.keep_prob, True) self.recon_X = tf.nn.sigmoid(self.recon_X_logit) gen_X = tf.nn.sigmoid(gen_X_logit) ### Discriminating ### dis_logit_real, dis_prob_real = self.discriminator( self.X, self.dis_h_dim_list, 1, self.keep_prob, False) #dis_logit_real, dis_prob_real = self.discriminator(self.recon_X, self.dis_h_dim_list, 1, self.keep_prob, False) dis_logit_fake, dis_prob_fake = self.discriminator( gen_X, self.dis_h_dim_list, 1, self.keep_prob, True) self.logger.info([x.name for x in tf.global_variables()]) print([x.name for x in tf.global_variables() if 'enc' in x.name]) print([x.name for x in tf.global_variables() if 'dec' in x.name]) print([x.name for x in tf.global_variables() if 'dis' in x.name]) enc_theta = ([x for x in tf.global_variables() if 'enc' in x.name]) dec_theta = ([x for x in tf.global_variables() if 'dec' in x.name]) dis_theta = ([x for x in tf.global_variables() if 'dis' in x.name]) #cost = tf.reduce_mean(tf.square(X-output)) ### Loss ### #self.recon_loss = tf.losses.mean_squared_error(self.X, self.recon_X) self.recon_loss = self.recon_loss() self.kl_loss = kl_divergence_normal_distribution( self.z_mu, self.z_logvar) self.dec_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=dis_logit_fake, labels=tf.ones_like(dis_logit_fake))) self.dis_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=dis_logit_real, labels=tf.ones_like(dis_logit_real))) self.dis_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=dis_logit_fake, labels=tf.zeros_like(dis_logit_fake))) self.enc_loss = self.recon_loss + self.kl_loss self.dec_loss = self.dec_loss_fake + self.recon_loss self.dis_loss = self.dis_loss_real + self.dis_loss_fake #cost_summary = tf.summary.scalar('cost', cost) #self.total_loss = self.enc_loss + self.dec_loss + self.dis_loss self.enc_solver = tf.train.AdamOptimizer( self.learning_rate).minimize(self.enc_loss, var_list=enc_theta) self.dec_solver = tf.train.AdamOptimizer( self.learning_rate).minimize(self.dec_loss, var_list=dec_theta) self.dis_solver = tf.train.AdamOptimizer( self.learning_rate).minimize(self.dis_loss, var_list=dis_theta) """ self.enc_solver = tf.train.AdamOptimizer(self.learning_rate).minimize(self.enc_loss, var_list=enc_theta) self.dec_solver = tf.train.AdamOptimizer(self.learning_rate).minimize(self.dec_loss, var_list=dec_theta) self.dis_solver = tf.train.AdamOptimizer(self.learning_rate).minimize(self.dis_loss, var_list=dis_theta) """ """ self.enc_solver = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.enc_loss, var_list=enc_theta) self.dec_solver = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.dec_loss, var_list=dec_theta) self.dis_solver = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.dis_loss, var_list=dis_theta) """ ### Recommendaiton metric ### with tf.device('/cpu:0'): self.top_k_op = tf.nn.top_k(self.recon_X, self.k)
def train(context_encoder, context_to_dist, decoder, aggregator, train_loader, test_loader, optimizer, n_epochs, device, save_path, summary_writer, save_every=10, h=28, w=28, log=1): context_encoder.train() decoder.train() grid = make_mesh_grid(h, w).to(device) # size h,w,2 for epoch in range(n_epochs): running_loss = 0.0 last_log_time = time.time() # Training train_loss = 0.0 for batch_idx, (batch, _) in enumerate(train_loader): batch = batch.to(device) if ((batch_idx % 100) == 0) and batch_idx > 1: print( "epoch {} | batch {} | mean running loss {:.2f} | {:.2f} batch/s" .format(epoch, batch_idx, running_loss / 100, 100 / (time.time() - last_log_time))) last_log_time = time.time() running_loss = 0.0 mask = random_mask_uniform(bsize=batch.size(0), h=h, w=w, device=batch.device) z_params_full, z_params_masked = all_forward( batch, grid, mask, context_encoder, aggregator, context_to_dist) reconstruction_loss, kl_loss, reconstructed_image = compute_loss( batch, grid, mask, z_params_full, z_params_masked, h, w, decoder) loss = reconstruction_loss + kl_loss if batch_idx % 100 == 0: print("reconstruction {:.2f} | kl {:.2f}".format( reconstruction_loss, kl_loss)) optimizer.zero_grad() loss.backward() optimizer.step() # add loss running_loss += loss.item() train_loss += loss.item() print("Epoch train loss : {}".format(train_loss / len(train_loader))) if summary_writer is not None: summary_writer.add_scalar("train/loss", train_loss / len(train_loader), global_step=epoch) if (epoch % save_every == 0) and log and epoch > 0: save_model(save_path, "NP_model_epoch_{}.pt".format(epoch), context_encoder, context_to_dist, decoder, aggregator, device) ## TEST test_loss = 0.0 for batch_idx, (batch, _) in enumerate(test_loader): batch = batch.to(device) mask = random_mask_uniform(bsize=batch.size(0), h=h, w=w, device=batch.device) with torch.no_grad(): z_params_full, z_params_masked = all_forward( batch, grid, mask, context_encoder, aggregator, context_to_dist) reconstruction_loss, kl_loss, reconstructed_image = compute_loss( batch, grid, mask, z_params_full, z_params_masked, h, w, decoder) loss = reconstruction_loss + kl_loss test_loss += loss.item() if summary_writer is not None: summary_writer.add_scalar("test/loss", test_loss / len(test_loader), global_step=epoch) print("TEST loss | epoch {} | {:.2f}".format( epoch, test_loss / len(test_loader))) # do examples example_batch, _ = next(iter(test_loader)) example_batch = example_batch[:10].to(device) for n_pixels in [50, 150, 450]: mask = random_mask(example_batch.size(0), h, w, n_pixels, device=example_batch.device) z_params_full, z_params_masked = all_forward( example_batch, grid, mask, context_encoder, aggregator, context_to_dist) z_context = torch.cat([ sample_z(z_params_masked).unsqueeze(1).expand(-1, h * w, -1) for i in range(3) ], dim=0) z_context = torch.cat([ z_context, grid.view(1, h * w, 2).expand(z_context.size(0), -1, -1) ], dim=2) decoded_images = decoder(z_context).view(-1, 1, h, w) stacked_images = display_images(original_image=example_batch, mask=mask, reconstructed_image=decoded_images) image = torch.tensor(stacked_images) if summary_writer is not None: summary_writer.add_image( "test_image/{}_pixels".format(n_pixels), image, global_step=epoch) return