def main(to_reload=None): mnist = load_mnist() if to_reload: # restore v = vae.VAE(ARCHITECTURE, HYPERPARAMS, meta_graph=to_reload) print("Loaded!") else: # train v = vae.VAE(ARCHITECTURE, HYPERPARAMS, log_dir=LOG_DIR) v.train(mnist, max_iter=MAX_ITER, max_epochs=MAX_EPOCHS, cross_validate=False, verbose=True, save=True, outdir=METAGRAPH_DIR, plots_outdir=PLOTS_DIR, plot_latent_over_time=False) print("Trained!") all_plots(v, mnist)
def main(): device = torch.device("cpu") model = vae.VAE(latent_dim=args.latent_dim) model.to(device) model.load_state_dict( torch.load(args.ckpt_path, map_location=device)['state_dict']) model.eval() print(model) # bdmc uses simulated data from the model loader = simulate.simulate_data( model, batch_size=args.batch_size, n_batch=args.n_batch, device=device, ) # run bdmc forward_schedule = np.linspace(0., 1., args.chain_length) bdmc( model, loader, forward_schedule=forward_schedule, n_sample=args.iwae_samples, device=device, )
def EDA1(input_size, evaluate, maxitr, pop_size, train_size, elite_size, intermediate_dim, latent_dim, epochs): best_in_samples = [0] * (maxitr + 1) average_in_samples = [0] * (maxitr + 1) diversity_in_samples = [0] * (maxitr + 1) sample_size = pop_size - elite_size population = generate_populaton(pop_size, input_size, evaluate) model = vae.VAE(input_size, intermediate_dim, latent_dim, epochs) best_in_samples[0] = best_in_pop(population) average_in_samples[0] = average_in_pop(population) diversity_in_samples[0] = calc_divesity(population) for gen in range(maxitr): print("gen : " + str(gen + 1)) train_data = make_train_data(population, train_size) model.train(train_data) elite = elite_select(population, elite_size) samples = sample_from_model(model, input_size, sample_size, evaluate) print("best in samples = " + str(best_in_samples[gen])) print("average in samples = " + str(average_in_samples[gen])) population = elite + samples best_in_samples[gen + 1] = best_in_pop(population) average_in_samples[gen + 1] = average_in_pop(population) diversity_in_samples[gen + 1] = calc_divesity(population) print("diversity = " + str(calc_divesity(population))) print("") return best_in_samples, average_in_samples, diversity_in_samples
def __init__(self, do_training=False): self.state_dict_filepath = 'vae_state_dict.pt' self.vae = vae.VAE() self.bottleneck_dim = 10 self.EPOCHS = 10 if do_training: self.train_vae() else: self.load_vae_state_dict()
def main(data="mnist", to_reload=None): if data == "mnist": input_data = load_mnist() control_plots = True elif data == "sentences": #input_data = load_textual_data("data/sentenceVectors-Emails-January.out", 0.9, 0.1) input_data = load_textual_data("data/docVectors-NASA.out", 0.9, 0.01) control_plots = False; if to_reload: # restore v = vae.VAE(ARCHITECTURE, HYPERPARAMS, meta_graph=to_reload) print("Loaded!") else: # train v = vae.VAE(ARCHITECTURE, HYPERPARAMS, log_dir=LOG_DIR) v.train(input_data, max_iter=MAX_ITER, max_epochs=MAX_EPOCHS, cross_validate=False, verbose=True, save=True, outdir=METAGRAPH_DIR, plots_outdir=PLOTS_DIR, plot_latent_over_time=False, control_plots=control_plots) if control_plots: all_plots(v, input_data)
def train(config): ''' SETTING HYPERPARAMETER (DEFAULT) ''' training_epoch = config.training_epoch z_dim = config.z_dim batch_size = config.batch_size n_data = mnist.train.num_examples total_batch = int(mnist.train.num_examples / batch_size) total_iteration = training_epoch * total_batch # Build Network VAE = vae.VAE(config) VAE.build() # Optimize Network VAE.optimize(config) sess = tf.Session() init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) saver = tf.train.Saver() print("Total the number of Data : " + str(n_data)) print("Total Step per 1 Epoch: {}".format(total_batch)) print("The number of Iteration: {}".format(total_iteration)) for epoch in range(training_epoch): avg_cost = 0 avg_recons = 0 avg_regular = 0 for i in range(total_batch): batch_xs, _ = mnist.train.next_batch(batch_size) _cost, _, _recons, _regular = sess.run([VAE.cost, VAE.optimizer, VAE.recons, VAE.regular], feed_dict={VAE.X: batch_xs}) avg_cost += _cost / total_batch avg_recons += _recons / total_batch avg_regular += _regular / total_batch if epoch % 10 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost), 'Recons_Loss =', '{:.9f}'.format(avg_recons), 'Regular_Loss =', '{:.9f}'.format(avg_regular)) print("Training Complete!") save_dir = './mode_z_dim_{}/'.format(z_dim) if not os.path.exists(save_dir): os.makedirs(save_dir) save_path = '{}VAE.ckpt'.format(save_dir) saver.save(sess, save_path) print("Saved Model") return VAE, sess
def __init__(self, action_space): self.vae = vae.VAE(dataset) self.lstm = lstm_validation.LSTM( action_space, seq_len=2, batch_size=1 ) #create new lstm, maintain the same struct of the lstm used for training (to load pretrained model) but ignore the targets data self.vae.load_json() self.lstm.load_json() self.current_state = self.lstm.sess.run( self.lstm.zero_state ) #initialize current_state to the initial state self.restart = 0 self.frame_count = None self.current_frame = self.z_to_img(self.current_state)
def main(): obs = srk.Observation(np.loadtxt("data.txt")) data_category = np.loadtxt("category.txt") vae1 = vae.VAE(18, itr=200, batch_size=500) gmm1 = gmm.GMM(10, category=data_category) vae1.connect(obs) gmm1.connect(vae1) for i in range(5): print(i) vae1.update() gmm1.update()
def main(): model = vae.VAE(latent_dim=args.latent_dim) model.cuda() model.load_state_dict(torch.load(args.ckpt_path)['state_dict']) model.eval() # bdmc uses simulated data from the model loader = simulate.simulate_data(model, batch_size=args.batch_size, n_batch=args.n_batch) # run bdmc forward_schedule = np.linspace(0., 1., args.chain_length) bdmc(model, loader, forward_schedule=forward_schedule, n_sample=args.iwae_samples)
def main(): obs1 = srk.Observation( np.loadtxt("data1.txt") ) obs2 = srk.Observation( np.loadtxt("data2.txt") ) data_category = np.loadtxt( "category.txt" ) vae1 = vae.VAE( 18, itr=200, batch_size=500 ) gmm1 = gmm.GMM( 10, category=data_category ) mlda1 = mlda.MLDA( 10, [200,200], category=data_category ) vae1.connect( obs1 ) gmm1.connect( vae1 ) mlda1.connect( obs2, gmm1 ) for i in range(5): print( i ) vae1.update() gmm1.update() mlda1.update()
def EDAQ1(input_size, evaluate, maxitr, pop_size, train_size, elite_size, intermediate_dim, latent_dim, epochs): queue_size = 5 best_in_samples = [0] * (maxitr - queue_size + 1 + 1) average_in_samples = [0] * (maxitr - queue_size + 1 + 1) diversity_in_samples = [0] * (maxitr - queue_size + 1 + 1) sample_size = pop_size - elite_size population = [] pop_queue = generate_populaton(pop_size * queue_size, input_size, evaluate) model = vae.VAE(input_size, intermediate_dim, latent_dim, epochs) best_in_samples[0] = best_in_pop(pop_queue) average_in_samples[0] = average_in_pop(pop_queue) diversity_in_samples[0] = calc_divesity(pop_queue) for gen in range(maxitr - queue_size + 1): print("gen : " + str(gen + 1)) train_queue = sorted(pop_queue, key=lambda p: p.fitness) train_data = make_train_data(train_queue, train_size) model.train(train_data) if gen == 0: elite = elite_select(pop_queue, elite_size) else: elite = elite_select(population, elite_size) samples = sample_from_model(model, input_size, sample_size, evaluate) print("best in samples = " + str(best_in_samples[gen])) print("average in samples = " + str(average_in_samples[gen])) population = elite + samples pop_queue = pop_queue[pop_size:] + population best_in_samples[gen + 1] = best_in_pop(population) average_in_samples[gen + 1] = average_in_pop(population) diversity_in_samples[gen + 1] = calc_divesity(population) print("diversity = " + str(calc_divesity(population))) print("") return best_in_samples, average_in_samples, diversity_in_samples
def reconstruct(self): """ Reconstruct erase digits """ # Build the VAE using the previously trained parameters vae_net = vae.VAE(n_steps=self.n_steps, \ erase_pix0=self.erase_pix0, erase_pix1=self.erase_pix1,\ mode='recon',param_fn=self.param_fn,\ recon_mode=self.recon_mode,nsteps_init=self.nsteps_init,\ lr_adam=self.lr_adam, lr_sgd=self.lr_sgd, nstep_save=self.nsteps_save) vae_net.build_graph() # Save the network self.vae_net = vae_net # Run the reconstruction optimization if not self.avg_only: vae_net.reconstruct(self.xtrue,restore=self.restore) if self.recon_mode == 'mmse': """ For MMSE reconstruction, we compute the values from the averages of the samples """ #self.xhat, self.zhat0, self.xhat_var, self.zhat0_var = vae_net.last_avg self.xhat, self.zhat0, self.xhat_var, self.zhat0_var \ = vae_net.recon_mean_var(self.nsteps_burn) else: """ For MAP reconstruction, we get the values in the Tensorflow graph """ # Get the results with tf.Session() as sess: vae_net.restore(sess) [self.zhat0, self.xhat_logit, self.xhat] = sess.run(\ [vae_net.z_samp, vae_net.xhat_logit, vae_net.xhat]) [self.loss_vals, self.loss_slice, self.loss_prior, self.pred_err, self.loss] =\ sess.run([vae_net.loss_vals, vae_net.loss_slice, vae_net.loss_prior, \ vae_net.pred_err, vae_net.loss], feed_dict={vae_net.x: self.xtrue}) self.xhat_var = [] self.zhat0_var = []
def get_model(model_type, layers=[32, 64], latent_dim=256, input_shape=32, use_bn=False, std=0.05): if model_type == 'AE': model = ae.AE(layers, latent_dim, input_shape, use_bn) elif model_type == 'CVAE': model = cvae.CVAE(layers, latent_dim, input_shape, std, use_bn) elif model_type == 'VAE': model = vae.VAE(layers, latent_dim) elif model_type == 'SBVAE': model = sbvae.SBVAE(layers, latent_dim) elif model_type == 'SBAE': model = sbae.SBAE(layers, latent_dim, input_shape, use_bn) elif model_type == 'SBAE_cl': model = sbae.SBAE(layers, latent_dim, input_shape, use_bn, classification=True) elif model_type == 'KVAE': model = KVAE.KVAE(layers, latent_dim, input_shape) elif model_type == 'RKN': model = RKN.RKN(layers, latent_dim, input_shape) elif model_type == 'KAST': model = KAST() else: print("Model type is not good") return model
def generate_pictures(): gp_scaling = 10 #Lambda in the paper parser = argparse.ArgumentParser( description='Output images for the VAE model') parser.add_argument('--generator_pt', type=str, default="vae_mymodel_40.pth", help='Generator parameters file') parser.add_argument('--n_pictures', type=int, default=1000, help='Number of pictures to generate') parser.add_argument('--out_dir', type=str, default="samples", help='Number of pictures to generate') args = parser.parse_args() the_vae = vae.VAE() the_vae.load_state_dict(torch.load(args.generator_pt)) #cuda = torch.cuda.is_available() cuda = False for i in range(0, int(args.n_pictures / 64)): fixed_z = torch.FloatTensor(args.n_pictures, 100, 1, 1).normal_( 0, 1) #Used to compare pictures from epoch to epoch if cuda: the_vae = vae.cuda() fixed_z = fixed_z.cuda() fake_pictures = the_vae.decoder(fixed_z) for j in range(64): torchvision.utils.save_image(fake_pictures[j].detach().cpu(), args.out_dir + '/vae/vae' + str(i * 64 + j) + '.png', normalize=True)
def main(): model = vae.VAE(latent_dim=args.latent_dim).to(device).eval() model.load_state_dict(torch.load(args.ckpt_path)['state_dict']) # bdmc uses simulated data from the model loader = simulate.simulate_data( model, batch_size=args.batch_size, n_batch=args.n_batch, device=device ) # run bdmc # Note: a linear schedule is used here for demo; a sigmoidal schedule might # be advantageous in certain settings, see Section 6 in the original paper # for more https://arxiv.org/pdf/1511.02543.pdf forward_schedule = torch.linspace(0, 1, args.chain_length, device=device) bdmc( model, loader, forward_schedule=forward_schedule, n_sample=args.iwae_samples, )
def test_function(config, config_suffix=None): config_main = config['main'] config_probe = config['probe'] config_VAE = config['VAE'] config_DDQN = config['DDQN'] config_PER = config['PER'] config_ablation = config['ablation'] use_pi_e = config_ablation['use_pi_e'] phase = config_main['phase'] assert (phase == 'validation' or phase == 'test') domain = config_main['domain'] # Domain-specific parameters (e.g. state and action space dimensions) if domain == '2D': domain_name = "config_2D.json" elif domain == 'acrobot': domain_name = "config_acrobot.json" elif domain == 'hiv': if config_suffix is not None: domain_name = "config_hiv{}.json".format(config_suffix) else: domain_name = "config_hiv.json" elif domain == 'mujoco': domain_name = "config_mujoco.json" elif domain == 'cancer': domain_name = "config_cancer.json" else: raise ValueError("test_ablation.py : domain not recognized") with open(domain_name) as f: config_domain = json.load(f) n_state = config_domain['n_state'] n_action = config_domain['n_action'] seed = config_main['seed'] np.random.seed(seed) random.seed(seed) tf.set_random_seed(seed) N_instances = config_domain['N_test_instances'] N_episodes = config_domain['N_test_episodes'] test_steps = config_domain['test_steps'] dir_name = config_main['dir_name'] model_name = config_main['model_name'] # Instantiate HPMDP hpmdp = HiPMDP.HiPMDP(domain, config_domain, phase) # Instantiate probe policy n_probe_steps = config_domain['traj_length'] assert (n_probe_steps < test_steps) if use_pi_e: pi_e = probe.Probe(config_probe, n_state, n_action) else: # initial z z_avg = pickle.load(open('../results/%s/z_avg.p' % dir_name, 'rb')) # Instantiate VAE buffer_size_vae = config_VAE['buffer_size'] batch_size_vae = config_VAE['batch_size'] del config_VAE['buffer_size'] vae = vae_import.VAE(n_state, n_action, n_probe_steps, seed=seed, **config_VAE) # Instantiate control policy if config_DDQN['activate']: pi_c = ddqn.DDQN(config_DDQN, n_state, n_action, config_PER['activate'], config_VAE['n_latent']) # TF session config_proto = tf.ConfigProto() config_proto.gpu_options.allow_growth = True sess = tf.Session(config=config_proto) saver = tf.train.Saver() print("Restoring variables from %s" % dir_name) saver.restore(sess, '../results/%s/%s' % (dir_name, model_name)) reward_total = 0 cumulative_reward = np.zeros((test_steps, N_instances)) # Iterate through random instances from the HPMDP for idx_instance in range(1, N_instances + 1): hpmdp.switch_instance() print("idx_instance", idx_instance, " | Switching instance to", hpmdp.instance_param_set) # N_episodes should be 1, but we let it be flexible in case needed for idx_episode in range(1, N_episodes + 1): reward_episode = 0 collected_probe_traj = False while not collected_probe_traj: # list of (state, action) pairs traj_probe = [] state = hpmdp.reset() episode_step = 0 done = False probe_finished_early = False # Generate probe trajectory for step in range(1, n_probe_steps + 1): if use_pi_e: action = pi_e.run_actor(state, sess) else: action = pi_c.run_actor(state, z_avg, sess, epsilon=0) # print("Probe step %d action %d" % (step, action)) action_1hot = np.zeros(n_action) action_1hot[action] = 1 traj_probe.append((state, action_1hot)) state_next, reward, done = hpmdp.step(action) reward_episode += reward cumulative_reward[episode_step, idx_instance - 1] = reward_episode state = state_next episode_step += 1 if done and step < n_probe_steps: probe_finished_early = True print( "test_ablation.py : done is True while generating probe trajectory" ) break if not probe_finished_early: collected_probe_traj = True # Use VAE to estimate hidden parameter z = vae.encode(sess, traj_probe) print(z) if config_DDQN['activate']: # Start control policy while not done and episode_step < test_steps: # Use DDQN with prioritized replay for this action = pi_c.run_actor(state, z, sess, epsilon=0) state_next, reward, done = hpmdp.step(action) reward_episode += reward cumulative_reward[episode_step, idx_instance - 1] = reward_episode state = state_next episode_step += 1 print(reward_episode) # If episode ended earlier than test_steps, fill in the # rest of the cumulative rewards with the last value if episode_step < test_steps: remaining = np.ones(test_steps - episode_step) * reward_episode cumulative_reward[episode_step:, idx_instance - 1] = remaining reward_total += reward_episode header = 'Step' for idx in range(1, N_instances + 1): header += ',R_%d' % idx indices = np.arange(1, test_steps + 1).reshape(test_steps, 1) concated = np.concatenate([indices, cumulative_reward], axis=1) save_loc = '_'.join(dir_name.split('_')[:-1]) os.makedirs('../results/%s' % save_loc, exist_ok=True) run_number = dir_name.split('_')[-1] np.savetxt('../results/%s/test_%s.csv' % (save_loc, run_number), concated, delimiter=',', fmt='%.3e', header=header) print("Avg episode reward", reward_total / float(N_instances * N_episodes))
def main(args): model_dir = Path('./log') / "{}-{}-{}-{}".format( args.n_latent, args.hidden_units, args.importance_weighting, args.not_weight_normalization ) / "epochs={}-batch_size={}-n_samples={}-lr={}".format( args.epochs, args.batch_size, args.n_samples, args.lr) if not model_dir.exists(): run_num = 1 else: exst_run_nums = [ int(str(folder.name).split('run')[1]) for folder in model_dir.iterdir() if str(folder.name).startswith('run') ] if len(exst_run_nums) == 0: run_num = 1 else: run_num = max(exst_run_nums) + 1 curr_run = 'run%i' % run_num log_dir = model_dir / curr_run os.makedirs(log_dir) print("making directory", str(log_dir)) data = scipy.sparse.load_npz( "/newNAS/Workspaces/DRLGroup/xiangyuliu/data_no_black_5.1.npz").A data_blacklist = scipy.sparse.load_npz( "/newNAS/Workspaces/DRLGroup/xiangyuliu/data_blacklist_5.1.npz").A data = np.concatenate([data, data_blacklist], axis=0) print(data.shape) validation = np.random.choice(data.shape[0], size=1000) train = [i for i in range(data.shape[0]) if not (i in validation)] train_data = data[train] validation_data = data[validation] train_data = vae.Dataset(train_data, batch_size=args.batch_size) validation_data = vae.Dataset(validation_data, batch_size=args.batch_size) model_path = "/newNAS/Workspaces/DRLGroup/xiangyuliu/Computer-Network/log/50-1000-True-True/epochs=1000 batch_size=1000 n_samples=10 lr=0.001/run1" with open(os.path.join(model_path, "model.pkl"), 'rb') as f: model = dill.load(f) model = vae.VAE(n_inputs=data.shape[1], n_latent=args.n_latent, n_encoder=[args.hidden_units, args.hidden_units], n_decoder=[args.hidden_units, args.hidden_units], visible_type='binary', nonlinearity=tf.nn.relu, weight_normalization=args.not_weight_normalization, importance_weighting=args.importance_weighting, optimizer=args.optimizer, learning_rate=args.lr, model_dir=str(log_dir)) with open(log_dir / "model.pkl", 'wb') as f: dill.dump(model, f) print("begin to fit") model.fit(train_data, validation_data=validation_data, epochs=args.epochs, shuffle=args.not_shuffle, summary_steps=args.summary_steps, init_feed_dict={'batch_size': args.batch_size}, batch_size=args.batch_size, n_samples=args.n_samples)
def test_function(config, config_suffix=None): config_main = config['main'] config_VAE = config['VAE'] config_DDQN = config['DDQN'] config_PER = config['PER'] phase = config_main['phase'] assert (phase == 'validation' or phase == 'test') domain = config_main['domain'] # Domain-specific parameters (e.g. state and action space dimensions) if domain == '2D': domain_name = "config_2D.json" elif domain == 'acrobot': domain_name = "config_acrobot.json" elif domain == 'hiv': if config_suffix is not None: domain_name = "config_hiv{}.json".format(config_suffix) else: domain_name = "config_hiv.json" elif domain == 'mujoco': domain_name = "config_mujoco.json" elif domain == 'cancer': domain_name = "config_cancer.json" else: raise ValueError("train.py : domain not recognized") with open(domain_name) as f: config_domain = json.load(f) n_state = config_domain['n_state'] n_action = config_domain['n_action'] seed = config_main['seed'] np.random.seed(seed) random.seed(seed) tf.set_random_seed(seed) N_instances = config_domain['N_test_instances'] N_episodes = config_domain['N_test_episodes'] test_steps = config_domain['test_steps'] dir_name = config_main['dir_name'] model_name = config_main['model_name'] # Instantiate HPMDP hpmdp = HiPMDP.HiPMDP(domain, config_domain, phase) # Length of trajectory for input to VAE n_vae_steps = config_domain['traj_length'] n_latent = config_VAE['n_latent'] z = np.zeros(config_VAE['n_latent'], dtype=np.float32) with open('../results/%s/std_max.pkl' % dir_name, 'rb') as f: std_max = pickle.load(f) # Instantiate VAE buffer_size_vae = config_VAE['buffer_size'] batch_size_vae = config_VAE['batch_size'] del config_VAE['buffer_size'] vae = vae_import.VAE(n_state, n_action, n_vae_steps, seed=seed, **config_VAE) # Instantiate control policy if config_DDQN['activate']: pi_c = ddqn.DDQN(config_DDQN, n_state, n_action, config_PER['activate'], config_VAE['n_latent']) # TF session config_proto = tf.ConfigProto() config_proto.gpu_options.allow_growth = True sess = tf.Session(config=config_proto) saver = tf.train.Saver() print("Restoring variables from %s" % dir_name) saver.restore(sess, '../results/%s/%s' % (dir_name, model_name)) reward_total = 0 cumulative_reward = np.zeros((test_steps, N_instances)) list_times = [] # Iterate through random instances from the HPMDP for idx_instance in range(1, N_instances + 1): hpmdp.switch_instance() print("idx_instance", idx_instance, " | Switching instance to", hpmdp.instance_param_set) t_start = time.time() for idx_episode in range(1, N_episodes + 1): # rolling window of (state, action) pairs traj_for_vae = [] eta = 1.0 # range [0,1] 1 means the policy should act to maximize probe reward z = np.zeros(config_VAE['n_latent'], dtype=np.float32) reward_episode = 0 state = hpmdp.reset() episode_step = 0 done = False while not done and episode_step < test_steps: action = pi_c.run_actor(state, z, sess, epsilon=0, eta=eta) action_1hot = np.zeros(n_action) action_1hot[action] = 1 traj_for_vae.append((state, action_1hot)) if len(traj_for_vae) == n_vae_steps + 1: traj_for_vae = traj_for_vae[1:] state_next, reward, done = hpmdp.step(action) reward_episode += reward cumulative_reward[episode_step, idx_instance - 1] = reward_episode # Get z_next and eta_next, because they are considered part of the augmented MDP state if len(traj_for_vae) == n_vae_steps: std = vae.get_std(sess, traj_for_vae) std = std / std_max # element-wise normalization, now each element is between [0,1] eta_next = np.sum(std) / n_latent # scalar between [0,1] eta_next = min( 1.0, eta_next ) # in case std_max during training isn't large enough # Use VAE to update hidden parameter z_next = vae.encode(sess, traj_for_vae) else: z_next = z eta_next = eta state = state_next eta = eta_next z = z_next episode_step += 1 # If episode ended earlier than test_steps, fill in the # rest of the cumulative rewards with the last value if episode_step < test_steps: remaining = np.ones(test_steps - episode_step) * reward_episode cumulative_reward[episode_step:, idx_instance - 1] = remaining reward_total += reward_episode list_times.append(time.time() - t_start) header = 'Step' for idx in range(1, N_instances + 1): header += ',R_%d' % idx indices = np.arange(1, test_steps + 1).reshape(test_steps, 1) concated = np.concatenate([indices, cumulative_reward], axis=1) save_loc = '_'.join(dir_name.split('_')[:-1]) os.makedirs('../results/%s' % save_loc, exist_ok=True) run_number = dir_name.split('_')[-1] np.savetxt('../results/%s/test_%s.csv' % (save_loc, run_number), concated, delimiter=',', fmt='%.3e', header=header) with open('../results/%s/test_time_%s.pkl' % (save_loc, run_number), 'wb') as f: pickle.dump(list_times, f) print("Avg episode reward", reward_total / float(N_instances * N_episodes))
def __init__(self, input_dim, n_actions): super().__init__() n_action_dims = 1 self.vae = vae.VAE(input_dim, z_dim).to(device) self.state_trans = statetransition.stateTransModel(n_action_dims) self.reward_conv = statetransition.rewardConv()
def train_function(config, config_suffix=None): config_main = config['main'] config_probe = config['probe'] autoencoder = config_main['autoencoder'] if autoencoder == 'VAE': config_VAE = config['VAE'] else: raise ValueError("Other autoencoders not supported") config_DDQN = config['DDQN'] config_PER = config['PER'] phase = config_main['phase'] assert (phase == 'train') domain = config_main['domain'] # Domain-specific parameters (e.g. state and action space dimensions) if domain == '2D': domain_name = "config_2D.json" elif domain == 'acrobot': domain_name = "config_acrobot.json" elif domain == 'hiv': if config_suffix is not None: domain_name = "config_hiv{}.json".format(config_suffix) else: domain_name = "config_hiv.json" elif domain == 'lander': domain_name = "config_lander.json" elif domain == 'cancer': domain_name = "config_cancer.json" else: raise ValueError("train.py : domain not recognized") with open(domain_name) as f: config_domain = json.load(f) n_state = config_domain['n_state'] n_action = config_domain['n_action'] min_samples_before_train = config_domain['min_samples_before_train'] seed = config_main['seed'] np.random.seed(seed) random.seed(seed) tf.set_random_seed(seed) N_instances = config_main['N_instances'] N_episodes = config_main['N_episodes'] period = config_main['period'] dir_name = config_main['dir_name'] model_name = config_main['model_name'] os.makedirs('../results/%s' % dir_name, exist_ok=True) # Instantiate HPMDP hpmdp = HiPMDP.HiPMDP(domain, config_domain) # Instantiate probe policy n_probe_steps = config_domain['traj_length'] pi_e = probe.Probe(config_probe, n_state, n_action) # Instantiate VAE buffer_size_vae = config_VAE['buffer_size'] batch_size_vae = config_VAE['batch_size'] del config_VAE['buffer_size'] if autoencoder == 'VAE': vae = vae_import.VAE(n_state, n_action, n_probe_steps, seed=seed, **config_VAE) else: raise ValueError('Other autoencoders not supported') # Instantiate control policy if config_DDQN['activate']: pi_c = ddqn.DDQN(config_DDQN, n_state, n_action, config_PER['activate'], config_VAE['n_latent']) epsilon_start = config_DDQN['epsilon_start'] epsilon_end = config_DDQN['epsilon_end'] epsilon_decay = np.exp( np.log(epsilon_end / epsilon_start) / (N_instances * N_episodes)) steps_per_train = config_DDQN['steps_per_train'] # TF session config_proto = tf.ConfigProto() config_proto.gpu_options.allow_growth = True sess = tf.Session(config=config_proto) sess.run(tf.global_variables_initializer()) if config_DDQN['activate']: sess.run(pi_c.list_initialize_target_ops) epsilon = epsilon_start if config_VAE['dual']: sess.run(vae.list_equate_dual_ops) writer = tf.summary.FileWriter('../results/%s' % dir_name, sess.graph) saver = tf.train.Saver() # use the DQN version of the replay, so instance_count and bnn-specific params do not matter exp_replay_param = { 'episode_count': N_instances * N_episodes, 'instance_count': 0, 'max_task_examples': hpmdp.max_steps_per_episode, 'ddqn_batch_size': config_DDQN['batch_size'], 'num_strata_samples': config_PER['num_strata_samples'], 'PER_alpha': config_PER['alpha'], 'PER_beta_zero': config_PER['beta_zero'], 'bnn_batch_size': 0, 'bnn_start': 0, 'dqn_start': min_samples_before_train } buf = ExperienceReplay.ExperienceReplay( exp_replay_param, buffer_size=config_PER['buffer_size']) # Logging header = "Episode,R_avg,R_p\n" with open("../results/%s/log.csv" % dir_name, 'w') as f: f.write(header) reward_period = 0 reward_p_period = 0 list_trajs = [] # circular buffer to store probe trajectories for VAE idx_traj = 0 # counter for list_trajs control_step = 0 train_count_probe = 1 train_count_vae = 1 train_count_control = 1 total_episodes = 0 t_start = time.time() # Iterate through random instances from the HPMDP for idx_instance in range(1, N_instances + 1): hpmdp.switch_instance() print("idx_instance", idx_instance, " | Switching instance to", hpmdp.instance_param_set) # Iterate through many episodes for idx_episode in range(1, N_episodes + 1): total_episodes += 1 # list of (state, action) pairs traj_probe = [] state = hpmdp.reset() done = False reward_episode = 0 # Generate probe trajectory probe_finished_early = False for step in range(1, n_probe_steps + 1): action = pi_e.run_actor(state, sess) action_1hot = np.zeros(n_action) action_1hot[action] = 1 traj_probe.append((state, action_1hot)) state_next, reward, done = hpmdp.step(action) state = state_next reward_episode += reward if done and step < n_probe_steps: probe_finished_early = True print( "train.py : done is True while generating probe trajectory" ) break if probe_finished_early: # Skip over pi_e and VAE training if probe finished early continue if idx_traj >= len(list_trajs): list_trajs.append(traj_probe) else: list_trajs[idx_traj] = traj_probe idx_traj = (idx_traj + 1) % buffer_size_vae # Compute probe reward using VAE if config_probe['reward'] == 'vae': reward_e = vae.compute_lower_bound(traj_probe, sess) elif config_probe['reward'] == 'total_variation': reward_e = pi_e.compute_reward(traj_probe) elif config_probe['reward'] == 'negvae': # this reward encourages maximizing entropy reward_e = -vae.compute_lower_bound(traj_probe, sess) # Write Tensorboard at the final episode of every instance if total_episodes % period == 0: summarize = True else: summarize = False # Train probe policy pi_e.train_step(sess, traj_probe, reward_e, train_count_probe, summarize, writer) train_count_probe += 1 # Train VAE if len(list_trajs) >= batch_size_vae: vae.train_step(sess, list_trajs, train_count_vae, summarize, writer) train_count_vae += 1 # Use VAE to estimate hidden parameter z = vae.encode(sess, traj_probe) if config_DDQN['activate']: # Start control policy summarized = False while not done: # Use DDQN with prioritized replay for this action = pi_c.run_actor(state, z, sess, epsilon) state_next, reward, done = hpmdp.step(action) control_step += 1 reward_episode += reward buf.add( np.reshape( np.array( [state, action, reward, state_next, done, z]), (1, 6))) state = state_next if control_step >= min_samples_before_train and control_step % steps_per_train == 0: batch, IS_weights, indices = buf.sample(control_step) if not summarized: # Write TF summary at first train step of the last episode of every instance td_loss = pi_c.train_step(sess, batch, IS_weights, indices, train_count_control, summarize, writer) summarized = True else: td_loss = pi_c.train_step(sess, batch, IS_weights, indices, train_count_control, False, writer) train_count_control += 1 if config_PER['activate']: buf.update_priorities( np.hstack( (np.reshape(td_loss, (len(td_loss), -1)), np.reshape(indices, (len(indices), -1))))) reward_period += reward_episode reward_p_period += reward_e if epsilon > epsilon_end: epsilon *= epsilon_decay # Logging if total_episodes % period == 0: s = "%d,%.2f,%.2f\n" % (total_episodes, reward_period / float(period), reward_p_period / float(period)) print(s) with open("../results/%s/log.csv" % dir_name, 'a') as f: f.write(s) if config_domain[ 'save_threshold'] and reward_period / float( period) > config_domain['save_threshold']: saver.save( sess, '../results/%s/%s.%d' % (dir_name, model_name, total_episodes)) reward_period = 0 reward_p_period = 0 with open("../results/%s/time.txt" % dir_name, 'a') as f: f.write("%.5e" % (time.time() - t_start)) saver.save(sess, '../results/%s/%s' % (dir_name, model_name))
import vizdoomgym ''' before run install vizdoom: git clone https://github.com/simontudo/vizdoomgym.git cd vizdoomgym pip install -e . ''' FRAME_SHAPE = [64, 64] # Init env = gym.make('VizdoomTakeCover-v0') dataset = dataset.Dataset(env, FRAME_SHAPE) #dataset.create_new_dataset(temporary=False) dataset.load_dataset() vae = vae.VAE(FRAME_SHAPE, dataset) vae.load_json() vae.train_vae(checkpoint=True) vae.save_json() choosed_img = dataset.dataset[850] imgplot = plt.imshow(choosed_img) plt.show() imgplot = plt.imshow(vae.synthesize_image(choosed_img)) plt.show()
def train_function(config, config_suffix=None): config_main = config['main'] config_VAE = config['VAE'] config_DDQN = config['DDQN'] config_PER = config['PER'] config_ablation = config['ablation'] eq_rew = config_ablation['equalize_reward'] domain = config_main['domain'] # Domain-specific parameters (e.g. state and action space dimensions) if domain == '2D': domain_name = "config_2D.json" elif domain == 'acrobot': domain_name = "config_acrobot.json" elif domain == 'hiv': if config_suffix is not None: domain_name = "config_hiv{}.json".format(config_suffix) else: domain_name = "config_hiv.json" elif domain == 'mujoco': domain_name = "config_mujoco.json" elif domain == 'cancer': domain_name = "config_cancer.json" else: raise ValueError("train.py : domain not recognized") with open(domain_name) as f: config_domain = json.load(f) n_state = config_domain['n_state'] n_action = config_domain['n_action'] min_samples_before_train = config_domain['min_samples_before_train'] seed = config_main['seed'] np.random.seed(seed) random.seed(seed) tf.set_random_seed(seed) N_instances = config_main['N_instances'] N_episodes = config_main['N_episodes'] period = config_main['period'] dir_name = config_main['dir_name'] model_name = config_main['model_name'] os.makedirs('../results/%s' % dir_name, exist_ok=True) # Instantiate HPMDP hpmdp = HiPMDP.HiPMDP(domain, config_domain) # Length of trajectory for input to VAE n_vae_steps = config_domain['traj_length'] n_latent = config_VAE['n_latent'] z = np.zeros(config_VAE['n_latent'], dtype=np.float32) eta = 1.0 # range [0,1] 1 means the policy should act to maximize probe reward std_max = -np.inf * np.ones(config_VAE['n_latent'], dtype=np.float32) # Instantiate VAE buffer_size_vae = config_VAE['buffer_size'] batch_size_vae = config_VAE['batch_size'] del config_VAE['buffer_size'] vae = vae_import.VAE(n_state, n_action, n_vae_steps, seed=seed, **config_VAE) # Instantiate control policy if config_DDQN['activate']: pi_c = ddqn.DDQN(config_DDQN, n_state, n_action, config_PER['activate'], config_VAE['n_latent']) epsilon_start = config_DDQN['epsilon_start'] epsilon_end = config_DDQN['epsilon_end'] epsilon_decay = np.exp( np.log(epsilon_end / epsilon_start) / (N_episodes * N_instances)) steps_per_train = config_DDQN['steps_per_train'] # TF session config_proto = tf.ConfigProto() config_proto.gpu_options.allow_growth = True sess = tf.Session(config=config_proto) sess.run(tf.global_variables_initializer()) if config_DDQN['activate']: sess.run(pi_c.list_initialize_target_ops) epsilon = epsilon_start if config_VAE['dual']: sess.run(vae.list_equate_dual_ops) writer = tf.summary.FileWriter('../results/%s' % dir_name, sess.graph) saver = tf.train.Saver() # use the DQN version of the replay, so instance_count and bnn-specific params do not matter exp_replay_param = { 'episode_count': N_instances * N_episodes, 'instance_count': 0, 'max_task_examples': hpmdp.max_steps_per_episode, 'ddqn_batch_size': config_DDQN['batch_size'], 'num_strata_samples': config_PER['num_strata_samples'], 'PER_alpha': config_PER['alpha'], 'PER_beta_zero': config_PER['beta_zero'], 'bnn_batch_size': 0, 'bnn_start': 0, 'dqn_start': min_samples_before_train } buf = ExperienceReplay.ExperienceReplay( exp_replay_param, buffer_size=config_PER['buffer_size']) # running mean and variance of MDP reward and VAE lowerbound if eq_rew: stat_counter = 0 r_mdp_mean = 0 r_mdp_var = 0 r_probe_mean = 0 r_probe_var = 0 # Logging header = "Episode,R_avg,R_e_avg\n" with open("../results/%s/log.csv" % dir_name, 'w') as f: f.write(header) reward_period = 0 reward_e_period = 0 list_trajs = [] # circular buffer to store probe trajectories for VAE idx_traj = 0 # counter for list_trajs control_step = 0 train_count_vae = 1 train_count_control = 1 total_episodes = 0 t_start = time.time() # Iterate through random instances from the HPMDP for idx_instance in range(1, N_instances + 1): hpmdp.switch_instance() print("idx_instance", idx_instance, " | Switching instance to", hpmdp.instance_param_set) # Iterate through many episodes for idx_episode in range(1, N_episodes + 1): total_episodes += 1 eta = 1.0 z = np.zeros(config_VAE['n_latent'], dtype=np.float32) if total_episodes % period == 0: list_eta = [eta] # rolling window of (state, action) pairs traj_for_vae = [] state = hpmdp.reset() done = False reward_episode = 0 reward_e_episode = 0 step_episode = 0 if total_episodes % period == 0: summarize = True else: summarize = False summarized = False while not done: action = pi_c.run_actor(state, z, sess, epsilon, eta) control_step += 1 action_1hot = np.zeros(n_action) action_1hot[action] = 1 traj_for_vae.append((state, action_1hot)) if len(traj_for_vae) == n_vae_steps + 1: traj_for_vae = traj_for_vae[1:] state_next, reward, done = hpmdp.step(action) step_episode += 1 if eq_rew: stat_counter += 1 # update MDP reward mean and var r_mdp_mean_prev = r_mdp_mean r_mdp_mean = 1 / float(stat_counter) * reward + ( stat_counter - 1) / float(stat_counter) * r_mdp_mean r_mdp_var = r_mdp_var + (reward - r_mdp_mean_prev) * ( reward - r_mdp_mean) if len(traj_for_vae) == n_vae_steps: # Compute probe reward using VAE reward_e = vae.compute_lower_bound(traj_for_vae, sess)[0] if eq_rew: # Update probe reward mean and var r_probe_mean_prev = r_probe_mean r_probe_mean = 1 / float(stat_counter) * reward_e + ( stat_counter - 1) / float(stat_counter) * r_probe_mean r_probe_var = r_probe_var + ( reward_e - r_probe_mean_prev) * (reward_e - r_probe_mean) # Scale probe reward into MDP reward reward_e = ( (reward_e - r_probe_mean) / np.sqrt(r_probe_var / stat_counter) + r_mdp_mean) * np.sqrt(r_mdp_var / stat_counter) reward_total = eta * reward_e + (1 - eta) * reward else: reward_e = 0.0 reward_total = reward # Get z_next and eta_next, because they are considered part of the augmented MDP state if len(traj_for_vae) == n_vae_steps: std = vae.get_std(sess, traj_for_vae) # Update max for idx in range(n_latent): if std[idx] >= std_max[idx]: std_max[idx] = std[idx] std = std / std_max # element-wise normalization, now each element is between [0,1] eta_next = np.sum(std) / n_latent # scalar between [0,1] # Use VAE to update hidden parameter z_next = vae.encode(sess, traj_for_vae) else: z_next = z eta_next = eta if total_episodes % period == 0: list_eta.append(eta_next) # Use total reward to train policy buf.add( np.reshape( np.array([ state, z, eta, action, reward_total, state_next, z_next, eta_next, done ]), (1, 9))) state = state_next eta = eta_next z = z_next # Note that for evaluation purpose we record the MDP reward separately reward_episode += reward reward_e_episode += reward_e # Store non-overlapping trajectories for training VAE # if len(traj_for_vae) == n_vae_steps: if step_episode % n_vae_steps == 0: if idx_traj >= len(list_trajs): list_trajs.append( list(traj_for_vae)) # must make a new list else: list_trajs[idx_traj] = list(traj_for_vae) idx_traj = (idx_traj + 1) % buffer_size_vae if control_step >= min_samples_before_train and control_step % steps_per_train == 0: batch, IS_weights, indices = buf.sample(control_step) if not summarized: # Write TF summary at first train step of the last episode of every instance td_loss = pi_c.train_step(sess, batch, IS_weights, indices, train_count_control, summarize, writer) summarized = True else: td_loss = pi_c.train_step(sess, batch, IS_weights, indices, train_count_control, False, writer) train_count_control += 1 if config_PER['activate']: buf.update_priorities( np.hstack((np.reshape(td_loss, (len(td_loss), -1)), np.reshape(indices, (len(indices), -1))))) reward_period += reward_episode reward_e_period += reward_e_episode if epsilon > epsilon_end: epsilon *= epsilon_decay # Train VAE at the end of each episode if len(list_trajs) >= batch_size_vae: vae.train_step(sess, list_trajs, train_count_vae, summarize, writer) train_count_vae += 1 # Logging if total_episodes % period == 0: s = "%d,%.2f,%.2f\n" % (total_episodes, reward_period / float(period), reward_e_period / float(period)) print(s) with open("../results/%s/log.csv" % dir_name, 'a') as f: f.write(s) with open("../results/%s/eta.csv" % dir_name, 'a') as f: eta_string = ','.join(['%.2f' % x for x in list_eta]) eta_string += '\n' f.write(eta_string) if config_domain['save_threshold'] and reward_period / float( period) > config_domain['save_threshold']: saver.save( sess, '../results/%s/%s.%d' % (dir_name, model_name, total_episodes)) reward_period = 0 reward_e_period = 0 with open("../results/%s/time.txt" % dir_name, 'a') as f: f.write("%.5e" % (time.time() - t_start)) with open('../results/%s/std_max.pkl' % dir_name, 'wb') as f: pickle.dump(std_max, f) if eq_rew: reward_scaling = np.array([ r_mdp_mean, np.sqrt(r_mdp_var / stat_counter), r_probe_mean, np.sqrt(r_probe_var / stat_counter) ]) with open('../results/%s/reward_scaling.pkl' % dir_name, 'wb') as f: pickle.dump(reward_scaling, f) saver.save(sess, '../results/%s/%s' % (dir_name, model_name))
def main(docopts): docopts["--batch_size"] = int(docopts["--batch_size"]) docopts["--gpu"] = int(docopts["--gpu"]) docopts["--lambda_l2_reg"] = float(docopts["--lambda_l2_reg"]) docopts["--learning_rate"] = float(docopts["--learning_rate"]) docopts["--max_epochs"] = int(docopts["--max_epochs"]) # Logging logging.basicConfig(level=logging.INFO) # # Following http://nbviewer.jupyter.org/github/dmlc/mxnet/blob/master/example/notebooks/simple_bind.ipynb # X, Y = data.get_mnist() iter = mx.io.NDArrayIter(data=X, label=Y, batch_size=docopts["--batch_size"], shuffle=True) if docopts["train"] or docopts["continue"]: m = vae.VAE(ARCHITECTURE) sym = m.training_model() dbatch = iter.next() exe = sym.simple_bind(ctx=mx.gpu(docopts["--gpu"]), data = dbatch.data[0].shape) args = exe.arg_dict grads = exe.grad_dict outputs = dict(zip(sym.list_outputs(), exe.outputs)) if docopts["continue"]: loaded_args = mx.nd.load(os.path.join(docopts["--log"], "parameters")) for name in args: if name != "data": args[name][:] = loaded_args[name] # Initialize parameters xavier = mx.init.Xavier() for name, nd_array in args.items(): if name != "data": xavier(name, nd_array) optimizer = mx.optimizer.create(name="adam", learning_rate=docopts["--learning_rate"], wd=docopts["--lambda_l2_reg"]) updater = mx.optimizer.get_updater(optimizer) # Train keys = sym.list_arguments() optimizer = mx.optimizer.Adam() if docopts["--visualize"]: # Random image last_image_time = time.time() plt.ion() figure = plt.figure() imshow = plt.imshow(np.random.uniform(size=(28,28)), cmap="gray") for epoch in range(docopts["--max_epochs"]): iter.reset() epoch_start_time = time.time() batch = 0 for dbatch in iter: args["data"][:] = dbatch.data[0] exe.forward(is_train=True) exe.backward() if docopts["--visualize"]: # Throttle refresh ratio if time.time() - last_image_time > 0.1: last_image_time = time.time() imshow.set_data(exe.outputs[2][ random.randint(0, docopts["--batch_size"])].reshape( (28,28)).asnumpy()) figure.canvas.draw() figure.canvas.flush_events() for index, key in enumerate(keys): updater(index=index, grad=grads[key], weight=args[key]) kl_divergence = exe.outputs[3].asnumpy() cross_entropy = exe.outputs[4].asnumpy() logging.info("Batch %d: %f mean kl_divergence", batch, kl_divergence.mean()) logging.info("Batch %d: %f mean cross_entropy", batch, cross_entropy.mean()) batch += 1 logging.info("Finish training epoch %d in %f seconds", epoch, time.time() - epoch_start_time) # Save model parameters (including data, to simplify loading / binding) mx.nd.save(os.path.join(docopts["--log"], "parameters"), {x[0]: x[1] for x in args.items() if x[0] != "data"}) elif docopts["test"]: from matplotlib.widgets import Button m = vae.VAE(ARCHITECTURE) sym = m.testing_model() exe = sym.simple_bind(ctx=mx.gpu(docopts["--gpu"]), data=(docopts["--batch_size"], ARCHITECTURE[-1])) args = exe.arg_dict grads = exe.grad_dict outputs = dict(zip(sym.list_outputs(), exe.outputs)) loaded_args = mx.nd.load(os.path.join(docopts["--log"], "parameters")) for name in args: if name != "data": args[name][:] = loaded_args[name] args["data"][:] = np.random.randn(docopts["--batch_size"], ARCHITECTURE[-1]) exe.forward(is_train=True) # testing_model has only 1 output batch = exe.outputs[0].asnumpy().reshape(-1, 28, 28) np.save(os.path.join(docopts["--log"], "output"), batch) imshow = plt.imshow(batch[0], cmap="gray") callback = Index(imshow, batch) axnext = plt.axes([0.8, 0.7, 0.1, 0.075]) axprev = plt.axes([0.8, 0.6, 0.1, 0.075]) next_button = Button(axnext, 'Next') next_button.on_clicked(callback.next) prev_button = Button(axprev, 'Previous') prev_button.on_clicked(callback.prev) plt.show() plt.waitforbuttonpress()
def run_experiment_vae(data, file_prefix, p): ''' Synthesis via training the VAE and the VAR separately ''' y = torch.zeros(data.size(0)).float() dataset = TensorDataset(data[1:, [0], :, :], y[1:]) data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=1, shuffle=True) net = vae.VAE(latent_dim=p['latent_dim'], n_clayers=p['n_clayers'], kernel1_size=p['kernel1_size']) optimizer = torch.optim.Adam(net.parameters(), lr=p['learning_rate']) test_noise = torch.randn(p['N_synth'], p['latent_dim']) mse_criterion = MSELoss(reduction='sum') for epoch in range(p['n_epochs']): cst = 0 kld = 0 for i, (images, _) in enumerate(data_loader): images = to_var(images) out, mu, log_var = net(images, N=p['n_mc']) reconst_loss = mse_criterion(out, images.repeat( p['n_mc'], 1, 1, 1))/p['n_mc'] try: kl_divergence = torch.sum(0.5 * (mu**2 + torch.exp(log_var) - log_var-1)) except RuntimeError: return False total_loss = reconst_loss/(p['sigma_squared']) + kl_divergence optimizer.zero_grad() total_loss.backward() optimizer.step() cst = total_loss + cst kld = kl_divergence + kld if epoch % p['save_interval'] == 0: net.eval() print("Epoch:", epoch+1, "- Averaged Cost:", cst.item()/(i+1), 'kld:', kld.item()/(i+1)) h_prev = np.zeros((data.size(0)-1, p['latent_dim'])) h_next = np.zeros((data.size(0)-1, p['latent_dim'])) for i in range(data.size(0)-1): mu_synth_prev, _ = torch.chunk(net.encoder(to_var( data[[i]][:, [0], :, :])), 2, dim=1) h_prev[i] = mu_synth_prev.data.cpu().numpy() mu_synth_next, _ = torch.chunk(net.encoder(to_var( data[[i]][:, [1], :, :])), 2, dim=1) h_next[i] = mu_synth_next.data.cpu().numpy() A = np.dot(np.dot(h_next.T, h_prev), np.linalg.inv(np.dot(h_prev.T, h_prev))) Ua, Sa, VaT = np.linalg.svd(A) Sa = np.where(Sa > 1.0, 1.0, Sa) A = np.dot(Ua*Sa, VaT) Ub, Sb, _ = np.linalg.svd(np.eye(p['latent_dim'])-np.dot(A, A.T)) B = np.dot(Ub, np.diag(np.sqrt(Sb))) Y = (net.synthesize(torch.from_numpy(A).float(), torch.from_numpy(B).float(), additive_noise=test_noise, img_init=data[[0]][:, [0], :, :])+1)/2 torchvision.utils.save_image(Y[:p['n_row']*p['n_col']].data.cpu(), file_prefix + '_%03d' % epoch + '.png', nrow=p['n_row']) if p['create_video']: writevid(Y.data.cpu().numpy().squeeze(), file_prefix+'_%03d' % epoch + '.avi', p['fps']) net.train() return True
def init_models(self): self.c_model = controller.Controller() self.vae_model = vae.VAE() self.rnn_model = rnn.RNN() pass
help='file name for the parameter file') parser.add_argument('-restore', dest='restore', action='store_true',\ help="Continue from previous run") parser.set_defaults(restore=False) args = parser.parse_args() nsteps = args.nsteps restore = args.restore param_fn = args.param_fn # Dimensions of the layers enc_dim = [784, 400, 20] dec_dim = [20, 400, 784] # Load MNIST if not 'mnist' in locals(): mnist = input_data.read_data_sets('MNIST') # Build the VAE #vae_net = vae.VAE(enc_dim, dec_dim, n_steps=int(20000)) vae_net = vae.VAE(enc_dim, dec_dim, n_steps=int(nsteps)) vae_net.build_graph() # Train the model vae_net.train(mnist, restore=restore) # Dump the matrices with tf.Session() as sess: vae_net.dump_matrices(sess, 'param.p') print("Data stored in file " + param_fn)
vae_losses = [] g_losses = [] h_losses = [] for d in Ds: vae_run = [] g_run = [] h_run = [] for run in range(max_runs): D = d dataset = var.MyDataSet(d, D, num_points) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=2) s = dataset.sigma # print (s) if s_trainable: vae = var.VAE(d, D) else: vae = var.VAE(d, D, s) optimizer = optim.Adam(vae.parameters(), lr=lr) l = None epochs = [] prev_loss = 0 for epoch in range(max_epochs): losses = [] for i, data in enumerate(dataloader, 0): inputs = Variable(data) optimizer.zero_grad() loss = vae.total_loss_direct(inputs) loss.backward() optimizer.step() l = loss
def main(nets_archi, train_data, test_data, mode_, name="test"): # Preprocessing data data_size = train_data.shape[0] # Create weights DST dir DST = create_DST(name) ###### Reset tf graph ###### tf.reset_default_graph() start_time = time.time() print("\nPreparing variables and building model ...") ###### Create tf placeholder for obs variables ###### y = tf.placeholder(dtype=data_type(), shape=(None, IMAGE_SIZE, IMAGE_SIZE, 1)) ###### Create varaible for batch ###### batch = tf.Variable(0, dtype=data_type()) ###### CLearning rate decay ###### learning_rate = tf.train.exponential_decay( learning_rate_init, # Base learning rate. batch * BATCH_SIZE, # Current index into the dataset. 15 * data_size, # Decay step. 0.98, # Decay rate. staircase=True) ###### Create instance SVAE ###### recognition_net = nets_archi["recog"] generator_net = nets_archi["gener"] vae_ = vae.VAE( recog_archi=recognition_net, # architecture of the recognition network gener_archi=generator_net, # architecture of the generative network N=N, # dim of the gaussian latents x P=IMAGE_SIZE * IMAGE_SIZE) # dim of the obs variables y ###### Build loss and optimizer ###### vae_._create_loss_optimizer(y, learning_rate, batch) ###### Build generator ###### vae_._generate(nexamples) ###### Initializer ###### init = tf.global_variables_initializer() ###### Saver ###### saver = tf.train.Saver() ###### Create a local session to run the training ###### with tf.Session() as sess: # Training if mode_ == "training": # Opening csv file csv_path = "./Perf" if not tf.gfile.Exists(csv_path): os.makedirs(csv_path) csvfileTrain = open(os.path.join(csv_path, name) + ".csv", 'w') Trainwriter = csv.writer( csvfileTrain, delimiter=';', ) Trainwriter.writerow(['Num Epoch', 'train loss', 'test_loss']) # Initialize variables sess.run(tf.global_variables_initializer()) # initialize performance indicators best_l = -10000000000.0 #training loop print("\nStart training ...") for epoch in range(num_epochs): start_time = time.time() print("") print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) # Training loop train_l = 0.0 batches = data_processing.get_batches(train_data, BATCH_SIZE) for i, batch in enumerate(batches): _, l, lr = sess.run( [vae_.optimizer, vae_.VAE_obj, learning_rate], feed_dict={y: batch}) # Update average loss train_l += l / len(batches) # Testing loop test_l = 0.0 batches = data_processing.get_batches(test_data, BATCH_SIZE) for i, batch in enumerate(batches): l = sess.run(vae_.VAE_obj, feed_dict={y: batch}) # Update average loss test_l += l / len(batches) # Update best perf and save model if test_l > best_l: best_l = test_l if epoch > 20: saver.save(sess, DST) print("model saved.") # Print info for previous epoch print("Epoch {} done, took {:.2f}s, learning rate: {:10.2e}". format(epoch, time.time() - start_time, lr)) print( "Train loss: {:.3f}, Test loss: {:.3f},Best test loss: {:.3f}" .format(train_l, test_l, best_l)) # Writing csv file with results and saving models Trainwriter.writerow([epoch + 1, train_l, test_l]) if mode_ == "reconstruct": #Plot reconstruction mean if not tf.gfile.Exists(DST + ".meta"): raise Exception("no weights given") saver.restore(sess, DST) img = test_data[np.random.randint(0, high=test_data.shape[0], size=BATCH_SIZE)] bernouilli_mean = sess.run(vae_.y_reconstr_mean, feed_dict={y: img}) save_reconstruct(img[:nexamples], bernouilli_mean[:nexamples], "./reconstruct") if mode_ == "generate": #Test for ploting images if not tf.gfile.Exists(DST + ".meta"): raise Exception("no weights given") saver.restore(sess, DST) bernouilli_mean = sess.run(vae_.y_generate_mean, feed_dict={}) bernouilli_mean = np.transpose( np.reshape(bernouilli_mean, (nexamples, 1, IMAGE_SIZE, IMAGE_SIZE)), (1, 0, 2, 3)) save_gene(bernouilli_mean, "./generate")
np.random.shuffle(data) VAL_SIZE = data.shape[0] // 100 TRAIN_SIZE = data.shape[0] - VAL_SIZE data_val = data[:VAL_SIZE] data_train = data[VAL_SIZE:] # create VAE DEVICE = torch.device( 'cuda' if args.gpu and torch.cuda.is_available() else 'cpu') print('DEVICE = %s' % str(DEVICE)) vae_module = vae.VAE(enc=args.enc, enc_kwargs=enc_kwargs, dec=args.dec, dec_kwargs=dec_kwargs, data_shape=(1, HEIGHT, WIDTH), code_size=args.code_size, loss_fn=args.loss_fn, kl_tolerance=args.kl_tolerance) vae_module.to(DEVICE) opt = optim.Adam(vae_module.parameters(), lr=1e-3) vae_module.set_optimizer(opt) log(vae_module) # train VAE keys = ['Epoch', 'Coding Loss', 'Reconstruction Loss', 'Total Loss'] formats = ['%04d', '%9.4f', '%9.4f', '%9.4f'] clear_logs() log_tabular(vals=keys) total_loss, coding_loss, reconstruction_loss = vae_module.eval_dataset( dataset=data_val, batch_size=args.batch_size)
def main(): # Set variables. img_dim = [64, 64] latent_dim = 32 hidden_dim = 1024 num_epochs = 20 save_freq = 5 batch_size = 128 shuffle = True num_loader_workers = 3 beta = 1. cuda = True learning_rate = 0.001 adaptive = False # True save_dir = os.path.dirname(os.path.realpath(__file__)) # fix seed for experiment. util.fix_seed() # Load Encoder, Decoder. vae_net = vae.VAE(latent_dim, hidden_dim) if cuda: vae_net.cuda() # Set loss fn. loss_fn = vae.loss_fn # Load Dataset. anime_data = data_util.AnimeFaceData(img_dim, batch_size, shuffle, num_loader_workers) # Load optimizer. if adaptive: optimizer = optim.Adam(vae_net.parameters(), lr=learning_rate) else: optimizer = optim.SGD(vae_net.parameters(), lr=learning_rate) scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=1e-1, epochs=num_epochs, steps_per_epoch=10) # Epoch loop for epoch in range(1, num_epochs + 1): print('Epoch {} of {}'.format(epoch, num_epochs)) start = time.time() train_loss = 0 # Batch loop. for i_batch, batch_data in enumerate(anime_data.data_loader, 0): # print('Batch {}'.format(i_batch+1)) # Load batch. x, _ = batch_data if cuda: x = x.cuda() # Reset gradient. optimizer.zero_grad() # Run batch, calculate loss, and backprop. x_reconst, mu, logvar = vae_net.forward(x) loss = loss_fn(x, x_reconst, mu, logvar, beta) train_loss += loss.item() loss.backward() optimizer.step() if not adaptive: scheduler.step() if epoch % save_freq == 0: if adaptive: o = 'adaptive' else: o = 'cyclic' util.save_weights( vae_net, os.path.join(save_dir, 'vae_{}_{}.pth'.format(o, epoch))) end = time.time() print('loss: ', train_loss / len(anime_data.img_folder)) print('Took {}'.format(end - start)) if adaptive: o = 'adaptive' else: o = 'cyclic' util.save_weights( vae_net, os.path.join(save_dir, 'vae_{}_{}.pth'.format(o, epoch)))