Esempio n. 1
0
def main(to_reload=None):
    mnist = load_mnist()

    if to_reload:  # restore
        v = vae.VAE(ARCHITECTURE, HYPERPARAMS, meta_graph=to_reload)
        print("Loaded!")

    else:  # train
        v = vae.VAE(ARCHITECTURE, HYPERPARAMS, log_dir=LOG_DIR)
        v.train(mnist, max_iter=MAX_ITER, max_epochs=MAX_EPOCHS, cross_validate=False,
                verbose=True, save=True, outdir=METAGRAPH_DIR, plots_outdir=PLOTS_DIR,
                plot_latent_over_time=False)
        print("Trained!")

    all_plots(v, mnist)
Esempio n. 2
0
def main():
    device = torch.device("cpu")
    model = vae.VAE(latent_dim=args.latent_dim)
    model.to(device)
    model.load_state_dict(
        torch.load(args.ckpt_path, map_location=device)['state_dict'])
    model.eval()
    print(model)

    # bdmc uses simulated data from the model
    loader = simulate.simulate_data(
        model,
        batch_size=args.batch_size,
        n_batch=args.n_batch,
        device=device,
    )
    # run bdmc
    forward_schedule = np.linspace(0., 1., args.chain_length)
    bdmc(
        model,
        loader,
        forward_schedule=forward_schedule,
        n_sample=args.iwae_samples,
        device=device,
    )
Esempio n. 3
0
def EDA1(input_size, evaluate, maxitr, pop_size, train_size, elite_size,
         intermediate_dim, latent_dim, epochs):
    best_in_samples = [0] * (maxitr + 1)
    average_in_samples = [0] * (maxitr + 1)
    diversity_in_samples = [0] * (maxitr + 1)
    sample_size = pop_size - elite_size

    population = generate_populaton(pop_size, input_size, evaluate)
    model = vae.VAE(input_size, intermediate_dim, latent_dim, epochs)

    best_in_samples[0] = best_in_pop(population)
    average_in_samples[0] = average_in_pop(population)
    diversity_in_samples[0] = calc_divesity(population)

    for gen in range(maxitr):
        print("gen : " + str(gen + 1))

        train_data = make_train_data(population, train_size)
        model.train(train_data)

        elite = elite_select(population, elite_size)

        samples = sample_from_model(model, input_size, sample_size, evaluate)

        print("best in samples = " + str(best_in_samples[gen]))
        print("average in samples = " + str(average_in_samples[gen]))

        population = elite + samples
        best_in_samples[gen + 1] = best_in_pop(population)
        average_in_samples[gen + 1] = average_in_pop(population)
        diversity_in_samples[gen + 1] = calc_divesity(population)
        print("diversity = " + str(calc_divesity(population)))
        print("")

    return best_in_samples, average_in_samples, diversity_in_samples
Esempio n. 4
0
 def __init__(self, do_training=False):
     self.state_dict_filepath = 'vae_state_dict.pt'
     self.vae = vae.VAE()
     self.bottleneck_dim = 10
     self.EPOCHS = 10
     if do_training:
         self.train_vae()
     else:
         self.load_vae_state_dict()
Esempio n. 5
0
def main(data="mnist", to_reload=None):
    if data == "mnist":
        input_data = load_mnist()
        control_plots = True
    elif data == "sentences":
        #input_data = load_textual_data("data/sentenceVectors-Emails-January.out", 0.9, 0.1)
        input_data = load_textual_data("data/docVectors-NASA.out", 0.9, 0.01)
        control_plots = False;

    if to_reload: # restore
        v = vae.VAE(ARCHITECTURE, HYPERPARAMS, meta_graph=to_reload)
        print("Loaded!")

    else: # train
        v = vae.VAE(ARCHITECTURE, HYPERPARAMS, log_dir=LOG_DIR)
        v.train(input_data, max_iter=MAX_ITER, max_epochs=MAX_EPOCHS, cross_validate=False,
                verbose=True, save=True, outdir=METAGRAPH_DIR, plots_outdir=PLOTS_DIR,
                plot_latent_over_time=False, control_plots=control_plots)

    if control_plots:
        all_plots(v, input_data)
Esempio n. 6
0
def train(config):
    '''
     SETTING HYPERPARAMETER (DEFAULT)
     '''
    training_epoch = config.training_epoch
    z_dim = config.z_dim
    batch_size = config.batch_size
    n_data = mnist.train.num_examples
    total_batch = int(mnist.train.num_examples / batch_size)
    total_iteration = training_epoch * total_batch

    # Build Network
    VAE = vae.VAE(config)
    VAE.build()
    # Optimize Network
    VAE.optimize(config)

    sess = tf.Session()
    init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
    sess.run(init_op)
    saver = tf.train.Saver()

    print("Total the number of Data : " + str(n_data))
    print("Total Step per 1 Epoch: {}".format(total_batch))
    print("The number of Iteration: {}".format(total_iteration))

    for epoch in range(training_epoch):
        avg_cost = 0
        avg_recons = 0
        avg_regular = 0
        for i in range(total_batch):
            batch_xs, _ = mnist.train.next_batch(batch_size)

            _cost, _, _recons, _regular = sess.run([VAE.cost, VAE.optimizer, VAE.recons, VAE.regular], feed_dict={VAE.X: batch_xs})
            avg_cost += _cost / total_batch
            avg_recons += _recons / total_batch
            avg_regular += _regular / total_batch

        if epoch % 10 == 0:
            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost),
                  'Recons_Loss =', '{:.9f}'.format(avg_recons),
                  'Regular_Loss =', '{:.9f}'.format(avg_regular))

    print("Training Complete!")

    save_dir = './mode_z_dim_{}/'.format(z_dim)
    if not os.path.exists(save_dir): os.makedirs(save_dir)
    save_path = '{}VAE.ckpt'.format(save_dir)
    saver.save(sess, save_path)
    print("Saved Model")

    return VAE, sess
Esempio n. 7
0
    def __init__(self, action_space):
        self.vae = vae.VAE(dataset)
        self.lstm = lstm_validation.LSTM(
            action_space, seq_len=2, batch_size=1
        )  #create new lstm, maintain the same struct of the lstm used for training (to load pretrained model) but ignore the targets data

        self.vae.load_json()
        self.lstm.load_json()
        self.current_state = self.lstm.sess.run(
            self.lstm.zero_state
        )  #initialize current_state to the initial state
        self.restart = 0
        self.frame_count = None
        self.current_frame = self.z_to_img(self.current_state)
Esempio n. 8
0
def main():
    obs = srk.Observation(np.loadtxt("data.txt"))
    data_category = np.loadtxt("category.txt")

    vae1 = vae.VAE(18, itr=200, batch_size=500)
    gmm1 = gmm.GMM(10, category=data_category)

    vae1.connect(obs)
    gmm1.connect(vae1)

    for i in range(5):
        print(i)
        vae1.update()
        gmm1.update()
Esempio n. 9
0
def main():
    model = vae.VAE(latent_dim=args.latent_dim)
    model.cuda()
    model.load_state_dict(torch.load(args.ckpt_path)['state_dict'])
    model.eval()

    # bdmc uses simulated data from the model
    loader = simulate.simulate_data(model,
                                    batch_size=args.batch_size,
                                    n_batch=args.n_batch)
    # run bdmc
    forward_schedule = np.linspace(0., 1., args.chain_length)
    bdmc(model,
         loader,
         forward_schedule=forward_schedule,
         n_sample=args.iwae_samples)
Esempio n. 10
0
def main():
    obs1 = srk.Observation( np.loadtxt("data1.txt") )
    obs2 = srk.Observation( np.loadtxt("data2.txt") )
    data_category = np.loadtxt( "category.txt" )

    vae1 = vae.VAE( 18, itr=200, batch_size=500 )
    gmm1 = gmm.GMM( 10, category=data_category )
    mlda1 = mlda.MLDA( 10, [200,200], category=data_category )

    vae1.connect( obs1 )
    gmm1.connect( vae1 )
    mlda1.connect( obs2, gmm1 )
    
    for i in range(5):
        print( i )
        vae1.update()
        gmm1.update()
        mlda1.update()
Esempio n. 11
0
def EDAQ1(input_size, evaluate, maxitr, pop_size, train_size, elite_size,
          intermediate_dim, latent_dim, epochs):
    queue_size = 5
    best_in_samples = [0] * (maxitr - queue_size + 1 + 1)
    average_in_samples = [0] * (maxitr - queue_size + 1 + 1)
    diversity_in_samples = [0] * (maxitr - queue_size + 1 + 1)
    sample_size = pop_size - elite_size

    population = []
    pop_queue = generate_populaton(pop_size * queue_size, input_size, evaluate)
    model = vae.VAE(input_size, intermediate_dim, latent_dim, epochs)

    best_in_samples[0] = best_in_pop(pop_queue)
    average_in_samples[0] = average_in_pop(pop_queue)
    diversity_in_samples[0] = calc_divesity(pop_queue)

    for gen in range(maxitr - queue_size + 1):
        print("gen : " + str(gen + 1))

        train_queue = sorted(pop_queue, key=lambda p: p.fitness)
        train_data = make_train_data(train_queue, train_size)
        model.train(train_data)

        if gen == 0:
            elite = elite_select(pop_queue, elite_size)
        else:
            elite = elite_select(population, elite_size)

        samples = sample_from_model(model, input_size, sample_size, evaluate)

        print("best in samples = " + str(best_in_samples[gen]))
        print("average in samples = " + str(average_in_samples[gen]))

        population = elite + samples
        pop_queue = pop_queue[pop_size:] + population

        best_in_samples[gen + 1] = best_in_pop(population)
        average_in_samples[gen + 1] = average_in_pop(population)
        diversity_in_samples[gen + 1] = calc_divesity(population)
        print("diversity = " + str(calc_divesity(population)))
        print("")

    return best_in_samples, average_in_samples, diversity_in_samples
Esempio n. 12
0
 def reconstruct(self):
     """
     Reconstruct erase digits
     """
     # Build the VAE using the previously trained parameters
     vae_net = vae.VAE(n_steps=self.n_steps, \
         erase_pix0=self.erase_pix0, erase_pix1=self.erase_pix1,\
         mode='recon',param_fn=self.param_fn,\
         recon_mode=self.recon_mode,nsteps_init=self.nsteps_init,\
         lr_adam=self.lr_adam, lr_sgd=self.lr_sgd, nstep_save=self.nsteps_save)
     vae_net.build_graph()
     
     # Save the network
     self.vae_net = vae_net
     
     # Run the reconstruction optimization
     if not self.avg_only:
         vae_net.reconstruct(self.xtrue,restore=self.restore)
     
     if self.recon_mode == 'mmse':
         """
         For MMSE reconstruction, we compute the values from the averages
         of the samples 
         """
         #self.xhat, self.zhat0, self.xhat_var, self.zhat0_var = vae_net.last_avg           
         self.xhat, self.zhat0, self.xhat_var, self.zhat0_var \
             = vae_net.recon_mean_var(self.nsteps_burn)
         
     else: 
         """
         For MAP reconstruction, we get the values in the Tensorflow
         graph
         """            
         # Get the results
         with tf.Session() as sess:
             vae_net.restore(sess)
             [self.zhat0, self.xhat_logit, self.xhat] = sess.run(\
                 [vae_net.z_samp, vae_net.xhat_logit, vae_net.xhat])
             [self.loss_vals, self.loss_slice, self.loss_prior, self.pred_err, self.loss] =\
                 sess.run([vae_net.loss_vals, vae_net.loss_slice, vae_net.loss_prior, \
                     vae_net.pred_err, vae_net.loss], feed_dict={vae_net.x: self.xtrue})
         self.xhat_var = []
         self.zhat0_var = []
Esempio n. 13
0
def get_model(model_type,
              layers=[32, 64],
              latent_dim=256,
              input_shape=32,
              use_bn=False,
              std=0.05):

    if model_type == 'AE':
        model = ae.AE(layers, latent_dim, input_shape, use_bn)

    elif model_type == 'CVAE':
        model = cvae.CVAE(layers, latent_dim, input_shape, std, use_bn)

    elif model_type == 'VAE':
        model = vae.VAE(layers, latent_dim)

    elif model_type == 'SBVAE':
        model = sbvae.SBVAE(layers, latent_dim)

    elif model_type == 'SBAE':
        model = sbae.SBAE(layers, latent_dim, input_shape, use_bn)

    elif model_type == 'SBAE_cl':
        model = sbae.SBAE(layers,
                          latent_dim,
                          input_shape,
                          use_bn,
                          classification=True)

    elif model_type == 'KVAE':
        model = KVAE.KVAE(layers, latent_dim, input_shape)

    elif model_type == 'RKN':
        model = RKN.RKN(layers, latent_dim, input_shape)

    elif model_type == 'KAST':
        model = KAST()

    else:
        print("Model type is not good")

    return model
Esempio n. 14
0
def generate_pictures():
    gp_scaling = 10  #Lambda in the paper
    parser = argparse.ArgumentParser(
        description='Output images for the VAE model')
    parser.add_argument('--generator_pt',
                        type=str,
                        default="vae_mymodel_40.pth",
                        help='Generator parameters file')
    parser.add_argument('--n_pictures',
                        type=int,
                        default=1000,
                        help='Number of pictures to generate')

    parser.add_argument('--out_dir',
                        type=str,
                        default="samples",
                        help='Number of pictures to generate')

    args = parser.parse_args()

    the_vae = vae.VAE()
    the_vae.load_state_dict(torch.load(args.generator_pt))

    #cuda = torch.cuda.is_available()
    cuda = False
    for i in range(0, int(args.n_pictures / 64)):
        fixed_z = torch.FloatTensor(args.n_pictures, 100, 1, 1).normal_(
            0, 1)  #Used to compare pictures from epoch to epoch
        if cuda:
            the_vae = vae.cuda()
            fixed_z = fixed_z.cuda()
        fake_pictures = the_vae.decoder(fixed_z)

        for j in range(64):
            torchvision.utils.save_image(fake_pictures[j].detach().cpu(),
                                         args.out_dir + '/vae/vae' +
                                         str(i * 64 + j) + '.png',
                                         normalize=True)
Esempio n. 15
0
def main():
    model = vae.VAE(latent_dim=args.latent_dim).to(device).eval()
    model.load_state_dict(torch.load(args.ckpt_path)['state_dict'])

    # bdmc uses simulated data from the model
    loader = simulate.simulate_data(
        model,
        batch_size=args.batch_size,
        n_batch=args.n_batch,
        device=device
    )

    # run bdmc
    # Note: a linear schedule is used here for demo; a sigmoidal schedule might
    # be advantageous in certain settings, see Section 6 in the original paper
    # for more https://arxiv.org/pdf/1511.02543.pdf
    forward_schedule = torch.linspace(0, 1, args.chain_length, device=device)
    bdmc(
        model,
        loader,
        forward_schedule=forward_schedule,
        n_sample=args.iwae_samples,
    )
Esempio n. 16
0
def test_function(config, config_suffix=None):

    config_main = config['main']
    config_probe = config['probe']
    config_VAE = config['VAE']
    config_DDQN = config['DDQN']
    config_PER = config['PER']
    config_ablation = config['ablation']
    use_pi_e = config_ablation['use_pi_e']
    phase = config_main['phase']
    assert (phase == 'validation' or phase == 'test')

    domain = config_main['domain']

    # Domain-specific parameters (e.g. state and action space dimensions)
    if domain == '2D':
        domain_name = "config_2D.json"
    elif domain == 'acrobot':
        domain_name = "config_acrobot.json"
    elif domain == 'hiv':
        if config_suffix is not None:
            domain_name = "config_hiv{}.json".format(config_suffix)
        else:
            domain_name = "config_hiv.json"
    elif domain == 'mujoco':
        domain_name = "config_mujoco.json"
    elif domain == 'cancer':
        domain_name = "config_cancer.json"
    else:
        raise ValueError("test_ablation.py : domain not recognized")
    with open(domain_name) as f:
        config_domain = json.load(f)

    n_state = config_domain['n_state']
    n_action = config_domain['n_action']

    seed = config_main['seed']
    np.random.seed(seed)
    random.seed(seed)
    tf.set_random_seed(seed)

    N_instances = config_domain['N_test_instances']
    N_episodes = config_domain['N_test_episodes']
    test_steps = config_domain['test_steps']
    dir_name = config_main['dir_name']
    model_name = config_main['model_name']

    # Instantiate HPMDP
    hpmdp = HiPMDP.HiPMDP(domain, config_domain, phase)

    # Instantiate probe policy
    n_probe_steps = config_domain['traj_length']
    assert (n_probe_steps < test_steps)
    if use_pi_e:
        pi_e = probe.Probe(config_probe, n_state, n_action)
    else:
        # initial z
        z_avg = pickle.load(open('../results/%s/z_avg.p' % dir_name, 'rb'))

    # Instantiate VAE
    buffer_size_vae = config_VAE['buffer_size']
    batch_size_vae = config_VAE['batch_size']
    del config_VAE['buffer_size']
    vae = vae_import.VAE(n_state,
                         n_action,
                         n_probe_steps,
                         seed=seed,
                         **config_VAE)

    # Instantiate control policy
    if config_DDQN['activate']:
        pi_c = ddqn.DDQN(config_DDQN, n_state, n_action,
                         config_PER['activate'], config_VAE['n_latent'])

    # TF session
    config_proto = tf.ConfigProto()
    config_proto.gpu_options.allow_growth = True
    sess = tf.Session(config=config_proto)

    saver = tf.train.Saver()
    print("Restoring variables from %s" % dir_name)
    saver.restore(sess, '../results/%s/%s' % (dir_name, model_name))

    reward_total = 0
    cumulative_reward = np.zeros((test_steps, N_instances))
    # Iterate through random instances from the HPMDP
    for idx_instance in range(1, N_instances + 1):

        hpmdp.switch_instance()
        print("idx_instance", idx_instance, " | Switching instance to",
              hpmdp.instance_param_set)

        # N_episodes should be 1, but we let it be flexible in case needed
        for idx_episode in range(1, N_episodes + 1):

            reward_episode = 0

            collected_probe_traj = False
            while not collected_probe_traj:

                # list of (state, action) pairs
                traj_probe = []
                state = hpmdp.reset()
                episode_step = 0
                done = False

                probe_finished_early = False
                # Generate probe trajectory
                for step in range(1, n_probe_steps + 1):

                    if use_pi_e:
                        action = pi_e.run_actor(state, sess)
                    else:
                        action = pi_c.run_actor(state, z_avg, sess, epsilon=0)
                    # print("Probe step %d action %d" % (step, action))
                    action_1hot = np.zeros(n_action)
                    action_1hot[action] = 1
                    traj_probe.append((state, action_1hot))
                    state_next, reward, done = hpmdp.step(action)
                    reward_episode += reward
                    cumulative_reward[episode_step,
                                      idx_instance - 1] = reward_episode
                    state = state_next
                    episode_step += 1
                    if done and step < n_probe_steps:
                        probe_finished_early = True
                        print(
                            "test_ablation.py : done is True while generating probe trajectory"
                        )
                        break

                if not probe_finished_early:
                    collected_probe_traj = True

            # Use VAE to estimate hidden parameter
            z = vae.encode(sess, traj_probe)

            print(z)

            if config_DDQN['activate']:
                # Start control policy
                while not done and episode_step < test_steps:
                    # Use DDQN with prioritized replay for this
                    action = pi_c.run_actor(state, z, sess, epsilon=0)
                    state_next, reward, done = hpmdp.step(action)
                    reward_episode += reward
                    cumulative_reward[episode_step,
                                      idx_instance - 1] = reward_episode
                    state = state_next
                    episode_step += 1
                print(reward_episode)
                # If episode ended earlier than test_steps, fill in the
                # rest of the cumulative rewards with the last value
                if episode_step < test_steps:
                    remaining = np.ones(test_steps -
                                        episode_step) * reward_episode
                    cumulative_reward[episode_step:,
                                      idx_instance - 1] = remaining

                reward_total += reward_episode

    header = 'Step'
    for idx in range(1, N_instances + 1):
        header += ',R_%d' % idx
    indices = np.arange(1, test_steps + 1).reshape(test_steps, 1)
    concated = np.concatenate([indices, cumulative_reward], axis=1)
    save_loc = '_'.join(dir_name.split('_')[:-1])
    os.makedirs('../results/%s' % save_loc, exist_ok=True)
    run_number = dir_name.split('_')[-1]
    np.savetxt('../results/%s/test_%s.csv' % (save_loc, run_number),
               concated,
               delimiter=',',
               fmt='%.3e',
               header=header)

    print("Avg episode reward", reward_total / float(N_instances * N_episodes))
def main(args):
    model_dir = Path('./log') / "{}-{}-{}-{}".format(
        args.n_latent, args.hidden_units, args.importance_weighting,
        args.not_weight_normalization
    ) / "epochs={}-batch_size={}-n_samples={}-lr={}".format(
        args.epochs, args.batch_size, args.n_samples, args.lr)
    if not model_dir.exists():
        run_num = 1
    else:
        exst_run_nums = [
            int(str(folder.name).split('run')[1])
            for folder in model_dir.iterdir()
            if str(folder.name).startswith('run')
        ]
        if len(exst_run_nums) == 0:
            run_num = 1
        else:
            run_num = max(exst_run_nums) + 1
    curr_run = 'run%i' % run_num
    log_dir = model_dir / curr_run
    os.makedirs(log_dir)
    print("making directory", str(log_dir))

    data = scipy.sparse.load_npz(
        "/newNAS/Workspaces/DRLGroup/xiangyuliu/data_no_black_5.1.npz").A
    data_blacklist = scipy.sparse.load_npz(
        "/newNAS/Workspaces/DRLGroup/xiangyuliu/data_blacklist_5.1.npz").A
    data = np.concatenate([data, data_blacklist], axis=0)
    print(data.shape)
    validation = np.random.choice(data.shape[0], size=1000)
    train = [i for i in range(data.shape[0]) if not (i in validation)]
    train_data = data[train]
    validation_data = data[validation]
    train_data = vae.Dataset(train_data, batch_size=args.batch_size)
    validation_data = vae.Dataset(validation_data, batch_size=args.batch_size)

    model_path = "/newNAS/Workspaces/DRLGroup/xiangyuliu/Computer-Network/log/50-1000-True-True/epochs=1000 batch_size=1000 n_samples=10 lr=0.001/run1"
    with open(os.path.join(model_path, "model.pkl"), 'rb') as f:
        model = dill.load(f)

    model = vae.VAE(n_inputs=data.shape[1],
                    n_latent=args.n_latent,
                    n_encoder=[args.hidden_units, args.hidden_units],
                    n_decoder=[args.hidden_units, args.hidden_units],
                    visible_type='binary',
                    nonlinearity=tf.nn.relu,
                    weight_normalization=args.not_weight_normalization,
                    importance_weighting=args.importance_weighting,
                    optimizer=args.optimizer,
                    learning_rate=args.lr,
                    model_dir=str(log_dir))

    with open(log_dir / "model.pkl", 'wb') as f:
        dill.dump(model, f)
    print("begin to fit")

    model.fit(train_data,
              validation_data=validation_data,
              epochs=args.epochs,
              shuffle=args.not_shuffle,
              summary_steps=args.summary_steps,
              init_feed_dict={'batch_size': args.batch_size},
              batch_size=args.batch_size,
              n_samples=args.n_samples)
Esempio n. 18
0
def test_function(config, config_suffix=None):

    config_main = config['main']
    config_VAE = config['VAE']
    config_DDQN = config['DDQN']
    config_PER = config['PER']
    phase = config_main['phase']
    assert (phase == 'validation' or phase == 'test')

    domain = config_main['domain']

    # Domain-specific parameters (e.g. state and action space dimensions)
    if domain == '2D':
        domain_name = "config_2D.json"
    elif domain == 'acrobot':
        domain_name = "config_acrobot.json"
    elif domain == 'hiv':
        if config_suffix is not None:
            domain_name = "config_hiv{}.json".format(config_suffix)
        else:
            domain_name = "config_hiv.json"
    elif domain == 'mujoco':
        domain_name = "config_mujoco.json"
    elif domain == 'cancer':
        domain_name = "config_cancer.json"
    else:
        raise ValueError("train.py : domain not recognized")
    with open(domain_name) as f:
        config_domain = json.load(f)

    n_state = config_domain['n_state']
    n_action = config_domain['n_action']

    seed = config_main['seed']
    np.random.seed(seed)
    random.seed(seed)
    tf.set_random_seed(seed)

    N_instances = config_domain['N_test_instances']
    N_episodes = config_domain['N_test_episodes']
    test_steps = config_domain['test_steps']
    dir_name = config_main['dir_name']
    model_name = config_main['model_name']

    # Instantiate HPMDP
    hpmdp = HiPMDP.HiPMDP(domain, config_domain, phase)

    # Length of trajectory for input to VAE
    n_vae_steps = config_domain['traj_length']
    n_latent = config_VAE['n_latent']
    z = np.zeros(config_VAE['n_latent'], dtype=np.float32)

    with open('../results/%s/std_max.pkl' % dir_name, 'rb') as f:
        std_max = pickle.load(f)

    # Instantiate VAE
    buffer_size_vae = config_VAE['buffer_size']
    batch_size_vae = config_VAE['batch_size']
    del config_VAE['buffer_size']
    vae = vae_import.VAE(n_state,
                         n_action,
                         n_vae_steps,
                         seed=seed,
                         **config_VAE)

    # Instantiate control policy
    if config_DDQN['activate']:
        pi_c = ddqn.DDQN(config_DDQN, n_state, n_action,
                         config_PER['activate'], config_VAE['n_latent'])

    # TF session
    config_proto = tf.ConfigProto()
    config_proto.gpu_options.allow_growth = True
    sess = tf.Session(config=config_proto)

    saver = tf.train.Saver()
    print("Restoring variables from %s" % dir_name)
    saver.restore(sess, '../results/%s/%s' % (dir_name, model_name))

    reward_total = 0
    cumulative_reward = np.zeros((test_steps, N_instances))
    list_times = []
    # Iterate through random instances from the HPMDP
    for idx_instance in range(1, N_instances + 1):

        hpmdp.switch_instance()
        print("idx_instance", idx_instance, " | Switching instance to",
              hpmdp.instance_param_set)

        t_start = time.time()
        for idx_episode in range(1, N_episodes + 1):

            # rolling window of (state, action) pairs
            traj_for_vae = []
            eta = 1.0  # range [0,1] 1 means the policy should act to maximize probe reward
            z = np.zeros(config_VAE['n_latent'], dtype=np.float32)
            reward_episode = 0
            state = hpmdp.reset()
            episode_step = 0
            done = False

            while not done and episode_step < test_steps:

                action = pi_c.run_actor(state, z, sess, epsilon=0, eta=eta)
                action_1hot = np.zeros(n_action)
                action_1hot[action] = 1
                traj_for_vae.append((state, action_1hot))
                if len(traj_for_vae) == n_vae_steps + 1:
                    traj_for_vae = traj_for_vae[1:]

                state_next, reward, done = hpmdp.step(action)

                reward_episode += reward
                cumulative_reward[episode_step,
                                  idx_instance - 1] = reward_episode

                # Get z_next and eta_next, because they are considered part of the augmented MDP state
                if len(traj_for_vae) == n_vae_steps:
                    std = vae.get_std(sess, traj_for_vae)
                    std = std / std_max  # element-wise normalization, now each element is between [0,1]
                    eta_next = np.sum(std) / n_latent  # scalar between [0,1]
                    eta_next = min(
                        1.0, eta_next
                    )  # in case std_max during training isn't large enough
                    # Use VAE to update hidden parameter
                    z_next = vae.encode(sess, traj_for_vae)
                else:
                    z_next = z
                    eta_next = eta

                state = state_next
                eta = eta_next
                z = z_next
                episode_step += 1

            # If episode ended earlier than test_steps, fill in the
            # rest of the cumulative rewards with the last value
            if episode_step < test_steps:
                remaining = np.ones(test_steps - episode_step) * reward_episode
                cumulative_reward[episode_step:, idx_instance - 1] = remaining

            reward_total += reward_episode

        list_times.append(time.time() - t_start)

    header = 'Step'
    for idx in range(1, N_instances + 1):
        header += ',R_%d' % idx
    indices = np.arange(1, test_steps + 1).reshape(test_steps, 1)
    concated = np.concatenate([indices, cumulative_reward], axis=1)
    save_loc = '_'.join(dir_name.split('_')[:-1])
    os.makedirs('../results/%s' % save_loc, exist_ok=True)
    run_number = dir_name.split('_')[-1]
    np.savetxt('../results/%s/test_%s.csv' % (save_loc, run_number),
               concated,
               delimiter=',',
               fmt='%.3e',
               header=header)

    with open('../results/%s/test_time_%s.pkl' % (save_loc, run_number),
              'wb') as f:
        pickle.dump(list_times, f)

    print("Avg episode reward", reward_total / float(N_instances * N_episodes))
Esempio n. 19
0
 def __init__(self, input_dim, n_actions):
     super().__init__()
     n_action_dims = 1
     self.vae = vae.VAE(input_dim, z_dim).to(device)
     self.state_trans = statetransition.stateTransModel(n_action_dims)
     self.reward_conv = statetransition.rewardConv()
Esempio n. 20
0
def train_function(config, config_suffix=None):

    config_main = config['main']
    config_probe = config['probe']
    autoencoder = config_main['autoencoder']
    if autoencoder == 'VAE':
        config_VAE = config['VAE']
    else:
        raise ValueError("Other autoencoders not supported")
    config_DDQN = config['DDQN']
    config_PER = config['PER']
    phase = config_main['phase']
    assert (phase == 'train')

    domain = config_main['domain']

    # Domain-specific parameters (e.g. state and action space dimensions)
    if domain == '2D':
        domain_name = "config_2D.json"
    elif domain == 'acrobot':
        domain_name = "config_acrobot.json"
    elif domain == 'hiv':
        if config_suffix is not None:
            domain_name = "config_hiv{}.json".format(config_suffix)
        else:
            domain_name = "config_hiv.json"
    elif domain == 'lander':
        domain_name = "config_lander.json"
    elif domain == 'cancer':
        domain_name = "config_cancer.json"
    else:
        raise ValueError("train.py : domain not recognized")
    with open(domain_name) as f:
        config_domain = json.load(f)

    n_state = config_domain['n_state']
    n_action = config_domain['n_action']
    min_samples_before_train = config_domain['min_samples_before_train']

    seed = config_main['seed']
    np.random.seed(seed)
    random.seed(seed)
    tf.set_random_seed(seed)

    N_instances = config_main['N_instances']
    N_episodes = config_main['N_episodes']
    period = config_main['period']
    dir_name = config_main['dir_name']
    model_name = config_main['model_name']

    os.makedirs('../results/%s' % dir_name, exist_ok=True)

    # Instantiate HPMDP
    hpmdp = HiPMDP.HiPMDP(domain, config_domain)

    # Instantiate probe policy
    n_probe_steps = config_domain['traj_length']
    pi_e = probe.Probe(config_probe, n_state, n_action)

    # Instantiate VAE
    buffer_size_vae = config_VAE['buffer_size']
    batch_size_vae = config_VAE['batch_size']
    del config_VAE['buffer_size']
    if autoencoder == 'VAE':
        vae = vae_import.VAE(n_state,
                             n_action,
                             n_probe_steps,
                             seed=seed,
                             **config_VAE)
    else:
        raise ValueError('Other autoencoders not supported')

    # Instantiate control policy
    if config_DDQN['activate']:
        pi_c = ddqn.DDQN(config_DDQN, n_state, n_action,
                         config_PER['activate'], config_VAE['n_latent'])
        epsilon_start = config_DDQN['epsilon_start']
        epsilon_end = config_DDQN['epsilon_end']
        epsilon_decay = np.exp(
            np.log(epsilon_end / epsilon_start) / (N_instances * N_episodes))
        steps_per_train = config_DDQN['steps_per_train']

    # TF session
    config_proto = tf.ConfigProto()
    config_proto.gpu_options.allow_growth = True
    sess = tf.Session(config=config_proto)
    sess.run(tf.global_variables_initializer())

    if config_DDQN['activate']:
        sess.run(pi_c.list_initialize_target_ops)
        epsilon = epsilon_start

    if config_VAE['dual']:
        sess.run(vae.list_equate_dual_ops)

    writer = tf.summary.FileWriter('../results/%s' % dir_name, sess.graph)

    saver = tf.train.Saver()

    # use the DQN version of the replay, so instance_count and bnn-specific params do not matter
    exp_replay_param = {
        'episode_count': N_instances * N_episodes,
        'instance_count': 0,
        'max_task_examples': hpmdp.max_steps_per_episode,
        'ddqn_batch_size': config_DDQN['batch_size'],
        'num_strata_samples': config_PER['num_strata_samples'],
        'PER_alpha': config_PER['alpha'],
        'PER_beta_zero': config_PER['beta_zero'],
        'bnn_batch_size': 0,
        'bnn_start': 0,
        'dqn_start': min_samples_before_train
    }

    buf = ExperienceReplay.ExperienceReplay(
        exp_replay_param, buffer_size=config_PER['buffer_size'])

    # Logging
    header = "Episode,R_avg,R_p\n"
    with open("../results/%s/log.csv" % dir_name, 'w') as f:
        f.write(header)
    reward_period = 0
    reward_p_period = 0

    list_trajs = []  # circular buffer to store probe trajectories for VAE
    idx_traj = 0  # counter for list_trajs
    control_step = 0
    train_count_probe = 1
    train_count_vae = 1
    train_count_control = 1
    total_episodes = 0
    t_start = time.time()
    # Iterate through random instances from the HPMDP
    for idx_instance in range(1, N_instances + 1):

        hpmdp.switch_instance()
        print("idx_instance", idx_instance, " | Switching instance to",
              hpmdp.instance_param_set)

        # Iterate through many episodes
        for idx_episode in range(1, N_episodes + 1):

            total_episodes += 1

            # list of (state, action) pairs
            traj_probe = []
            state = hpmdp.reset()
            done = False
            reward_episode = 0

            # Generate probe trajectory
            probe_finished_early = False
            for step in range(1, n_probe_steps + 1):

                action = pi_e.run_actor(state, sess)
                action_1hot = np.zeros(n_action)
                action_1hot[action] = 1
                traj_probe.append((state, action_1hot))
                state_next, reward, done = hpmdp.step(action)
                state = state_next
                reward_episode += reward

                if done and step < n_probe_steps:
                    probe_finished_early = True
                    print(
                        "train.py : done is True while generating probe trajectory"
                    )
                    break

            if probe_finished_early:
                # Skip over pi_e and VAE training if probe finished early
                continue

            if idx_traj >= len(list_trajs):
                list_trajs.append(traj_probe)
            else:
                list_trajs[idx_traj] = traj_probe
            idx_traj = (idx_traj + 1) % buffer_size_vae

            # Compute probe reward using VAE
            if config_probe['reward'] == 'vae':
                reward_e = vae.compute_lower_bound(traj_probe, sess)
            elif config_probe['reward'] == 'total_variation':
                reward_e = pi_e.compute_reward(traj_probe)
            elif config_probe['reward'] == 'negvae':
                # this reward encourages maximizing entropy
                reward_e = -vae.compute_lower_bound(traj_probe, sess)

            # Write Tensorboard at the final episode of every instance
            if total_episodes % period == 0:
                summarize = True
            else:
                summarize = False

            # Train probe policy
            pi_e.train_step(sess, traj_probe, reward_e, train_count_probe,
                            summarize, writer)
            train_count_probe += 1

            # Train VAE
            if len(list_trajs) >= batch_size_vae:
                vae.train_step(sess, list_trajs, train_count_vae, summarize,
                               writer)
                train_count_vae += 1

            # Use VAE to estimate hidden parameter
            z = vae.encode(sess, traj_probe)

            if config_DDQN['activate']:
                # Start control policy
                summarized = False
                while not done:
                    # Use DDQN with prioritized replay for this
                    action = pi_c.run_actor(state, z, sess, epsilon)
                    state_next, reward, done = hpmdp.step(action)
                    control_step += 1
                    reward_episode += reward

                    buf.add(
                        np.reshape(
                            np.array(
                                [state, action, reward, state_next, done, z]),
                            (1, 6)))
                    state = state_next

                    if control_step >= min_samples_before_train and control_step % steps_per_train == 0:
                        batch, IS_weights, indices = buf.sample(control_step)
                        if not summarized:
                            # Write TF summary at first train step of the last episode of every instance
                            td_loss = pi_c.train_step(sess, batch, IS_weights,
                                                      indices,
                                                      train_count_control,
                                                      summarize, writer)
                            summarized = True
                        else:
                            td_loss = pi_c.train_step(sess, batch, IS_weights,
                                                      indices,
                                                      train_count_control,
                                                      False, writer)
                        train_count_control += 1

                        if config_PER['activate']:
                            buf.update_priorities(
                                np.hstack(
                                    (np.reshape(td_loss, (len(td_loss), -1)),
                                     np.reshape(indices, (len(indices), -1)))))

                reward_period += reward_episode
                reward_p_period += reward_e

                if epsilon > epsilon_end:
                    epsilon *= epsilon_decay

                # Logging
                if total_episodes % period == 0:
                    s = "%d,%.2f,%.2f\n" % (total_episodes,
                                            reward_period / float(period),
                                            reward_p_period / float(period))
                    print(s)
                    with open("../results/%s/log.csv" % dir_name, 'a') as f:
                        f.write(s)
                    if config_domain[
                            'save_threshold'] and reward_period / float(
                                period) > config_domain['save_threshold']:
                        saver.save(
                            sess, '../results/%s/%s.%d' %
                            (dir_name, model_name, total_episodes))
                    reward_period = 0
                    reward_p_period = 0

    with open("../results/%s/time.txt" % dir_name, 'a') as f:
        f.write("%.5e" % (time.time() - t_start))

    saver.save(sess, '../results/%s/%s' % (dir_name, model_name))
Esempio n. 21
0
import vizdoomgym
'''
before run install vizdoom:
    git clone https://github.com/simontudo/vizdoomgym.git
    cd vizdoomgym
    pip install -e .
'''

FRAME_SHAPE = [64, 64]

# Init
env = gym.make('VizdoomTakeCover-v0')
dataset = dataset.Dataset(env, FRAME_SHAPE)

#dataset.create_new_dataset(temporary=False)
dataset.load_dataset()

vae = vae.VAE(FRAME_SHAPE, dataset)

vae.load_json()
vae.train_vae(checkpoint=True)
vae.save_json()

choosed_img = dataset.dataset[850]

imgplot = plt.imshow(choosed_img)
plt.show()

imgplot = plt.imshow(vae.synthesize_image(choosed_img))
plt.show()
Esempio n. 22
0
def train_function(config, config_suffix=None):

    config_main = config['main']
    config_VAE = config['VAE']
    config_DDQN = config['DDQN']
    config_PER = config['PER']
    config_ablation = config['ablation']
    eq_rew = config_ablation['equalize_reward']

    domain = config_main['domain']

    # Domain-specific parameters (e.g. state and action space dimensions)
    if domain == '2D':
        domain_name = "config_2D.json"
    elif domain == 'acrobot':
        domain_name = "config_acrobot.json"
    elif domain == 'hiv':
        if config_suffix is not None:
            domain_name = "config_hiv{}.json".format(config_suffix)
        else:
            domain_name = "config_hiv.json"
    elif domain == 'mujoco':
        domain_name = "config_mujoco.json"
    elif domain == 'cancer':
        domain_name = "config_cancer.json"
    else:
        raise ValueError("train.py : domain not recognized")
    with open(domain_name) as f:
        config_domain = json.load(f)

    n_state = config_domain['n_state']
    n_action = config_domain['n_action']
    min_samples_before_train = config_domain['min_samples_before_train']

    seed = config_main['seed']
    np.random.seed(seed)
    random.seed(seed)
    tf.set_random_seed(seed)

    N_instances = config_main['N_instances']
    N_episodes = config_main['N_episodes']
    period = config_main['period']
    dir_name = config_main['dir_name']
    model_name = config_main['model_name']

    os.makedirs('../results/%s' % dir_name, exist_ok=True)

    # Instantiate HPMDP
    hpmdp = HiPMDP.HiPMDP(domain, config_domain)

    # Length of trajectory for input to VAE
    n_vae_steps = config_domain['traj_length']
    n_latent = config_VAE['n_latent']
    z = np.zeros(config_VAE['n_latent'], dtype=np.float32)
    eta = 1.0  # range [0,1] 1 means the policy should act to maximize probe reward
    std_max = -np.inf * np.ones(config_VAE['n_latent'], dtype=np.float32)

    # Instantiate VAE
    buffer_size_vae = config_VAE['buffer_size']
    batch_size_vae = config_VAE['batch_size']
    del config_VAE['buffer_size']
    vae = vae_import.VAE(n_state,
                         n_action,
                         n_vae_steps,
                         seed=seed,
                         **config_VAE)

    # Instantiate control policy
    if config_DDQN['activate']:
        pi_c = ddqn.DDQN(config_DDQN, n_state, n_action,
                         config_PER['activate'], config_VAE['n_latent'])
        epsilon_start = config_DDQN['epsilon_start']
        epsilon_end = config_DDQN['epsilon_end']
        epsilon_decay = np.exp(
            np.log(epsilon_end / epsilon_start) / (N_episodes * N_instances))
        steps_per_train = config_DDQN['steps_per_train']

    # TF session
    config_proto = tf.ConfigProto()
    config_proto.gpu_options.allow_growth = True
    sess = tf.Session(config=config_proto)
    sess.run(tf.global_variables_initializer())

    if config_DDQN['activate']:
        sess.run(pi_c.list_initialize_target_ops)
        epsilon = epsilon_start

    if config_VAE['dual']:
        sess.run(vae.list_equate_dual_ops)

    writer = tf.summary.FileWriter('../results/%s' % dir_name, sess.graph)

    saver = tf.train.Saver()

    # use the DQN version of the replay, so instance_count and bnn-specific params do not matter
    exp_replay_param = {
        'episode_count': N_instances * N_episodes,
        'instance_count': 0,
        'max_task_examples': hpmdp.max_steps_per_episode,
        'ddqn_batch_size': config_DDQN['batch_size'],
        'num_strata_samples': config_PER['num_strata_samples'],
        'PER_alpha': config_PER['alpha'],
        'PER_beta_zero': config_PER['beta_zero'],
        'bnn_batch_size': 0,
        'bnn_start': 0,
        'dqn_start': min_samples_before_train
    }

    buf = ExperienceReplay.ExperienceReplay(
        exp_replay_param, buffer_size=config_PER['buffer_size'])

    # running mean and variance of MDP reward and VAE lowerbound
    if eq_rew:
        stat_counter = 0
        r_mdp_mean = 0
        r_mdp_var = 0
        r_probe_mean = 0
        r_probe_var = 0

    # Logging
    header = "Episode,R_avg,R_e_avg\n"
    with open("../results/%s/log.csv" % dir_name, 'w') as f:
        f.write(header)
    reward_period = 0
    reward_e_period = 0

    list_trajs = []  # circular buffer to store probe trajectories for VAE
    idx_traj = 0  # counter for list_trajs
    control_step = 0
    train_count_vae = 1
    train_count_control = 1
    total_episodes = 0
    t_start = time.time()
    # Iterate through random instances from the HPMDP
    for idx_instance in range(1, N_instances + 1):

        hpmdp.switch_instance()
        print("idx_instance", idx_instance, " | Switching instance to",
              hpmdp.instance_param_set)

        # Iterate through many episodes
        for idx_episode in range(1, N_episodes + 1):

            total_episodes += 1

            eta = 1.0
            z = np.zeros(config_VAE['n_latent'], dtype=np.float32)
            if total_episodes % period == 0:
                list_eta = [eta]

            # rolling window of (state, action) pairs
            traj_for_vae = []
            state = hpmdp.reset()
            done = False
            reward_episode = 0
            reward_e_episode = 0
            step_episode = 0

            if total_episodes % period == 0:
                summarize = True
            else:
                summarize = False

            summarized = False
            while not done:

                action = pi_c.run_actor(state, z, sess, epsilon, eta)
                control_step += 1
                action_1hot = np.zeros(n_action)
                action_1hot[action] = 1
                traj_for_vae.append((state, action_1hot))
                if len(traj_for_vae) == n_vae_steps + 1:
                    traj_for_vae = traj_for_vae[1:]

                state_next, reward, done = hpmdp.step(action)
                step_episode += 1

                if eq_rew:
                    stat_counter += 1
                    # update MDP reward mean and var
                    r_mdp_mean_prev = r_mdp_mean
                    r_mdp_mean = 1 / float(stat_counter) * reward + (
                        stat_counter - 1) / float(stat_counter) * r_mdp_mean
                    r_mdp_var = r_mdp_var + (reward - r_mdp_mean_prev) * (
                        reward - r_mdp_mean)

                if len(traj_for_vae) == n_vae_steps:
                    # Compute probe reward using VAE
                    reward_e = vae.compute_lower_bound(traj_for_vae, sess)[0]

                    if eq_rew:
                        # Update probe reward mean and var
                        r_probe_mean_prev = r_probe_mean
                        r_probe_mean = 1 / float(stat_counter) * reward_e + (
                            stat_counter -
                            1) / float(stat_counter) * r_probe_mean
                        r_probe_var = r_probe_var + (
                            reward_e - r_probe_mean_prev) * (reward_e -
                                                             r_probe_mean)
                        # Scale probe reward into MDP reward
                        reward_e = (
                            (reward_e - r_probe_mean) /
                            np.sqrt(r_probe_var / stat_counter) +
                            r_mdp_mean) * np.sqrt(r_mdp_var / stat_counter)

                    reward_total = eta * reward_e + (1 - eta) * reward
                else:
                    reward_e = 0.0
                    reward_total = reward

                # Get z_next and eta_next, because they are considered part of the augmented MDP state
                if len(traj_for_vae) == n_vae_steps:
                    std = vae.get_std(sess, traj_for_vae)
                    # Update max
                    for idx in range(n_latent):
                        if std[idx] >= std_max[idx]:
                            std_max[idx] = std[idx]
                    std = std / std_max  # element-wise normalization, now each element is between [0,1]
                    eta_next = np.sum(std) / n_latent  # scalar between [0,1]
                    # Use VAE to update hidden parameter
                    z_next = vae.encode(sess, traj_for_vae)
                else:
                    z_next = z
                    eta_next = eta

                if total_episodes % period == 0:
                    list_eta.append(eta_next)

                # Use total reward to train policy
                buf.add(
                    np.reshape(
                        np.array([
                            state, z, eta, action, reward_total, state_next,
                            z_next, eta_next, done
                        ]), (1, 9)))
                state = state_next
                eta = eta_next
                z = z_next

                # Note that for evaluation purpose we record the MDP reward separately
                reward_episode += reward
                reward_e_episode += reward_e

                # Store non-overlapping trajectories for training VAE
                # if len(traj_for_vae) == n_vae_steps:
                if step_episode % n_vae_steps == 0:
                    if idx_traj >= len(list_trajs):
                        list_trajs.append(
                            list(traj_for_vae))  # must make a new list
                    else:
                        list_trajs[idx_traj] = list(traj_for_vae)
                    idx_traj = (idx_traj + 1) % buffer_size_vae

                if control_step >= min_samples_before_train and control_step % steps_per_train == 0:
                    batch, IS_weights, indices = buf.sample(control_step)
                    if not summarized:
                        # Write TF summary at first train step of the last episode of every instance
                        td_loss = pi_c.train_step(sess, batch, IS_weights,
                                                  indices, train_count_control,
                                                  summarize, writer)
                        summarized = True
                    else:
                        td_loss = pi_c.train_step(sess, batch, IS_weights,
                                                  indices, train_count_control,
                                                  False, writer)
                    train_count_control += 1

                    if config_PER['activate']:
                        buf.update_priorities(
                            np.hstack((np.reshape(td_loss, (len(td_loss), -1)),
                                       np.reshape(indices,
                                                  (len(indices), -1)))))

            reward_period += reward_episode
            reward_e_period += reward_e_episode

            if epsilon > epsilon_end:
                epsilon *= epsilon_decay

            # Train VAE at the end of each episode
            if len(list_trajs) >= batch_size_vae:
                vae.train_step(sess, list_trajs, train_count_vae, summarize,
                               writer)
                train_count_vae += 1

            # Logging
            if total_episodes % period == 0:
                s = "%d,%.2f,%.2f\n" % (total_episodes,
                                        reward_period / float(period),
                                        reward_e_period / float(period))
                print(s)
                with open("../results/%s/log.csv" % dir_name, 'a') as f:
                    f.write(s)
                with open("../results/%s/eta.csv" % dir_name, 'a') as f:
                    eta_string = ','.join(['%.2f' % x for x in list_eta])
                    eta_string += '\n'
                    f.write(eta_string)
                if config_domain['save_threshold'] and reward_period / float(
                        period) > config_domain['save_threshold']:
                    saver.save(
                        sess, '../results/%s/%s.%d' %
                        (dir_name, model_name, total_episodes))
                reward_period = 0
                reward_e_period = 0

    with open("../results/%s/time.txt" % dir_name, 'a') as f:
        f.write("%.5e" % (time.time() - t_start))

    with open('../results/%s/std_max.pkl' % dir_name, 'wb') as f:
        pickle.dump(std_max, f)

    if eq_rew:
        reward_scaling = np.array([
            r_mdp_mean,
            np.sqrt(r_mdp_var / stat_counter), r_probe_mean,
            np.sqrt(r_probe_var / stat_counter)
        ])
        with open('../results/%s/reward_scaling.pkl' % dir_name, 'wb') as f:
            pickle.dump(reward_scaling, f)

    saver.save(sess, '../results/%s/%s' % (dir_name, model_name))
Esempio n. 23
0
def main(docopts):
    docopts["--batch_size"] = int(docopts["--batch_size"])
    docopts["--gpu"] = int(docopts["--gpu"])
    docopts["--lambda_l2_reg"] = float(docopts["--lambda_l2_reg"])
    docopts["--learning_rate"] = float(docopts["--learning_rate"])
    docopts["--max_epochs"] = int(docopts["--max_epochs"])

    # Logging
    logging.basicConfig(level=logging.INFO)

    #
    # Following http://nbviewer.jupyter.org/github/dmlc/mxnet/blob/master/example/notebooks/simple_bind.ipynb
    #

    X, Y = data.get_mnist()
    iter = mx.io.NDArrayIter(data=X, label=Y, batch_size=docopts["--batch_size"], shuffle=True)


    if docopts["train"] or docopts["continue"]:
        m = vae.VAE(ARCHITECTURE)
        sym = m.training_model()

        dbatch = iter.next()
        exe = sym.simple_bind(ctx=mx.gpu(docopts["--gpu"]), data = dbatch.data[0].shape)

        args = exe.arg_dict
        grads = exe.grad_dict
        outputs = dict(zip(sym.list_outputs(), exe.outputs))

        if docopts["continue"]:
            loaded_args = mx.nd.load(os.path.join(docopts["--log"], "parameters"))
            for name in args:
                if name != "data":
                    args[name][:] = loaded_args[name]

        # Initialize parameters
        xavier = mx.init.Xavier()
        for name, nd_array in args.items():
            if name != "data":
                xavier(name, nd_array)

        optimizer = mx.optimizer.create(name="adam",
                                        learning_rate=docopts["--learning_rate"],
                                        wd=docopts["--lambda_l2_reg"])
        updater = mx.optimizer.get_updater(optimizer)

        # Train
        keys = sym.list_arguments()
        optimizer = mx.optimizer.Adam()

        if docopts["--visualize"]:
            # Random image
            last_image_time = time.time()
            plt.ion()
            figure = plt.figure()
            imshow = plt.imshow(np.random.uniform(size=(28,28)), cmap="gray")

        for epoch in range(docopts["--max_epochs"]):
            iter.reset()
            epoch_start_time = time.time()
            batch = 0
            for dbatch in iter:
                args["data"][:] = dbatch.data[0]

                exe.forward(is_train=True)
                exe.backward()

                if docopts["--visualize"]:
                    # Throttle refresh ratio
                    if time.time() - last_image_time > 0.1:
                        last_image_time = time.time()
                        imshow.set_data(exe.outputs[2][
                            random.randint(0, docopts["--batch_size"])].reshape(
                                (28,28)).asnumpy())
                        figure.canvas.draw()
                        figure.canvas.flush_events()

                for index, key in enumerate(keys):
                    updater(index=index, grad=grads[key], weight=args[key])

                kl_divergence = exe.outputs[3].asnumpy()
                cross_entropy = exe.outputs[4].asnumpy()

                logging.info("Batch %d: %f mean kl_divergence", batch, kl_divergence.mean())
                logging.info("Batch %d: %f mean cross_entropy", batch, cross_entropy.mean())

                batch += 1

            logging.info("Finish training epoch %d in %f seconds",
                         epoch,
                         time.time() - epoch_start_time)

        # Save model parameters (including data, to simplify loading / binding)
        mx.nd.save(os.path.join(docopts["--log"], "parameters"),
                   {x[0]: x[1] for x in args.items() if x[0] != "data"})

    elif docopts["test"]:
        from matplotlib.widgets import Button

        m = vae.VAE(ARCHITECTURE)
        sym = m.testing_model()

        exe = sym.simple_bind(ctx=mx.gpu(docopts["--gpu"]),
                              data=(docopts["--batch_size"], ARCHITECTURE[-1]))

        args = exe.arg_dict
        grads = exe.grad_dict
        outputs = dict(zip(sym.list_outputs(), exe.outputs))

        loaded_args = mx.nd.load(os.path.join(docopts["--log"], "parameters"))
        for name in args:
            if name != "data":
                args[name][:] = loaded_args[name]

        args["data"][:] = np.random.randn(docopts["--batch_size"], ARCHITECTURE[-1])
        exe.forward(is_train=True)
        # testing_model has only 1 output
        batch = exe.outputs[0].asnumpy().reshape(-1, 28, 28)
        np.save(os.path.join(docopts["--log"], "output"), batch)

        imshow = plt.imshow(batch[0], cmap="gray")
        callback = Index(imshow, batch)
        axnext = plt.axes([0.8, 0.7, 0.1, 0.075])
        axprev = plt.axes([0.8, 0.6, 0.1, 0.075])
        next_button = Button(axnext, 'Next')
        next_button.on_clicked(callback.next)
        prev_button = Button(axprev, 'Previous')
        prev_button.on_clicked(callback.prev)

        plt.show()
        plt.waitforbuttonpress()
Esempio n. 24
0
def run_experiment_vae(data, file_prefix, p):
    '''
    Synthesis via training the VAE and the VAR separately
    '''
    
    y = torch.zeros(data.size(0)).float()
    dataset = TensorDataset(data[1:, [0], :, :], y[1:])
    data_loader = torch.utils.data.DataLoader(dataset=dataset,
                                              batch_size=1,
                                              shuffle=True)
    net = vae.VAE(latent_dim=p['latent_dim'],
                  n_clayers=p['n_clayers'], kernel1_size=p['kernel1_size'])
    optimizer = torch.optim.Adam(net.parameters(), lr=p['learning_rate'])
    test_noise = torch.randn(p['N_synth'], p['latent_dim'])
    mse_criterion = MSELoss(reduction='sum')
    for epoch in range(p['n_epochs']):
        cst = 0
        kld = 0
        for i, (images, _) in enumerate(data_loader):
            images = to_var(images)
            out, mu, log_var = net(images, N=p['n_mc'])
            reconst_loss = mse_criterion(out, images.repeat(
                    p['n_mc'], 1, 1, 1))/p['n_mc']
            try:
                kl_divergence = torch.sum(0.5 * (mu**2 + torch.exp(log_var)
                                          - log_var-1))
            except RuntimeError:
                return False

            total_loss = reconst_loss/(p['sigma_squared']) + kl_divergence
            optimizer.zero_grad()
            total_loss.backward()
            optimizer.step()
            cst = total_loss + cst
            kld = kl_divergence + kld
        if epoch % p['save_interval'] == 0:
            net.eval()
            print("Epoch:", epoch+1, "- Averaged Cost:", cst.item()/(i+1),
                  'kld:', kld.item()/(i+1))
            h_prev = np.zeros((data.size(0)-1, p['latent_dim']))
            h_next = np.zeros((data.size(0)-1, p['latent_dim']))
            for i in range(data.size(0)-1):
                mu_synth_prev, _ = torch.chunk(net.encoder(to_var(
                        data[[i]][:, [0], :, :])), 2, dim=1)
                h_prev[i] = mu_synth_prev.data.cpu().numpy()
                mu_synth_next, _ = torch.chunk(net.encoder(to_var(
                        data[[i]][:, [1], :, :])), 2, dim=1)
                h_next[i] = mu_synth_next.data.cpu().numpy()
            A = np.dot(np.dot(h_next.T, h_prev), np.linalg.inv(np.dot(h_prev.T,
                       h_prev)))
            Ua, Sa, VaT = np.linalg.svd(A)
            Sa = np.where(Sa > 1.0, 1.0, Sa)
            A = np.dot(Ua*Sa, VaT)
            Ub, Sb, _ = np.linalg.svd(np.eye(p['latent_dim'])-np.dot(A, A.T))
            B = np.dot(Ub, np.diag(np.sqrt(Sb)))
            Y = (net.synthesize(torch.from_numpy(A).float(),
                 torch.from_numpy(B).float(),
                 additive_noise=test_noise,
                 img_init=data[[0]][:, [0], :, :])+1)/2
            torchvision.utils.save_image(Y[:p['n_row']*p['n_col']].data.cpu(),
                                         file_prefix + '_%03d' % epoch + '.png',
                                         nrow=p['n_row'])
            if p['create_video']:
                writevid(Y.data.cpu().numpy().squeeze(),
                         file_prefix+'_%03d' % epoch + '.avi', p['fps'])
            net.train()
    return True
Esempio n. 25
0
 def init_models(self):
     self.c_model = controller.Controller()
     self.vae_model = vae.VAE()
     self.rnn_model = rnn.RNN()
     pass
Esempio n. 26
0
    help='file name for the parameter file')
parser.add_argument('-restore', dest='restore', action='store_true',\
    help="Continue from previous run")
parser.set_defaults(restore=False)

args = parser.parse_args()
nsteps = args.nsteps
restore = args.restore
param_fn = args.param_fn

# Dimensions of the layers
enc_dim = [784, 400, 20]
dec_dim = [20, 400, 784]

# Load MNIST
if not 'mnist' in locals():
    mnist = input_data.read_data_sets('MNIST')

# Build the VAE
#vae_net = vae.VAE(enc_dim, dec_dim, n_steps=int(20000))
vae_net = vae.VAE(enc_dim, dec_dim, n_steps=int(nsteps))
vae_net.build_graph()

# Train the model
vae_net.train(mnist, restore=restore)

# Dump the matrices
with tf.Session() as sess:
    vae_net.dump_matrices(sess, 'param.p')
print("Data stored in file " + param_fn)
 vae_losses = []
 g_losses = []
 h_losses = []
 for d in Ds:
     vae_run = []
     g_run = []
     h_run = []
     for run in range(max_runs):
         D = d
         dataset = var.MyDataSet(d, D, num_points)
         dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                                  shuffle=True, num_workers=2)
         s = dataset.sigma
         # print (s)
         if s_trainable:
             vae = var.VAE(d, D)
         else:
             vae = var.VAE(d, D, s)
         optimizer = optim.Adam(vae.parameters(), lr=lr)
         l = None
         epochs = []
         prev_loss = 0
         for epoch in range(max_epochs):
             losses = []
             for i, data in enumerate(dataloader, 0):
                 inputs = Variable(data)
                 optimizer.zero_grad()
                 loss = vae.total_loss_direct(inputs)
                 loss.backward()
                 optimizer.step()
                 l = loss
Esempio n. 28
0
def main(nets_archi, train_data, test_data, mode_, name="test"):
    # Preprocessing data
    data_size = train_data.shape[0]
    # Create weights DST dir
    DST = create_DST(name)

    ###### Reset tf graph ######
    tf.reset_default_graph()
    start_time = time.time()
    print("\nPreparing variables and building model ...")

    ###### Create tf placeholder for obs variables ######
    y = tf.placeholder(dtype=data_type(),
                       shape=(None, IMAGE_SIZE, IMAGE_SIZE, 1))

    ###### Create varaible for batch ######
    batch = tf.Variable(0, dtype=data_type())
    ###### CLearning rate decay ######
    learning_rate = tf.train.exponential_decay(
        learning_rate_init,  # Base learning rate.
        batch * BATCH_SIZE,  # Current index into the dataset.
        15 * data_size,  # Decay step.
        0.98,  # Decay rate.
        staircase=True)

    ###### Create instance SVAE ######
    recognition_net = nets_archi["recog"]
    generator_net = nets_archi["gener"]
    vae_ = vae.VAE(
        recog_archi=recognition_net,  # architecture of the recognition network
        gener_archi=generator_net,  # architecture of the generative network
        N=N,  # dim of the gaussian latents x
        P=IMAGE_SIZE * IMAGE_SIZE)  # dim of the obs variables y

    ###### Build loss and optimizer ######
    vae_._create_loss_optimizer(y, learning_rate, batch)

    ###### Build generator ######
    vae_._generate(nexamples)

    ###### Initializer ######
    init = tf.global_variables_initializer()
    ###### Saver ######
    saver = tf.train.Saver()
    ###### Create a local session to run the training ######
    with tf.Session() as sess:
        # Training
        if mode_ == "training":
            # Opening csv file
            csv_path = "./Perf"
            if not tf.gfile.Exists(csv_path):
                os.makedirs(csv_path)
            csvfileTrain = open(os.path.join(csv_path, name) + ".csv", 'w')
            Trainwriter = csv.writer(
                csvfileTrain,
                delimiter=';',
            )
            Trainwriter.writerow(['Num Epoch', 'train loss', 'test_loss'])

            # Initialize variables
            sess.run(tf.global_variables_initializer())

            # initialize performance indicators
            best_l = -10000000000.0

            #training loop
            print("\nStart training ...")
            for epoch in range(num_epochs):
                start_time = time.time()
                print("")
                print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
                # Training loop
                train_l = 0.0
                batches = data_processing.get_batches(train_data, BATCH_SIZE)
                for i, batch in enumerate(batches):
                    _, l, lr = sess.run(
                        [vae_.optimizer, vae_.VAE_obj, learning_rate],
                        feed_dict={y: batch})
                    # Update average loss
                    train_l += l / len(batches)

                # Testing loop
                test_l = 0.0
                batches = data_processing.get_batches(test_data, BATCH_SIZE)
                for i, batch in enumerate(batches):
                    l = sess.run(vae_.VAE_obj, feed_dict={y: batch})
                    # Update average loss
                    test_l += l / len(batches)

                # Update best perf and save model
                if test_l > best_l:
                    best_l = test_l
                    if epoch > 20:
                        saver.save(sess, DST)
                        print("model saved.")
                # Print info for previous epoch
                print("Epoch {} done, took {:.2f}s, learning rate: {:10.2e}".
                      format(epoch,
                             time.time() - start_time, lr))
                print(
                    "Train loss: {:.3f}, Test loss: {:.3f},Best test loss: {:.3f}"
                    .format(train_l, test_l, best_l))

                # Writing csv file with results and saving models
                Trainwriter.writerow([epoch + 1, train_l, test_l])

        if mode_ == "reconstruct":
            #Plot reconstruction mean
            if not tf.gfile.Exists(DST + ".meta"):
                raise Exception("no weights given")
            saver.restore(sess, DST)
            img = test_data[np.random.randint(0,
                                              high=test_data.shape[0],
                                              size=BATCH_SIZE)]
            bernouilli_mean = sess.run(vae_.y_reconstr_mean,
                                       feed_dict={y: img})
            save_reconstruct(img[:nexamples], bernouilli_mean[:nexamples],
                             "./reconstruct")

        if mode_ == "generate":
            #Test for ploting images
            if not tf.gfile.Exists(DST + ".meta"):
                raise Exception("no weights given")
            saver.restore(sess, DST)
            bernouilli_mean = sess.run(vae_.y_generate_mean, feed_dict={})
            bernouilli_mean = np.transpose(
                np.reshape(bernouilli_mean,
                           (nexamples, 1, IMAGE_SIZE, IMAGE_SIZE)),
                (1, 0, 2, 3))
            save_gene(bernouilli_mean, "./generate")
Esempio n. 29
0
np.random.shuffle(data)

VAL_SIZE = data.shape[0] // 100
TRAIN_SIZE = data.shape[0] - VAL_SIZE
data_val = data[:VAL_SIZE]
data_train = data[VAL_SIZE:]

# create VAE
DEVICE = torch.device(
    'cuda' if args.gpu and torch.cuda.is_available() else 'cpu')
print('DEVICE = %s' % str(DEVICE))

vae_module = vae.VAE(enc=args.enc,
                     enc_kwargs=enc_kwargs,
                     dec=args.dec,
                     dec_kwargs=dec_kwargs,
                     data_shape=(1, HEIGHT, WIDTH),
                     code_size=args.code_size,
                     loss_fn=args.loss_fn,
                     kl_tolerance=args.kl_tolerance)
vae_module.to(DEVICE)
opt = optim.Adam(vae_module.parameters(), lr=1e-3)
vae_module.set_optimizer(opt)
log(vae_module)

# train VAE
keys = ['Epoch', 'Coding Loss', 'Reconstruction Loss', 'Total Loss']
formats = ['%04d', '%9.4f', '%9.4f', '%9.4f']
clear_logs()
log_tabular(vals=keys)
total_loss, coding_loss, reconstruction_loss = vae_module.eval_dataset(
    dataset=data_val, batch_size=args.batch_size)
Esempio n. 30
0
def main():
    # Set variables.
    img_dim = [64, 64]
    latent_dim = 32
    hidden_dim = 1024
    num_epochs = 20
    save_freq = 5
    batch_size = 128
    shuffle = True
    num_loader_workers = 3
    beta = 1.
    cuda = True
    learning_rate = 0.001
    adaptive = False  # True
    save_dir = os.path.dirname(os.path.realpath(__file__))

    # fix seed for experiment.
    util.fix_seed()

    # Load Encoder, Decoder.
    vae_net = vae.VAE(latent_dim, hidden_dim)
    if cuda:
        vae_net.cuda()

    # Set loss fn.
    loss_fn = vae.loss_fn

    # Load Dataset.
    anime_data = data_util.AnimeFaceData(img_dim, batch_size, shuffle,
                                         num_loader_workers)

    # Load optimizer.
    if adaptive:
        optimizer = optim.Adam(vae_net.parameters(), lr=learning_rate)
    else:
        optimizer = optim.SGD(vae_net.parameters(), lr=learning_rate)
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer,
                                                  max_lr=1e-1,
                                                  epochs=num_epochs,
                                                  steps_per_epoch=10)

    # Epoch loop
    for epoch in range(1, num_epochs + 1):
        print('Epoch {} of {}'.format(epoch, num_epochs))
        start = time.time()
        train_loss = 0

        # Batch loop.
        for i_batch, batch_data in enumerate(anime_data.data_loader, 0):
            # print('Batch {}'.format(i_batch+1))

            # Load batch.
            x, _ = batch_data
            if cuda:
                x = x.cuda()

            # Reset gradient.
            optimizer.zero_grad()

            # Run batch, calculate loss, and backprop.
            x_reconst, mu, logvar = vae_net.forward(x)
            loss = loss_fn(x, x_reconst, mu, logvar, beta)
            train_loss += loss.item()
            loss.backward()
            optimizer.step()
            if not adaptive:
                scheduler.step()

        if epoch % save_freq == 0:
            if adaptive:
                o = 'adaptive'
            else:
                o = 'cyclic'
            util.save_weights(
                vae_net,
                os.path.join(save_dir, 'vae_{}_{}.pth'.format(o, epoch)))

        end = time.time()
        print('loss: ', train_loss / len(anime_data.img_folder))
        print('Took {}'.format(end - start))

        if adaptive:
            o = 'adaptive'
        else:
            o = 'cyclic'
        util.save_weights(
            vae_net, os.path.join(save_dir, 'vae_{}_{}.pth'.format(o, epoch)))