Example #1
0
def get_experiment_environment(**args):
    from misc_util import set_global_seeds
    process_seed = args["seed"]
    set_global_seeds(process_seed)

    logger_context = logger.scoped_configure(
        dir='tmp', format_strs=['stdout', 'log', 'csv'])
    return logger_context
Example #2
0
def main():
    # parser = argparse.ArgumentParser()
    # parser.add_argument('mode', type=string)
    # args = parser.parse_args()
    # print args.mode
    sess = U.single_threaded_session()
    sess.__enter__()
    set_global_seeds(0)

    dir_name = "training_images"

    cur_dir = get_cur_dir()
    img_dir = osp.join(cur_dir, dir_name)
    header("Load model")
    mynet = mymodel(name="mynet", img_shape = [210, 160, 1], latent_dim = 2048)
    header("Load model")
    train_net(model = mynet, img_dir = img_dir)
Example #3
0
def main(env_id, num_timesteps, seed, policy, nstack, nsteps, lrschedule,
         optimizer, num_cpu, model_file, use_static_wrapper,
         use_encoded_imagination, use_decoded_imagination):
    num_timesteps //= 4
    assert not (use_encoded_imagination and use_decoded_imagination)

    def make_env(rank):
        def _thunk():
            env = gym.make(env_id)
            env.seed(seed + rank)
            if use_static_wrapper:
                env = StaticWrapper(env)
            if policy == 'cnn' or use_encoded_imagination:
                env = RenderWrapper(env, 400, 600)
                env = DownsampleWrapper(env, 4)
            if use_encoded_imagination or use_decoded_imagination:
                env = FrameStack(env, 3)
            if use_encoded_imagination:
                env = EncodedImaginationWrapper(env, model_file, num_cpu)
            if use_decoded_imagination:
                env = DecodedImaginationWrapper(env, model_file, num_cpu)
            gym.logger.setLevel(logging.WARN)
            return env

        return _thunk

    set_global_seeds(seed)
    env = SubprocVecEnv([make_env(i) for i in range(num_cpu)])

    if policy == 'fc':
        policy_fn = FcPolicy
    if policy == 'cnn':
        policy_fn = CnnPolicy
    learn(policy_fn,
          env,
          seed,
          nsteps=nsteps,
          nstack=nstack,
          total_timesteps=num_timesteps,
          lrschedule=lrschedule,
          optimizer=optimizer,
          max_episode_length=195)
    env.close()
Example #4
0
def main():

    # Base: https://openreview.net/pdf?id=Sy2fzU9gl

    # (1) parse arguments

    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset')  # chairs, celeba, dsprites
    parser.add_argument('--mode')  # train, test
    parser.add_argument('--disentangled_feat', type=int)
    parser.add_argument('--num_gpus', type=int, default=1)
    args = parser.parse_args()

    dataset = args.dataset
    mode = args.mode
    disentangled_feat = args.disentangled_feat
    chkfile_name = "chk_{}_{}".format(dataset, disentangled_feat)
    logfile_name = "log_{}_{}".format(dataset, disentangled_feat)
    validatefile_name = "val_{}_{}".format(dataset, disentangled_feat)

    # (2) Dataset

    if dataset == 'chairs':
        dir_name = "/dataset/chairs/training_img"
    elif dataset == 'celeba':
        dir_name = 'temporarily not available'
    elif dataset == 'dsprites':
        dir_name = '/dataset/dsprites'  # This is dummy, for dsprites dataset, we are using data_manager
    else:
        header("Unknown dataset name")

    cur_dir = get_cur_dir()
    cur_dir = osp.join(cur_dir, 'dataset')
    cur_dir = osp.join(cur_dir, 'chairs')
    img_dir = osp.join(cur_dir, 'training_img')  # This is for chairs

    # (3) Set experiment configuration, and disentangled_feat, according to beta-VAE( https://openreview.net/pdf?id=Sy2fzU9gl )

    if dataset == 'chairs':
        latent_dim = 32
        loss_weight = {'siam': 50000.0, 'kl': 30000.0}
        batch_size = 32
        max_epoch = 300
        lr = 0.0001
    elif dataset == 'celeba':
        latent_dim = 32
        loss_weight = {'siam': 1000.0, 'kl': 30000.0}
        batch_size = 512
        max_epoch = 300
        lr = 0.0001
    elif dataset == 'dsprites':
        latent_dim = 10
        loss_weight = {'siam': 1.0, 'kl': 1.0}
        batch_size = 1024
        max_epoch = 300
        lr = 0.001
        feat_size = 5  # shape, rotation, size, x, y => Don't know why there are only 4 features in paper p6. Need to check more about it.
        cls_batch_per_gpu = 15
        cls_L = 10

    entangled_feat = latent_dim - disentangled_feat

    # (4) Open Tensorflow session, Need to find optimal configuration because we don't need to use single thread session
    # Important!!! : If we don't use single threaded session, then we need to change this!!!

    # sess = U.single_threaded_session()
    sess = U.mgpu_session()
    sess.__enter__()
    set_global_seeds(0)

    num_gpus = args.num_gpus

    # Model Setting

    # (5) Import model, merged into models.py
    # only celeba has RGB channel, other has black and white.

    if dataset == 'chairs':
        import models
        mynet = models.mymodel(name="mynet",
                               img_shape=[64, 64, 1],
                               latent_dim=latent_dim,
                               disentangled_feat=disentangled_feat,
                               mode=mode,
                               loss_weight=loss_weight)
    elif dataset == 'celeba':
        import models
        mynet = models.mymodel(name="mynet",
                               img_shape=[64, 64, 3],
                               latent_dim=latent_dim,
                               disentangled_feat=disentangled_feat,
                               mode=mode,
                               loss_weight=loss_weight)
    elif dataset == 'dsprites':
        import models

        img_shape = [None, 64, 64, 1]
        img1 = U.get_placeholder(name="img1",
                                 dtype=tf.float32,
                                 shape=img_shape)
        img2 = U.get_placeholder(name="img2",
                                 dtype=tf.float32,
                                 shape=img_shape)

        feat_cls = U.get_placeholder(name="feat_cls",
                                     dtype=tf.int32,
                                     shape=None)

        tf.assert_equal(tf.shape(img1)[0], tf.shape(img2)[0])
        tf.assert_equal(tf.floormod(tf.shape(img1)[0], num_gpus), 0)

        tf.assert_equal(tf.floormod(tf.shape(feat_cls)[0], num_gpus), 0)

        img1splits = tf.split(img1, num_gpus, 0)
        img2splits = tf.split(img2, num_gpus, 0)

        feat_cls_splits = tf.split(feat_cls, num_gpus, 0)

        mynets = []
        with tf.variable_scope(tf.get_variable_scope()):
            for gid in range(num_gpus):
                with tf.name_scope('gpu%d' % gid) as scope:
                    with tf.device('/gpu:%d' % gid):
                        mynet = models.mymodel(
                            name="mynet",
                            img1=img1splits[gid],
                            img2=img2splits[gid],
                            img_shape=img_shape[1:],
                            latent_dim=latent_dim,
                            disentangled_feat=disentangled_feat,
                            mode=mode,
                            loss_weight=loss_weight,
                            feat_cls=feat_cls_splits[gid],
                            feat_size=feat_size,
                            cls_L=cls_L,
                            cls_batch_per_gpu=cls_batch_per_gpu)
                        mynets.append(mynet)
                # Reuse variables for the next tower.
                tf.get_variable_scope().reuse_variables()

    else:
        header("Unknown model name")

    # (6) Train or test the model
    # Testing by adding noise on latent feature is not merged yet. Will be finished soon.

    if mode == 'train':
        mgpu_train_net(models=mynets,
                       num_gpus=num_gpus,
                       mode=mode,
                       img_dir=img_dir,
                       dataset=dataset,
                       chkfile_name=chkfile_name,
                       logfile_name=logfile_name,
                       validatefile_name=validatefile_name,
                       entangled_feat=entangled_feat,
                       max_epoch=max_epoch,
                       batch_size=batch_size,
                       lr=lr)
        # train_net(model=mynets[0], mode = mode, img_dir = img_dir, dataset = dataset, chkfile_name = chkfile_name, logfile_name = logfile_name, validatefile_name = validatefile_name, entangled_feat = entangled_feat, max_epoch = max_epoch, batch_size = batch_size, lr = lr)
    elif mode == 'classifier_train':
        warn("Classifier Train")
        mgpu_classifier_train_net(models=mynets,
                                  num_gpus=num_gpus,
                                  cls_batch_per_gpu=cls_batch_per_gpu,
                                  cls_L=cls_L,
                                  mode=mode,
                                  img_dir=img_dir,
                                  dataset=dataset,
                                  chkfile_name=chkfile_name,
                                  logfile_name=logfile_name,
                                  validatefile_name=validatefile_name,
                                  entangled_feat=entangled_feat,
                                  max_epoch=max_epoch,
                                  batch_size=batch_size,
                                  lr=lr)

    elif mode == 'test':
        header("Need to be merged")
    else:
        header("Unknown mode name")
Example #5
0
def learn(policy,
          env,
          seed,
          nsteps=5,
          nstack=4,
          total_timesteps=int(80e6),
          vf_coef=0.5,
          ent_coef=0.01,
          max_grad_norm=0.5,
          lr=7e-4,
          lrschedule='linear',
          epsilon=1e-5,
          alpha=0.99,
          gamma=0.99,
          log_interval=100,
          max_episode_length=None,
          optimizer=None):
    tf.reset_default_graph()
    set_global_seeds(seed)

    nenvs = env.num_envs
    ob_space = env.observation_space
    ac_space = env.action_space
    num_procs = len(env.remotes)  # HACK
    model = Model(policy=policy,
                  ob_space=ob_space,
                  ac_space=ac_space,
                  nenvs=nenvs,
                  nsteps=nsteps,
                  nstack=nstack,
                  num_procs=num_procs,
                  ent_coef=ent_coef,
                  vf_coef=vf_coef,
                  max_grad_norm=max_grad_norm,
                  lr=lr,
                  alpha=alpha,
                  epsilon=epsilon,
                  total_timesteps=total_timesteps,
                  lrschedule=lrschedule,
                  optimizer=optimizer)
    runner = Runner(env, model, nsteps=nsteps, nstack=nstack, gamma=gamma)

    stats = EpisodeStats(nsteps, nenvs, maxlen=100)
    nbatch = nenvs * nsteps
    tstart = time.time()
    for update in itertools.count():
        obs, states, rewards, masks, actions, values = runner.run()
        total_loss, policy_loss, value_loss, policy_entropy = model.train(
            obs, states, rewards, masks, actions, values)
        nseconds = time.time() - tstart
        fps = int((update * nbatch) / nseconds)
        stats.feed(rewards, masks)

        if update % log_interval == 0 or update == 1:
            ev = explained_variance(values, rewards)
            logger.record_tabular("nupdates", update)
            logger.record_tabular("total_timesteps", update * nbatch)
            logger.record_tabular("fps", fps)
            logger.record_tabular("policy_entropy", float(policy_entropy))
            logger.record_tabular("value_loss", float(value_loss))
            logger.record_tabular("total_loss", float(total_loss))
            logger.record_tabular("explained_variance", float(ev))
            logger.record_tabular("mean_episode_length", stats.mean_length())
            logger.record_tabular("mean_episode_reward", stats.mean_reward())

            logger.dump_tabular()

            if max_episode_length and stats.mean_length(
            ) >= max_episode_length:
                break
    env.close()