def get_experiment_environment(**args): from misc_util import set_global_seeds process_seed = args["seed"] set_global_seeds(process_seed) logger_context = logger.scoped_configure( dir='tmp', format_strs=['stdout', 'log', 'csv']) return logger_context
def main(): # parser = argparse.ArgumentParser() # parser.add_argument('mode', type=string) # args = parser.parse_args() # print args.mode sess = U.single_threaded_session() sess.__enter__() set_global_seeds(0) dir_name = "training_images" cur_dir = get_cur_dir() img_dir = osp.join(cur_dir, dir_name) header("Load model") mynet = mymodel(name="mynet", img_shape = [210, 160, 1], latent_dim = 2048) header("Load model") train_net(model = mynet, img_dir = img_dir)
def main(env_id, num_timesteps, seed, policy, nstack, nsteps, lrschedule, optimizer, num_cpu, model_file, use_static_wrapper, use_encoded_imagination, use_decoded_imagination): num_timesteps //= 4 assert not (use_encoded_imagination and use_decoded_imagination) def make_env(rank): def _thunk(): env = gym.make(env_id) env.seed(seed + rank) if use_static_wrapper: env = StaticWrapper(env) if policy == 'cnn' or use_encoded_imagination: env = RenderWrapper(env, 400, 600) env = DownsampleWrapper(env, 4) if use_encoded_imagination or use_decoded_imagination: env = FrameStack(env, 3) if use_encoded_imagination: env = EncodedImaginationWrapper(env, model_file, num_cpu) if use_decoded_imagination: env = DecodedImaginationWrapper(env, model_file, num_cpu) gym.logger.setLevel(logging.WARN) return env return _thunk set_global_seeds(seed) env = SubprocVecEnv([make_env(i) for i in range(num_cpu)]) if policy == 'fc': policy_fn = FcPolicy if policy == 'cnn': policy_fn = CnnPolicy learn(policy_fn, env, seed, nsteps=nsteps, nstack=nstack, total_timesteps=num_timesteps, lrschedule=lrschedule, optimizer=optimizer, max_episode_length=195) env.close()
def main(): # Base: https://openreview.net/pdf?id=Sy2fzU9gl # (1) parse arguments parser = argparse.ArgumentParser() parser.add_argument('--dataset') # chairs, celeba, dsprites parser.add_argument('--mode') # train, test parser.add_argument('--disentangled_feat', type=int) parser.add_argument('--num_gpus', type=int, default=1) args = parser.parse_args() dataset = args.dataset mode = args.mode disentangled_feat = args.disentangled_feat chkfile_name = "chk_{}_{}".format(dataset, disentangled_feat) logfile_name = "log_{}_{}".format(dataset, disentangled_feat) validatefile_name = "val_{}_{}".format(dataset, disentangled_feat) # (2) Dataset if dataset == 'chairs': dir_name = "/dataset/chairs/training_img" elif dataset == 'celeba': dir_name = 'temporarily not available' elif dataset == 'dsprites': dir_name = '/dataset/dsprites' # This is dummy, for dsprites dataset, we are using data_manager else: header("Unknown dataset name") cur_dir = get_cur_dir() cur_dir = osp.join(cur_dir, 'dataset') cur_dir = osp.join(cur_dir, 'chairs') img_dir = osp.join(cur_dir, 'training_img') # This is for chairs # (3) Set experiment configuration, and disentangled_feat, according to beta-VAE( https://openreview.net/pdf?id=Sy2fzU9gl ) if dataset == 'chairs': latent_dim = 32 loss_weight = {'siam': 50000.0, 'kl': 30000.0} batch_size = 32 max_epoch = 300 lr = 0.0001 elif dataset == 'celeba': latent_dim = 32 loss_weight = {'siam': 1000.0, 'kl': 30000.0} batch_size = 512 max_epoch = 300 lr = 0.0001 elif dataset == 'dsprites': latent_dim = 10 loss_weight = {'siam': 1.0, 'kl': 1.0} batch_size = 1024 max_epoch = 300 lr = 0.001 feat_size = 5 # shape, rotation, size, x, y => Don't know why there are only 4 features in paper p6. Need to check more about it. cls_batch_per_gpu = 15 cls_L = 10 entangled_feat = latent_dim - disentangled_feat # (4) Open Tensorflow session, Need to find optimal configuration because we don't need to use single thread session # Important!!! : If we don't use single threaded session, then we need to change this!!! # sess = U.single_threaded_session() sess = U.mgpu_session() sess.__enter__() set_global_seeds(0) num_gpus = args.num_gpus # Model Setting # (5) Import model, merged into models.py # only celeba has RGB channel, other has black and white. if dataset == 'chairs': import models mynet = models.mymodel(name="mynet", img_shape=[64, 64, 1], latent_dim=latent_dim, disentangled_feat=disentangled_feat, mode=mode, loss_weight=loss_weight) elif dataset == 'celeba': import models mynet = models.mymodel(name="mynet", img_shape=[64, 64, 3], latent_dim=latent_dim, disentangled_feat=disentangled_feat, mode=mode, loss_weight=loss_weight) elif dataset == 'dsprites': import models img_shape = [None, 64, 64, 1] img1 = U.get_placeholder(name="img1", dtype=tf.float32, shape=img_shape) img2 = U.get_placeholder(name="img2", dtype=tf.float32, shape=img_shape) feat_cls = U.get_placeholder(name="feat_cls", dtype=tf.int32, shape=None) tf.assert_equal(tf.shape(img1)[0], tf.shape(img2)[0]) tf.assert_equal(tf.floormod(tf.shape(img1)[0], num_gpus), 0) tf.assert_equal(tf.floormod(tf.shape(feat_cls)[0], num_gpus), 0) img1splits = tf.split(img1, num_gpus, 0) img2splits = tf.split(img2, num_gpus, 0) feat_cls_splits = tf.split(feat_cls, num_gpus, 0) mynets = [] with tf.variable_scope(tf.get_variable_scope()): for gid in range(num_gpus): with tf.name_scope('gpu%d' % gid) as scope: with tf.device('/gpu:%d' % gid): mynet = models.mymodel( name="mynet", img1=img1splits[gid], img2=img2splits[gid], img_shape=img_shape[1:], latent_dim=latent_dim, disentangled_feat=disentangled_feat, mode=mode, loss_weight=loss_weight, feat_cls=feat_cls_splits[gid], feat_size=feat_size, cls_L=cls_L, cls_batch_per_gpu=cls_batch_per_gpu) mynets.append(mynet) # Reuse variables for the next tower. tf.get_variable_scope().reuse_variables() else: header("Unknown model name") # (6) Train or test the model # Testing by adding noise on latent feature is not merged yet. Will be finished soon. if mode == 'train': mgpu_train_net(models=mynets, num_gpus=num_gpus, mode=mode, img_dir=img_dir, dataset=dataset, chkfile_name=chkfile_name, logfile_name=logfile_name, validatefile_name=validatefile_name, entangled_feat=entangled_feat, max_epoch=max_epoch, batch_size=batch_size, lr=lr) # train_net(model=mynets[0], mode = mode, img_dir = img_dir, dataset = dataset, chkfile_name = chkfile_name, logfile_name = logfile_name, validatefile_name = validatefile_name, entangled_feat = entangled_feat, max_epoch = max_epoch, batch_size = batch_size, lr = lr) elif mode == 'classifier_train': warn("Classifier Train") mgpu_classifier_train_net(models=mynets, num_gpus=num_gpus, cls_batch_per_gpu=cls_batch_per_gpu, cls_L=cls_L, mode=mode, img_dir=img_dir, dataset=dataset, chkfile_name=chkfile_name, logfile_name=logfile_name, validatefile_name=validatefile_name, entangled_feat=entangled_feat, max_epoch=max_epoch, batch_size=batch_size, lr=lr) elif mode == 'test': header("Need to be merged") else: header("Unknown mode name")
def learn(policy, env, seed, nsteps=5, nstack=4, total_timesteps=int(80e6), vf_coef=0.5, ent_coef=0.01, max_grad_norm=0.5, lr=7e-4, lrschedule='linear', epsilon=1e-5, alpha=0.99, gamma=0.99, log_interval=100, max_episode_length=None, optimizer=None): tf.reset_default_graph() set_global_seeds(seed) nenvs = env.num_envs ob_space = env.observation_space ac_space = env.action_space num_procs = len(env.remotes) # HACK model = Model(policy=policy, ob_space=ob_space, ac_space=ac_space, nenvs=nenvs, nsteps=nsteps, nstack=nstack, num_procs=num_procs, ent_coef=ent_coef, vf_coef=vf_coef, max_grad_norm=max_grad_norm, lr=lr, alpha=alpha, epsilon=epsilon, total_timesteps=total_timesteps, lrschedule=lrschedule, optimizer=optimizer) runner = Runner(env, model, nsteps=nsteps, nstack=nstack, gamma=gamma) stats = EpisodeStats(nsteps, nenvs, maxlen=100) nbatch = nenvs * nsteps tstart = time.time() for update in itertools.count(): obs, states, rewards, masks, actions, values = runner.run() total_loss, policy_loss, value_loss, policy_entropy = model.train( obs, states, rewards, masks, actions, values) nseconds = time.time() - tstart fps = int((update * nbatch) / nseconds) stats.feed(rewards, masks) if update % log_interval == 0 or update == 1: ev = explained_variance(values, rewards) logger.record_tabular("nupdates", update) logger.record_tabular("total_timesteps", update * nbatch) logger.record_tabular("fps", fps) logger.record_tabular("policy_entropy", float(policy_entropy)) logger.record_tabular("value_loss", float(value_loss)) logger.record_tabular("total_loss", float(total_loss)) logger.record_tabular("explained_variance", float(ev)) logger.record_tabular("mean_episode_length", stats.mean_length()) logger.record_tabular("mean_episode_reward", stats.mean_reward()) logger.dump_tabular() if max_episode_length and stats.mean_length( ) >= max_episode_length: break env.close()