def main(args=None): # Parse command line arguments. parsed_args = parse_command_line_arguments(args) # Initialize logger. Logger() # Initialize dataset object. init_tf() dataset_obj = load_dataset(tfrecord_dir=parsed_args.data_dir, repeat=True, shuffle_mb=0, prefetch_mb=100, max_label_size='full', verbose=True) if parsed_args.realism_score: # Compute realism score. realism_config.datareader = dataset_obj compute_stylegan_realism(**realism_config) if parsed_args.truncation_sweep: # Compute truncation sweep. truncation_config.datareader = dataset_obj compute_stylegan_truncation(**truncation_config) peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse() peak_gpu_mem_usage = peak_gpu_mem_op.eval() print('Peak GPU memory usage: %g GB' % (peak_gpu_mem_usage * 1e-9))
def maximum_liklihood(): success = 0 trials = 0 buffer = ExpertBuffer('expert_trajs_point.npz', batch_size=256, number_of_trajs_to_use=20000) s_mb, a_mb = buffer.sample() env = PointEnv() obs = env.reset() #policy = BCPol(s_mb, a_mb) def policy_fn(name, ob_space, ac_space, reuse=False): return mlp_policy.MlpPolicy(name=name, ob_space=ob_space, ac_space=ac_space, reuse=reuse, hid_size=32, num_hid_layers=2) init_tf() policy = max_liklihood_bc(env, policy_fn, buffer, max_iters=int(1e5), ckpt_dir=None, log_dir=None, task_name=None, verbose=True) for test_idx in range(100): obs = env.reset() for i in range(250): act = policy.act(False, obs)[0] obs, _, _, _ = env.step(act) # print(obs) # env.render() # print(env.get_reward()) rew = env.get_reward() if np.abs(rew - 1.0) < 0.01: success += 1 trials += 1 if test_idx % 10 == 0: print(F"{100*success/trials} percent success rate") while True: obs = env.reset() for i in range(250): act = policy.act(False, obs)[0] + 0.1*np.random.randn(2) obs, _, _, _ = env.step(act) env.render()
def hindsight_supervised_learning(): success = 0 trials = 0 buffer = HindsightBuffer(obs_shape=8, acs_shape=2, future_k=1) s_mb, a_mb = buffer.sample() policy = BCPol(s_mb, a_mb) init_tf() env = PointEnv(sparse_reward=True) obs = env.reset() train_iters = 10000 for i in range(train_iters): rollout_obs = [] rollout_acts = [] obs = env.reset() for j in range(250): act = policy.act(obs) obs, rew, _, _ = env.step(act) rollout_obs.append(obs) rollout_acts.append(act) buffer.add_traj(rollout_obs, rollout_acts) rew = env.get_reward() trials += 1 if np.abs(rew - 1.0) < 0.1: success += 1 # train policy. for optim_step in range(10): s_mb, a_mb = buffer.sample() loss = policy.train(s_mb, a_mb) # evaluation if i % 100 == 0: #print(F"Loss at step {i} is: {loss}") print(F"{100*success/trials} percent success rate at step {i}") success = 0 trials = 0 while True: for test_idx in range(100): obs = env.reset() for i in range(300): act = policy.act(obs) obs, _, _, _ = env.step(act) env.render()
def supervised_learning(): success = 0 trials = 0 buffer = ExpertBuffer('expert_trajs_point.npz') s_mb, a_mb = buffer.sample() policy = BCPol(s_mb, a_mb) init_tf() env = PointEnv() obs = env.reset() train_iters = int(1e5) for i in range(train_iters): s_mb, a_mb = buffer.sample() loss = policy.train(s_mb, a_mb) if i % 10000 == 0: #print(F"Loss at step {i} is: {loss}") #pass # test the policy. success = 0 trials = 0 for test_idx in range(10): obs = env.reset() for step_idx in range(300): act = policy.act(obs) obs, _, _, _ = env.step(act) # print(obs) # env.render() # print(env.get_reward()) rew = env.get_reward() if np.abs(rew - 1.0) < 0.01: success += 1 trials += 1 if test_idx == 9: print(F"{100*success/trials} percent success rate") while True: for test_idx in range(100): obs = env.reset() for i in range(300): act = policy.act(obs) obs, _, _, _ = env.step(act) env.render()
#! /usr/bin/env python3 from utils import init_tf, output_tf, save_infra_description init_tf() filename = output_tf() save_infra_description(filename)