) algo_variant = dict( algorithm="DQN", version="natural curriculum - axe (RND)", layer_size=16, replay_buffer_size=int(5E5), algorithm_kwargs=dict( num_epochs=2000, num_eval_steps_per_epoch=6000, num_trains_per_train_loop=500, num_expl_steps_per_train_loop=500, min_num_steps_before_training=200, max_path_length=math.inf, batch_size=64, validation_envs_pkl=join(get_repo_dir(), 'examples/continual/env_shaping/natural_curriculum/axe/validation_envs/dynamic_static_validation_envs_2019_11_04_06_27_16.pkl'), validation_rollout_length=500, viz_maps=True, viz_gap=100 ), trainer_kwargs=dict( discount=0.99, learning_rate=1E-4, grad_clip_val=5 ), inventory_network_kwargs=dict( # shelf: 8 x 8 input_size=64, output_size=16, hidden_sizes=[16, 16], ),
algo_variant = dict( algorithm="DQN", version="distance increase - deer - hitting", layer_size=16, replay_buffer_size=int(5E5), eps_decay_rate=1e-5, algorithm_kwargs=dict( num_epochs=10, num_eval_steps_per_epoch=6000, num_trains_per_train_loop=500, num_expl_steps_per_train_loop=500, min_num_steps_before_training=200, max_path_length=math.inf, batch_size=64, validation_envs_pkl=join( get_repo_dir(), 'experiments/continual/measure/env_shaping/hitting/deer/validation_envs/dynamic_static_validation_envs_2020_05_21_17_30_37.pkl' ), validation_rollout_length=1, validation_period=5, # store visit count array for heat map viz_maps=True, viz_gap=100), trainer_kwargs=dict(discount=0.99, learning_rate=0, grad_clip_val=5), inventory_network_kwargs=dict( # shelf: 8 x 8 input_size=64, output_size=16, hidden_sizes=[16, 16], ), full_img_network_kwargs=dict(
algo_variant = dict( algorithm="DQN Lifetime", version="dynamic static - axe", lifetime=True, layer_size=16, replay_buffer_size=int(5E5), algorithm_kwargs=dict( num_epochs=2000, num_eval_steps_per_epoch=500, num_trains_per_train_loop=500, num_expl_steps_per_train_loop=500, min_num_steps_before_training=200, max_path_length=math.inf, batch_size=64, validation_envs_pkl=join(get_repo_dir(), 'experiments/continual/dynamic_static/axe/validation_envs/dynamic_static_validation_envs_2019_11_07_05_27_51.pkl'), # TODO originally 200 validation_rollout_length=100, validation_period=10, viz_maps=True, viz_gap=100 ), trainer_kwargs=dict( discount=0.99, learning_rate=1E-4, grad_clip_val=5 ), inventory_network_kwargs=dict( # shelf: 8 x 8 input_size=64, output_size=16,
algo_variant = dict( algorithm="DQN", version="monster - food", layer_size=16, replay_buffer_size=int(5E5), eps_decay_rate=1e-5, algorithm_kwargs=dict( num_epochs=2500, num_eval_steps_per_epoch=6000, num_trains_per_train_loop=500, num_expl_steps_per_train_loop=500, min_num_steps_before_training=200, max_path_length=math.inf, batch_size=64, validation_envs_pkl=join(get_repo_dir(), 'experiments/continual/dynamic_static/monster/validation_envs/dynamic_static_validation_envs_2020_02_05_02_06_18.pkl'), validation_rollout_length=200, validation_period=10, # store visit count array for heat map viz_maps=True, viz_gap=100 ), trainer_kwargs=dict( discount=0.99, learning_rate=1E-4, grad_clip_val=5 ), inventory_network_kwargs=dict( # shelf: 8 x 8 input_size=64, output_size=16,
algo_variant = dict( algorithm="DQN Lifetime", version="dynamic static - deer", layer_size=16, replay_buffer_size=int(5E5), eps_decay_rate=1e-5, algorithm_kwargs=dict( num_epochs=3000, num_eval_steps_per_epoch=500, num_trains_per_train_loop=500, num_expl_steps_per_train_loop=500, min_num_steps_before_training=200, max_path_length=math.inf, batch_size=64, validation_envs_pkl=join(get_repo_dir(), 'experiments/continual/dynamic_static/deer/validation_envs/dynamic_static_validation_envs_2019_11_07_05_48_30.pkl'), validation_rollout_length=200, validation_period=10, viz_maps=True, viz_gap=100 ), trainer_kwargs=dict( discount=0.99, learning_rate=1E-4, grad_clip_val=5 ), inventory_network_kwargs=dict( # shelf: 8 x 8 input_size=64, output_size=16, hidden_sizes=[16, 16],
algo_variant = dict( algorithm="DQN", version="distance increase - deer - entropy", layer_size=16, replay_buffer_size=int(5E5), eps_decay_rate=1e-5, algorithm_kwargs=dict( num_epochs=100, num_eval_steps_per_epoch=6000, num_trains_per_train_loop=1, num_expl_steps_per_train_loop=500, min_num_steps_before_training=200, max_path_length=math.inf, batch_size=64, validation_envs_pkl=join(get_repo_dir(), 'experiments/continual/measure/env_shaping/entropy/deer/validation_envs/dynamic_static_validation_envs_2020_05_27_00_42_58.pkl'), validation_rollout_length=1, validation_period=10, # store visit count array for heat map viz_maps=True, viz_gap=100 ), trainer_kwargs=dict( discount=0.99, learning_rate=0, grad_clip_val=5 ), inventory_network_kwargs=dict( # shelf: 8 x 8 input_size=64, output_size=16,
algo_variant = dict( algorithm="DQN Lifetime", version="distance increase - food", lifetime=True, layer_size=16, replay_buffer_size=int(5E5), algorithm_kwargs=dict( num_epochs=1000, num_eval_steps_per_epoch=6000, num_trains_per_train_loop=500, num_expl_steps_per_train_loop=500, min_num_steps_before_training=200, max_path_length=math.inf, batch_size=256, validation_envs_pkl=join(get_repo_dir(), 'experiments/continual/env_shaping/distance_increasing/food/validation_envs/dynamic_static_validation_envs_2019_09_18_04_36_59.pkl'), validation_rollout_length=100 ), trainer_kwargs=dict( discount=0.99, learning_rate=1E-4, grad_clip_val=5 ), inventory_network_kwargs=dict( # shelf: 8 x 8 input_size=64, output_size=16, hidden_sizes=[16, 16], ), full_img_network_kwargs=dict( # 5 x 5 x 8
def get_gifs_heatmaps(exps_dir_name, seeds, save_dir, titles): data_dir = join(get_repo_dir(), 'data') exps_dir = join(data_dir, exps_dir_name) gifs_dir = join(data_dir, 'gifs') heat_dir = join(data_dir, 'heatmaps') # load variant and get pickled validation envs rand_exp_dir = glob(join(exps_dir, '*'))[0] with open(join(rand_exp_dir, 'variant.json'), 'r') as f: variant = json.load(f) task_obj = variant['env_kwargs']['task'].split()[1] val_envs_path = variant['algo_kwargs']['algorithm_kwargs'][ 'validation_envs_pkl'] val_rollout_len = variant['algo_kwargs']['algorithm_kwargs'][ 'validation_rollout_length'] val_envs = get_val_envs(val_envs_path) # load policy for seed_idx, seed in enumerate(seeds): val_env_idxs = random.sample(list(range(len(val_envs))), 10) exp_dir = glob(join(exps_dir, '*%d' % seed))[0] """ Get policy """ pol_file = max(glob(join(exp_dir, 'itr_*.pkl')), key=lambda pol_path: int(basename(pol_path)[4:-4])) # to override policy itr number # pol_file = join(exp_dir, 'itr_%d.pkl' % 2990) print(pol_file) with open(pol_file, 'rb') as f: policy = pickle.load(f)['evaluation/policy'] if hasattr(policy, 'policy'): # if it's reset free, strip out the underlying policy from the exploration strategy policy = policy.policy policy = PolicyWrappedWithExplorationStrategy( EpsilonGreedy(spaces.Discrete(7), 0.1), policy) # re-fetch the val envs each time so that envs are fresh # val_envs = get_val_envs(val_envs_path) # """ Get gifs """ # stats = [{} for _ in range(len(val_env_idxs))] # for meta_idx, env_idx in enumerate(val_env_idxs): # env = val_envs[env_idx] # path = rollout(env, policy, val_rollout_len, render=True, save=True, # save_dir=join(gifs_dir, exps_dir_name, save_dir, str(seed), str(env_idx))) # env.render(close=True) # for typ in env.object_to_idx.keys(): # if typ not in ['empty', 'wall', 'tree']: # key = 'pickup_%s' % typ # last_val = 0 # pickup_idxs = [] # for t, env_info in enumerate(path['env_infos']): # count = env_info[key] - last_val # pickup_idxs.extend([t for _ in range(count)]) # last_val = env_info[key] # stats[meta_idx][key] = pickup_idxs # for typ in env.interactions.values(): # key = 'made_%s' % typ # last_val = 0 # made_idxs = [] # for t, env_info in enumerate(path['env_infos']): # count = env_info[key] - last_val # made_idxs.extend([t for _ in range(count)]) # last_val = env_info[key] # stats[meta_idx][key] = made_idxs # solved = [val_env_idxs[i] for i, stat in enumerate(stats) if stat['pickup_%s' % task_obj]] # print('seed %d solved %d percent:' % (seed, 100 * len(solved) // len(val_env_idxs))) # print(solved) # re-fetch the val envs each time so that envs are fresh val_envs = get_val_envs(val_envs_path) print('refetched envs') """ Get heatmaps """ vcs = [] for env_idx, env in enumerate(val_envs): path = rollout(env, policy, val_rollout_len) vcs.append(env.visit_count) visit_count_sum = sum(vcs) plt.imshow(visit_count_sum) plt.title('Validation Tasks State Visitation Count (%s)' % titles[seed_idx]) plt.axis('off') vc_save_path = join(heat_dir, exps_dir_name, save_dir, str(seed)) os.makedirs(vc_save_path, exist_ok=True) plt.savefig(join(vc_save_path, 'map.png'))
algo_variant = dict( algorithm="DQN", version="diverse deer - env shaping", layer_size=16, replay_buffer_size=int(5E5), eps_decay_rate=1e-5, algorithm_kwargs=dict( num_epochs=2500, num_eval_steps_per_epoch=6000, num_trains_per_train_loop=500, num_expl_steps_per_train_loop=500, min_num_steps_before_training=200, max_path_length=math.inf, batch_size=64, validation_envs_pkl=join(get_repo_dir(), 'examples/continual/env_shaping/diverse_deer/validation_envs/env_shaping_validation_envs_2020_02_05_06_58_29.pkl'), validation_rollout_length=200, validation_period=10, # store visit count array for heat map viz_maps=True, viz_gap=100 ), trainer_kwargs=dict( discount=0.99, learning_rate=1E-4, grad_clip_val=5 ), inventory_network_kwargs=dict( # shelf: 8 x 8 input_size=64, output_size=16,