Exemple #1
0
    )

    algo_variant = dict(
        algorithm="DQN",
        version="natural curriculum - axe (RND)",
        layer_size=16,
        replay_buffer_size=int(5E5),
        algorithm_kwargs=dict(
            num_epochs=2000,
            num_eval_steps_per_epoch=6000,
            num_trains_per_train_loop=500,
            num_expl_steps_per_train_loop=500,
            min_num_steps_before_training=200,
            max_path_length=math.inf,
            batch_size=64,
            validation_envs_pkl=join(get_repo_dir(), 'examples/continual/env_shaping/natural_curriculum/axe/validation_envs/dynamic_static_validation_envs_2019_11_04_06_27_16.pkl'),
            validation_rollout_length=500,
            viz_maps=True,
            viz_gap=100
        ),
        trainer_kwargs=dict(
            discount=0.99,
            learning_rate=1E-4,
            grad_clip_val=5
        ),
        inventory_network_kwargs=dict(
            # shelf: 8 x 8
            input_size=64,
            output_size=16,
            hidden_sizes=[16, 16],
        ),
Exemple #2
0
 algo_variant = dict(
     algorithm="DQN",
     version="distance increase - deer - hitting",
     layer_size=16,
     replay_buffer_size=int(5E5),
     eps_decay_rate=1e-5,
     algorithm_kwargs=dict(
         num_epochs=10,
         num_eval_steps_per_epoch=6000,
         num_trains_per_train_loop=500,
         num_expl_steps_per_train_loop=500,
         min_num_steps_before_training=200,
         max_path_length=math.inf,
         batch_size=64,
         validation_envs_pkl=join(
             get_repo_dir(),
             'experiments/continual/measure/env_shaping/hitting/deer/validation_envs/dynamic_static_validation_envs_2020_05_21_17_30_37.pkl'
         ),
         validation_rollout_length=1,
         validation_period=5,
         # store visit count array for heat map
         viz_maps=True,
         viz_gap=100),
     trainer_kwargs=dict(discount=0.99, learning_rate=0, grad_clip_val=5),
     inventory_network_kwargs=dict(
         # shelf: 8 x 8
         input_size=64,
         output_size=16,
         hidden_sizes=[16, 16],
     ),
     full_img_network_kwargs=dict(
Exemple #3
0
    algo_variant = dict(
        algorithm="DQN Lifetime",
        version="dynamic static - axe",
        lifetime=True,
        layer_size=16,
        replay_buffer_size=int(5E5),
        algorithm_kwargs=dict(
            num_epochs=2000,
            num_eval_steps_per_epoch=500,
            num_trains_per_train_loop=500,
            num_expl_steps_per_train_loop=500,
            min_num_steps_before_training=200,
            max_path_length=math.inf,
            batch_size=64,
            validation_envs_pkl=join(get_repo_dir(), 'experiments/continual/dynamic_static/axe/validation_envs/dynamic_static_validation_envs_2019_11_07_05_27_51.pkl'),
            # TODO originally 200
            validation_rollout_length=100,
            validation_period=10,
            viz_maps=True,
            viz_gap=100
        ),
        trainer_kwargs=dict(
            discount=0.99,
            learning_rate=1E-4,
            grad_clip_val=5
        ),
        inventory_network_kwargs=dict(
            # shelf: 8 x 8
            input_size=64,
            output_size=16,
Exemple #4
0
    algo_variant = dict(
        algorithm="DQN",
        version="monster - food",
        layer_size=16,
        replay_buffer_size=int(5E5),
        eps_decay_rate=1e-5,
        algorithm_kwargs=dict(
            num_epochs=2500,
            num_eval_steps_per_epoch=6000,
            num_trains_per_train_loop=500,
            num_expl_steps_per_train_loop=500,
            min_num_steps_before_training=200,
            max_path_length=math.inf,
            batch_size=64,
            validation_envs_pkl=join(get_repo_dir(), 'experiments/continual/dynamic_static/monster/validation_envs/dynamic_static_validation_envs_2020_02_05_02_06_18.pkl'),
            validation_rollout_length=200,
            validation_period=10,
            # store visit count array for heat map
            viz_maps=True,
            viz_gap=100
        ),
        trainer_kwargs=dict(
            discount=0.99,
            learning_rate=1E-4,
            grad_clip_val=5
        ),
        inventory_network_kwargs=dict(
            # shelf: 8 x 8
            input_size=64,
            output_size=16,
Exemple #5
0
    algo_variant = dict(
        algorithm="DQN Lifetime",
        version="dynamic static - deer",
        layer_size=16,
        replay_buffer_size=int(5E5),
        eps_decay_rate=1e-5,
        algorithm_kwargs=dict(
            num_epochs=3000,
            num_eval_steps_per_epoch=500,
            num_trains_per_train_loop=500,
            num_expl_steps_per_train_loop=500,
            min_num_steps_before_training=200,
            max_path_length=math.inf,
            batch_size=64,
            validation_envs_pkl=join(get_repo_dir(), 'experiments/continual/dynamic_static/deer/validation_envs/dynamic_static_validation_envs_2019_11_07_05_48_30.pkl'),
            validation_rollout_length=200,
            validation_period=10,
            viz_maps=True,
            viz_gap=100
        ),
        trainer_kwargs=dict(
            discount=0.99,
            learning_rate=1E-4,
            grad_clip_val=5
        ),
        inventory_network_kwargs=dict(
            # shelf: 8 x 8
            input_size=64,
            output_size=16,
            hidden_sizes=[16, 16],
Exemple #6
0
    algo_variant = dict(
        algorithm="DQN",
        version="distance increase - deer - entropy",
        layer_size=16,
        replay_buffer_size=int(5E5),
        eps_decay_rate=1e-5,
        algorithm_kwargs=dict(
            num_epochs=100,
            num_eval_steps_per_epoch=6000,
            num_trains_per_train_loop=1,
            num_expl_steps_per_train_loop=500,
            min_num_steps_before_training=200,
            max_path_length=math.inf,
            batch_size=64,
            validation_envs_pkl=join(get_repo_dir(), 'experiments/continual/measure/env_shaping/entropy/deer/validation_envs/dynamic_static_validation_envs_2020_05_27_00_42_58.pkl'),
            validation_rollout_length=1,
            validation_period=10,
            # store visit count array for heat map
            viz_maps=True,
            viz_gap=100
        ),
        trainer_kwargs=dict(
            discount=0.99,
            learning_rate=0,
            grad_clip_val=5
        ),
        inventory_network_kwargs=dict(
            # shelf: 8 x 8
            input_size=64,
            output_size=16,
Exemple #7
0
    algo_variant = dict(
        algorithm="DQN Lifetime",
        version="distance increase - food",
        lifetime=True,
        layer_size=16,
        replay_buffer_size=int(5E5),
        algorithm_kwargs=dict(
            num_epochs=1000,
            num_eval_steps_per_epoch=6000,
            num_trains_per_train_loop=500,
            num_expl_steps_per_train_loop=500,
            min_num_steps_before_training=200,
            max_path_length=math.inf,
            batch_size=256,
            validation_envs_pkl=join(get_repo_dir(), 'experiments/continual/env_shaping/distance_increasing/food/validation_envs/dynamic_static_validation_envs_2019_09_18_04_36_59.pkl'),
            validation_rollout_length=100
        ),
        trainer_kwargs=dict(
            discount=0.99,
            learning_rate=1E-4,
            grad_clip_val=5
        ),
        inventory_network_kwargs=dict(
            # shelf: 8 x 8
            input_size=64,
            output_size=16,
            hidden_sizes=[16, 16],
        ),
        full_img_network_kwargs=dict(
            # 5 x 5 x 8
Exemple #8
0
def get_gifs_heatmaps(exps_dir_name, seeds, save_dir, titles):
    data_dir = join(get_repo_dir(), 'data')
    exps_dir = join(data_dir, exps_dir_name)
    gifs_dir = join(data_dir, 'gifs')
    heat_dir = join(data_dir, 'heatmaps')

    # load variant and get pickled validation envs
    rand_exp_dir = glob(join(exps_dir, '*'))[0]
    with open(join(rand_exp_dir, 'variant.json'), 'r') as f:
        variant = json.load(f)
    task_obj = variant['env_kwargs']['task'].split()[1]
    val_envs_path = variant['algo_kwargs']['algorithm_kwargs'][
        'validation_envs_pkl']
    val_rollout_len = variant['algo_kwargs']['algorithm_kwargs'][
        'validation_rollout_length']
    val_envs = get_val_envs(val_envs_path)

    # load policy
    for seed_idx, seed in enumerate(seeds):
        val_env_idxs = random.sample(list(range(len(val_envs))), 10)
        exp_dir = glob(join(exps_dir, '*%d' % seed))[0]
        """ Get policy """
        pol_file = max(glob(join(exp_dir, 'itr_*.pkl')),
                       key=lambda pol_path: int(basename(pol_path)[4:-4]))
        # to override policy itr number
        # pol_file = join(exp_dir, 'itr_%d.pkl' % 2990)
        print(pol_file)
        with open(pol_file, 'rb') as f:
            policy = pickle.load(f)['evaluation/policy']
        if hasattr(policy, 'policy'):
            # if it's reset free, strip out the underlying policy from the exploration strategy
            policy = policy.policy
        policy = PolicyWrappedWithExplorationStrategy(
            EpsilonGreedy(spaces.Discrete(7), 0.1), policy)

        # re-fetch the val envs each time so that envs are fresh
        # val_envs = get_val_envs(val_envs_path)
        # """ Get gifs """
        # stats = [{} for _ in range(len(val_env_idxs))]
        # for meta_idx, env_idx in enumerate(val_env_idxs):
        #     env = val_envs[env_idx]
        #     path = rollout(env, policy, val_rollout_len, render=True, save=True,
        #                    save_dir=join(gifs_dir, exps_dir_name, save_dir, str(seed), str(env_idx)))
        #     env.render(close=True)
        #     for typ in env.object_to_idx.keys():
        #         if typ not in ['empty', 'wall', 'tree']:
        #             key = 'pickup_%s' % typ
        #             last_val = 0
        #             pickup_idxs = []
        #             for t, env_info in enumerate(path['env_infos']):
        #                 count = env_info[key] - last_val
        #                 pickup_idxs.extend([t for _ in range(count)])
        #                 last_val = env_info[key]
        #             stats[meta_idx][key] = pickup_idxs
        #     for typ in env.interactions.values():
        #         key = 'made_%s' % typ
        #         last_val = 0
        #         made_idxs = []
        #         for t, env_info in enumerate(path['env_infos']):
        #             count = env_info[key] - last_val
        #             made_idxs.extend([t for _ in range(count)])
        #             last_val = env_info[key]
        #         stats[meta_idx][key] = made_idxs
        # solved = [val_env_idxs[i] for i, stat in enumerate(stats) if stat['pickup_%s' % task_obj]]
        # print('seed %d solved %d percent:' % (seed, 100 * len(solved) // len(val_env_idxs)))
        # print(solved)

        # re-fetch the val envs each time so that envs are fresh
        val_envs = get_val_envs(val_envs_path)
        print('refetched envs')
        """ Get heatmaps """
        vcs = []
        for env_idx, env in enumerate(val_envs):
            path = rollout(env, policy, val_rollout_len)
            vcs.append(env.visit_count)
        visit_count_sum = sum(vcs)
        plt.imshow(visit_count_sum)
        plt.title('Validation Tasks State Visitation Count (%s)' %
                  titles[seed_idx])
        plt.axis('off')
        vc_save_path = join(heat_dir, exps_dir_name, save_dir, str(seed))
        os.makedirs(vc_save_path, exist_ok=True)
        plt.savefig(join(vc_save_path, 'map.png'))
Exemple #9
0
    algo_variant = dict(
        algorithm="DQN",
        version="diverse deer - env shaping",
        layer_size=16,
        replay_buffer_size=int(5E5),
        eps_decay_rate=1e-5,
        algorithm_kwargs=dict(
            num_epochs=2500,
            num_eval_steps_per_epoch=6000,
            num_trains_per_train_loop=500,
            num_expl_steps_per_train_loop=500,
            min_num_steps_before_training=200,
            max_path_length=math.inf,
            batch_size=64,
            validation_envs_pkl=join(get_repo_dir(), 'examples/continual/env_shaping/diverse_deer/validation_envs/env_shaping_validation_envs_2020_02_05_06_58_29.pkl'),
            validation_rollout_length=200,
            validation_period=10,
            # store visit count array for heat map
            viz_maps=True,
            viz_gap=100
        ),
        trainer_kwargs=dict(
            discount=0.99,
            learning_rate=1E-4,
            grad_clip_val=5
        ),
        inventory_network_kwargs=dict(
            # shelf: 8 x 8
            input_size=64,
            output_size=16,