Example #1
0
 def update_priorities(self, idxs, priorities):
     # Update priorities via the legacy method, used w/ ranked approach
     idxs, priorities = super().update_priorities(idxs, priorities)
     # Register whether a transition is b/g in the UNREAL-specific sum trees
     for idx, priority in zipsame(idxs, priorities):
         # Decide whether the transition to be added is good or bad
         # Get the rank from the priority
         # Note: UnrealReplayBuffer inherits from PER w/ 'ranked' set to True
         if idx < self.num_demos:
             # When the transition is from the demos, always set it as 'good' regardless
             self.b_sum_st[idx] = 0
             self.g_sum_st[idx] = 1
         else:
             rank = (1. / priority) - 1
             thres = floor(.5 * self.num_entries)
             is_g = rank < thres
             is_g *= 1  # HAXX: multiply by 1 to cast the bool into an int
             # Fill the good and bad sum segment trees w/ the obtained value
             self.b_sum_st[idx] = 1 - is_g
             self.g_sum_st[idx] = is_g
     if debug:
         # Verify updates
         # Compute the cardinalities of virtual sub-buffers
         b_num_entries = self.b_sum_st.sum(end=self.num_entries)
         g_num_entries = self.g_sum_st.sum(end=self.num_entries)
         print("[num entries]    b: {}    | g: {}".format(
             b_num_entries, g_num_entries))
         print("total num entries: {}".format(self.num_entries))
Example #2
0
    def update_priorities(self, idxs, priorities):
        """Update priorities according to the PER paper, i.e. by updating
        only the priority of sampled transitions. A priority priorities[i] is
        assigned to the transition at index indices[i].
        Note: not in use in the vanilla setting, but here if needed in extensions.
        """
        global debug
        if self.ranked:
            # Override the priorities to be 1 / (rank(priority) + 1)
            # Add new index, priority pairs to the list
            self.i_p.update({i: p for i, p in zipsame(idxs, priorities)})
            # Rank the indices by priorities
            i_sorted_by_p = sorted(self.i_p.items(),
                                   key=lambda t: t[1],
                                   reverse=True)
            # Create the index, rank dict
            i_r = {i: i_sorted_by_p.index((i, p)) for i, p in self.i_p.items()}
            # Unpack indices and ranks
            _idxs, ranks = zipsame(*i_r.items())
            # Override the indices and priorities
            idxs = list(_idxs)
            priorities = [1. / (rank + 1)
                          for rank in ranks]  # start ranks at 1
            if debug:
                # Verify that the priorities have been properly overridden
                for idx, priority in zipsame(idxs, priorities):
                    print("index: {}    | priority: {}".format(idx, priority))

        assert len(idxs) == len(
            priorities), "the two arrays must be the same length"
        for idx, priority in zipsame(idxs, priorities):
            assert priority > 0, "priorities must be positive"
            assert 0 <= idx < self.num_entries, "no element in buffer associated w/ index"
            if idx < self.num_demos:
                # Add a priority bonus when replaying a demo
                priority += self.demos_eps
            self.sum_st[idx] = priority**self.alpha
            self.min_st[idx] = priority**self.alpha
            # Update max priority currently in the buffer
            self.max_priority = max(priority, self.max_priority)

        if self.ranked:
            # Return indices and associated overriden priorities
            # Note: returned values are only used in the UNREAL priority update function
            return idxs, priorities
Example #3
0
def columnize(names, tuples, widths, indent=2):
    """Generate and return the content of table
    (w/o logging or printing anything)

    Args:
        width (int): Width of each cell in the table
        indent (int): Indentation spacing prepended to every row in the table
    """
    indent_space = indent * ' '
    # Add row containing the names
    table = indent_space + " | ".join(cell(name, width) for name, width in zipsame(names, widths))
    table_width = len(table)
    # Add header hline
    table += '\n' + indent_space + ('-' * table_width)
    for tuple_ in tuples:
        # Add a new row
        table += '\n' + indent_space
        table += " | ".join(cell(value, width) for value, width in zipsame(tuple_, widths))
    # Add closing hline
    table += '\n' + indent_space + ('-' * table_width)
    return table
Example #4
0
 def __init__(self, limit, ob_shape, ac_shape):
     self.limit = limit
     self.ob_shape = ob_shape
     self.ac_shape = ac_shape
     self.num_demos = 0
     self.atom_names = ['obs0', 'acs', 'rews', 'dones1', 'obs1']
     self.atom_shapes = [
         self.ob_shape, self.ac_shape, (1, ), (1, ), self.ob_shape
     ]
     # Create one `RingBuffer` object for every atom in a transition
     self.ring_buffers = {
         atom_name: RingBuffer(self.limit, atom_shape)
         for atom_name, atom_shape in zipsame(self.atom_names,
                                              self.atom_shapes)
     }
Example #5
0
 def add_demo_transitions_to_mem(self, dset):
     """Add transitions from expert demonstration trajectories to memory"""
     # Ensure the replay buffer is empty as demos need to be first
     assert self.num_entries == 0 and self.num_demos == 0
     logger.info("adding demonstrations to memory")
     # Zip transition atoms
     transitions = zipsame(dset.obs0, dset.acs, dset.env_rews, dset.obs1,
                           dset.dones1)
     # Note: careful w/ the order, it should correspond to the order in `append` signature
     for transition in transitions:
         self.append(*transition, is_demo=True)
         self.num_demos += 1
     assert self.num_demos == self.num_entries
     logger.info("  num entries in memory after addition: {}".format(
         self.num_entries))
Example #6
0
def run(args):
    """Spawn jobs"""

    # Create directory for spawned jobs
    os.makedirs("spawn", exist_ok=True)
    if CLUSTER == 'local':
        os.makedirs("tmux", exist_ok=True)

    # Get the hyperparameter set(s)
    if args.sweep:
        hpmaps_ = [
            get_hps(sweep=True)
            for _ in range(CONFIG['parameters']['num_trials'])
        ]
        # Flatten into a 1-dim list
        hpmaps = [x for hpmap in hpmaps_ for x in hpmap]
    else:
        hpmaps = get_hps(sweep=False)

    # Create associated task strings
    commands = [
        "python main.py \\\n{}".format(unroll_options(hpmap))
        for hpmap in hpmaps
    ]
    if not len(commands) == len(set(commands)):
        # Terminate in case of duplicate experiment (extremely unlikely though)
        raise ValueError("bad luck, there are dupes -> Try again (:")
    # Create the job maps
    names = [
        "{}{}_{}".format(TYPE,
                         str(i).zfill(3), hpmap['uuid'])
        for i, hpmap in enumerate(hpmaps)
    ]
    # Finally get all the required job strings
    jobs = [
        create_job_str(name, command)
        for name, command in zipsame(names, commands)
    ]

    # Spawn the jobs
    for i, (name, job) in enumerate(zipsame(names, jobs)):
        logger.info(
            ">>>>>>>>>>>>>>>>>>>> Job #{} ready to submit. Config below.".
            format(i))
        logger.info(job + "\n")
        dir_ = name.split('.')[2]
        os.makedirs("spawn/{}".format(dir_), exist_ok=True)
        job_name = "spawn/{}/{}.sh".format(dir_, name)
        with open(job_name, 'w') as f:
            f.write(job)
        if args.call and not CLUSTER == 'local':
            # Spawn the job!
            check_output(["sbatch", "spawn/{}/{}".format(dir_, job_name)])
            logger.info(">>>>>>>>>>>>>>>>>>>> Job #{} submitted.".format(i))
    # Summarize the number of jobs spawned
    logger.info(">>>>>>>>>>>>>>>>>>>> {} jobs were spawned.".format(len(jobs)))

    if CLUSTER == 'local':
        dir_ = hpmaps[0]['uuid'].split('.')[2]  # arbitrarilly picked index 0
        session_name = "{}_{}seeds_{}".format(TYPE,
                                              str(NUM_SEEDS).zfill(2), dir_)
        yaml_content = {'session_name': session_name, 'windows': []}
        for i, name in enumerate(names):
            executable = "{}.sh".format(name)
            single_pane = {
                'shell_command': [
                    "source activate {}".format(CONDA),
                    "chmod u+x spawn/{}/{}".format(dir_, executable),
                    "spawn/{}/{}".format(dir_, executable)
                ]
            }
            yaml_content['windows'].append({
                'window_name':
                "job{}".format(str(i).zfill(2)),
                'panes': [single_pane]
            })
        # Dump the assembled tmux config into a yaml file
        job_config = "tmux/{}.yaml".format(session_name)
        with open(job_config, "w") as f:
            yaml.dump(yaml_content, f, default_flow_style=False)
        if args.call:
            # Spawn all the jobs in the tmux session!
            check_output(["tmuxp", "load", "{}".format(job_config)])