def update_priorities(self, idxs, priorities): # Update priorities via the legacy method, used w/ ranked approach idxs, priorities = super().update_priorities(idxs, priorities) # Register whether a transition is b/g in the UNREAL-specific sum trees for idx, priority in zipsame(idxs, priorities): # Decide whether the transition to be added is good or bad # Get the rank from the priority # Note: UnrealReplayBuffer inherits from PER w/ 'ranked' set to True if idx < self.num_demos: # When the transition is from the demos, always set it as 'good' regardless self.b_sum_st[idx] = 0 self.g_sum_st[idx] = 1 else: rank = (1. / priority) - 1 thres = floor(.5 * self.num_entries) is_g = rank < thres is_g *= 1 # HAXX: multiply by 1 to cast the bool into an int # Fill the good and bad sum segment trees w/ the obtained value self.b_sum_st[idx] = 1 - is_g self.g_sum_st[idx] = is_g if debug: # Verify updates # Compute the cardinalities of virtual sub-buffers b_num_entries = self.b_sum_st.sum(end=self.num_entries) g_num_entries = self.g_sum_st.sum(end=self.num_entries) print("[num entries] b: {} | g: {}".format( b_num_entries, g_num_entries)) print("total num entries: {}".format(self.num_entries))
def update_priorities(self, idxs, priorities): """Update priorities according to the PER paper, i.e. by updating only the priority of sampled transitions. A priority priorities[i] is assigned to the transition at index indices[i]. Note: not in use in the vanilla setting, but here if needed in extensions. """ global debug if self.ranked: # Override the priorities to be 1 / (rank(priority) + 1) # Add new index, priority pairs to the list self.i_p.update({i: p for i, p in zipsame(idxs, priorities)}) # Rank the indices by priorities i_sorted_by_p = sorted(self.i_p.items(), key=lambda t: t[1], reverse=True) # Create the index, rank dict i_r = {i: i_sorted_by_p.index((i, p)) for i, p in self.i_p.items()} # Unpack indices and ranks _idxs, ranks = zipsame(*i_r.items()) # Override the indices and priorities idxs = list(_idxs) priorities = [1. / (rank + 1) for rank in ranks] # start ranks at 1 if debug: # Verify that the priorities have been properly overridden for idx, priority in zipsame(idxs, priorities): print("index: {} | priority: {}".format(idx, priority)) assert len(idxs) == len( priorities), "the two arrays must be the same length" for idx, priority in zipsame(idxs, priorities): assert priority > 0, "priorities must be positive" assert 0 <= idx < self.num_entries, "no element in buffer associated w/ index" if idx < self.num_demos: # Add a priority bonus when replaying a demo priority += self.demos_eps self.sum_st[idx] = priority**self.alpha self.min_st[idx] = priority**self.alpha # Update max priority currently in the buffer self.max_priority = max(priority, self.max_priority) if self.ranked: # Return indices and associated overriden priorities # Note: returned values are only used in the UNREAL priority update function return idxs, priorities
def columnize(names, tuples, widths, indent=2): """Generate and return the content of table (w/o logging or printing anything) Args: width (int): Width of each cell in the table indent (int): Indentation spacing prepended to every row in the table """ indent_space = indent * ' ' # Add row containing the names table = indent_space + " | ".join(cell(name, width) for name, width in zipsame(names, widths)) table_width = len(table) # Add header hline table += '\n' + indent_space + ('-' * table_width) for tuple_ in tuples: # Add a new row table += '\n' + indent_space table += " | ".join(cell(value, width) for value, width in zipsame(tuple_, widths)) # Add closing hline table += '\n' + indent_space + ('-' * table_width) return table
def __init__(self, limit, ob_shape, ac_shape): self.limit = limit self.ob_shape = ob_shape self.ac_shape = ac_shape self.num_demos = 0 self.atom_names = ['obs0', 'acs', 'rews', 'dones1', 'obs1'] self.atom_shapes = [ self.ob_shape, self.ac_shape, (1, ), (1, ), self.ob_shape ] # Create one `RingBuffer` object for every atom in a transition self.ring_buffers = { atom_name: RingBuffer(self.limit, atom_shape) for atom_name, atom_shape in zipsame(self.atom_names, self.atom_shapes) }
def add_demo_transitions_to_mem(self, dset): """Add transitions from expert demonstration trajectories to memory""" # Ensure the replay buffer is empty as demos need to be first assert self.num_entries == 0 and self.num_demos == 0 logger.info("adding demonstrations to memory") # Zip transition atoms transitions = zipsame(dset.obs0, dset.acs, dset.env_rews, dset.obs1, dset.dones1) # Note: careful w/ the order, it should correspond to the order in `append` signature for transition in transitions: self.append(*transition, is_demo=True) self.num_demos += 1 assert self.num_demos == self.num_entries logger.info(" num entries in memory after addition: {}".format( self.num_entries))
def run(args): """Spawn jobs""" # Create directory for spawned jobs os.makedirs("spawn", exist_ok=True) if CLUSTER == 'local': os.makedirs("tmux", exist_ok=True) # Get the hyperparameter set(s) if args.sweep: hpmaps_ = [ get_hps(sweep=True) for _ in range(CONFIG['parameters']['num_trials']) ] # Flatten into a 1-dim list hpmaps = [x for hpmap in hpmaps_ for x in hpmap] else: hpmaps = get_hps(sweep=False) # Create associated task strings commands = [ "python main.py \\\n{}".format(unroll_options(hpmap)) for hpmap in hpmaps ] if not len(commands) == len(set(commands)): # Terminate in case of duplicate experiment (extremely unlikely though) raise ValueError("bad luck, there are dupes -> Try again (:") # Create the job maps names = [ "{}{}_{}".format(TYPE, str(i).zfill(3), hpmap['uuid']) for i, hpmap in enumerate(hpmaps) ] # Finally get all the required job strings jobs = [ create_job_str(name, command) for name, command in zipsame(names, commands) ] # Spawn the jobs for i, (name, job) in enumerate(zipsame(names, jobs)): logger.info( ">>>>>>>>>>>>>>>>>>>> Job #{} ready to submit. Config below.". format(i)) logger.info(job + "\n") dir_ = name.split('.')[2] os.makedirs("spawn/{}".format(dir_), exist_ok=True) job_name = "spawn/{}/{}.sh".format(dir_, name) with open(job_name, 'w') as f: f.write(job) if args.call and not CLUSTER == 'local': # Spawn the job! check_output(["sbatch", "spawn/{}/{}".format(dir_, job_name)]) logger.info(">>>>>>>>>>>>>>>>>>>> Job #{} submitted.".format(i)) # Summarize the number of jobs spawned logger.info(">>>>>>>>>>>>>>>>>>>> {} jobs were spawned.".format(len(jobs))) if CLUSTER == 'local': dir_ = hpmaps[0]['uuid'].split('.')[2] # arbitrarilly picked index 0 session_name = "{}_{}seeds_{}".format(TYPE, str(NUM_SEEDS).zfill(2), dir_) yaml_content = {'session_name': session_name, 'windows': []} for i, name in enumerate(names): executable = "{}.sh".format(name) single_pane = { 'shell_command': [ "source activate {}".format(CONDA), "chmod u+x spawn/{}/{}".format(dir_, executable), "spawn/{}/{}".format(dir_, executable) ] } yaml_content['windows'].append({ 'window_name': "job{}".format(str(i).zfill(2)), 'panes': [single_pane] }) # Dump the assembled tmux config into a yaml file job_config = "tmux/{}.yaml".format(session_name) with open(job_config, "w") as f: yaml.dump(yaml_content, f, default_flow_style=False) if args.call: # Spawn all the jobs in the tmux session! check_output(["tmuxp", "load", "{}".format(job_config)])