def __init__(self, filename, attrs): if filename is None: util.warn('Warning: not writing log to any file!') self.f = None else: if os.path.exists(filename): raise RuntimeError('Log file %s already exists' % filename) os.makedirs(os.path.dirname(filename), exist_ok=True) self.f = tables.open_file(filename, mode='w') for k, v in attrs: self.f.root._v_attrs[k] = v self.log_table = None self.schema = None # list of col name / types for display
def save_h5(self, h5file, key, extra_attrs=None): with h5py.File(h5file, 'a') as f: if key in f: util.warn('WARNING: key %s already exists in %s' % (key, h5file)) dset = f[key] else: dset = f.create_group(key) for v in self.get_all_variables(): dset[v.name] = v.get_value() dset.attrs['hash'] = self.savehash() if extra_attrs is not None: for k, v in extra_attrs: if k in dset.attrs: util.warn( 'Warning: attribute %s already exists in %s' % (k, dset.name)) dset.attrs[k] = v
def btlinesearch(f, x0, fx0, g, dx, accept_ratio, shrink_factor, max_steps, verbose=False): ''' Find a step size t such that f(x0 + t*dx) is within a factor accept_ratio of the linearized function value improvement. Args: f: the function x0: starting point for search fx0: the value f(x0). Will be computed if set to None. g: search direction, typically the gradient of f at x0 dx: the largest possible step to take accept_ratio: termination criterion shrink_factor: how much to decrease the step every iteration ''' if fx0 is None: fx0 = f(x0) t = 1. m = g.dot(dx) if accept_ratio != 0 and m > 0: util.warn('WARNING: %.10f not <= 0' % m) num_steps = 0 while num_steps < max_steps: true_imp = f(x0 + t * dx) - fx0 lin_imp = t * m if verbose: true_imp, lin_imp, accept_ratio if true_imp <= accept_ratio * lin_imp: break t *= shrink_factor num_steps += 1 return x0 + t * dx, num_steps
def sim_multi(self, policy_fn, obsfeat_fn, cfg, num_threads=None, no_reward=False): ''' Run many simulations, with policy evaluations batched together. Samples complete trajectories (stopping when Simulation.done is true, or when cfg.max_traj_len is reached) until both (1) at least cfg.min_num_trajs trajectories have been sampled, and (2) at least cfg.min_total_sa transitions have been sampled. ''' util.warn('sim_multi is deprecated!') assert isinstance(cfg, SimConfig) Do, Da = self.obs_space.storage_size, self.action_space.storage_size if num_threads is None: num_threads = multiprocessing.cpu_count() # Completed trajectories num_sa = 0 completed_translists = [] # Simulations and their current trajectories simbatch = self.new_batched_sim( cfg.batch_size) # TODO: reuse this across runs sim_trans_B = [ [] for _ in range(cfg.batch_size) ] # list of (o,obsfeat,adist,a,r) transitions for each simulation # Keep running simulations until we fill up the quota of trajectories and transitions while True: # If a simulation is done, pull out and save its trajectory, and restart it. for i_sim in range(cfg.batch_size): if simbatch.is_done(i_sim) or len( sim_trans_B[i_sim]) >= cfg.max_traj_len: # Save the trajectory completed_translists.append(sim_trans_B[i_sim]) num_sa += len(sim_trans_B[i_sim]) # and restart the simulation sim_trans_B[i_sim] = [] simbatch.reset_sim(i_sim) # Are both quotas filled? If so, we're done. if len(completed_translists ) >= cfg.min_num_trajs and num_sa >= cfg.min_total_sa: break # Keep simulating otherwise. Pull together observations from all simulations obs_B_Do = simbatch.batch_obs.copy() assert obs_B_Do.shape == (cfg.batch_size, Do) # Evaluate policy obsfeat_B_Df = obsfeat_fn(obs_B_Do) a_B_Da, adist_B_Pa = policy_fn(obsfeat_B_Df) assert a_B_Da.shape == (cfg.batch_size, Da) assert adist_B_Pa.shape[ 0] == cfg.batch_size and adist_B_Pa.ndim == 2 # Step simulations r_B = simbatch.batch_step(a_B_Da, num_threads=num_threads) if no_reward: r_B[:] = np.nan # Save the transitions for i_sim in range(cfg.batch_size): sim_trans_B[i_sim].append( (obs_B_Do[i_sim, :], obsfeat_B_Df[i_sim, :], adist_B_Pa[i_sim, :], a_B_Da[i_sim, :], r_B[i_sim])) assert sum(len(tlist) for tlist in completed_translists) == num_sa # Pack together each trajectory individually def translist_to_traj(tlist): obs_T_Do = np.stack([trans[0] for trans in tlist]) assert obs_T_Do.shape == (len(tlist), self.obs_space.storage_size) obsfeat_T_Df = np.stack([trans[1] for trans in tlist]) assert obsfeat_T_Df.shape[0] == len(tlist) adist_T_Pa = np.stack([trans[2] for trans in tlist]) assert adist_T_Pa.ndim == 2 and adist_T_Pa.shape[0] == len(tlist) a_T_Da = np.stack([trans[3] for trans in tlist]) assert a_T_Da.shape == (len(tlist), self.action_space.storage_size) r_T = np.stack([trans[4] for trans in tlist]) assert r_T.shape == (len(tlist), ) return Trajectory(obs_T_Do, obsfeat_T_Df, adist_T_Pa, a_T_Da, r_T) completed_trajs = [ translist_to_traj(tlist) for tlist in completed_translists ] assert len(completed_trajs) >= cfg.min_num_trajs and sum( len(traj) for traj in completed_trajs) >= cfg.min_total_sa return TrajBatch.FromTrajs(completed_trajs)
global _global_sim_info mdp, policy_fn, obsfeat_fn, max_traj_len = _global_sim_info return mdp.sim_single(policy_fn, obsfeat_fn, max_traj_len) except KeyboardInterrupt: pass # Stuff for temporarily disabling MKL threading during multiprocessing # http://stackoverflow.com/a/28293128 import ctypes mkl_rt = None try: mkl_rt = ctypes.CDLL('libmkl_rt.so') mkl_set_num_threads = mkl_rt.MKL_Set_Num_Threads mkl_get_max_threads = mkl_rt.MKL_Get_Max_Threads except OSError: # library not found util.warn( 'MKL runtime not found. Will not attempt to disable multithreaded MKL for parallel rollouts.' ) from contextlib import contextmanager @contextmanager def set_mkl_threads(n): if mkl_rt is not None: orig = mkl_get_max_threads() mkl_set_num_threads(n) yield if mkl_rt is not None: mkl_set_num_threads(orig)
def sim_multi(self, policy_fn, obsfeat_fn, cfg, num_threads=None, no_reward=False): ''' Run many simulations, with policy evaluations batched together. Samples complete trajectories (stopping when Simulation.done is true, or when cfg.max_traj_len is reached) until both (1) at least cfg.min_num_trajs trajectories have been sampled, and (2) at least cfg.min_total_sa transitions have been sampled. ''' util.warn('sim_multi is deprecated!') assert isinstance(cfg, SimConfig) Do, Da = self.obs_space.storage_size, self.action_space.storage_size if num_threads is None: num_threads = multiprocessing.cpu_count() # Completed trajectories num_sa = 0 completed_translists = [] # Simulations and their current trajectories simbatch = self.new_batched_sim(cfg.batch_size) # TODO: reuse this across runs sim_trans_B = [[] for _ in xrange(cfg.batch_size)] # list of (o,obsfeat,adist,a,r) transitions for each simulation # Keep running simulations until we fill up the quota of trajectories and transitions while True: # If a simulation is done, pull out and save its trajectory, and restart it. for i_sim in xrange(cfg.batch_size): if simbatch.is_done(i_sim) or len(sim_trans_B[i_sim]) >= cfg.max_traj_len: # Save the trajectory completed_translists.append(sim_trans_B[i_sim]) num_sa += len(sim_trans_B[i_sim]) # and restart the simulation sim_trans_B[i_sim] = [] simbatch.reset_sim(i_sim) # Are both quotas filled? If so, we're done. if len(completed_translists) >= cfg.min_num_trajs and num_sa >= cfg.min_total_sa: break # Keep simulating otherwise. Pull together observations from all simulations obs_B_Do = simbatch.batch_obs.copy(); assert obs_B_Do.shape == (cfg.batch_size, Do) # Evaluate policy obsfeat_B_Df = obsfeat_fn(obs_B_Do) a_B_Da, adist_B_Pa = policy_fn(obsfeat_B_Df) assert a_B_Da.shape == (cfg.batch_size, Da) assert adist_B_Pa.shape[0] == cfg.batch_size and adist_B_Pa.ndim == 2 # Step simulations r_B = simbatch.batch_step(a_B_Da, num_threads=num_threads) if no_reward: r_B[:] = np.nan # Save the transitions for i_sim in xrange(cfg.batch_size): sim_trans_B[i_sim].append((obs_B_Do[i_sim,:], obsfeat_B_Df[i_sim,:], adist_B_Pa[i_sim,:], a_B_Da[i_sim,:], r_B[i_sim])) assert sum(len(tlist) for tlist in completed_translists) == num_sa # Pack together each trajectory individually def translist_to_traj(tlist): obs_T_Do = np.stack([trans[0] for trans in tlist]); assert obs_T_Do.shape == (len(tlist), self.obs_space.storage_size) obsfeat_T_Df = np.stack([trans[1] for trans in tlist]); assert obsfeat_T_Df.shape[0] == len(tlist) adist_T_Pa = np.stack([trans[2] for trans in tlist]); assert adist_T_Pa.ndim == 2 and adist_T_Pa.shape[0] == len(tlist) a_T_Da = np.stack([trans[3] for trans in tlist]); assert a_T_Da.shape == (len(tlist), self.action_space.storage_size) r_T = np.stack([trans[4] for trans in tlist]); assert r_T.shape == (len(tlist),) return Trajectory(obs_T_Do, obsfeat_T_Df, adist_T_Pa, a_T_Da, r_T) completed_trajs = [translist_to_traj(tlist) for tlist in completed_translists] assert len(completed_trajs) >= cfg.min_num_trajs and sum(len(traj) for traj in completed_trajs) >= cfg.min_total_sa return TrajBatch.FromTrajs(completed_trajs)
_global_sim_info = None def _rollout(): try: import os, random; random.seed(os.urandom(4)); np.random.seed(int(os.urandom(4).encode('hex'), 16)) global _global_sim_info mdp, policy_fn, obsfeat_fn, max_traj_len = _global_sim_info return mdp.sim_single(policy_fn, obsfeat_fn, max_traj_len) except KeyboardInterrupt: pass # Stuff for temporarily disabling MKL threading during multiprocessing # http://stackoverflow.com/a/28293128 import ctypes mkl_rt = None try: mkl_rt = ctypes.CDLL('libmkl_rt.so') mkl_set_num_threads = mkl_rt.MKL_Set_Num_Threads mkl_get_max_threads = mkl_rt.MKL_Get_Max_Threads except OSError: # library not found util.warn('MKL runtime not found. Will not attempt to disable multithreaded MKL for parallel rollouts.') from contextlib import contextmanager @contextmanager def set_mkl_threads(n): if mkl_rt is not None: orig = mkl_get_max_threads() mkl_set_num_threads(n) yield if mkl_rt is not None: mkl_set_num_threads(orig)
''' Check how many snapshots were saved in a set of log files ''' import argparse import h5py from policyopt import util parser = argparse.ArgumentParser() parser.add_argument('logfiles', nargs='+', type=str) args = parser.parse_args() for filename in args.logfiles: if filename.endswith('.h5'): try: with h5py.File(filename, 'r') as f: snapshot_name = sorted(f['snapshots'].keys())[-1] assert snapshot_name.startswith('iter') last_snapshot_iter = int(snapshot_name[len('iter'):]) #if last_snapshot_iter != 300: print filename, last_snapshot_iter except: util.warn('Error opening {}'.format(filename)) continue