def __init__(self, yaml_path, steps): self._steps = steps with open(yaml_path, 'r') as f: self._params = yaml.load(f) logger.setup(display_name=self._params['exp_name'], log_path=os.path.join(self._onpolicy_dir, 'log_onpolicy.txt'), lvl=self._params['log_level']) logger.info('Yaml {0}'.format(yaml_path)) logger.info('') logger.info('Creating environment') self._env = create_env(self._params['alg']['env']) logger.info('') logger.info('Creating model') self._policy = self._create_policy() logger.info('Restoring policy') self._restore_policy() logger.info('') logger.info('Create sampler') self._sampler = self._create_sampler()
def run_gcg(params): curr_dir = os.path.dirname(__file__) data_dir = os.path.join(curr_dir[:curr_dir.find('src/gcg')], 'data') assert (os.path.exists(data_dir)) save_dir = os.path.join(data_dir, params['exp_name']) os.makedirs(save_dir, exist_ok=True) logger.setup(display_name=params['exp_name'], log_path=os.path.join(save_dir, 'log.txt'), lvl=params['log_level']) # TODO: set seed # copy yaml for posterity yaml_path = os.path.join(save_dir, 'params.yaml'.format(params['exp_name'])) with open(yaml_path, 'w') as f: f.write(params['txt']) os.environ["CUDA_VISIBLE_DEVICES"] = str( params['policy']['gpu_device']) # TODO: hack so don't double GPU env_str = params['alg'].pop('env') env = create_env(env_str, seed=params['seed']) env_eval_str = params['alg'].pop('env_eval', env_str) env_eval = create_env(env_eval_str, seed=params['seed']) env.reset() env_eval.reset() ##################### ### Create policy ### ##################### policy_class = params['policy']['class'] PolicyClass = eval(policy_class) policy_params = params['policy'][policy_class] policy = PolicyClass( env_spec=env.spec, exploration_strategies=params['alg'].pop('exploration_strategies'), **policy_params, **params['policy']) ######################## ### Create algorithm ### ######################## if 'max_path_length' in params['alg']: max_path_length = params['alg'].pop('max_path_length') else: max_path_length = env.horizon algo = GCG(save_dir=save_dir, env=env, env_eval=env_eval, policy=policy, max_path_length=max_path_length, env_str=env_str, **params['alg']) algo.train()
def train(self): logger.setup(display_name=self._trav_graph.save_folder, log_path=os.path.join(self._trav_graph.save_folder, 'log.txt'), lvl='debug') # self._create_training_data() self._train_model()
def create_async_gcg(params, is_continue, log_fname, AsyncClass=AsyncGCG): curr_dir = os.path.dirname(__file__) data_dir = os.path.join(curr_dir[:curr_dir.find('src/gcg')], 'data') assert (os.path.exists(data_dir)) save_dir = os.path.join(data_dir, params['exp_name']) if os.path.exists(save_dir) and not is_continue: print( 'Save directory {0} exists. You need to explicitly say to continue if you want to start training from where you left off' .format(save_dir)) sys.exit(0) os.makedirs(save_dir, exist_ok=True) logger.setup(display_name=params['exp_name'], log_path=os.path.join(save_dir, log_fname), lvl=params['log_level']) # copy yaml for posterity yaml_path = os.path.join(save_dir, 'params.yaml'.format(params['exp_name'])) with open(yaml_path, 'w') as f: f.write(params['txt']) os.environ["CUDA_VISIBLE_DEVICES"] = str( params['policy']['gpu_device']) # TODO: hack so don't double GPU env_dict = params['alg'].pop('env') env = create_env(env_dict, seed=params['seed']) env_eval = None ##################### ### Create policy ### ##################### policy_class = params['policy']['class'] PolicyClass = eval(policy_class) policy_params = params['policy'][policy_class] policy = PolicyClass( env_spec=env.spec, exploration_strategies=params['alg'].pop('exploration_strategies'), **policy_params, **params['policy']) ######################## ### Create algorithm ### ######################## max_path_length = params['alg'].pop('max_path_length') algo = AsyncClass(save_dir=save_dir, env=env, env_eval=env_eval, policy=policy, max_path_length=max_path_length, env_dict=env_dict, **params['alg']) return algo
def __init__(self, exp_name, is_continue=False, log_level='info', log_fname='log.txt', log_folder=''): self._exp_name = exp_name ### create logger log_path = os.path.join(self.exp_dir, log_fname) if not is_continue and os.path.exists(log_path): print('Save directory {0} exists. You need to explicitly say to continue if you want to start training ' 'from where you left off'.format(self.exp_dir)) sys.exit(0) log_folder_full = os.path.join(self.exp_dir, log_folder) os.makedirs(log_folder_full, exist_ok=True) logger.setup(display_name=self._exp_name, log_path=os.path.join(log_folder_full, log_fname), lvl=log_level)
def __init__(self, yaml_path): with open(yaml_path, 'r') as f: self._params = yaml.load(f) # write yaml to save dir with open(yaml_path, 'r') as f: params_txt = ''.join(f.readlines()) params_path = os.path.join(self._save_dir, 'params.yaml') with open(params_path, 'w') as f: f.write(params_txt) logger.setup(display_name=self._params['exp_name'], log_path=os.path.join(self._save_dir, 'log.txt'), lvl=self._params['log_level']) logger.info('Yaml {0}'.format(yaml_path)) logger.info('') logger.info('Creating environment') self._env = create_env(self._params['alg']['env']) logger.info('') logger.info('Creating model') self._model = self._create_model() logger.info('') logger.info('Loading data') # self._replay_pool = self._load_data(self._data_file_name) # TODO # logger.info('Size of replay pool: {0:d}'.format(len(self._replay_pool))) self._replay_holdout_pool = self._load_data( self._data_holdout_file_name) logger.info('Size of holdout replay pool: {0:d}'.format( len(self._replay_holdout_pool))) if self._init_checkpoint_file_name is not None: logger.info('') logger.info('Loading checkpoint {0} for {1}'.format( self._init_checkpoint_file_name, self._params['offline']['init_restore'])) self._model.restore( self._init_checkpoint_file_name, train_restore=self._params['offline']['init_restore']) self._restore_train_policy() self._num_bnn_samples = self._params['offline'].get( 'num_bnn_samples', 100) if self.num_bootstraps is not None: self._num_bnn_samples = self.num_bootstraps
def __init__(self, save_folder, pkl_folders, obs_shape=(50, 100, 3)): self._save_folder = os.path.join(DATA_DIR, save_folder) os.makedirs(self._save_folder, exist_ok=True) logger.setup(display_name=save_folder, log_path=os.path.join(self._save_folder, 'log.txt'), lvl='debug') self._pkl_fnames = [] for folder in pkl_folders: folder = os.path.join(DATA_DIR, folder) self._pkl_fnames += [os.path.join(folder, f) for f in os.listdir(folder) if FileManager.train_rollouts_fname_suffix in f or \ FileManager.eval_rollouts_fname_suffix in f] random.shuffle(self._pkl_fnames) self._obs_shape = list(obs_shape)
def __init__(self, pkl_folders, output_folder, env_params, labeller_params, num_processes=1): logger.setup(display_name='pkls_to_tfrecords', log_path='/tmp/bla.txt', lvl='debug') self._pkl_folders = pkl_folders self._output_folder = os.path.join(DATA_DIR, output_folder) self._num_processes = num_processes ### create env self._env = env_params['class'](params=env_params['kwargs']) ### create labeller self._labeller = labeller_params['class']( env_spec=self._env.spec, policy=None, # TODO **labeller_params['kwargs']) if labeller_params['class'] else None if self._labeller: assert self._num_processes == 1, 'can only have one process when labelling' ### modify rollouts? if hasattr(self._env, 'create_rollout'): print('\nenv has create_rollout method') self._create_rollout = self._env.create_rollout else: print( '\nenv does not have create_rollout method, defaulting to identity' ) def create_rollout(rollout, labeller): return rollout self._create_rollout = create_rollout
# for k in ('intersection_offroad', 'intersection_otherlane'): # print('{0}: {1}'.format(k, env_info[k])) # input('') # import IPython; IPython.embed() # import matplotlib.pyplot as plt # plt.imshow(obs_im) # plt.show() if t % 10 == 0: print('time per step: {0}'.format((time.time() - start) / 10.)) start = time.time() if done: t = 0 break # env.reset() if __name__ == '__main__': logger.setup(display_name='CarlaCollSpeedEnv', log_path='/tmp/log.txt', lvl='debug') env = CarlaCollSpeedEnv(params={ 'port': 2040, 'player_start_indices': [30], 'number_of_pedestrians': 150, 'number_of_vehicles': 0, 'weather': 2, }) test_collision(env) # test_reset(env) import IPython; IPython.embed()
#normalize for color reasons # probcoll -= probcoll.min() # probcoll /= probcoll.max() for l, p in zip(lines, probcoll): l.set_color(cm.viridis(1 - p)) l.set_markerfacecolor(cm.viridis(1 - p)) axes[1].set_xlim(xlim) axes[1].set_ylim(ylim) axes[1].set_aspect('equal') axes[1].set_title('steer {0:.3f}, probcoll in [{1:.2f}, {2:.2f}]'.format(-steers[0, 0], min_probcoll, max_probcoll)) f.savefig(os.path.join(plot_folder, 'rollout_{0:03d}_t_{1:03d}.png'.format(i, j)), bbox_inches='tight', dpi=200) # break # break plt.close(f) tf_sess.close() if __name__ == '__main__': logger.setup(display_name='tmp', log_path='/tmp/log.txt', lvl='debug') # plot_test() plot_rw_rccar_var001_var016()
rollouts.append(rollouts_i) mypickle.dump({'rollouts': rollouts}, self._save_file) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('env', type=str, choices=('SquareClutteredEnv', 'SquareClutteredHoldoutEnv', 'SquareClutteredConeEnv')) parser.add_argument('steps', type=int) parser.add_argument('-H', type=int) parser.add_argument('-K', type=int) args = parser.parse_args() logger.setup(display_name='gather_mpc_data', log_path='/tmp/log.txt', lvl='debug') if args.env == 'SquareClutteredEnv': env = create_env("SquareClutteredEnv(params={'hfov': 120, 'do_back_up': True, 'collision_reward_only': True, 'collision_reward': -1, 'speed_limits': [2., 2.]})") elif args.env == 'SquareClutteredColoredEnv': env = create_env("SquareClutteredColoredEnv(params={'hfov': 120, 'do_back_up': True, 'collision_reward_only': True, 'collision_reward': -1, 'speed_limits': [2., 2.]})") elif args.env == 'SquareClutteredConeEnv': env = create_env("SquareClutteredConeEnv(params={'hfov': 120, 'do_back_up': False, 'collision_reward_only': True, 'collision_reward': -1, 'speed_limits': [2., 2.]})") else: raise NotImplementedError curr_dir = os.path.realpath(os.path.dirname(__file__)) data_dir = os.path.join(curr_dir[:curr_dir.find('gcg/src')], 'gcg/data') assert (os.path.exists(data_dir)) fname = '{0}_mpc{1:d}_H_{2:d}_K_{3:d}.pkl'.format(args.env, args.steps, args.H, args.K) save_dir = os.path.join(data_dir, 'bnn/datasets', os.path.splitext(fname)[0])