def __init__(self, env=None): self.env_name = '' self.env_path = None self.instance_name = None self.instance_path = None self.trainer = Trainer() self.config = ConfigManager() if env is not None: self.set_env(env)
def run_experiment(env_name, parameter_folder , result_folder, verbose=False): models = [] log = [] start = time.time() parameter_dir = os.path.join(settings.CONFIG, parameter_folder) for param_file in os.listdir(parameter_dir): parameters = os.path.join(parameter_dir, param_file) models.append(Trainer(env_name, subdir=result_folder).create_model(config_location=parameters)) models[0]._tensorboard() for i, model in enumerate(models): sep = '\n' +'='*50 + '\n' print(sep, 'Training model Nr {}/{}...\n'.format(i+1, len(models))) t0 = time.time() model.train() t = time.time() - t0 steps = model.config['main']['n_steps'] print('Training time: {:2f} min, steps/s: {}'.format(t/60, float(steps)/t), sep) log.append('Training time for model {}: {:2f} min, steps/s: {}'.format(i, t/60, float(steps)/t)) end = (time.time() - start)/60 path = os.path.join(settings.TRAINED_MODELS, env_name) path = os.path.join(path, result_folder) with open(os.path.join(path, 'training_log.txt'), 'w') as f: for item in log: f.write("%s\n" % item) f.write("Total runtime: {} minutes".format(end))
def run_experiment(env_name, parameter_folder, result_folder, verbose=False): models = [] parameter_dir = os.path.join(settings.CONFIG, parameter_folder) for param_file in os.listdir(parameter_dir): parameters = os.path.join(parameter_dir, param_file) models.append( Trainer( env_name, subdir=result_folder).create_model(config_location=parameters)) models[0]._tensorboard() for i, model in enumerate(models): sep = '\n' + '=' * 50 + '\n' print(sep, 'Training model Nr {}/{}...\n'.format(i + 1, len(models))) t0 = time.time() model.train() t = time.time() - t0 steps = model.config['main']['n_steps'] print( 'Training time: {:2f} min, steps/s: {}'.format( t / 60, float(steps) / t), sep)
'-e', '--env_name', type=str, help= 'Name of the environment. Can be either a any gym environment or a custom one defined in rl.environments' ) parser.add_argument( '-s', '--subdir', type=str, help= 'Subdirectory where the model is stored: e.g. -> ../trained_models/env_type/env/[SUBDIR]/model_num/*' ) parser.add_argument( '-n', '--num', type=int, help= 'Unique identifier of the model, e.g. -> ../trained_models/env_type/env/subdir/[NUM]_/*' ) parser.add_argument('-t', '--tensorboard', action='store_true', help='Launch tensorboard in the current subdirectory.') args = parser.parse_args() model = Trainer(args.env_name, args.subdir).load_model(args.num) if args.tensorboard: model._tensorboard() model.train()
type=str, help= 'Name of the environment. Can be either a any gym environment or a custom one defined in rl.environments' ) parser.add_argument( '-s', '--subdir', type=str, help= 'Subdirectory where the trained model is going to be stored (useful for separating tensorboard logs): e.g. -> ../trained_models/env_type/env/[SUBDIR]/model_num/*' ) parser.add_argument( '-n', '--num', type=int, help= 'Unique identifier of the model, e.g. -> ../trained_models/env_type/env/subdir/[NUM]_/*' ) parser.add_argument('-o', '--episodes', type=int, default=200, help='Number of episodes to run.') parser.add_argument('-r', '--render', action='store_true', help='Render the agents.') args = parser.parse_args() model = Trainer(args.environment, args.subdir).load_model(args.num) model.run(episodes=args.episodes, render=args.render)
or python train.py -e TestEnv -s TestSubdirectory -n NewModel -m DQN """ if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-e', '--environment', type=str, help='Name of the environment. Can be either a any gym environment or a custom one defined in rl.environments') parser.add_argument('-s', '--subdir', type=str, help='Subdirectory where the trained model is going to be stored (useful for separating tensorboard logs): e.g. -> ../trained_models/env_type/env/[SUBDIR]/0_model/*') parser.add_argument('-n', '--name', type=str, default=None, help='Unique identifier of the model, e.g. -> ../trained_models/env_type/env/subdir/0_[NAME]/*') parser.add_argument('-m', '--model', type=str, default=None, help='Reinforcement learning model to use. PPO / ACER / ACKTR / DQN / .') parser.add_argument('-c', '--config', type=str, default=None, help='Adusted configuration file located in config/custom folder') parser.print_help() args = parser.parse_args() path = pathlib.Path().absolute() trainer = Trainer(args.environment, args.subdir) if args.config is not None: try: config_path = join(path, 'rl', 'config', 'custom', '{}.yml'.format(args.config)) with open(config_path) as f: config = yaml.safe_load(f) print('\nLoaded config file from: {}\n'.format(config_path)) except: print('specified config is not in path, getting original config: {}.yml...'.format(args.environment)) # load config and variables needed config = get_parameters(args.environment) else: config = get_parameters(args.environment)
Run one timestep of the environment's dynamics. When end of episode is reached, call reset() to reset this environment's state. Accepts an action and returns a tuple (observation, reward, done, info). Args: action (object): an action provided by the agent Returns: observation (object): agent's observation of the current environment reward (float) : amount of reward returned after previous action done (bool): whether the episode has ended, in which case further step() calls will return undefined results info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning) """" #return next_state, reward, terminate, info def render(): """" Should render the observation based on the current state. (Pure visualization) """" if __name__ == "__main__": from rl.baselines import get_parameters, Trainer import rl.environments env = custom_env(get_parameters('custom_env')) model = Trainer('custom_env', 'models').create_model() model._tensorboard() model.train() print('Training done') input('Run trained model (Enter)') env.create_window() env.run(model)
""" self.create_window() for _ in range(10): self.done = False state = self.reset() while not self.done: action = self.action_space.sample() state, reward, self.done, _ = self.step(action) print('Reward: {:2.3f}, state: {}, action: {}'.format( reward, state, action)) self.render(True) cv2.destroyAllWindows() def create_window(self): cv2.namedWindow(self.window_name, cv2.WINDOW_NORMAL) cv2.resizeWindow(self.window_name, 300, 300) if __name__ == "__main__": from rl.baselines import get_parameters, Trainer import rl.environments env = TestEnv(get_parameters('TestEnv')) model = Trainer('TestEnv', 'models').create_model() model._tensorboard() model.train() print('Training done') input('Run trained model (Enter)') env.create_window() env.run(model)
class InstanceManager: """ Creates dirs, manages envs and instances """ def __init__(self, env=None): self.env_name = '' self.env_path = None self.instance_name = None self.instance_path = None self.trainer = Trainer() self.config = ConfigManager() if env is not None: self.set_env(env) def set_env(self, env): self.env_name = env self.env_path = os.path.join(settings.TRAINED_MODELS, env) os.makedirs(self.env_path, exist_ok=True) config_path = self.config.load(env) return config_path def new_instance(self, namestamp=None): # Assign a unique numerical ID to an instance numerical_ids = [ int(x.split('_')[0]) for x in os.listdir(self.env_path) ] try: unique_id = max(numerical_ids) + 1 except: unique_id = 0 # Check if some IDs are missing (e.g. deleted) for num in range(len(numerical_ids)): if num not in numerical_ids: unique_id = num break if namestamp is None: date = datetime.datetime.now().strftime("%m-%d_%H-%M") namestamp = "{}_{}_{}_{}_{}".format( self.env_name, self.config.model_type, self.config.policy_type, self.config.trainer['n_workers'], date) self.instance_name = str(unique_id) + '_' + namestamp self.instance_path = os.path.join(self.env_path, self.instance_name) os.makedirs(self.instance_path, exist_ok=True) print(self.instance_path) self.config = ConfigManager(env_name=self.env_name) self.trainer.create_model(config=self.config, path=self.instance_path) def load_instance(self, path=None, num=None): """ Loads an instance from the specified path """ if num is not None: subdirs = [] for f in os.listdir(self.env_path): fpath = os.path.join(self.env_path, f) print(f) if os.path.isdir(fpath) and num == int(f.split('_')[0]): print(fpath) path = fpath break config_path = os.path.join(path, 'config.yml') self.config.load_dict(self.config.load_file(config_path)) self.trainer.load_model(path=path, config=self.config) self.instance_path = path self.instance_name = os.path.split(path)[-1] def save_instance(self): """ Saves the current instance (model weights and config files) """ # try: print(self.instance_path) if self.config.trainer.get('steps_trained') is None: self.config.trainer['steps_trained'] = self.trainer.steps_trained else: self.config.trainer['steps_trained'] += self.trainer.steps_trained self.config.save(self.instance_path) self.trainer.save_model() # except: # print('Nothing to save.') def tensorboard(self, browser=True): # Kill current session self._tensorboard_kill() # Open the dir of the current env cmd = 'tensorboard --logdir ' + self.instance_path print('Launching tensorboard at {}'.format(self.instance_path)) DEVNULL = open(os.devnull, 'wb') subprocess.Popen(cmd, shell=True, stdout=DEVNULL, stderr=DEVNULL) if browser: time.sleep(2) webbrowser.open_new_tab( url='http://localhost:6006/#scalars&_smoothingWeight=0.995') def _tensorboard_kill(self): """ Destroy all running instances of tensorboard """ print('Closing current session of tensorboard.') if sys.platform == 'win32': os.system("taskkill /f /im tensorboard.exe") elif sys.platform == 'linux': os.system('pkill tensorboard') else: print('No running instances of tensorboard.')