Ejemplo n.º 1
0
    def __init__(self, env=None):
        self.env_name = ''
        self.env_path = None

        self.instance_name = None
        self.instance_path = None

        self.trainer = Trainer()
        self.config = ConfigManager()

        if env is not None:
            self.set_env(env)
Ejemplo n.º 2
0
def run_experiment(env_name, parameter_folder , result_folder, verbose=False):
    models = []
    log = []
    start = time.time()
    parameter_dir = os.path.join(settings.CONFIG, parameter_folder)
    for param_file in os.listdir(parameter_dir):
        parameters = os.path.join(parameter_dir, param_file)
        models.append(Trainer(env_name, subdir=result_folder).create_model(config_location=parameters))
    models[0]._tensorboard()

    for i, model in enumerate(models):
        sep = '\n' +'='*50 + '\n'
        print(sep, 'Training model Nr {}/{}...\n'.format(i+1, len(models)))
        t0 = time.time()
        model.train()
        t = time.time() - t0
        steps = model.config['main']['n_steps']
        print('Training time: {:2f} min, steps/s: {}'.format(t/60, float(steps)/t), sep)
        log.append('Training time for model {}: {:2f} min, steps/s: {}'.format(i, t/60, float(steps)/t))

    end = (time.time() - start)/60
    path = os.path.join(settings.TRAINED_MODELS, env_name)
    path = os.path.join(path, result_folder)
    with open(os.path.join(path, 'training_log.txt'), 'w') as f:
        for item in log:
            f.write("%s\n" % item)
        f.write("Total runtime: {} minutes".format(end))
Ejemplo n.º 3
0
def run_experiment(env_name, parameter_folder, result_folder, verbose=False):
    models = []
    parameter_dir = os.path.join(settings.CONFIG, parameter_folder)
    for param_file in os.listdir(parameter_dir):
        parameters = os.path.join(parameter_dir, param_file)
        models.append(
            Trainer(
                env_name,
                subdir=result_folder).create_model(config_location=parameters))
    models[0]._tensorboard()

    for i, model in enumerate(models):
        sep = '\n' + '=' * 50 + '\n'
        print(sep, 'Training model Nr {}/{}...\n'.format(i + 1, len(models)))
        t0 = time.time()
        model.train()
        t = time.time() - t0
        steps = model.config['main']['n_steps']
        print(
            'Training time: {:2f} min, steps/s: {}'.format(
                t / 60,
                float(steps) / t), sep)
Ejemplo n.º 4
0
        '-e',
        '--env_name',
        type=str,
        help=
        'Name of the environment. Can be either a any gym environment or a custom one defined in rl.environments'
    )
    parser.add_argument(
        '-s',
        '--subdir',
        type=str,
        help=
        'Subdirectory where the model is stored: e.g. -> ../trained_models/env_type/env/[SUBDIR]/model_num/*'
    )
    parser.add_argument(
        '-n',
        '--num',
        type=int,
        help=
        'Unique identifier of the model, e.g. -> ../trained_models/env_type/env/subdir/[NUM]_/*'
    )
    parser.add_argument('-t',
                        '--tensorboard',
                        action='store_true',
                        help='Launch tensorboard in the current subdirectory.')
    args = parser.parse_args()

    model = Trainer(args.env_name, args.subdir).load_model(args.num)
    if args.tensorboard:
        model._tensorboard()
    model.train()
Ejemplo n.º 5
0
        type=str,
        help=
        'Name of the environment. Can be either a any gym environment or a custom one defined in rl.environments'
    )
    parser.add_argument(
        '-s',
        '--subdir',
        type=str,
        help=
        'Subdirectory where the trained model is going to be stored (useful for separating tensorboard logs): e.g. -> ../trained_models/env_type/env/[SUBDIR]/model_num/*'
    )
    parser.add_argument(
        '-n',
        '--num',
        type=int,
        help=
        'Unique identifier of the model, e.g. -> ../trained_models/env_type/env/subdir/[NUM]_/*'
    )
    parser.add_argument('-o',
                        '--episodes',
                        type=int,
                        default=200,
                        help='Number of episodes to run.')
    parser.add_argument('-r',
                        '--render',
                        action='store_true',
                        help='Render the agents.')
    args = parser.parse_args()

    model = Trainer(args.environment, args.subdir).load_model(args.num)
    model.run(episodes=args.episodes, render=args.render)
Ejemplo n.º 6
0
    or 
       python train.py -e TestEnv -s TestSubdirectory -n NewModel -m DQN
"""
if __name__ == "__main__":

    parser = argparse.ArgumentParser()
    parser.add_argument('-e', '--environment', type=str, help='Name of the environment. Can be either a any gym environment or a custom one defined in rl.environments')
    parser.add_argument('-s', '--subdir', type=str, help='Subdirectory where the trained model is going to be stored (useful for separating tensorboard logs): e.g. -> ../trained_models/env_type/env/[SUBDIR]/0_model/*')
    parser.add_argument('-n', '--name', type=str, default=None, help='Unique identifier of the model, e.g. -> ../trained_models/env_type/env/subdir/0_[NAME]/*')
    parser.add_argument('-m', '--model', type=str, default=None, help='Reinforcement learning model to use. PPO / ACER / ACKTR / DQN / .')
    parser.add_argument('-c', '--config', type=str, default=None, help='Adusted configuration file located in config/custom folder')
    parser.print_help()
    args = parser.parse_args()
    path = pathlib.Path().absolute()

    trainer = Trainer(args.environment, args.subdir)

    if args.config is not None:
        try:
            config_path = join(path, 'rl', 'config', 'custom', '{}.yml'.format(args.config))
            with open(config_path) as f:
                config = yaml.safe_load(f)
            print('\nLoaded config file from: {}\n'.format(config_path))

        except:
            print('specified config is not in path, getting original config: {}.yml...'.format(args.environment))
            # load config and variables needed
            config = get_parameters(args.environment)
    else:
        config = get_parameters(args.environment)
Ejemplo n.º 7
0
		Run one timestep of the environment's dynamics. When end of
        episode is reached, call reset() to reset this environment's state.
        Accepts an action and returns a tuple (observation, reward, done, info).
        Args:
            action (object): an action provided by the agent
        
        Returns:
            observation (object): agent's observation of the current environment
            reward (float) : amount of reward returned after previous action
            done (bool): whether the episode has ended, in which case further step() calls will return undefined results
            info (dict): contains auxiliary diagnostic information (helpful for debugging, and sometimes learning)
		""""
        #return next_state, reward, terminate, info

	def render():
		""""
		Should render the observation based on the current state. (Pure visualization)
		""""

if __name__ == "__main__":
    from rl.baselines import get_parameters, Trainer
    import rl.environments
    env = custom_env(get_parameters('custom_env'))

    model = Trainer('custom_env', 'models').create_model()
    model._tensorboard()
    model.train()
    print('Training done')
    input('Run trained model (Enter)')
    env.create_window()
    env.run(model)
Ejemplo n.º 8
0
        """
        self.create_window()
        for _ in range(10):
            self.done = False
            state = self.reset()
            while not self.done:
                action = self.action_space.sample()
                state, reward, self.done, _ = self.step(action)
                print('Reward: {:2.3f}, state: {}, action: {}'.format(
                    reward, state, action))
                self.render(True)
        cv2.destroyAllWindows()

    def create_window(self):
        cv2.namedWindow(self.window_name, cv2.WINDOW_NORMAL)
        cv2.resizeWindow(self.window_name, 300, 300)


if __name__ == "__main__":
    from rl.baselines import get_parameters, Trainer
    import rl.environments
    env = TestEnv(get_parameters('TestEnv'))

    model = Trainer('TestEnv', 'models').create_model()
    model._tensorboard()
    model.train()
    print('Training done')
    input('Run trained model (Enter)')
    env.create_window()
    env.run(model)
Ejemplo n.º 9
0
class InstanceManager:
    """
    Creates dirs, manages envs and instances
    """
    def __init__(self, env=None):
        self.env_name = ''
        self.env_path = None

        self.instance_name = None
        self.instance_path = None

        self.trainer = Trainer()
        self.config = ConfigManager()

        if env is not None:
            self.set_env(env)

    def set_env(self, env):
        self.env_name = env
        self.env_path = os.path.join(settings.TRAINED_MODELS, env)
        os.makedirs(self.env_path, exist_ok=True)
        config_path = self.config.load(env)
        return config_path

    def new_instance(self, namestamp=None):

        # Assign a unique numerical ID to an instance
        numerical_ids = [
            int(x.split('_')[0]) for x in os.listdir(self.env_path)
        ]
        try:
            unique_id = max(numerical_ids) + 1
        except:
            unique_id = 0

        # Check if some IDs are missing (e.g. deleted)
        for num in range(len(numerical_ids)):
            if num not in numerical_ids:
                unique_id = num
                break

        if namestamp is None:
            date = datetime.datetime.now().strftime("%m-%d_%H-%M")
            namestamp = "{}_{}_{}_{}_{}".format(
                self.env_name, self.config.model_type, self.config.policy_type,
                self.config.trainer['n_workers'], date)
        self.instance_name = str(unique_id) + '_' + namestamp
        self.instance_path = os.path.join(self.env_path, self.instance_name)
        os.makedirs(self.instance_path, exist_ok=True)
        print(self.instance_path)
        self.config = ConfigManager(env_name=self.env_name)
        self.trainer.create_model(config=self.config, path=self.instance_path)

    def load_instance(self, path=None, num=None):
        """
        Loads an instance from the specified path
        """
        if num is not None:
            subdirs = []
            for f in os.listdir(self.env_path):
                fpath = os.path.join(self.env_path, f)
                print(f)
                if os.path.isdir(fpath) and num == int(f.split('_')[0]):
                    print(fpath)
                    path = fpath
                    break

        config_path = os.path.join(path, 'config.yml')
        self.config.load_dict(self.config.load_file(config_path))
        self.trainer.load_model(path=path, config=self.config)
        self.instance_path = path
        self.instance_name = os.path.split(path)[-1]

    def save_instance(self):
        """
        Saves the current instance (model weights and config files)
        """
        # try:
        print(self.instance_path)
        if self.config.trainer.get('steps_trained') is None:
            self.config.trainer['steps_trained'] = self.trainer.steps_trained
        else:
            self.config.trainer['steps_trained'] += self.trainer.steps_trained
        self.config.save(self.instance_path)
        self.trainer.save_model()
        # except:
        #     print('Nothing to save.')

    def tensorboard(self, browser=True):
        # Kill current session
        self._tensorboard_kill()

        # Open the dir of the current env
        cmd = 'tensorboard --logdir ' + self.instance_path
        print('Launching tensorboard at {}'.format(self.instance_path))
        DEVNULL = open(os.devnull, 'wb')
        subprocess.Popen(cmd, shell=True, stdout=DEVNULL, stderr=DEVNULL)

        if browser:
            time.sleep(2)
            webbrowser.open_new_tab(
                url='http://localhost:6006/#scalars&_smoothingWeight=0.995')

    def _tensorboard_kill(self):
        """
        Destroy all running instances of tensorboard
        """
        print('Closing current session of tensorboard.')
        if sys.platform == 'win32':
            os.system("taskkill /f /im  tensorboard.exe")
        elif sys.platform == 'linux':
            os.system('pkill tensorboard')
        else:
            print('No running instances of tensorboard.')