def show_help(args: List[str]): """ Show help for a class. :param args: Arguments. """ if len(args) == 1: try: parser = get_argument_parser(args[0]) parse_arguments(parser, ['--help']) except Exception as ex: print(f'{ex}') else: print('Usage: rlai help CLASS')
def init_from_arguments( cls, args: List[str], random_state: RandomState, environment: Environment) -> Tuple[List[Agent], List[str]]: """ Initialize an MDP agent from arguments. :param args: Arguments. :param random_state: Random state. :param environment: Environment. :return: 2-tuple of a list of agents and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) # load state-action value estimator estimator_class = load_class(parsed_args.q_S_A) q_S_A, unparsed_args = estimator_class.init_from_arguments( args=unparsed_args, random_state=random_state, environment=environment) del parsed_args.q_S_A # noinspection PyUnboundLocalVariable agent = cls(name=f'action-value (gamma={parsed_args.gamma})', random_state=random_state, q_S_A=q_S_A, **vars(parsed_args)) return [agent], unparsed_args
def init_from_arguments( cls, args: List[str], random_state: RandomState, pi: Policy ) -> Tuple[List[Agent], List[str]]: """ Initialize an MDP agent from arguments. :param args: Arguments. :param random_state: Random state. :param pi: Policy. :return: 2-tuple of a list of agents and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) agent = cls( name=f'stochastic (gamma={parsed_args.gamma})', random_state=random_state, pi=pi, **vars(parsed_args) ) return [agent], unparsed_args
def init_from_arguments( cls, args: List[str], random_state: RandomState, environment: Environment ) -> Tuple[List[Agent], List[str]]: """ Initialize a list of agents from arguments. :param args: Arguments. :param random_state: Random state. :param environment: Environment. :return: 2-tuple of a list of agents and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) # initialize agents agents = [ cls( name=f'preference gradient (step size={parsed_args.step_size_alpha})', random_state=random_state, **vars(parsed_args) ) ] return agents, unparsed_args
def init_from_arguments( cls, args: List[str], random_state: RandomState ) -> Tuple[FunctionApproximationModel, List[str]]: """ Initialize a model from arguments. :param args: Arguments. :param random_state: Random state. :return: 2-tuple of a model and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) # process arguments whose names conflict with arguments used elsewhere setattr(parsed_args, 'alpha', parsed_args.sgd_alpha) del parsed_args.sgd_alpha setattr(parsed_args, 'epsilon', parsed_args.sgd_epsilon) del parsed_args.sgd_epsilon # instantiate model model = cls( random_state=random_state, **vars(parsed_args) ) return model, unparsed_args
def init_from_arguments( cls, args: List[str], random_state: RandomState, pi: Optional[Policy] ) -> Tuple[List[Agent], List[str]]: """ Initialize a list of agents from arguments. :param args: Arguments. :param random_state: Random state. :param pi: Policy. :return: 2-tuple of a list of agents and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) # grab and delete c values from parsed arguments c_values = parsed_args.c del parsed_args.c # initialize agents agents = [ cls( name=f'UCB (c={c})', random_state=random_state, c=c, **vars(parsed_args) ) for c in c_values ] return agents, unparsed_args
def init_from_arguments( cls, args: List[str], random_state: RandomState, pi: Optional[Policy] ) -> Tuple[List[Agent], List[str]]: """ Initialize a list of agents from arguments. :param args: Arguments. :param random_state: Random state. :param pi: Policy. :return: 2-tuple of a list of agents and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) # grab and delete epsilons from parsed arguments epsilons = parsed_args.epsilon del parsed_args.epsilon # initialize agents agents = [ cls( name=f'epsilon-greedy (e={epsilon:0.2f})', random_state=random_state, epsilon=epsilon, **vars(parsed_args) ) for epsilon in epsilons ] return agents, unparsed_args
def init_from_arguments( cls, args: List[str], random_state: RandomState, environment: MdpEnvironment ) -> Tuple[StateActionValueEstimator, List[str]]: """ Initialize a state-action value estimator from arguments. :param args: Arguments. :param random_state: Random state. :param environment: Environment. :return: 2-tuple of a state-action value estimator and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) # load model model_class = load_class(parsed_args.function_approximation_model) model, unparsed_args = model_class.init_from_arguments( args=unparsed_args, random_state=random_state) del parsed_args.function_approximation_model # load feature extractor feature_extractor_class = load_class(parsed_args.feature_extractor) fex, unparsed_args = feature_extractor_class.init_from_arguments( args=unparsed_args, environment=environment) del parsed_args.feature_extractor # initialize estimator estimator = cls(environment=environment, model=model, feature_extractor=fex, **vars(parsed_args)) return estimator, unparsed_args
def init_from_arguments( cls, args: List[str], environment: ContinuousMdpEnvironment) -> Tuple[Policy, List[str]]: """ Initialize a policy from arguments. :param args: Arguments. :param environment: Environment. :return: 2-tuple of a policy and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) # load feature extractor feature_extractor_class = load_class( parsed_args.policy_feature_extractor) feature_extractor, unparsed_args = feature_extractor_class.init_from_arguments( args=unparsed_args, environment=environment) del parsed_args.policy_feature_extractor # initialize policy policy = cls(environment=environment, feature_extractor=feature_extractor, **vars(parsed_args)) return policy, unparsed_args
def init_from_arguments( cls, args: List[str], environment: MdpEnvironment ) -> Tuple[Policy, List[str]]: """ Initialize a policy from arguments. :param args: Arguments. :param environment: Environment. :return: 2-tuple of a policy and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) # load feature extractor feature_extractor_class = load_class(parsed_args.policy_feature_extractor) feature_extractor, unparsed_args = feature_extractor_class.init_from_arguments( args=unparsed_args, environment=environment ) del parsed_args.policy_feature_extractor # there shouldn't be anything left if len(vars(parsed_args)) > 0: # pragma no cover raise ValueError('Parsed args remain. Need to pass to constructor.') # initialize policy policy = cls( feature_extractor=feature_extractor ) return policy, unparsed_args
def init_from_arguments( cls, args: List[str], random_state: RandomState) -> Tuple[Environment, List[str]]: """ Initialize an environment from arguments. :param args: Arguments. :param random_state: Random state. :return: 2-tuple of an environment and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) bandit = cls(random_state=random_state, **vars(parsed_args)) return bandit, unparsed_args
def init_from_arguments( cls, args: List[str], random_state: RandomState) -> Tuple[Environment, List[str]]: """ Initialize an environment from arguments. :param args: Arguments. :param random_state: Random state. :return: 2-tuple of an environment and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) gamblers_problem = cls(name=f"gambler's problem (p={parsed_args.p_h})", random_state=random_state, **vars(parsed_args)) return gamblers_problem, unparsed_args
def init_from_arguments( cls, args: List[str], random_state: RandomState, environment: MdpEnvironment ) -> Tuple[StateActionValueEstimator, List[str]]: """ Initialize a state-action value estimator from arguments. :param args: Arguments. :param random_state: Random state. :param environment: Environment. :return: 2-tuple of a state-action value estimator and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) estimator = cls(environment=environment, **vars(parsed_args)) return estimator, unparsed_args
def init_from_arguments( cls, args: List[str], random_state: RandomState, environment: MdpEnvironment ) -> Tuple[StateValueEstimator, List[str]]: """ Initialize a state-value estimator from arguments. :param args: Arguments. :param random_state: Random state. :param environment: Environment. :return: 2-tuple of a state-value estimator and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) # load model model_class = load_class(parsed_args.function_approximation_model) model, unparsed_args = model_class.init_from_arguments( args=unparsed_args, random_state=random_state ) del parsed_args.function_approximation_model # load feature extractor feature_extractor_class = load_class(parsed_args.feature_extractor) fex, unparsed_args = feature_extractor_class.init_from_arguments( args=unparsed_args, environment=environment ) del parsed_args.feature_extractor # there shouldn't be anything left if len(vars(parsed_args)) > 0: # pragma no cover raise ValueError('Parsed args remain. Need to pass to constructor.') # initialize estimator estimator = cls( model=model, feature_extractor=fex ) return estimator, unparsed_args
def init_from_arguments( cls, args: List[str], random_state: RandomState) -> Tuple[Environment, List[str]]: """ Initialize an environment from arguments. :param args: Arguments. :param random_state: Random state. :return: 2-tuple of an environment and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) planning_environment = cls(name='trajectory planning', random_state=random_state, model=StochasticEnvironmentModel(), **vars(parsed_args)) return planning_environment, unparsed_args
def init_from_arguments( cls, args: List[str], environment: Gym) -> Tuple[FeatureExtractor, List[str]]: """ Initialize a feature extractor from arguments. :param args: Arguments. :param environment: Environment. :return: 2-tuple of a feature extractor and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) # there shouldn't be anything left if len(vars(parsed_args)) > 0: # pragma no cover raise ValueError( 'Parsed args remain. Need to pass to constructor.') fex = cls() return fex, unparsed_args
def init_from_arguments( cls, args: List[str], random_state: RandomState) -> Tuple[Environment, List[str]]: """ Initialize an environment from arguments. :param args: Arguments. :param random_state: Random state. :return: 2-tuple of an environment and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) mancala = cls(random_state=random_state, player_2=StochasticMdpAgent('environmental agent', random_state, TabularPolicy(None, None), 1), **vars(parsed_args)) return mancala, unparsed_args
def init_from_arguments( cls, args: List[str], random_state: RandomState, environment: Environment) -> Tuple[List[Agent], List[str]]: """ Initialize an MDP agent from arguments. :param args: Arguments. :param random_state: Random state. :param environment: Environment. :return: 2-tuple of a list of agents and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) # load state-value estimator, which is optional. v_S = None if parsed_args.v_S is not None: estimator_class = load_class(parsed_args.v_S) v_S, unparsed_args = estimator_class.init_from_arguments( args=unparsed_args, random_state=random_state, environment=environment) del parsed_args.v_S # load parameterized policy policy_class = load_class(parsed_args.policy) policy, unparsed_args = policy_class.init_from_arguments( args=unparsed_args, environment=environment) del parsed_args.policy # noinspection PyUnboundLocalVariable agent = cls(name=f'parameterized (gamma={parsed_args.gamma})', random_state=random_state, pi=policy, v_S=v_S, **vars(parsed_args)) return [agent], unparsed_args
def init_from_arguments( cls, args: List[str], random_state: RandomState ) -> Tuple[Environment, List[str]]: """ Initialize an environment from arguments. :param args: Arguments. :param random_state: Random state. :return: 2-tuple of an environment and a list of unparsed arguments. """ parsed_args, unparsed_args = parse_arguments(cls, args) gridworld_id = parsed_args.id del parsed_args.id gridworld = getattr(cls, gridworld_id)( random_state=random_state, **vars(parsed_args) ) return gridworld, unparsed_args
def run( args: List[str] = None, thread_manager: RunThreadManager = None, train_function_args_callback: Callable[[Dict], None] = None ) -> Tuple[Optional[str], Optional[str]]: """ Train an agent in an environment. :param args: Arguments. :param thread_manager: Thread manager for the thread that is executing the current function. If None, then training will continue until termination criteria (e.g., number of iterations) are met. If not None, then the passed manager will be waited upon before starting each iteration. If the manager blocks, then another thread will need to clear the manager before the iteration continues. If the manager aborts, then this function will return as soon as possible. :param train_function_args_callback: A callback function to be called with the arguments that will be passed to the training function. This gives the caller an opportunity to grab references to the internal arguments that will be used in training. For example, plotting from the Jupyter Lab interface grabs the state-action value estimator (q_S_A) from the passed dictionary to use in updating its plots. This callback is only called for fresh training. It is not called when resuming from a checkpoint. :returns: 2-tuple of the checkpoint path (if any) and the saved agent path (if any). """ # initialize with flag set if not passed, so that execution will not block. since the caller will not hold a # reference to the manager, it cannot be cleared and execution will never block. if thread_manager is None: thread_manager = RunThreadManager(True) parser = get_argument_parser_for_run() parsed_args, unparsed_args = parse_arguments(parser, args) if parsed_args.train_function is None: raise ValueError('No training function specified. Cannot train.') if parsed_args.random_seed is None: warnings.warn('No random seed provided to the trainer. Results will not be replicable. Consider passing --random-seed argument.') random_state = RandomState() else: random_state = RandomState(parsed_args.random_seed) # warn user, as training could take a long time and it'll be wasted effort if the agent is not saved. if parsed_args.save_agent_path is None: warnings.warn('No --save-agent-path has been specified, so no agent will be saved after training.') initial_policy = None # load training function and parse any arguments that it requires train_function = import_function(parsed_args.train_function) train_function_arg_parser = get_argument_parser_for_train_function(parsed_args.train_function) parsed_train_function_args, unparsed_args = parse_arguments(train_function_arg_parser, unparsed_args) train_function_args = { 'thread_manager': thread_manager, **vars(parsed_train_function_args) } # convert boolean strings to booleans if train_function_args.get('update_upon_every_visit', None) is not None: train_function_args['update_upon_every_visit'] = train_function_args['update_upon_every_visit'] == 'True' if train_function_args.get('make_final_policy_greedy', None) is not None: train_function_args['make_final_policy_greedy'] = train_function_args['make_final_policy_greedy'] == 'True' if train_function_args.get('plot_state_value', None) is not None: train_function_args['plot_state_value'] = train_function_args['plot_state_value'] == 'True' # load environment if train_function_args.get('environment', None) is not None: environment_class = load_class(train_function_args['environment']) train_function_args['environment'], unparsed_args = environment_class.init_from_arguments( args=unparsed_args, random_state=random_state ) # load planning environment if train_function_args.get('planning_environment', None) is not None: planning_environment_class = load_class(train_function_args['planning_environment']) train_function_args['planning_environment'], unparsed_args = planning_environment_class.init_from_arguments( args=unparsed_args, random_state=random_state ) # load state-action value estimator if train_function_args.get('q_S_A', None) is not None: estimator_class = load_class(train_function_args['q_S_A']) state_action_value_estimator, unparsed_args = estimator_class.init_from_arguments( args=unparsed_args, random_state=random_state, environment=train_function_args['environment'] ) train_function_args['q_S_A'] = state_action_value_estimator initial_policy = state_action_value_estimator.get_initial_policy() # load state-value estimator if train_function_args.get('v_S', None) is not None: estimator_class = load_class(train_function_args['v_S']) train_function_args['v_S'], unparsed_args = estimator_class.init_from_arguments( args=unparsed_args, random_state=random_state, environment=train_function_args['environment'] ) # load parameterized policy if train_function_args.get('policy', None) is not None: policy_class = load_class(train_function_args['policy']) initial_policy, unparsed_args = policy_class.init_from_arguments( args=unparsed_args, environment=train_function_args['environment'] ) train_function_args['policy'] = initial_policy # load agent if train_function_args.get('agent', None) is not None: agent_class = load_class(train_function_args['agent']) agents, unparsed_args = agent_class.init_from_arguments( args=unparsed_args, random_state=random_state, pi=initial_policy ) agent = agents[0] train_function_args['agent'] = agent else: agent = None if '--help' in unparsed_args: unparsed_args.remove('--help') if len(unparsed_args) > 0: raise ValueError(f'Unparsed arguments remain: {unparsed_args}') new_checkpoint_path = None # resumption will return a trained version of the agent contained in the checkpoint file if parsed_args.resume: agent = resume_from_checkpoint( resume_function=train_function, **train_function_args ) # fresh training will train the agent that was initialized above and passed in else: if train_function_args_callback is not None: train_function_args_callback(train_function_args) new_checkpoint_path = train_function( **train_function_args ) train_function_args['environment'].close() if isinstance(initial_policy, ParameterizedPolicy): initial_policy.close() logging.info('Training complete.') # try to save agent if agent is None: # pragma no cover warnings.warn('No agent resulting at end of training. Nothing to save.') elif parsed_args.save_agent_path is None: warnings.warn('No --save-agent-path specified. Not saving agent.') else: with open(os.path.expanduser(parsed_args.save_agent_path), 'wb') as f: pickle.dump(agent, f) logging.info(f'Saved agent to {parsed_args.save_agent_path}') return new_checkpoint_path, parsed_args.save_agent_path