def __init__(self, environment, function_approximator, config=None, summary=None): self.config = config or Config() assert isinstance(config, Config) """ Parameters in config: Name: Type: Default: Description: (Omitted when self-explanatory) store_summary bool False store the summary of the agent (return per episode) """ self.store_summary = check_attribute_else_default( self.config, 'store_summary', False) if self.store_summary: assert isinstance(summary, dict) self.summary = summary check_dict_else_default(self.summary, 'return_per_episode', []) " Other Parameters " # Function Approximator: used to approximate the Q-Values self.fa = function_approximator # Environment that the agent is interacting with self.env = environment # Summaries self.cumulative_reward = 0
def __init__(self, experiment_parameters, run_results_dir): self.run_results_dir = run_results_dir self.num_tilings = check_attribute_else_default(experiment_parameters, 'num_tilings', 32) self.tiling_length = check_attribute_else_default(experiment_parameters, 'tiling_length', 10) self.learning_rate = check_attribute_else_default(exp_parameters, 'learning_rate', 0.001) self.environment_name = check_attribute_else_default(experiment_parameters, 'env', 'mountain_car', choices=['mountain_car', 'catcher']) self.verbose = experiment_parameters.verbose self.config = Config() self.config.store_summary = True self.summary = {} """ Parameters for the Environment """ self.config.max_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['max_actions'] self.config.norm_state = True """ Parameters for the Function Approximator """ self.config.state_dims = ENVIRONMENT_DICTIONARY[self.environment_name]['state_dims'] self.config.num_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['num_actions'] self.config.gamma = 1.0 self.config.epsilon = 0.1 self.config.lr = self.learning_rate / self.num_tilings self.config.num_tilings = self.num_tilings self.config.tiling_length = self.tiling_length self.config.scaling_factor = 1/2 self.config.scaling_offset = 1 self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](config=self.config, summary=self.summary) self.fa = TileCoderFA(config=self.config) self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config, summary=self.summary)
def __init__(self, experiment_parameters, run_results_dir): self.run_results_dir = run_results_dir self.tnet_update_Freq = check_attribute(experiment_parameters, 'tnet_update_freq', 1) self.buffer_size = check_attribute(experiment_parameters, 'buffer_size', 10000) self.learning_rate = check_attribute(exp_parameters, 'lr', 0.001) self.environment_name = check_attribute( experiment_parameters, 'env', 'mountain_car', choices=['mountain_car', 'catcher', 'puddle_world']) self.ppa = check_attribute(exp_parameters, 'ppa', 0.1) self.gated = check_attribute(exp_parameters, 'gated', False) self.gate_function = check_attribute(exp_parameters, 'gate_function', 'tanh') self.verbose = experiment_parameters.verbose self.config = Config() self.config.store_summary = True # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step' self.summary = {} self.config.number_of_steps = ENVIRONMENT_DICTIONARY[ self.environment_name]['number_of_steps'] """ Parameters for the Environment """ self.config.max_episode_length = ENVIRONMENT_DICTIONARY[ self.environment_name]['max_episode_length'] self.config.norm_state = True self.config.current_step = 0 """ Parameters for the Function Approximator """ self.config.state_dims = ENVIRONMENT_DICTIONARY[ self.environment_name]['state_dims'] self.config.num_actions = ENVIRONMENT_DICTIONARY[ self.environment_name]['num_actions'] self.config.gamma = 1.0 self.config.epsilon = 0.1 self.config.optim = "adam" self.config.lr = self.learning_rate self.config.batch_size = 32 self.config.ppa = self.ppa self.config.h1_dims = 32 self.config.h2_dims = 256 self.config.gate_function = self.gate_function # DQN parameters self.config.buffer_size = self.buffer_size self.config.tnet_update_freq = self.tnet_update_Freq self.config.input_dims = self.config.state_dims if self.gated: self.config.gated = True else: self.config.gated = False self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class']( config=self.config, summary=self.summary) self.fa = ActionDQN(config=self.config, summary=self.summary) self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config, summary=self.summary)
def __init__(self, experiment_parameters, run_results_dir): self.run_results_dir = run_results_dir self.buffer_size = check_attribute_else_default(experiment_parameters, 'buffer_size', 20000) self.tnet_update_freq = check_attribute_else_default(experiment_parameters, 'tnet_update_freq', 10) self.environment_name = check_attribute_else_default(experiment_parameters, 'env', 'mountain_car', choices=['mountain_car', 'catcher']) self.verbose = experiment_parameters.verbose # parameters specific to the parameter sweep self.learning_rate = check_attribute_else_default(exp_parameters, 'lr', 0.001) self.l1_reg = check_attribute_else_default(experiment_parameters, 'l1_reg', True) self.weights_reg = check_attribute_else_default(experiment_parameters, 'weights_reg', True) self.reg_factor = check_attribute_else_default(experiment_parameters, 'reg_factor', 0.1) self.config = Config() self.config.store_summary = True # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step' self.summary = {} self.config.number_of_steps = ENVIRONMENT_DICTIONARY[self.environment_name]['number_of_steps'] """ Parameters for the Environment """ # Same for every experiment self.config.max_episode_length = ENVIRONMENT_DICTIONARY[self.environment_name]['max_episode_length'] self.config.norm_state = True self.config.current_step = 0 """ Parameters for the Function Approximator """ # Same for every experiment self.config.state_dims = ENVIRONMENT_DICTIONARY[self.environment_name]['state_dims'] self.config.num_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['num_actions'] self.config.gamma = 1.0 self.config.epsilon = 0.1 self.config.optim = "adam" self.config.batch_size = 32 # Selected after finding the best parameter combinations for DQN with a given buffer size self.config.buffer_size = self.buffer_size self.config.tnet_update_freq = self.tnet_update_freq # These are the parameters that we are sweeping over self.config.lr = self.learning_rate self.config.reg_method = 'l1' if self.l1_reg else 'l2' self.config.weights_reg = self.weights_reg self.config.reg_factor = self.reg_factor self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](config=self.config, summary=self.summary) self.fa = RegularizedNeuralNetwork(config=self.config, summary=self.summary) self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config, summary=self.summary)
terminate = self.pOb.game_over() self.current_state = self.pOb.getGameState() timeout = bool(self.episode_step_count >= self.max_episode_length or self.config.current_step >= self.number_of_steps) return self.current_state, reward, terminate, timeout def get_current_state(self): return self.current_state def close(self): return if __name__ == "__main__": print('==== Random Policy Example ====') config = Config() config.store_summary = True config.max_episode_length = 100000 config.number_of_steps = 100000 config.current_step = 0 summary = {} actions = 3 verbose = False env = Catcher3(config, summary=summary) cumulative_reward = 0 terminations = 0 successful_episode_steps = [] for i in range(config.number_of_steps): action = np.random.randint(actions)
def __init__(self, experiment_parameters, run_results_dir): self.run_results_dir = run_results_dir self.learning_rate = check_attribute_else_default( exp_parameters, 'lr', 0.001) self.buffer_size = check_attribute_else_default( experiment_parameters, 'buffer_size', 20000) self.tnet_update_freq = check_attribute_else_default( experiment_parameters, 'tnet_update_freq', 10) self.environment_name = check_attribute_else_default( experiment_parameters, 'env', 'mountain_car', choices=['mountain_car', 'catcher']) self.verbose = experiment_parameters.verbose # parameters specific to distributional regularizers self.beta = check_attribute_else_default(experiment_parameters, 'beta', 0.1) self.reg_factor = check_attribute_else_default(experiment_parameters, 'reg_factor', 0.1) self.use_gamma = check_attribute_else_default(experiment_parameters, 'use_gamma', False) self.beta_lb = check_attribute_else_default(experiment_parameters, 'beta_lb', False) self.config = Config() self.config.store_summary = True # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step' self.summary = {} self.config.number_of_steps = ENVIRONMENT_DICTIONARY[ self.environment_name]['number_of_steps'] """ Parameters for the Environment """ self.config.max_episode_length = ENVIRONMENT_DICTIONARY[ self.environment_name]['max_episode_length'] self.config.norm_state = True self.config.current_step = 0 """ Parameters for the Function Approximator """ self.config.state_dims = ENVIRONMENT_DICTIONARY[ self.environment_name]['state_dims'] self.config.num_actions = ENVIRONMENT_DICTIONARY[ self.environment_name]['num_actions'] self.config.gamma = 1.0 self.config.epsilon = 0.1 self.config.optim = "adam" self.config.batch_size = 32 # DQN parameters self.config.lr = self.learning_rate self.config.buffer_size = self.buffer_size self.config.tnet_update_freq = self.tnet_update_freq self.config.beta = self.beta self.config.reg_factor = self.reg_factor self.config.use_gamma = self.use_gamma self.config.beta_lb = self.beta_lb self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class']( config=self.config, summary=self.summary) self.fa = DistRegNeuralNetwork(config=self.config, summary=self.summary) self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config, summary=self.summary)
" Argument Parser " ################### parser = argparse.ArgumentParser() parser.add_argument("-minibatch_size", action='store', default=np.int8(32)) parser.add_argument("-lr", action='store', default=np.float64(0.001)) parser.add_argument("-threshold", action='store', default=1e-4, type=float) parser.add_argument('-regularization', action='store', default='none', type=str, choices=['none', 'l1', 'l2']) parser.add_argument('-reg_factor', action='store', default=0.0001, type=float) parser.add_argument('-test_copy_params', action='store_true', default=False) parser.add_argument('-init_test', action='store_true', default=False) parser.add_argument('-simple_training_test', action='store_true', default=False) parser.add_argument('-copy_parameters_test', action='store_true', default=False) parser.add_argument('-networks_comparison_test', action='store_true', default=False) args = parser.parse_args() config = Config() ############################################ " Example: initializing the neural network " ############################################ if args.init_test: config.input_dims = 2 config.h1_dims = 2 config.h2_dims = 2 config.output_dims = 1 print("Creating Two Layer Fully Connected Network...") network = TwoLayerFullyConnected(config) network.apply(weight_init) print("Printing Network...") print("\t", network, "\n")
return temp_state def get_current_state(self): if self.norm_state: return self.normalize(self.current_state) else: return self.current_state if __name__ == "__main__": verbose = False random_policy_test = True pumping_action_test = True actions = 3 config = Config() config.norm_state = True config.store_summary = True config.max_episode_length = 1000000 steps = 10000 if random_policy_test: print("==== Results with Random Policy ====") config.current_step = 0 summary = {} env = MountainCar(config, summary=summary) cumulative_reward = 0 terminations = 0
def __init__(self, experiment_parameters, run_results_dir): self.run_results_dir = run_results_dir self.buffer_size = check_attribute_else_default( experiment_parameters, 'buffer_size', 20000) self.method = check_attribute_else_default(exp_parameters, 'method', 'DQN') self.environment_name = check_attribute_else_default( experiment_parameters, 'env', 'mountain_car', choices=['mountain_car', 'catcher', 'puddle_world']) parameters_dictionary = BEST_PARAMETERS_DICTIONARY[ self.environment_name][self.method][self.buffer_size] self.verbose = experiment_parameters.verbose self.config = Config() self.config.store_summary = True # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step' self.summary = {} self.config.number_of_steps = ENVIRONMENT_DICTIONARY[ self.environment_name]['number_of_steps'] """ Parameters for the Environment """ self.config.max_episode_length = ENVIRONMENT_DICTIONARY[ self.environment_name]['max_episode_length'] self.config.norm_state = True self.config.current_step = 0 """ Parameters for the Function Approximator """ self.config.state_dims = ENVIRONMENT_DICTIONARY[ self.environment_name]['state_dims'] self.config.num_actions = ENVIRONMENT_DICTIONARY[ self.environment_name]['num_actions'] self.config.gamma = 1.0 self.config.epsilon = 0.1 self.config.optim = "adam" self.config.batch_size = 32 # Parameters for any type of agent self.config.buffer_size = self.buffer_size self.config.lr = parameters_dictionary['LearningRate'] self.config.tnet_update_freq = parameters_dictionary['Freq'] if self.method in ['DRE', 'DRE_LB', 'DRG', 'DRG_LB']: self.config.beta = parameters_dictionary['Beta'] self.config.reg_factor = parameters_dictionary['RegFactor'] self.config.use_gamma = False self.config.beta_lb = False if self.method in ['DRG', 'DRG_LB']: self.config.use_gamma = True if self.method in ['DRE_LB', 'DRG_LB']: self.config.beta_lb = True self.fa = DistRegNeuralNetwork(config=self.config, summary=self.summary) elif self.method in ['L1A', 'L1W', 'L2A', 'L2W']: self.config.reg_factor = parameters_dictionary['RegFactor'] self.config.reg_method = 'l1' if self.method in ['L2A', 'L2W']: self.config.reg_method = 'l2' self.config.weights_reg = False if self.method in ['L1W', 'L2W']: self.config.weights_reg = True self.fa = RegularizedNeuralNetwork(config=self.config, summary=self.summary) elif self.method in ['DQN']: self.fa = VanillaDQN(config=self.config, summary=self.summary) elif self.method in ['Dropout']: self.config.dropout_probability = parameters_dictionary[ 'DropoutProbability'] self.fa = DropoutNeuralNetwork(config=self.config, summary=self.summary) else: raise ValueError( "No configuration available for the given method.") self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class']( config=self.config, summary=self.summary) self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config, summary=self.summary)