def __init__(self, config): super(GatedActionNeuralNetwork, self).__init__() self.config = config # format: check_attribute( config_class, attribute_name, default_value, data_type) # description (optional) input_dims = check_attribute(self.config, 'input_dims', 1, data_type=int) h1_dims = check_attribute(self.config, 'h1_dims', 1, data_type=int) # neurons in hidden layer 1 h2_dims = check_attribute(self.config, 'h2_dims', 1, data_type=int) # neurons in hidden layer 2 self.num_actions = check_attribute(self.config, 'num_actions', 1, data_type=int) self.gate_function = check_attribute(self.config, 'gate_function', 'tanh', data_type=str) # mask gate function self.fc1 = nn.Linear(input_dims, h1_dims, bias=True) self.fc2 = nn.Linear(h1_dims, h2_dims, bias=True) self.fc3 = nn.Linear(h2_dims, 1, bias=False) if self.gate_function == 'sigmoid': self.gf = torch.sigmoid elif self.gate_function == 'tanh': self.gf = torch.tanh elif self.gate_function == 'noisy_relu': self.gf = lambda x: torch.relu(x + torch.empty(x.shape).normal_(mean=0, std=1)) else: raise ValueError("Choose one of the following gate functions: sigmoid, tanh") self.action_gates = nn.Parameter(torch.randn((self.num_actions, h2_dims)), requires_grad=True) self.action_gates_bias = nn.Parameter(torch.randn(h2_dims), requires_grad=True) self.action_indices = torch.arange(start=0, end=self.num_actions, dtype=torch.int64)
def __init__(self, config): super(TwoLayerFullyConnected, self).__init__() self.config = config # format: check_attribute( config_class, attribute_name, default_value, data_type) # description (optional) input_dims = check_attribute(self.config, 'input_dims', 1, data_type=int) h1_dims = check_attribute(self.config, 'h1_dims', 1, data_type=int) # neurons in hidden layer 1 h2_dims = check_attribute(self.config, 'h2_dims', 1, data_type=int) # neurons in hidden layer 2 output_dims = check_attribute(self.config, 'output_dims', 1, data_type=int) self.fc1 = nn.Linear(input_dims, h1_dims, bias=True) self.fc2 = nn.Linear(h1_dims, h2_dims, bias=True) self.fc3 = nn.Linear(h2_dims, output_dims, bias=False)
def __init__(self, experiment_parameters, run_results_dir): self.run_results_dir = run_results_dir self.tnet_update_Freq = check_attribute(experiment_parameters, 'tnet_update_freq', 1) self.buffer_size = check_attribute(experiment_parameters, 'buffer_size', 10000) self.learning_rate = check_attribute(exp_parameters, 'lr', 0.001) self.environment_name = check_attribute( experiment_parameters, 'env', 'mountain_car', choices=['mountain_car', 'catcher', 'puddle_world']) self.verbose = experiment_parameters.verbose self.config = Config() self.config.store_summary = True # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step' self.summary = {} self.config.number_of_steps = ENVIRONMENT_DICTIONARY[ self.environment_name]['number_of_steps'] """ Parameters for the Environment """ self.config.max_episode_length = ENVIRONMENT_DICTIONARY[ self.environment_name]['max_episode_length'] self.config.norm_state = True self.config.current_step = 0 """ Parameters for the Function Approximator """ self.config.state_dims = ENVIRONMENT_DICTIONARY[ self.environment_name]['state_dims'] self.config.num_actions = ENVIRONMENT_DICTIONARY[ self.environment_name]['num_actions'] self.config.gamma = 1.0 self.config.epsilon = 0.1 self.config.optim = "adam" self.config.lr = self.learning_rate self.config.batch_size = 32 self.config.h1_dims = 32 self.config.h2_dims = 256 # DQN parameters self.config.buffer_size = self.buffer_size self.config.tnet_update_freq = self.tnet_update_Freq self.config.input_dims = self.config.state_dims self.config.output_dims = self.config.num_actions self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class']( config=self.config, summary=self.summary) self.fa = VanillaDQN(config=self.config, summary=self.summary) self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config, summary=self.summary)
def __init__(self, environment, function_approximator, config=None, summary=None): self.config = config or Config() assert isinstance(config, Config) """ Parameters in config: Name: Type: Default: Description: (Omitted when self-explanatory) store_summary bool False store the summary of the agent (return per episode) """ self.store_summary = check_attribute(self.config, 'store_summary', False) if self.store_summary: assert isinstance(summary, dict) self.summary = summary check_dict_else_default(self.summary, 'return_per_episode', []) " Other Parameters " # Function Approximator: used to approximate the Q-Values self.fa = function_approximator # Environment that the agent is interacting with self.env = environment # Summaries self.cumulative_reward = 0
def __init__(self, config): super(NormNeuralNetwork, self).__init__() self.config = config # format: check_attribute( config_class, attribute_name, default_value, data_type) # description (optional) input_dims = check_attribute(self.config, 'input_dims', 1, data_type=int) h1_dims = check_attribute(self.config, 'h1_dims', 1, data_type=int) # neurons in hidden layer 1 h2_dims = check_attribute(self.config, 'h2_dims', 1, data_type=int) # neurons in hidden layer 2 self.num_actions = check_attribute(self.config, 'num_actions', 1, data_type=int) self.norm_type = check_attribute(self.config, 'norm_type', 'batch', choices=['batch', 'layer']) self.fc1 = nn.Linear(input_dims, h1_dims, bias=True) self.fc2 = nn.Linear(h1_dims, h2_dims, bias=True) if self.norm_type == 'batch': self.bn2 = nn.BatchNorm1d(h2_dims, affine=False) else: self.bn2 = None self.fc3 = nn.Linear(h2_dims, self.num_actions, bias=False) self.action_scales = nn.Parameter(torch.randn(h2_dims), requires_grad=True) self.action_shifts = nn.Parameter(torch.randn(h2_dims), requires_grad=True)
def __init__(self, config): super(ActionNeuralNetwork, self).__init__() self.config = config # format: check_attribute( config_class, attribute_name, default_value, data_type) # description (optional) input_dims = check_attribute(self.config, 'input_dims', 1, data_type=int) h1_dims = check_attribute(self.config, 'h1_dims', 1, data_type=int) # neurons in hidden layer 1 h2_dims = check_attribute(self.config, 'h2_dims', 1, data_type=int) # neurons in hidden layer 2 self.num_actions = check_attribute(self.config, 'num_actions', 1, data_type=int) ppa = check_attribute(self.config, 'ppa', 0.1, data_type=float) # proportion of neurons per action self.fc1 = nn.Linear(input_dims, h1_dims, bias=True) self.fc2 = nn.Linear(h1_dims, h2_dims, bias=True) self.fc3 = nn.Linear(h2_dims, 1, bias=False) self.npa = np.int64(np.floor(h2_dims * ppa)) assert self.npa * self.num_actions <= h2_dims, "Too many neurons per action!" self.masks = torch.zeros((self.num_actions, h2_dims)) shared_neurons = np.int64(h2_dims - self.npa * self.num_actions) exclusive_neurons = 0 for i in range(self.num_actions): self.masks[i][0:shared_neurons] += 1 self.masks[i][(shared_neurons + exclusive_neurons):(shared_neurons + exclusive_neurons + self.npa)] += 1 exclusive_neurons += self.npa
def __init__(self, config, summary=None): """ Parameters: Name: Type Default: Description(omitted when self-explanatory): max_actions int 1000 The max number of actions executed before forcing a time out norm_state bool True Normalize the state to [-1,1] store_summary bool False Whether to store the summary of the environment """ self.norm_state = check_attribute(config, 'norm_state', True) self.max_actions = check_attribute(config, 'max_actions', 1000) self.store_summary = check_attribute(config, 'store_summary', False) self.summary = summary if self.store_summary: assert isinstance(self.summary, dict) check_dict_else_default(self.summary, "steps_per_episode", []) self.num_actions = 3 self.state_dims = 4 " Inner state of the environment " self.step_count = 0 self.current_state = np.float64(np.random.uniform(low=-0.5, high=0.5, size=(4,))) self.MAX_VEL_1 = 4 * np.pi self.MAX_VEL_2 = 9 * np.pi self.MAX_THETA_1 = np.pi self.MAX_THETA_2 = np.pi self.m1 = 1.0 self.m2 = 1.0 self.l1 = 1.0 self.l2 = 1.0 self.lc1 = 0.5 self.lc2 = 0.5 self.I1 = 1.0 self.I2 = 1.0 self.g = 9.8 self.dt = 0.05 self.acrobotGoalPosition = 1.0
def __init__(self, config, summary=None): assert isinstance(config, Config) """ Parameters: Name: Type Default: Description(omitted when self-explanatory): # environment parameters max_episode_length int 500000 The max number of actions executed before forcing a time out norm_state bool True Normalize the state to [-1,1] # summary parameters store_summary bool False Whether to store the summary of the environment number_of_steps int 500000 Total number of environment steps """ check_attribute(config, 'current_step', 0) self.config = config # environment related variables self.max_episode_length = check_attribute(config, 'max_episode_length', default_value=500000) self.norm_state = check_attribute(config, 'norm_state', default_value=True) # summary related variables self.store_summary = check_attribute(config, 'store_summary', default_value=False) self.number_of_steps = check_attribute(config, 'number_of_steps', default_value=500000) self.summary = summary if self.store_summary: assert isinstance(self.summary, dict) self.reward_per_step = np.zeros(self.number_of_steps, dtype=np.float64) check_dict_else_default(self.summary, "steps_per_episode", []) check_dict_else_default(self.summary, "reward_per_step", self.reward_per_step) # internal state of the environment self.episode_step_count = 0 position = -0.6 + np.random.random() * 0.2 velocity = 0.0 self.current_state = np.array((position, velocity), dtype=np.float64) self.actions = np.array( [0, 1, 2], dtype=int) # 0 = backward, 1 = coast, 2 = forward self.high = np.array([0.5, 0.07], dtype=np.float64) self.low = np.array([-1.2, -0.07], dtype=np.float64) self.action_dictionary = { 0: -1, # accelerate backwards 1: 0, # coast 2: 1 } # accelerate forwards
def __init__(self, config, summary=None): """ Parameters: Name: Type Default: Description(omitted when self-explanatory): max_episode_length int 200000 The max number of steps executed in an episoe before forcing a time out norm_state bool True Normalize the state to [-1,1] store_summary bool False Whether to store the summary of the environment number_of_steps int 200000 Total number of environment steps """ check_attribute(config, 'current_step', 0) self.config = config # environment parameters self.max_episode_length = check_attribute(config, 'max_episode_length', 200000) self.norm_state = check_attribute(config, 'norm_state', True) # summary parameters self.store_summary = check_attribute(config, 'store_summary', default_value=False) self.summary = summary self.number_of_steps = check_attribute(config, 'number_of_steps', 200000) if self.store_summary: assert isinstance(self.summary, dict) self.reward_per_step = np.zeros(self.number_of_steps, dtype=np.float64) check_dict_else_default(self.summary, "steps_per_episode", []) check_dict_else_default(self.summary, "reward_per_step", self.reward_per_step) self.num_action = 4 self.num_state = 2 """ Inner state of the environment """ self.episode_step_count = 0 self.state = np.float64( np.random.uniform(low=0.0, high=0.1, size=(2, ))) self.puddle1 = Puddle(0.45, 0.75, 0.10, 0.75, 0.1, 0.35) self.puddle2 = Puddle(0.45, 0.80, 0.45, 0.40, 0.1, 0.4) self.pworld_min_x = 0.0 self.pworld_max_x = 1.0 self.pworld_min_y = 0.0 self.pworld_max_y = 1.0 self.goalDimension = 0.05 self.defDisplacement = 0.05 self.goalXCoor = self.pworld_max_x - self.goalDimension self.goalYCoor = self.pworld_max_y - self.goalDimension
def __init__(self, config, summary=None): assert isinstance(config, Config) """ Parameters: Name: Type Default: Description(omitted when self-explanatory): max_episode_length int 500000 The max number of steps executed in an episoe before forcing a time out norm_state bool True Normalize the state to [-1,1] display bool False Whether to display the screen of the game init_lives int 3 Number of lives at the start of the game store_summary bool False Whether to store the summary of the environment number_of_steps int 500000 Total number of environment steps """ check_attribute(config, 'current_step', 0) self.config = config # environment parameters self.max_episode_length = check_attribute(config, 'max_episode_length', default_value=500000) self.norm_state = check_attribute(config, 'norm_state', default_value=True) self.display = False self.init_lives = 3 # self.display = check_attribute(config, 'display', default_value=False) # self.init_lives = check_attribute(config, 'init_lives', default_value=3) # summary parameters self.store_summary = check_attribute(config, 'store_summary', default_value=False) self.summary = summary self.number_of_steps = check_attribute(config, 'number_of_steps', 500000) if self.store_summary: assert isinstance(self.summary, dict) self.reward_per_step = np.zeros(self.number_of_steps, dtype=np.float64) check_dict_else_default(self.summary, "steps_per_episode", []) check_dict_else_default(self.summary, "reward_per_step", self.reward_per_step) # setting up original catcher environment with the specified parameters self.catcherOb = Catcher(init_lives=self.init_lives) if not self.display: # do not open a pygame window os.putenv('SDL_VIDEODRIVER', 'fbcon') os.environ["SDL_VIDEODRIVER"] = "dummy" if self.norm_state: self.pOb = PLE(self.catcherOb, fps=30, state_preprocessor=get_ob_normalize, display_screen=self.display) else: self.pOb = PLE(self.catcherOb, fps=30, state_preprocessor=get_ob, display_screen=self.display) self.pOb.init() # environment internal state self.actions = [ 97, None, 100 ] # self.pOb.getActionSet() (left = 97, do nothing = None, right = 100) self.num_action = 3 self.num_state = 4 self.episode_step_count = 0 self.pOb.reset_game() self.current_state = self.pOb.getGameState()