Ejemplo n.º 1
0
    def __init__(self, config):
        super(GatedActionNeuralNetwork, self).__init__()
        self.config = config
        # format: check_attribute( config_class, attribute_name, default_value, data_type)  # description (optional)
        input_dims = check_attribute(self.config, 'input_dims', 1, data_type=int)
        h1_dims = check_attribute(self.config, 'h1_dims', 1, data_type=int)  # neurons in hidden layer 1
        h2_dims = check_attribute(self.config, 'h2_dims', 1, data_type=int)  # neurons in hidden layer 2
        self.num_actions = check_attribute(self.config, 'num_actions', 1, data_type=int)
        self.gate_function = check_attribute(self.config, 'gate_function', 'tanh', data_type=str)   # mask gate function

        self.fc1 = nn.Linear(input_dims, h1_dims, bias=True)
        self.fc2 = nn.Linear(h1_dims, h2_dims, bias=True)
        self.fc3 = nn.Linear(h2_dims, 1, bias=False)
        if self.gate_function == 'sigmoid':
            self.gf = torch.sigmoid
        elif self.gate_function == 'tanh':
            self.gf = torch.tanh
        elif self.gate_function == 'noisy_relu':
            self.gf = lambda x: torch.relu(x + torch.empty(x.shape).normal_(mean=0, std=1))
        else:
            raise ValueError("Choose one of the following gate functions: sigmoid, tanh")

        self.action_gates = nn.Parameter(torch.randn((self.num_actions, h2_dims)), requires_grad=True)
        self.action_gates_bias = nn.Parameter(torch.randn(h2_dims), requires_grad=True)

        self.action_indices = torch.arange(start=0, end=self.num_actions, dtype=torch.int64)
Ejemplo n.º 2
0
    def __init__(self, config):
        super(TwoLayerFullyConnected, self).__init__()
        self.config = config
        # format: check_attribute( config_class, attribute_name, default_value, data_type)  # description (optional)
        input_dims = check_attribute(self.config, 'input_dims', 1, data_type=int)
        h1_dims = check_attribute(self.config, 'h1_dims', 1, data_type=int)         # neurons in hidden layer 1
        h2_dims = check_attribute(self.config, 'h2_dims', 1, data_type=int)         # neurons in hidden layer 2
        output_dims = check_attribute(self.config, 'output_dims', 1, data_type=int)

        self.fc1 = nn.Linear(input_dims, h1_dims, bias=True)
        self.fc2 = nn.Linear(h1_dims, h2_dims, bias=True)
        self.fc3 = nn.Linear(h2_dims, output_dims, bias=False)
Ejemplo n.º 3
0
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.tnet_update_Freq = check_attribute(experiment_parameters,
                                                'tnet_update_freq', 1)
        self.buffer_size = check_attribute(experiment_parameters,
                                           'buffer_size', 10000)
        self.learning_rate = check_attribute(exp_parameters, 'lr', 0.001)
        self.environment_name = check_attribute(
            experiment_parameters,
            'env',
            'mountain_car',
            choices=['mountain_car', 'catcher', 'puddle_world'])
        self.verbose = experiment_parameters.verbose

        self.config = Config()
        self.config.store_summary = True
        # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step'
        self.summary = {}
        self.config.number_of_steps = ENVIRONMENT_DICTIONARY[
            self.environment_name]['number_of_steps']
        """ Parameters for the Environment """
        self.config.max_episode_length = ENVIRONMENT_DICTIONARY[
            self.environment_name]['max_episode_length']
        self.config.norm_state = True
        self.config.current_step = 0
        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[
            self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[
            self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.optim = "adam"
        self.config.lr = self.learning_rate
        self.config.batch_size = 32
        self.config.h1_dims = 32
        self.config.h2_dims = 256
        # DQN parameters
        self.config.buffer_size = self.buffer_size
        self.config.tnet_update_freq = self.tnet_update_Freq
        self.config.input_dims = self.config.state_dims
        self.config.output_dims = self.config.num_actions

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](
            config=self.config, summary=self.summary)
        self.fa = VanillaDQN(config=self.config, summary=self.summary)
        self.rl_agent = Agent(environment=self.env,
                              function_approximator=self.fa,
                              config=self.config,
                              summary=self.summary)
Ejemplo n.º 4
0
    def __init__(self,
                 environment,
                 function_approximator,
                 config=None,
                 summary=None):
        self.config = config or Config()
        assert isinstance(config, Config)
        """ 
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        store_summary           bool            False               store the summary of the agent (return per episode)
        """
        self.store_summary = check_attribute(self.config, 'store_summary',
                                             False)
        if self.store_summary:
            assert isinstance(summary, dict)
            self.summary = summary
            check_dict_else_default(self.summary, 'return_per_episode', [])

        " Other Parameters "
        # Function Approximator: used to approximate the Q-Values
        self.fa = function_approximator
        # Environment that the agent is interacting with
        self.env = environment
        # Summaries
        self.cumulative_reward = 0
Ejemplo n.º 5
0
    def __init__(self, config):
        super(NormNeuralNetwork, self).__init__()
        self.config = config
        # format: check_attribute( config_class, attribute_name, default_value, data_type)  # description (optional)
        input_dims = check_attribute(self.config, 'input_dims', 1, data_type=int)
        h1_dims = check_attribute(self.config, 'h1_dims', 1, data_type=int)  # neurons in hidden layer 1
        h2_dims = check_attribute(self.config, 'h2_dims', 1, data_type=int)  # neurons in hidden layer 2
        self.num_actions = check_attribute(self.config, 'num_actions', 1, data_type=int)
        self.norm_type = check_attribute(self.config, 'norm_type', 'batch', choices=['batch', 'layer'])

        self.fc1 = nn.Linear(input_dims, h1_dims, bias=True)
        self.fc2 = nn.Linear(h1_dims, h2_dims, bias=True)
        if self.norm_type == 'batch':
            self.bn2 = nn.BatchNorm1d(h2_dims, affine=False)
        else: self.bn2 = None
        self.fc3 = nn.Linear(h2_dims, self.num_actions, bias=False)

        self.action_scales = nn.Parameter(torch.randn(h2_dims), requires_grad=True)
        self.action_shifts = nn.Parameter(torch.randn(h2_dims), requires_grad=True)
Ejemplo n.º 6
0
    def __init__(self, config):
        super(ActionNeuralNetwork, self).__init__()
        self.config = config
        # format: check_attribute( config_class, attribute_name, default_value, data_type)  # description (optional)
        input_dims = check_attribute(self.config, 'input_dims', 1, data_type=int)
        h1_dims = check_attribute(self.config, 'h1_dims', 1, data_type=int)  # neurons in hidden layer 1
        h2_dims = check_attribute(self.config, 'h2_dims', 1, data_type=int)  # neurons in hidden layer 2
        self.num_actions = check_attribute(self.config, 'num_actions', 1, data_type=int)
        ppa = check_attribute(self.config, 'ppa', 0.1, data_type=float)     # proportion of neurons per action

        self.fc1 = nn.Linear(input_dims, h1_dims, bias=True)
        self.fc2 = nn.Linear(h1_dims, h2_dims, bias=True)
        self.fc3 = nn.Linear(h2_dims, 1, bias=False)
        self.npa = np.int64(np.floor(h2_dims * ppa))
        assert self.npa * self.num_actions <= h2_dims, "Too many neurons per action!"

        self.masks = torch.zeros((self.num_actions, h2_dims))
        shared_neurons = np.int64(h2_dims - self.npa * self.num_actions)
        exclusive_neurons = 0
        for i in range(self.num_actions):
            self.masks[i][0:shared_neurons] += 1
            self.masks[i][(shared_neurons + exclusive_neurons):(shared_neurons + exclusive_neurons + self.npa)] += 1
            exclusive_neurons += self.npa
Ejemplo n.º 7
0
    def __init__(self, config, summary=None):
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        max_actions                 int             1000            The max number of actions executed before forcing
                                                                    a time out
        norm_state                  bool            True            Normalize the state to [-1,1]
        store_summary               bool            False           Whether to store the summary of the environment
        """
        self.norm_state = check_attribute(config, 'norm_state', True)
        self.max_actions = check_attribute(config, 'max_actions', 1000)
        self.store_summary = check_attribute(config, 'store_summary', False)
        self.summary = summary
        if self.store_summary:
            assert isinstance(self.summary, dict)
            check_dict_else_default(self.summary, "steps_per_episode", [])

        self.num_actions = 3
        self.state_dims = 4

        " Inner state of the environment "
        self.step_count = 0
        self.current_state = np.float64(np.random.uniform(low=-0.5, high=0.5, size=(4,)))
        self.MAX_VEL_1 = 4 * np.pi
        self.MAX_VEL_2 = 9 * np.pi
        self.MAX_THETA_1 = np.pi
        self.MAX_THETA_2 = np.pi
        self.m1 = 1.0
        self.m2 = 1.0
        self.l1 = 1.0
        self.l2 = 1.0
        self.lc1 = 0.5
        self.lc2 = 0.5
        self.I1 = 1.0
        self.I2 = 1.0
        self.g = 9.8
        self.dt = 0.05
        self.acrobotGoalPosition = 1.0
Ejemplo n.º 8
0
    def __init__(self, config, summary=None):
        assert isinstance(config, Config)
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        # environment parameters
        max_episode_length          int             500000          The max number of actions executed before forcing
                                                                    a time out
        norm_state                  bool            True            Normalize the state to [-1,1]
        # summary parameters
        store_summary               bool            False           Whether to store the summary of the environment
        number_of_steps             int             500000          Total number of environment steps
        """
        check_attribute(config, 'current_step', 0)
        self.config = config

        # environment related variables
        self.max_episode_length = check_attribute(config,
                                                  'max_episode_length',
                                                  default_value=500000)
        self.norm_state = check_attribute(config,
                                          'norm_state',
                                          default_value=True)

        # summary related variables
        self.store_summary = check_attribute(config,
                                             'store_summary',
                                             default_value=False)
        self.number_of_steps = check_attribute(config,
                                               'number_of_steps',
                                               default_value=500000)
        self.summary = summary
        if self.store_summary:
            assert isinstance(self.summary, dict)
            self.reward_per_step = np.zeros(self.number_of_steps,
                                            dtype=np.float64)
            check_dict_else_default(self.summary, "steps_per_episode", [])
            check_dict_else_default(self.summary, "reward_per_step",
                                    self.reward_per_step)

        # internal state of the environment
        self.episode_step_count = 0
        position = -0.6 + np.random.random() * 0.2
        velocity = 0.0
        self.current_state = np.array((position, velocity), dtype=np.float64)
        self.actions = np.array(
            [0, 1, 2], dtype=int)  # 0 = backward, 1 = coast, 2 = forward
        self.high = np.array([0.5, 0.07], dtype=np.float64)
        self.low = np.array([-1.2, -0.07], dtype=np.float64)
        self.action_dictionary = {
            0: -1,  # accelerate backwards
            1: 0,  # coast
            2: 1
        }  # accelerate forwards
Ejemplo n.º 9
0
    def __init__(self, config, summary=None):
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        max_episode_length          int             200000          The max number of steps executed in an episoe
                                                                    before forcing a time out
        norm_state                  bool            True            Normalize the state to [-1,1]
        store_summary               bool            False           Whether to store the summary of the environment
        number_of_steps             int             200000          Total number of environment steps
        """
        check_attribute(config, 'current_step', 0)
        self.config = config

        # environment parameters
        self.max_episode_length = check_attribute(config, 'max_episode_length',
                                                  200000)
        self.norm_state = check_attribute(config, 'norm_state', True)

        # summary parameters
        self.store_summary = check_attribute(config,
                                             'store_summary',
                                             default_value=False)
        self.summary = summary
        self.number_of_steps = check_attribute(config, 'number_of_steps',
                                               200000)

        if self.store_summary:
            assert isinstance(self.summary, dict)
            self.reward_per_step = np.zeros(self.number_of_steps,
                                            dtype=np.float64)
            check_dict_else_default(self.summary, "steps_per_episode", [])
            check_dict_else_default(self.summary, "reward_per_step",
                                    self.reward_per_step)

        self.num_action = 4
        self.num_state = 2
        """ Inner state of the environment """
        self.episode_step_count = 0
        self.state = np.float64(
            np.random.uniform(low=0.0, high=0.1, size=(2, )))
        self.puddle1 = Puddle(0.45, 0.75, 0.10, 0.75, 0.1, 0.35)
        self.puddle2 = Puddle(0.45, 0.80, 0.45, 0.40, 0.1, 0.4)

        self.pworld_min_x = 0.0
        self.pworld_max_x = 1.0
        self.pworld_min_y = 0.0
        self.pworld_max_y = 1.0

        self.goalDimension = 0.05
        self.defDisplacement = 0.05

        self.goalXCoor = self.pworld_max_x - self.goalDimension
        self.goalYCoor = self.pworld_max_y - self.goalDimension
Ejemplo n.º 10
0
    def __init__(self, config, summary=None):
        assert isinstance(config, Config)
        """ Parameters:
        Name:                       Type            Default:        Description(omitted when self-explanatory):
        max_episode_length          int             500000          The max number of steps executed in an episoe
                                                                    before forcing a time out
        norm_state                  bool            True            Normalize the state to [-1,1]
        display                     bool            False           Whether to display the screen of the game
        init_lives                  int             3               Number of lives at the start of the game
        store_summary               bool            False           Whether to store the summary of the environment
        number_of_steps             int             500000          Total number of environment steps
        """
        check_attribute(config, 'current_step', 0)
        self.config = config

        # environment parameters
        self.max_episode_length = check_attribute(config,
                                                  'max_episode_length',
                                                  default_value=500000)
        self.norm_state = check_attribute(config,
                                          'norm_state',
                                          default_value=True)
        self.display = False
        self.init_lives = 3
        # self.display = check_attribute(config, 'display', default_value=False)
        # self.init_lives = check_attribute(config, 'init_lives', default_value=3)

        # summary parameters
        self.store_summary = check_attribute(config,
                                             'store_summary',
                                             default_value=False)
        self.summary = summary
        self.number_of_steps = check_attribute(config, 'number_of_steps',
                                               500000)

        if self.store_summary:
            assert isinstance(self.summary, dict)
            self.reward_per_step = np.zeros(self.number_of_steps,
                                            dtype=np.float64)
            check_dict_else_default(self.summary, "steps_per_episode", [])
            check_dict_else_default(self.summary, "reward_per_step",
                                    self.reward_per_step)

        # setting up original catcher environment with the specified parameters
        self.catcherOb = Catcher(init_lives=self.init_lives)
        if not self.display:
            # do not open a pygame window
            os.putenv('SDL_VIDEODRIVER', 'fbcon')
            os.environ["SDL_VIDEODRIVER"] = "dummy"
        if self.norm_state:
            self.pOb = PLE(self.catcherOb,
                           fps=30,
                           state_preprocessor=get_ob_normalize,
                           display_screen=self.display)
        else:
            self.pOb = PLE(self.catcherOb,
                           fps=30,
                           state_preprocessor=get_ob,
                           display_screen=self.display)
        self.pOb.init()

        # environment internal state
        self.actions = [
            97, None, 100
        ]  # self.pOb.getActionSet() (left = 97, do nothing = None, right = 100)
        self.num_action = 3
        self.num_state = 4
        self.episode_step_count = 0
        self.pOb.reset_game()
        self.current_state = self.pOb.getGameState()