Ejemplo n.º 1
0
    def __init__(self,
                 environment,
                 function_approximator,
                 config=None,
                 summary=None):
        self.config = config or Config()
        assert isinstance(config, Config)
        """ 
        Parameters in config:
        Name:                   Type:           Default:            Description: (Omitted when self-explanatory)
        store_summary           bool            False               store the summary of the agent (return per episode)
        """
        self.store_summary = check_attribute_else_default(
            self.config, 'store_summary', False)
        if self.store_summary:
            assert isinstance(summary, dict)
            self.summary = summary
            check_dict_else_default(self.summary, 'return_per_episode', [])

        " Other Parameters "
        # Function Approximator: used to approximate the Q-Values
        self.fa = function_approximator
        # Environment that the agent is interacting with
        self.env = environment
        # Summaries
        self.cumulative_reward = 0
Ejemplo n.º 2
0
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.num_tilings = check_attribute_else_default(experiment_parameters, 'num_tilings', 32)
        self.tiling_length = check_attribute_else_default(experiment_parameters, 'tiling_length', 10)
        self.learning_rate = check_attribute_else_default(exp_parameters, 'learning_rate', 0.001)
        self.environment_name = check_attribute_else_default(experiment_parameters, 'env', 'mountain_car',
                                                             choices=['mountain_car', 'catcher'])
        self.verbose = experiment_parameters.verbose

        self.config = Config()
        self.config.store_summary = True
        self.summary = {}

        """ Parameters for the Environment """
        self.config.max_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['max_actions']
        self.config.norm_state = True

        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.lr = self.learning_rate / self.num_tilings
        self.config.num_tilings = self.num_tilings
        self.config.tiling_length = self.tiling_length
        self.config.scaling_factor = 1/2
        self.config.scaling_offset = 1

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](config=self.config, summary=self.summary)
        self.fa = TileCoderFA(config=self.config)
        self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config,
                              summary=self.summary)
Ejemplo n.º 3
0
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.tnet_update_Freq = check_attribute(experiment_parameters,
                                                'tnet_update_freq', 1)
        self.buffer_size = check_attribute(experiment_parameters,
                                           'buffer_size', 10000)
        self.learning_rate = check_attribute(exp_parameters, 'lr', 0.001)
        self.environment_name = check_attribute(
            experiment_parameters,
            'env',
            'mountain_car',
            choices=['mountain_car', 'catcher', 'puddle_world'])
        self.ppa = check_attribute(exp_parameters, 'ppa', 0.1)
        self.gated = check_attribute(exp_parameters, 'gated', False)
        self.gate_function = check_attribute(exp_parameters, 'gate_function',
                                             'tanh')
        self.verbose = experiment_parameters.verbose

        self.config = Config()
        self.config.store_summary = True
        # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step'
        self.summary = {}
        self.config.number_of_steps = ENVIRONMENT_DICTIONARY[
            self.environment_name]['number_of_steps']
        """ Parameters for the Environment """
        self.config.max_episode_length = ENVIRONMENT_DICTIONARY[
            self.environment_name]['max_episode_length']
        self.config.norm_state = True
        self.config.current_step = 0
        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[
            self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[
            self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.optim = "adam"
        self.config.lr = self.learning_rate
        self.config.batch_size = 32
        self.config.ppa = self.ppa
        self.config.h1_dims = 32
        self.config.h2_dims = 256
        self.config.gate_function = self.gate_function
        # DQN parameters
        self.config.buffer_size = self.buffer_size
        self.config.tnet_update_freq = self.tnet_update_Freq
        self.config.input_dims = self.config.state_dims
        if self.gated:
            self.config.gated = True
        else:
            self.config.gated = False

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](
            config=self.config, summary=self.summary)
        self.fa = ActionDQN(config=self.config, summary=self.summary)
        self.rl_agent = Agent(environment=self.env,
                              function_approximator=self.fa,
                              config=self.config,
                              summary=self.summary)
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.buffer_size = check_attribute_else_default(experiment_parameters, 'buffer_size', 20000)
        self.tnet_update_freq = check_attribute_else_default(experiment_parameters, 'tnet_update_freq', 10)
        self.environment_name = check_attribute_else_default(experiment_parameters, 'env', 'mountain_car',
                                                             choices=['mountain_car', 'catcher'])
        self.verbose = experiment_parameters.verbose
        # parameters specific to the parameter sweep
        self.learning_rate = check_attribute_else_default(exp_parameters, 'lr', 0.001)
        self.l1_reg = check_attribute_else_default(experiment_parameters, 'l1_reg', True)
        self.weights_reg = check_attribute_else_default(experiment_parameters, 'weights_reg', True)
        self.reg_factor = check_attribute_else_default(experiment_parameters, 'reg_factor', 0.1)

        self.config = Config()
        self.config.store_summary = True
        # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step'
        self.summary = {}
        self.config.number_of_steps = ENVIRONMENT_DICTIONARY[self.environment_name]['number_of_steps']

        """ Parameters for the Environment """
            # Same for every experiment
        self.config.max_episode_length = ENVIRONMENT_DICTIONARY[self.environment_name]['max_episode_length']
        self.config.norm_state = True
        self.config.current_step = 0

        """ Parameters for the Function Approximator """
            # Same for every experiment
        self.config.state_dims = ENVIRONMENT_DICTIONARY[self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.optim = "adam"
        self.config.batch_size = 32
            # Selected after finding the best parameter combinations for DQN with a given buffer size
        self.config.buffer_size = self.buffer_size
        self.config.tnet_update_freq = self.tnet_update_freq
            # These are the parameters that we are sweeping over
        self.config.lr = self.learning_rate
        self.config.reg_method = 'l1' if self.l1_reg else 'l2'
        self.config.weights_reg = self.weights_reg
        self.config.reg_factor = self.reg_factor

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](config=self.config, summary=self.summary)
        self.fa = RegularizedNeuralNetwork(config=self.config, summary=self.summary)
        self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config,
                              summary=self.summary)
Ejemplo n.º 5
0
        terminate = self.pOb.game_over()
        self.current_state = self.pOb.getGameState()
        timeout = bool(self.episode_step_count >= self.max_episode_length
                       or self.config.current_step >= self.number_of_steps)
        return self.current_state, reward, terminate, timeout

    def get_current_state(self):
        return self.current_state

    def close(self):
        return


if __name__ == "__main__":
    print('==== Random Policy Example ====')
    config = Config()
    config.store_summary = True
    config.max_episode_length = 100000
    config.number_of_steps = 100000
    config.current_step = 0
    summary = {}

    actions = 3
    verbose = False

    env = Catcher3(config, summary=summary)
    cumulative_reward = 0
    terminations = 0
    successful_episode_steps = []
    for i in range(config.number_of_steps):
        action = np.random.randint(actions)
Ejemplo n.º 6
0
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.learning_rate = check_attribute_else_default(
            exp_parameters, 'lr', 0.001)
        self.buffer_size = check_attribute_else_default(
            experiment_parameters, 'buffer_size', 20000)
        self.tnet_update_freq = check_attribute_else_default(
            experiment_parameters, 'tnet_update_freq', 10)
        self.environment_name = check_attribute_else_default(
            experiment_parameters,
            'env',
            'mountain_car',
            choices=['mountain_car', 'catcher'])
        self.verbose = experiment_parameters.verbose
        # parameters specific to distributional regularizers
        self.beta = check_attribute_else_default(experiment_parameters, 'beta',
                                                 0.1)
        self.reg_factor = check_attribute_else_default(experiment_parameters,
                                                       'reg_factor', 0.1)
        self.use_gamma = check_attribute_else_default(experiment_parameters,
                                                      'use_gamma', False)
        self.beta_lb = check_attribute_else_default(experiment_parameters,
                                                    'beta_lb', False)

        self.config = Config()
        self.config.store_summary = True
        # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step'
        self.summary = {}
        self.config.number_of_steps = ENVIRONMENT_DICTIONARY[
            self.environment_name]['number_of_steps']
        """ Parameters for the Environment """
        self.config.max_episode_length = ENVIRONMENT_DICTIONARY[
            self.environment_name]['max_episode_length']
        self.config.norm_state = True
        self.config.current_step = 0
        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[
            self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[
            self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.optim = "adam"
        self.config.batch_size = 32
        # DQN parameters
        self.config.lr = self.learning_rate
        self.config.buffer_size = self.buffer_size
        self.config.tnet_update_freq = self.tnet_update_freq

        self.config.beta = self.beta
        self.config.reg_factor = self.reg_factor
        self.config.use_gamma = self.use_gamma
        self.config.beta_lb = self.beta_lb

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](
            config=self.config, summary=self.summary)
        self.fa = DistRegNeuralNetwork(config=self.config,
                                       summary=self.summary)
        self.rl_agent = Agent(environment=self.env,
                              function_approximator=self.fa,
                              config=self.config,
                              summary=self.summary)
Ejemplo n.º 7
0
    " Argument Parser "
    ###################
    parser = argparse.ArgumentParser()
    parser.add_argument("-minibatch_size", action='store', default=np.int8(32))
    parser.add_argument("-lr", action='store', default=np.float64(0.001))
    parser.add_argument("-threshold", action='store', default=1e-4, type=float)
    parser.add_argument('-regularization', action='store', default='none', type=str, choices=['none', 'l1', 'l2'])
    parser.add_argument('-reg_factor', action='store', default=0.0001, type=float)
    parser.add_argument('-test_copy_params', action='store_true', default=False)
    parser.add_argument('-init_test', action='store_true', default=False)
    parser.add_argument('-simple_training_test', action='store_true', default=False)
    parser.add_argument('-copy_parameters_test', action='store_true', default=False)
    parser.add_argument('-networks_comparison_test', action='store_true', default=False)
    args = parser.parse_args()

    config = Config()

    ############################################
    " Example: initializing the neural network "
    ############################################
    if args.init_test:
        config.input_dims = 2
        config.h1_dims = 2
        config.h2_dims = 2
        config.output_dims = 1
        print("Creating Two Layer Fully Connected Network...")
        network = TwoLayerFullyConnected(config)
        network.apply(weight_init)

        print("Printing Network...")
        print("\t", network, "\n")
Ejemplo n.º 8
0
        return temp_state

    def get_current_state(self):
        if self.norm_state:
            return self.normalize(self.current_state)
        else:
            return self.current_state


if __name__ == "__main__":
    verbose = False
    random_policy_test = True
    pumping_action_test = True
    actions = 3

    config = Config()
    config.norm_state = True
    config.store_summary = True
    config.max_episode_length = 1000000
    steps = 10000

    if random_policy_test:
        print("==== Results with Random Policy ====")

        config.current_step = 0
        summary = {}

        env = MountainCar(config, summary=summary)

        cumulative_reward = 0
        terminations = 0
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.buffer_size = check_attribute_else_default(
            experiment_parameters, 'buffer_size', 20000)
        self.method = check_attribute_else_default(exp_parameters, 'method',
                                                   'DQN')
        self.environment_name = check_attribute_else_default(
            experiment_parameters,
            'env',
            'mountain_car',
            choices=['mountain_car', 'catcher', 'puddle_world'])
        parameters_dictionary = BEST_PARAMETERS_DICTIONARY[
            self.environment_name][self.method][self.buffer_size]
        self.verbose = experiment_parameters.verbose

        self.config = Config()
        self.config.store_summary = True
        # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step'
        self.summary = {}
        self.config.number_of_steps = ENVIRONMENT_DICTIONARY[
            self.environment_name]['number_of_steps']
        """ Parameters for the Environment """
        self.config.max_episode_length = ENVIRONMENT_DICTIONARY[
            self.environment_name]['max_episode_length']
        self.config.norm_state = True
        self.config.current_step = 0
        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[
            self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[
            self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.optim = "adam"
        self.config.batch_size = 32

        # Parameters for any type of agent
        self.config.buffer_size = self.buffer_size
        self.config.lr = parameters_dictionary['LearningRate']
        self.config.tnet_update_freq = parameters_dictionary['Freq']

        if self.method in ['DRE', 'DRE_LB', 'DRG', 'DRG_LB']:
            self.config.beta = parameters_dictionary['Beta']
            self.config.reg_factor = parameters_dictionary['RegFactor']
            self.config.use_gamma = False
            self.config.beta_lb = False
            if self.method in ['DRG', 'DRG_LB']:
                self.config.use_gamma = True
            if self.method in ['DRE_LB', 'DRG_LB']:
                self.config.beta_lb = True
            self.fa = DistRegNeuralNetwork(config=self.config,
                                           summary=self.summary)

        elif self.method in ['L1A', 'L1W', 'L2A', 'L2W']:
            self.config.reg_factor = parameters_dictionary['RegFactor']
            self.config.reg_method = 'l1'
            if self.method in ['L2A', 'L2W']:
                self.config.reg_method = 'l2'
            self.config.weights_reg = False
            if self.method in ['L1W', 'L2W']:
                self.config.weights_reg = True
            self.fa = RegularizedNeuralNetwork(config=self.config,
                                               summary=self.summary)

        elif self.method in ['DQN']:
            self.fa = VanillaDQN(config=self.config, summary=self.summary)

        elif self.method in ['Dropout']:
            self.config.dropout_probability = parameters_dictionary[
                'DropoutProbability']
            self.fa = DropoutNeuralNetwork(config=self.config,
                                           summary=self.summary)
        else:
            raise ValueError(
                "No configuration available for the given method.")

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](
            config=self.config, summary=self.summary)
        self.rl_agent = Agent(environment=self.env,
                              function_approximator=self.fa,
                              config=self.config,
                              summary=self.summary)