Beispiel #1
0
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.num_tilings = check_attribute_else_default(experiment_parameters, 'num_tilings', 32)
        self.tiling_length = check_attribute_else_default(experiment_parameters, 'tiling_length', 10)
        self.learning_rate = check_attribute_else_default(exp_parameters, 'learning_rate', 0.001)
        self.environment_name = check_attribute_else_default(experiment_parameters, 'env', 'mountain_car',
                                                             choices=['mountain_car', 'catcher'])
        self.verbose = experiment_parameters.verbose

        self.config = Config()
        self.config.store_summary = True
        self.summary = {}

        """ Parameters for the Environment """
        self.config.max_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['max_actions']
        self.config.norm_state = True

        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.lr = self.learning_rate / self.num_tilings
        self.config.num_tilings = self.num_tilings
        self.config.tiling_length = self.tiling_length
        self.config.scaling_factor = 1/2
        self.config.scaling_offset = 1

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](config=self.config, summary=self.summary)
        self.fa = TileCoderFA(config=self.config)
        self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config,
                              summary=self.summary)
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.tnet_update_Freq = check_attribute(experiment_parameters,
                                                'tnet_update_freq', 1)
        self.buffer_size = check_attribute(experiment_parameters,
                                           'buffer_size', 10000)
        self.learning_rate = check_attribute(exp_parameters, 'lr', 0.001)
        self.environment_name = check_attribute(
            experiment_parameters,
            'env',
            'mountain_car',
            choices=['mountain_car', 'catcher', 'puddle_world'])
        self.ppa = check_attribute(exp_parameters, 'ppa', 0.1)
        self.gated = check_attribute(exp_parameters, 'gated', False)
        self.gate_function = check_attribute(exp_parameters, 'gate_function',
                                             'tanh')
        self.verbose = experiment_parameters.verbose

        self.config = Config()
        self.config.store_summary = True
        # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step'
        self.summary = {}
        self.config.number_of_steps = ENVIRONMENT_DICTIONARY[
            self.environment_name]['number_of_steps']
        """ Parameters for the Environment """
        self.config.max_episode_length = ENVIRONMENT_DICTIONARY[
            self.environment_name]['max_episode_length']
        self.config.norm_state = True
        self.config.current_step = 0
        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[
            self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[
            self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.optim = "adam"
        self.config.lr = self.learning_rate
        self.config.batch_size = 32
        self.config.ppa = self.ppa
        self.config.h1_dims = 32
        self.config.h2_dims = 256
        self.config.gate_function = self.gate_function
        # DQN parameters
        self.config.buffer_size = self.buffer_size
        self.config.tnet_update_freq = self.tnet_update_Freq
        self.config.input_dims = self.config.state_dims
        if self.gated:
            self.config.gated = True
        else:
            self.config.gated = False

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](
            config=self.config, summary=self.summary)
        self.fa = ActionDQN(config=self.config, summary=self.summary)
        self.rl_agent = Agent(environment=self.env,
                              function_approximator=self.fa,
                              config=self.config,
                              summary=self.summary)
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.buffer_size = check_attribute_else_default(experiment_parameters, 'buffer_size', 20000)
        self.tnet_update_freq = check_attribute_else_default(experiment_parameters, 'tnet_update_freq', 10)
        self.environment_name = check_attribute_else_default(experiment_parameters, 'env', 'mountain_car',
                                                             choices=['mountain_car', 'catcher'])
        self.verbose = experiment_parameters.verbose
        # parameters specific to the parameter sweep
        self.learning_rate = check_attribute_else_default(exp_parameters, 'lr', 0.001)
        self.l1_reg = check_attribute_else_default(experiment_parameters, 'l1_reg', True)
        self.weights_reg = check_attribute_else_default(experiment_parameters, 'weights_reg', True)
        self.reg_factor = check_attribute_else_default(experiment_parameters, 'reg_factor', 0.1)

        self.config = Config()
        self.config.store_summary = True
        # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step'
        self.summary = {}
        self.config.number_of_steps = ENVIRONMENT_DICTIONARY[self.environment_name]['number_of_steps']

        """ Parameters for the Environment """
            # Same for every experiment
        self.config.max_episode_length = ENVIRONMENT_DICTIONARY[self.environment_name]['max_episode_length']
        self.config.norm_state = True
        self.config.current_step = 0

        """ Parameters for the Function Approximator """
            # Same for every experiment
        self.config.state_dims = ENVIRONMENT_DICTIONARY[self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.optim = "adam"
        self.config.batch_size = 32
            # Selected after finding the best parameter combinations for DQN with a given buffer size
        self.config.buffer_size = self.buffer_size
        self.config.tnet_update_freq = self.tnet_update_freq
            # These are the parameters that we are sweeping over
        self.config.lr = self.learning_rate
        self.config.reg_method = 'l1' if self.l1_reg else 'l2'
        self.config.weights_reg = self.weights_reg
        self.config.reg_factor = self.reg_factor

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](config=self.config, summary=self.summary)
        self.fa = RegularizedNeuralNetwork(config=self.config, summary=self.summary)
        self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config,
                              summary=self.summary)
Beispiel #4
0
class Experiment:

    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.num_tilings = check_attribute_else_default(experiment_parameters, 'num_tilings', 32)
        self.tiling_length = check_attribute_else_default(experiment_parameters, 'tiling_length', 10)
        self.learning_rate = check_attribute_else_default(exp_parameters, 'learning_rate', 0.001)
        self.environment_name = check_attribute_else_default(experiment_parameters, 'env', 'mountain_car',
                                                             choices=['mountain_car', 'catcher'])
        self.verbose = experiment_parameters.verbose

        self.config = Config()
        self.config.store_summary = True
        self.summary = {}

        """ Parameters for the Environment """
        self.config.max_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['max_actions']
        self.config.norm_state = True

        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.lr = self.learning_rate / self.num_tilings
        self.config.num_tilings = self.num_tilings
        self.config.tiling_length = self.tiling_length
        self.config.scaling_factor = 1/2
        self.config.scaling_offset = 1

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](config=self.config, summary=self.summary)
        self.fa = TileCoderFA(config=self.config)
        self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config,
                              summary=self.summary)

    def run(self):
        for i in range(ENVIRONMENT_DICTIONARY[self.environment_name]['number_of_episodes']):
            episode_number = i + 1
            self.rl_agent.train(1)
            if self.verbose and (((i+1) % 10 == 0) or i == 0):
                print("Episode Number:", episode_number)
                print('\tThe cumulative reward was:', self.summary['return_per_episode'][-1])
            if self.environment_name == 'catcher':
                assert isinstance(self.env, Catcher3)
                if self.env.timeout: break
        self.save_run_summary()
        # self.save_tilecoder()

    def save_tilecoder(self):
        tilecoder_path = os.path.join(self.run_results_dir, 'tilecoder.p')
        with open(tilecoder_path, mode='wb') as tilecoder_file:
            pickle.dump(self.rl_agent.fa, tilecoder_file)

    def save_run_summary(self):
        total_reward = np.sum(self.summary['reward_per_step'])
        tr_path = os.path.join(self.run_results_dir, 'total_reward.p')
        with open(tr_path, mode='wb') as tr_file:
            pickle.dump(total_reward, tr_file)
        config_path = os.path.join(self.run_results_dir, 'config.p')
        with open(config_path, mode='wb') as config_file:
            pickle.dump(self.config, config_file)
Beispiel #5
0
class Experiment:
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.tnet_update_Freq = check_attribute_else_default(
            experiment_parameters, 'tnet_update_freq', 1)
        self.buffer_size = check_attribute_else_default(
            experiment_parameters, 'buffer_size', 10000)
        self.learning_rate = check_attribute_else_default(
            exp_parameters, 'lr', 0.001)
        self.environment_name = check_attribute_else_default(
            experiment_parameters,
            'env',
            'mountain_car',
            choices=['mountain_car', 'catcher', 'puddle_world'])
        self.verbose = experiment_parameters.verbose

        self.config = Config()
        self.config.store_summary = True
        # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step'
        self.summary = {}
        self.config.number_of_steps = ENVIRONMENT_DICTIONARY[
            self.environment_name]['number_of_steps']
        """ Parameters for the Environment """
        self.config.max_episode_length = ENVIRONMENT_DICTIONARY[
            self.environment_name]['max_episode_length']
        self.config.norm_state = True
        self.config.current_step = 0
        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[
            self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[
            self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.optim = "adam"
        self.config.lr = self.learning_rate
        self.config.batch_size = 32
        # DQN parameters
        self.config.buffer_size = self.buffer_size
        self.config.tnet_update_freq = self.tnet_update_Freq

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](
            config=self.config, summary=self.summary)
        self.fa = VanillaDQN(config=self.config, summary=self.summary)
        self.rl_agent = Agent(environment=self.env,
                              function_approximator=self.fa,
                              config=self.config,
                              summary=self.summary)

    def run(self):
        prev_idx = 0
        current_episode_number = 1
        while self.config.current_step != self.config.number_of_steps:
            self.rl_agent.train(1)
            if self.verbose and ((current_episode_number % 10 == 0) or
                                 (current_episode_number - 1 == 0)):
                print("Episode Number:", current_episode_number)
                print('\tThe cumulative reward was:',
                      self.summary['return_per_episode'][-1])
                print(
                    '\tThe cumulative loss was:',
                    np.round(np.sum(self.summary['loss_per_step'][prev_idx:]),
                             2))
                print('\tCurrent environment steps:', self.config.current_step)
                prev_idx = self.config.current_step
            current_episode_number += 1
        if self.verbose:
            print("Number of episodes completed:",
                  len(self.summary['return_per_episode']))
            print("The total cumulative reward was:",
                  np.sum(self.summary['reward_per_step']))
            print("Current environment steps:", self.config.current_step)
        self.save_network_params()
        self.save_run_summary()

    def save_network_params(self):
        params_path = os.path.join(self.run_results_dir,
                                   'final_network_weights.pt')
        torch.save(self.fa.net.state_dict(), params_path)

    def save_run_summary(self):
        summary_path = os.path.join(self.run_results_dir, 'summary.p')
        with open(summary_path, mode='wb') as summary_file:
            pickle.dump(self.summary, summary_file)
        config_path = os.path.join(self.run_results_dir, 'config.p')
        with open(config_path, mode='wb') as config_file:
            pickle.dump(self.config, config_file)
Beispiel #6
0
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.learning_rate = check_attribute_else_default(
            exp_parameters, 'lr', 0.001)
        self.buffer_size = check_attribute_else_default(
            experiment_parameters, 'buffer_size', 20000)
        self.tnet_update_freq = check_attribute_else_default(
            experiment_parameters, 'tnet_update_freq', 10)
        self.environment_name = check_attribute_else_default(
            experiment_parameters,
            'env',
            'mountain_car',
            choices=['mountain_car', 'catcher'])
        self.verbose = experiment_parameters.verbose
        # parameters specific to distributional regularizers
        self.beta = check_attribute_else_default(experiment_parameters, 'beta',
                                                 0.1)
        self.reg_factor = check_attribute_else_default(experiment_parameters,
                                                       'reg_factor', 0.1)
        self.use_gamma = check_attribute_else_default(experiment_parameters,
                                                      'use_gamma', False)
        self.beta_lb = check_attribute_else_default(experiment_parameters,
                                                    'beta_lb', False)

        self.config = Config()
        self.config.store_summary = True
        # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step'
        self.summary = {}
        self.config.number_of_steps = ENVIRONMENT_DICTIONARY[
            self.environment_name]['number_of_steps']
        """ Parameters for the Environment """
        self.config.max_episode_length = ENVIRONMENT_DICTIONARY[
            self.environment_name]['max_episode_length']
        self.config.norm_state = True
        self.config.current_step = 0
        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[
            self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[
            self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.optim = "adam"
        self.config.batch_size = 32
        # DQN parameters
        self.config.lr = self.learning_rate
        self.config.buffer_size = self.buffer_size
        self.config.tnet_update_freq = self.tnet_update_freq

        self.config.beta = self.beta
        self.config.reg_factor = self.reg_factor
        self.config.use_gamma = self.use_gamma
        self.config.beta_lb = self.beta_lb

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](
            config=self.config, summary=self.summary)
        self.fa = DistRegNeuralNetwork(config=self.config,
                                       summary=self.summary)
        self.rl_agent = Agent(environment=self.env,
                              function_approximator=self.fa,
                              config=self.config,
                              summary=self.summary)
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.buffer_size = check_attribute_else_default(
            experiment_parameters, 'buffer_size', 20000)
        self.method = check_attribute_else_default(exp_parameters, 'method',
                                                   'DQN')
        self.environment_name = check_attribute_else_default(
            experiment_parameters,
            'env',
            'mountain_car',
            choices=['mountain_car', 'catcher', 'puddle_world'])
        parameters_dictionary = BEST_PARAMETERS_DICTIONARY[
            self.environment_name][self.method][self.buffer_size]
        self.verbose = experiment_parameters.verbose

        self.config = Config()
        self.config.store_summary = True
        # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step'
        self.summary = {}
        self.config.number_of_steps = ENVIRONMENT_DICTIONARY[
            self.environment_name]['number_of_steps']
        """ Parameters for the Environment """
        self.config.max_episode_length = ENVIRONMENT_DICTIONARY[
            self.environment_name]['max_episode_length']
        self.config.norm_state = True
        self.config.current_step = 0
        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[
            self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[
            self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.optim = "adam"
        self.config.batch_size = 32

        # Parameters for any type of agent
        self.config.buffer_size = self.buffer_size
        self.config.lr = parameters_dictionary['LearningRate']
        self.config.tnet_update_freq = parameters_dictionary['Freq']

        if self.method in ['DRE', 'DRE_LB', 'DRG', 'DRG_LB']:
            self.config.beta = parameters_dictionary['Beta']
            self.config.reg_factor = parameters_dictionary['RegFactor']
            self.config.use_gamma = False
            self.config.beta_lb = False
            if self.method in ['DRG', 'DRG_LB']:
                self.config.use_gamma = True
            if self.method in ['DRE_LB', 'DRG_LB']:
                self.config.beta_lb = True
            self.fa = DistRegNeuralNetwork(config=self.config,
                                           summary=self.summary)

        elif self.method in ['L1A', 'L1W', 'L2A', 'L2W']:
            self.config.reg_factor = parameters_dictionary['RegFactor']
            self.config.reg_method = 'l1'
            if self.method in ['L2A', 'L2W']:
                self.config.reg_method = 'l2'
            self.config.weights_reg = False
            if self.method in ['L1W', 'L2W']:
                self.config.weights_reg = True
            self.fa = RegularizedNeuralNetwork(config=self.config,
                                               summary=self.summary)

        elif self.method in ['DQN']:
            self.fa = VanillaDQN(config=self.config, summary=self.summary)

        elif self.method in ['Dropout']:
            self.config.dropout_probability = parameters_dictionary[
                'DropoutProbability']
            self.fa = DropoutNeuralNetwork(config=self.config,
                                           summary=self.summary)
        else:
            raise ValueError(
                "No configuration available for the given method.")

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](
            config=self.config, summary=self.summary)
        self.rl_agent = Agent(environment=self.env,
                              function_approximator=self.fa,
                              config=self.config,
                              summary=self.summary)
class Experiment:
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.buffer_size = check_attribute_else_default(
            experiment_parameters, 'buffer_size', 20000)
        self.method = check_attribute_else_default(exp_parameters, 'method',
                                                   'DQN')
        self.environment_name = check_attribute_else_default(
            experiment_parameters,
            'env',
            'mountain_car',
            choices=['mountain_car', 'catcher', 'puddle_world'])
        parameters_dictionary = BEST_PARAMETERS_DICTIONARY[
            self.environment_name][self.method][self.buffer_size]
        self.verbose = experiment_parameters.verbose

        self.config = Config()
        self.config.store_summary = True
        # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step'
        self.summary = {}
        self.config.number_of_steps = ENVIRONMENT_DICTIONARY[
            self.environment_name]['number_of_steps']
        """ Parameters for the Environment """
        self.config.max_episode_length = ENVIRONMENT_DICTIONARY[
            self.environment_name]['max_episode_length']
        self.config.norm_state = True
        self.config.current_step = 0
        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[
            self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[
            self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.optim = "adam"
        self.config.batch_size = 32

        # Parameters for any type of agent
        self.config.buffer_size = self.buffer_size
        self.config.lr = parameters_dictionary['LearningRate']
        self.config.tnet_update_freq = parameters_dictionary['Freq']

        if self.method in ['DRE', 'DRE_LB', 'DRG', 'DRG_LB']:
            self.config.beta = parameters_dictionary['Beta']
            self.config.reg_factor = parameters_dictionary['RegFactor']
            self.config.use_gamma = False
            self.config.beta_lb = False
            if self.method in ['DRG', 'DRG_LB']:
                self.config.use_gamma = True
            if self.method in ['DRE_LB', 'DRG_LB']:
                self.config.beta_lb = True
            self.fa = DistRegNeuralNetwork(config=self.config,
                                           summary=self.summary)

        elif self.method in ['L1A', 'L1W', 'L2A', 'L2W']:
            self.config.reg_factor = parameters_dictionary['RegFactor']
            self.config.reg_method = 'l1'
            if self.method in ['L2A', 'L2W']:
                self.config.reg_method = 'l2'
            self.config.weights_reg = False
            if self.method in ['L1W', 'L2W']:
                self.config.weights_reg = True
            self.fa = RegularizedNeuralNetwork(config=self.config,
                                               summary=self.summary)

        elif self.method in ['DQN']:
            self.fa = VanillaDQN(config=self.config, summary=self.summary)

        elif self.method in ['Dropout']:
            self.config.dropout_probability = parameters_dictionary[
                'DropoutProbability']
            self.fa = DropoutNeuralNetwork(config=self.config,
                                           summary=self.summary)
        else:
            raise ValueError(
                "No configuration available for the given method.")

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](
            config=self.config, summary=self.summary)
        self.rl_agent = Agent(environment=self.env,
                              function_approximator=self.fa,
                              config=self.config,
                              summary=self.summary)

    def run(self):
        prev_idx = 0
        current_episode_number = 1
        assert hasattr(self.config, 'current_step')
        while self.config.current_step != self.config.number_of_steps:
            self.rl_agent.train(1)
            if self.verbose and ((current_episode_number % 10 == 0) or
                                 (current_episode_number - 1 == 0)):
                print("Episode Number:", current_episode_number)
                print('\tThe cumulative reward was:',
                      self.summary['return_per_episode'][-1])
                print(
                    '\tThe cumulative loss was:',
                    np.round(np.sum(self.summary['loss_per_step'][prev_idx:]),
                             2))
                print('\tCurrent environment steps:', self.config.current_step)
                prev_idx = self.config.current_step
            current_episode_number += 1
        if self.verbose:
            print("Number of episodes completed:",
                  len(self.summary['return_per_episode']))
            print("The total cumulative reward was:",
                  np.sum(self.summary['reward_per_step']))
            print("Current environment steps:", self.config.current_step)
        self.save_network_params()
        self.save_run_summary()

    def save_network_params(self):
        params_path = os.path.join(self.run_results_dir,
                                   'final_network_weights.pt')
        torch.save(self.fa.net.state_dict(), params_path)

    def save_run_summary(self):
        summary_path = os.path.join(self.run_results_dir, 'summary.p')
        with open(summary_path, mode='wb') as summary_file:
            pickle.dump(self.summary, summary_file)
        config_path = os.path.join(self.run_results_dir, 'config.p')
        with open(config_path, mode='wb') as config_file:
            pickle.dump(self.config, config_file)