Esempio n. 1
0
class Experiment:
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.tnet_update_Freq = check_attribute_else_default(
            experiment_parameters, 'tnet_update_freq', 1)
        self.buffer_size = check_attribute_else_default(
            experiment_parameters, 'buffer_size', 10000)
        self.learning_rate = check_attribute_else_default(
            exp_parameters, 'lr', 0.001)
        self.environment_name = check_attribute_else_default(
            experiment_parameters,
            'env',
            'mountain_car',
            choices=['mountain_car', 'catcher', 'puddle_world'])
        self.verbose = experiment_parameters.verbose

        self.config = Config()
        self.config.store_summary = True
        # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step'
        self.summary = {}
        self.config.number_of_steps = ENVIRONMENT_DICTIONARY[
            self.environment_name]['number_of_steps']
        """ Parameters for the Environment """
        self.config.max_episode_length = ENVIRONMENT_DICTIONARY[
            self.environment_name]['max_episode_length']
        self.config.norm_state = True
        self.config.current_step = 0
        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[
            self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[
            self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.optim = "adam"
        self.config.lr = self.learning_rate
        self.config.batch_size = 32
        # DQN parameters
        self.config.buffer_size = self.buffer_size
        self.config.tnet_update_freq = self.tnet_update_Freq

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](
            config=self.config, summary=self.summary)
        self.fa = VanillaDQN(config=self.config, summary=self.summary)
        self.rl_agent = Agent(environment=self.env,
                              function_approximator=self.fa,
                              config=self.config,
                              summary=self.summary)

    def run(self):
        prev_idx = 0
        current_episode_number = 1
        while self.config.current_step != self.config.number_of_steps:
            self.rl_agent.train(1)
            if self.verbose and ((current_episode_number % 10 == 0) or
                                 (current_episode_number - 1 == 0)):
                print("Episode Number:", current_episode_number)
                print('\tThe cumulative reward was:',
                      self.summary['return_per_episode'][-1])
                print(
                    '\tThe cumulative loss was:',
                    np.round(np.sum(self.summary['loss_per_step'][prev_idx:]),
                             2))
                print('\tCurrent environment steps:', self.config.current_step)
                prev_idx = self.config.current_step
            current_episode_number += 1
        if self.verbose:
            print("Number of episodes completed:",
                  len(self.summary['return_per_episode']))
            print("The total cumulative reward was:",
                  np.sum(self.summary['reward_per_step']))
            print("Current environment steps:", self.config.current_step)
        self.save_network_params()
        self.save_run_summary()

    def save_network_params(self):
        params_path = os.path.join(self.run_results_dir,
                                   'final_network_weights.pt')
        torch.save(self.fa.net.state_dict(), params_path)

    def save_run_summary(self):
        summary_path = os.path.join(self.run_results_dir, 'summary.p')
        with open(summary_path, mode='wb') as summary_file:
            pickle.dump(self.summary, summary_file)
        config_path = os.path.join(self.run_results_dir, 'config.p')
        with open(config_path, mode='wb') as config_file:
            pickle.dump(self.config, config_file)
Esempio n. 2
0
class Experiment:

    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.num_tilings = check_attribute_else_default(experiment_parameters, 'num_tilings', 32)
        self.tiling_length = check_attribute_else_default(experiment_parameters, 'tiling_length', 10)
        self.learning_rate = check_attribute_else_default(exp_parameters, 'learning_rate', 0.001)
        self.environment_name = check_attribute_else_default(experiment_parameters, 'env', 'mountain_car',
                                                             choices=['mountain_car', 'catcher'])
        self.verbose = experiment_parameters.verbose

        self.config = Config()
        self.config.store_summary = True
        self.summary = {}

        """ Parameters for the Environment """
        self.config.max_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['max_actions']
        self.config.norm_state = True

        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.lr = self.learning_rate / self.num_tilings
        self.config.num_tilings = self.num_tilings
        self.config.tiling_length = self.tiling_length
        self.config.scaling_factor = 1/2
        self.config.scaling_offset = 1

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](config=self.config, summary=self.summary)
        self.fa = TileCoderFA(config=self.config)
        self.rl_agent = Agent(environment=self.env, function_approximator=self.fa, config=self.config,
                              summary=self.summary)

    def run(self):
        for i in range(ENVIRONMENT_DICTIONARY[self.environment_name]['number_of_episodes']):
            episode_number = i + 1
            self.rl_agent.train(1)
            if self.verbose and (((i+1) % 10 == 0) or i == 0):
                print("Episode Number:", episode_number)
                print('\tThe cumulative reward was:', self.summary['return_per_episode'][-1])
            if self.environment_name == 'catcher':
                assert isinstance(self.env, Catcher3)
                if self.env.timeout: break
        self.save_run_summary()
        # self.save_tilecoder()

    def save_tilecoder(self):
        tilecoder_path = os.path.join(self.run_results_dir, 'tilecoder.p')
        with open(tilecoder_path, mode='wb') as tilecoder_file:
            pickle.dump(self.rl_agent.fa, tilecoder_file)

    def save_run_summary(self):
        total_reward = np.sum(self.summary['reward_per_step'])
        tr_path = os.path.join(self.run_results_dir, 'total_reward.p')
        with open(tr_path, mode='wb') as tr_file:
            pickle.dump(total_reward, tr_file)
        config_path = os.path.join(self.run_results_dir, 'config.p')
        with open(config_path, mode='wb') as config_file:
            pickle.dump(self.config, config_file)
class Experiment:
    def __init__(self, experiment_parameters, run_results_dir):
        self.run_results_dir = run_results_dir
        self.buffer_size = check_attribute_else_default(
            experiment_parameters, 'buffer_size', 20000)
        self.method = check_attribute_else_default(exp_parameters, 'method',
                                                   'DQN')
        self.environment_name = check_attribute_else_default(
            experiment_parameters,
            'env',
            'mountain_car',
            choices=['mountain_car', 'catcher', 'puddle_world'])
        parameters_dictionary = BEST_PARAMETERS_DICTIONARY[
            self.environment_name][self.method][self.buffer_size]
        self.verbose = experiment_parameters.verbose

        self.config = Config()
        self.config.store_summary = True
        # stored in summary: 'return_per_episode', 'loss_per_step', 'steps_per_episode', 'reward_per_step'
        self.summary = {}
        self.config.number_of_steps = ENVIRONMENT_DICTIONARY[
            self.environment_name]['number_of_steps']
        """ Parameters for the Environment """
        self.config.max_episode_length = ENVIRONMENT_DICTIONARY[
            self.environment_name]['max_episode_length']
        self.config.norm_state = True
        self.config.current_step = 0
        """ Parameters for the Function Approximator """
        self.config.state_dims = ENVIRONMENT_DICTIONARY[
            self.environment_name]['state_dims']
        self.config.num_actions = ENVIRONMENT_DICTIONARY[
            self.environment_name]['num_actions']
        self.config.gamma = 1.0
        self.config.epsilon = 0.1
        self.config.optim = "adam"
        self.config.batch_size = 32

        # Parameters for any type of agent
        self.config.buffer_size = self.buffer_size
        self.config.lr = parameters_dictionary['LearningRate']
        self.config.tnet_update_freq = parameters_dictionary['Freq']

        if self.method in ['DRE', 'DRE_LB', 'DRG', 'DRG_LB']:
            self.config.beta = parameters_dictionary['Beta']
            self.config.reg_factor = parameters_dictionary['RegFactor']
            self.config.use_gamma = False
            self.config.beta_lb = False
            if self.method in ['DRG', 'DRG_LB']:
                self.config.use_gamma = True
            if self.method in ['DRE_LB', 'DRG_LB']:
                self.config.beta_lb = True
            self.fa = DistRegNeuralNetwork(config=self.config,
                                           summary=self.summary)

        elif self.method in ['L1A', 'L1W', 'L2A', 'L2W']:
            self.config.reg_factor = parameters_dictionary['RegFactor']
            self.config.reg_method = 'l1'
            if self.method in ['L2A', 'L2W']:
                self.config.reg_method = 'l2'
            self.config.weights_reg = False
            if self.method in ['L1W', 'L2W']:
                self.config.weights_reg = True
            self.fa = RegularizedNeuralNetwork(config=self.config,
                                               summary=self.summary)

        elif self.method in ['DQN']:
            self.fa = VanillaDQN(config=self.config, summary=self.summary)

        elif self.method in ['Dropout']:
            self.config.dropout_probability = parameters_dictionary[
                'DropoutProbability']
            self.fa = DropoutNeuralNetwork(config=self.config,
                                           summary=self.summary)
        else:
            raise ValueError(
                "No configuration available for the given method.")

        self.env = ENVIRONMENT_DICTIONARY[self.environment_name]['class'](
            config=self.config, summary=self.summary)
        self.rl_agent = Agent(environment=self.env,
                              function_approximator=self.fa,
                              config=self.config,
                              summary=self.summary)

    def run(self):
        prev_idx = 0
        current_episode_number = 1
        assert hasattr(self.config, 'current_step')
        while self.config.current_step != self.config.number_of_steps:
            self.rl_agent.train(1)
            if self.verbose and ((current_episode_number % 10 == 0) or
                                 (current_episode_number - 1 == 0)):
                print("Episode Number:", current_episode_number)
                print('\tThe cumulative reward was:',
                      self.summary['return_per_episode'][-1])
                print(
                    '\tThe cumulative loss was:',
                    np.round(np.sum(self.summary['loss_per_step'][prev_idx:]),
                             2))
                print('\tCurrent environment steps:', self.config.current_step)
                prev_idx = self.config.current_step
            current_episode_number += 1
        if self.verbose:
            print("Number of episodes completed:",
                  len(self.summary['return_per_episode']))
            print("The total cumulative reward was:",
                  np.sum(self.summary['reward_per_step']))
            print("Current environment steps:", self.config.current_step)
        self.save_network_params()
        self.save_run_summary()

    def save_network_params(self):
        params_path = os.path.join(self.run_results_dir,
                                   'final_network_weights.pt')
        torch.save(self.fa.net.state_dict(), params_path)

    def save_run_summary(self):
        summary_path = os.path.join(self.run_results_dir, 'summary.p')
        with open(summary_path, mode='wb') as summary_file:
            pickle.dump(self.summary, summary_file)
        config_path = os.path.join(self.run_results_dir, 'config.p')
        with open(config_path, mode='wb') as config_file:
            pickle.dump(self.config, config_file)