Beispiel #1
0
 def load_weights(self, weight_file):
     if weight_file == None:
         print_timestamp("Not loading any weight file")
     else:
         self.model.load_weights(weight_file)
         print_timestamp("Weight file loaded from '{}' ".format(weight_file))
     self.update_target_model
Beispiel #2
0
    def _initialize_test(self, test_parameters):

        self.test_episodes = test_parameters['TEST_EPISODES']
        self.test_timesteps = test_parameters['TEST_TIMESTEPS']
        self.show_test_progress = test_parameters['RENDER_TEST']  # Rework
        print_timestamp(
            "Starting intermediate test with {} episodes ({} timesteps each).".
            format(self.test_episodes, self.test_timesteps))
        self.reward_list_test = []  # Has to be a report
Beispiel #3
0
    def learn(self, training_parameters, run=None):
        """The agent uses his training method on the given environment"""
        self._initialize_learning(training_parameters)
        for ep in range(self.training_episodes):
            self._initialize_episode(ep)
            for t in range(self.training_timesteps):
                self._initialize_timestep()  # Just render
                self.Q = self.estimator.predict(self.state)
                self.action = epsilon_greedy(self.Q, self.epsilon,
                                             self.action_space)
                if self.reward_fnc is 'Vanilla':
                    self.next_state, self.reward, done = self._act(
                        self.action, True)
                elif self.reward_fnc is 'Heuristic1':
                    self.next_state, self.reward, done = self._act(
                        self.action, False)
                self._analyze_timestep()
                if self.update:
                    self.update_on_batch(self.memory.get_batch(
                        self.batch_size))
                if self.update_target:
                    self.update_target_model()
                    # print_timestamp('Updated the target model')
                    self.timestep_counter = 0
                if done:
                    break
                self._decrease_epsilon()
            self._analyze_episode(ep)
            print_timestamp(
                "Episode {}/{}\t| Reward: {}\t| epsilon: {:.2f}\t".format(
                    (ep + 1), self.training_episodes, self.episode_reward,
                    self.epsilon))

            if self.test:
                test_report = self.run_test(self.test_parameters)

                average_reward = test_report[1].round(2)  # dictionary
                self.average_reward_list.append(average_reward)
                print_timestamp('Test ended with average reward: {}'.format(
                    average_reward))
                print_timestamp('Plotting')
                heat = PolarHeatmapPlotter(8, self.target, self.exp_dir)
                heat.plot(ep, average_reward, run)

        # Plotting time

        self.plotter.plot_training(self.exp_dir, self.reward_list)
        self.plotter.plot_test(self.exp_dir,
                               self.average_reward_list,
                               testeach=self.test_each)

        pickle.dump(
            self.reward_list,
            open('{}/report/training_report.p'.format(self.exp_dir), 'wb'))
        pickle.dump(self.average_reward_list,
                    open('{}/report/test_report.p'.format(self.exp_dir), 'wb'))

        return self.reward_list, self.average_reward_list
Beispiel #4
0
    def parameter_sweep(self, parameter, sweep_vector, train_parameters,
                        N_runs):
        """
        Sweeping one parameter according to the sweep_vector
        """
        # Create directories...
        actual_dir = train_parameters['ACTUAL_DIR']
        fm.create_report_dir(actual_dir)
        fm.create_plots_dir(actual_dir)
        # ... and report object
        sweepingReport = {}

        # Write into description-file
        description = train_parameters['DESCRIPTION']
        descriptionfile = open('{}/description.txt'.format(actual_dir), "w")
        descriptionfile.write(description)
        descriptionfile.close()

        # Sweep through the parameter vector
        for sweep_parameter in sweep_vector:
            # Adjust the actual directory
            train_parameters['ACTUAL_DIR'] = '{}/{}_{}'.format(
                actual_dir, parameter, sweep_parameter)
            # Adjust the sweeped parameter
            train_parameters[parameter] = sweep_parameter

            print_timestamp(
                'Parameter Sweep: Starting sweep on parameter {} with value {}'
                .format(parameter, sweep_parameter))
            multiReport = self.run_n_learning_sessions(N_runs,
                                                       train_parameters)
            # Add multiReport to the sweepingReport
            sweepingReport.update({sweep_parameter: multiReport})
        # Save the report file...
        pickle.dump(sweepingReport,
                    open('{}/report/sweepReport.p'.format(actual_dir), 'wb'))
        # ... and create plot
        return sweepingReport
Beispiel #5
0
    def learn(self, training_parameters):
        """The agent uses his training method on the given environment"""
        self._initialize_learning(training_parameters)
        for ep in range(self.training_episodes):
            self._initialize_episode(ep)
            for t in range(self.training_timesteps):
                self._initialize_timestep()
                self.Q = self.estimator.predict(self.state)
                if True:  # Explore
                    self.action = epsilon_greedy(self.Q, self.epsilon, self.D_action)
                else:
                    self.action_idx, self.action = greedy(self.Q, self.action_space)
                # self.action = self._select_action(self.state, self.policy)
                self.next_state, self.reward, done = self._act(self.action)  # Perform action
                self._analyze_timestep()

                if done:
                    break
            self._analyze_episode(ep)
            print_timestamp("Episode {}/{}\t| Reward: {}\t| epsilon: {:.2f}\t".format(ep, self.training_episodes, self.episode_reward, self.epsilon,))
            self._decrease_epsilon()
            if self.test:
                report = self.run_test(self.test_parameters)
        return report
Beispiel #6
0
    def run_n_learning_sessions(self, N_runs, train_parameters):
        # Create directories...
        actual_dir = train_parameters['ACTUAL_DIR']
        fm.create_plots_dir(actual_dir)
        fm.create_report_dir(actual_dir)
        # ... and report object
        multireport = []

        # Perform N runs
        for run in range(N_runs):
            self._reset_agent()  # reset agent models
            print_timestamp('Starting run {}'.format(run))
            training_report, test_report = self.learn(train_parameters, run)
            multireport.append([training_report, test_report, self.test_each])

        # Save the report file...
        pickle.dump(multireport,
                    open('{}/report/multiReport.p'.format(actual_dir), 'wb'))
        # ... and create plot
        self.plotter.plot_test_multireport(multireport, actual_dir,
                                           'multireport_test')
        self.plotter.plot_training_multireport(multireport, actual_dir,
                                               'multireport_training')
        return multireport
Beispiel #7
0
    def learn(self, training_parameters):
        """The agent uses his training method on the given environment"""
        self._initialize_learning(training_parameters)
        for ep in range(self.training_episodes):
            self._initialize_episode(ep)
            for t in range(self.training_timesteps):
                self._initialize_timestep()  # Just render
                self.Q = self.estimator.predict(self.state)
                self.action = epsilon_greedy(self.Q, self.epsilon,
                                             self.action_space)
                self.next_state, self.reward, done = self._act(
                    self.action, False)
                self._analyze_timestep()
                if self.update:
                    self.update_on_batch(self.memory.get_batch(
                        self.batch_size))
                if done:
                    break
                self._decrease_epsilon()
            self._analyze_episode(ep)
            print_timestamp(
                "Episode {}/{}\t| Reward: {}\t| epsilon: {:.2f}\t".format(
                    ep, self.training_episodes, self.episode_reward.round(2),
                    self.epsilon))

            if self.update:
                # print_timestamp("Updating target model weights")
                self.update_target_model()
            if self.test:
                test_report = self.run_test(self.test_parameters)
                average_reward = test_report[1].round(2)  # dictionary
                self.average_reward_list.append(average_reward)
                print_timestamp('Test ended with average reward: {}'.format(
                    average_reward))
                print_timestamp('Plotting')
                heat = PolarHeatmapPlotter(2, self.target,
                                           self.experiment_name)
                heat.plot(ep, average_reward)
        return self.reward_list, self.average_reward_list
Beispiel #8
0
 def _load(self, file_name):
     self.model.load_weights(file_name)
     print_timestamp()
     print("agent loaded weights from file '{}' ".format(file_name))
     self.update_target_model()
Beispiel #9
0
 def _save(self, file_name):
     print_timestamp()
     self.model.save_weights(file_name)
     print("agent saved weights in file '{}' ".format(file_name))
Beispiel #10
0
###############################################################################
# Parameter sweeps
###############################################################################

batch_size_sweep = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
gamma_sweep = [0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1]
optimizer_sweep = ['NAdam', 'Adam']  # Define other optimizers as well.
reward_function_sweep = ['Vanilla', 'Heuristic1', 'Heuristic2']

###############################################################################
# Main starts here
###############################################################################
# print_timestamp('Plotting')
# heat = PolarHeatmapPlotter(2)
# heat.plot()
print_timestamp('Started main program')

env = PendulumEnv()  # Create some environments
dankAgent = Dank_Agent(env, hyperparameters, model)  # Create some agents

# Start a training session with a given weight_file (e.g.)
training_report = dankAgent.learn(train_parameters)

# report = dankAgent.perform(model)  # perform with weights
# dankAgent.present() # Plot the results

###############################################################################
# Code dumpster
###############################################################################

# test_parameters = {
Beispiel #11
0
 def _initialize_test(self, test_parameters):
     print_timestamp("Starting intermediate test. ")
     self.test_episodes = test_parameters['TEST_EPISODES']
     self.test_timesteps = test_parameters['TEST_TIMESTEPS']
     self.render_test = test_parameters['RENDER_TEST']
     self.reward_list = []  # Has to be a report