def load_weights(self, weight_file): if weight_file == None: print_timestamp("Not loading any weight file") else: self.model.load_weights(weight_file) print_timestamp("Weight file loaded from '{}' ".format(weight_file)) self.update_target_model
def _initialize_test(self, test_parameters): self.test_episodes = test_parameters['TEST_EPISODES'] self.test_timesteps = test_parameters['TEST_TIMESTEPS'] self.show_test_progress = test_parameters['RENDER_TEST'] # Rework print_timestamp( "Starting intermediate test with {} episodes ({} timesteps each).". format(self.test_episodes, self.test_timesteps)) self.reward_list_test = [] # Has to be a report
def learn(self, training_parameters, run=None): """The agent uses his training method on the given environment""" self._initialize_learning(training_parameters) for ep in range(self.training_episodes): self._initialize_episode(ep) for t in range(self.training_timesteps): self._initialize_timestep() # Just render self.Q = self.estimator.predict(self.state) self.action = epsilon_greedy(self.Q, self.epsilon, self.action_space) if self.reward_fnc is 'Vanilla': self.next_state, self.reward, done = self._act( self.action, True) elif self.reward_fnc is 'Heuristic1': self.next_state, self.reward, done = self._act( self.action, False) self._analyze_timestep() if self.update: self.update_on_batch(self.memory.get_batch( self.batch_size)) if self.update_target: self.update_target_model() # print_timestamp('Updated the target model') self.timestep_counter = 0 if done: break self._decrease_epsilon() self._analyze_episode(ep) print_timestamp( "Episode {}/{}\t| Reward: {}\t| epsilon: {:.2f}\t".format( (ep + 1), self.training_episodes, self.episode_reward, self.epsilon)) if self.test: test_report = self.run_test(self.test_parameters) average_reward = test_report[1].round(2) # dictionary self.average_reward_list.append(average_reward) print_timestamp('Test ended with average reward: {}'.format( average_reward)) print_timestamp('Plotting') heat = PolarHeatmapPlotter(8, self.target, self.exp_dir) heat.plot(ep, average_reward, run) # Plotting time self.plotter.plot_training(self.exp_dir, self.reward_list) self.plotter.plot_test(self.exp_dir, self.average_reward_list, testeach=self.test_each) pickle.dump( self.reward_list, open('{}/report/training_report.p'.format(self.exp_dir), 'wb')) pickle.dump(self.average_reward_list, open('{}/report/test_report.p'.format(self.exp_dir), 'wb')) return self.reward_list, self.average_reward_list
def parameter_sweep(self, parameter, sweep_vector, train_parameters, N_runs): """ Sweeping one parameter according to the sweep_vector """ # Create directories... actual_dir = train_parameters['ACTUAL_DIR'] fm.create_report_dir(actual_dir) fm.create_plots_dir(actual_dir) # ... and report object sweepingReport = {} # Write into description-file description = train_parameters['DESCRIPTION'] descriptionfile = open('{}/description.txt'.format(actual_dir), "w") descriptionfile.write(description) descriptionfile.close() # Sweep through the parameter vector for sweep_parameter in sweep_vector: # Adjust the actual directory train_parameters['ACTUAL_DIR'] = '{}/{}_{}'.format( actual_dir, parameter, sweep_parameter) # Adjust the sweeped parameter train_parameters[parameter] = sweep_parameter print_timestamp( 'Parameter Sweep: Starting sweep on parameter {} with value {}' .format(parameter, sweep_parameter)) multiReport = self.run_n_learning_sessions(N_runs, train_parameters) # Add multiReport to the sweepingReport sweepingReport.update({sweep_parameter: multiReport}) # Save the report file... pickle.dump(sweepingReport, open('{}/report/sweepReport.p'.format(actual_dir), 'wb')) # ... and create plot return sweepingReport
def learn(self, training_parameters): """The agent uses his training method on the given environment""" self._initialize_learning(training_parameters) for ep in range(self.training_episodes): self._initialize_episode(ep) for t in range(self.training_timesteps): self._initialize_timestep() self.Q = self.estimator.predict(self.state) if True: # Explore self.action = epsilon_greedy(self.Q, self.epsilon, self.D_action) else: self.action_idx, self.action = greedy(self.Q, self.action_space) # self.action = self._select_action(self.state, self.policy) self.next_state, self.reward, done = self._act(self.action) # Perform action self._analyze_timestep() if done: break self._analyze_episode(ep) print_timestamp("Episode {}/{}\t| Reward: {}\t| epsilon: {:.2f}\t".format(ep, self.training_episodes, self.episode_reward, self.epsilon,)) self._decrease_epsilon() if self.test: report = self.run_test(self.test_parameters) return report
def run_n_learning_sessions(self, N_runs, train_parameters): # Create directories... actual_dir = train_parameters['ACTUAL_DIR'] fm.create_plots_dir(actual_dir) fm.create_report_dir(actual_dir) # ... and report object multireport = [] # Perform N runs for run in range(N_runs): self._reset_agent() # reset agent models print_timestamp('Starting run {}'.format(run)) training_report, test_report = self.learn(train_parameters, run) multireport.append([training_report, test_report, self.test_each]) # Save the report file... pickle.dump(multireport, open('{}/report/multiReport.p'.format(actual_dir), 'wb')) # ... and create plot self.plotter.plot_test_multireport(multireport, actual_dir, 'multireport_test') self.plotter.plot_training_multireport(multireport, actual_dir, 'multireport_training') return multireport
def learn(self, training_parameters): """The agent uses his training method on the given environment""" self._initialize_learning(training_parameters) for ep in range(self.training_episodes): self._initialize_episode(ep) for t in range(self.training_timesteps): self._initialize_timestep() # Just render self.Q = self.estimator.predict(self.state) self.action = epsilon_greedy(self.Q, self.epsilon, self.action_space) self.next_state, self.reward, done = self._act( self.action, False) self._analyze_timestep() if self.update: self.update_on_batch(self.memory.get_batch( self.batch_size)) if done: break self._decrease_epsilon() self._analyze_episode(ep) print_timestamp( "Episode {}/{}\t| Reward: {}\t| epsilon: {:.2f}\t".format( ep, self.training_episodes, self.episode_reward.round(2), self.epsilon)) if self.update: # print_timestamp("Updating target model weights") self.update_target_model() if self.test: test_report = self.run_test(self.test_parameters) average_reward = test_report[1].round(2) # dictionary self.average_reward_list.append(average_reward) print_timestamp('Test ended with average reward: {}'.format( average_reward)) print_timestamp('Plotting') heat = PolarHeatmapPlotter(2, self.target, self.experiment_name) heat.plot(ep, average_reward) return self.reward_list, self.average_reward_list
def _load(self, file_name): self.model.load_weights(file_name) print_timestamp() print("agent loaded weights from file '{}' ".format(file_name)) self.update_target_model()
def _save(self, file_name): print_timestamp() self.model.save_weights(file_name) print("agent saved weights in file '{}' ".format(file_name))
############################################################################### # Parameter sweeps ############################################################################### batch_size_sweep = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] gamma_sweep = [0, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1] optimizer_sweep = ['NAdam', 'Adam'] # Define other optimizers as well. reward_function_sweep = ['Vanilla', 'Heuristic1', 'Heuristic2'] ############################################################################### # Main starts here ############################################################################### # print_timestamp('Plotting') # heat = PolarHeatmapPlotter(2) # heat.plot() print_timestamp('Started main program') env = PendulumEnv() # Create some environments dankAgent = Dank_Agent(env, hyperparameters, model) # Create some agents # Start a training session with a given weight_file (e.g.) training_report = dankAgent.learn(train_parameters) # report = dankAgent.perform(model) # perform with weights # dankAgent.present() # Plot the results ############################################################################### # Code dumpster ############################################################################### # test_parameters = {
def _initialize_test(self, test_parameters): print_timestamp("Starting intermediate test. ") self.test_episodes = test_parameters['TEST_EPISODES'] self.test_timesteps = test_parameters['TEST_TIMESTEPS'] self.render_test = test_parameters['RENDER_TEST'] self.reward_list = [] # Has to be a report