def main(): nameOfNSP = ['1', '2', '3'] nameOfCLASS = ['A', 'B', 'C', 'D', 'SH', 'AD', 'DE', 'LD', 'FS', 'SUSP'] ourData = rd.ReadData() ourData = rd.CleanData(ourData) normalizer = Normalizer() #Te linijki to zgrupowanie według klas i zliczenie ile elementow ma kazda klasa, dwa rodzaje NSPCardiotography = ourData['NSP'].sort_values().value_counts(sort=False) ClassCardiotography = ourData['CLASS'].sort_values().value_counts( sort=False) #Histogramy dwa #hoc.CreateHistNSP(ourData['NSP']) #hoc.CreateHistClass(ourData['CLASS']) #Wyciagniecie dwoch kolumn po ktorych klasyfikujemy classification_columns = ourData[['NSP', 'CLASS']] #ourData = ourData.drop(labels=['NSP', 'CLASS'], axis=1) #Tu odbywa sie normalizacja/standaryzacja itd + wykresy/ narazie zakomentowane # Plot a graph comparing scaled data #normalizer.compareStandarizationMethods(ourData) # normalize data ourDataNormalized = normalizer.normalize(ourData) # transform data with PCA and plot results dataTransformedPCA = transformPCA(normalizer.standarizeStandard(ourData)) Plot.scatterPlotNSP(dataTransformedPCA, ourData['NSP']) Plot.scatterPlotCLASS(dataTransformedPCA, ourData['CLASS']) # transform data with LDA and plot results dataTransformedLDA = transformLDA(ourData, 'NSP') Plot.scatterPlotNSP(dataTransformedLDA, ourData['NSP']) dataTransformedLDA = transformLDA(ourData, 'CLASS') Plot.scatterPlotCLASS(dataTransformedLDA, ourData['CLASS']) # plot heatmap - correlation between data and Plot.heatmap(ourDataNormalized) ourDataNormalized = ourDataNormalized.drop(labels=['CLASS', 'NSP'], axis=1) #crossValidationDataNSP = CrossValidation.crossValidationNSP(ourDataNormalized) #crossValidationDataCLASS = CrossValidation.crossValidationCLASS(ourDataNormalized) #hoc.CreateHistAfterValidationNSP(crossValidationDataNSP, ourData['NSP']) #hoc.CreateHistAfterValidationCLASS(crossValidationDataCLASS, ourData['CLASS']) #Data without LDA and normalization allClassification(ourData, classification_columns, nameOfNSP, nameOfCLASS)
class DDPGAgent: def __init__(self, args, env, env_params): self.args = args self.env = env self.env_params = env_params # _build up the actor/critic evaluated network self.actor_net = Actor(env_params, hidden_units=256) self.critic_net = Critic(env_params, hidden_units=256) # sync the networks across the cpus for parallel training (when running at workstation) sync_networks(self.actor_net) sync_networks(self.critic_net) # _build up the actor/critic target network self.actor_target_net = Actor(env_params, hidden_units=256) self.critic_target_net = Critic(env_params, hidden_units=256) # if gpu is used if self.args.cuda: self.actor_net.cuda() self.critic_net.cuda() self.actor_target_net.cuda() self.critic_target_net.cuda() # the optimizer of the networks self.actor_optimizer = torch.optim.Adam( self.actor_net.parameters(), lr=self.args.learning_rate_actor) self.critic_optimizer = torch.optim.Adam( self.critic_net.parameters(), lr=self.args.learning_rate_critic) # HER sample function self.her_sample = HER(self.args.replay_strategy, self.args.replay_ratio, self.env.compute_reward) # experience buffer self.exp_buffer = ReplayBuffer(self.env_params, self.args.buffer_size, self.her_sample.her_sample_transitions) # the normalization of the observation and goal self.obs_norm = Normalizer(size=env_params['obs'], clip_range=self.args.clip_range) self.goal_norm = Normalizer(size=env_params['d_goal'], clip_range=self.args.clip_range) # create the dictionary to save the model if MPI.COMM_WORLD.Get_rank() == 0: if not os.path.exists(self.args.save_dir): os.mkdir(self.args.save_dir) # get the model path self.model_path = os.path.join(self.args.save_dir, self.args.env_name) if not os.path.exists(self.model_path): os.mkdir(self.model_path) ############################### # Name: learning # Function: Training the model # Comment: ############################### def learning(self): success_rate_history = [] for epoch in range(self.args.n_epochs): for _ in range(self.args.n_cycles): exp_obs_buff, exp_a_goal_buff, exp_d_goal_buff, exp_actions_buff = [], [], [], [] for _ in range(self.args.num_exp_per_mpi): # reset the environment and experience exp_obs, exp_a_goal, exp_d_goal, exp_actions = [], [], [], [] observations = self.env.reset() obs = observations['observation'] a_goal = observations['achieved_goal'] d_goal = observations['desired_goal'] # interact with the environment for t in range(self.env_params['max_timesteps']): with torch.no_grad(): input_tensor = self._pre_process_inputs( obs, d_goal) policy_predictions = self.actor_net(input_tensor) action = self._choose_action(policy_predictions) # get the observations from the action observations_next, _, _, info = self.env.step(action) obs_next = observations_next['observation'] a_goal_next = observations_next['achieved_goal'] exp_obs.append(obs.copy()) exp_a_goal.append(a_goal.copy()) exp_d_goal.append(d_goal.copy()) exp_actions.append(action.copy()) # update the state obs = obs_next a_goal = a_goal_next exp_obs.append(obs.copy()) exp_a_goal.append(a_goal.copy()) exp_obs_buff.append(exp_obs) exp_a_goal_buff.append(exp_a_goal) exp_d_goal_buff.append(exp_d_goal) exp_actions_buff.append(exp_actions) exp_obs_buff = np.array(exp_obs_buff) exp_a_goal_buff = np.array(exp_a_goal_buff) exp_d_goal_buff = np.array(exp_d_goal_buff) exp_actions_buff = np.array(exp_actions_buff) # store the transitions self.exp_buffer.store_transition([ exp_obs_buff, exp_a_goal_buff, exp_d_goal_buff, exp_actions_buff ]) self._update_normalizer([ exp_obs_buff, exp_a_goal_buff, exp_d_goal_buff, exp_actions_buff ]) for _ in range(self.args.n_batches): self._update_network() # training the network # soft update the network parameter self._soft_update_target_network(self.actor_target_net, self.actor_net) self._soft_update_target_network(self.critic_target_net, self.critic_net) # start evaluation success_rate = self._evaluate_agent() if MPI.COMM_WORLD.Get_rank() == 0: print('[{}] epoch is: {}, eval success rate is: {:.3f}'.format( datetime.now(), epoch, success_rate)) torch.save([ self.obs_norm.mean, self.obs_norm.std, self.goal_norm.mean, self.goal_norm.std, self.actor_net.state_dict() ], self.model_path + '/model.pt') success_rate_history.append(success_rate) success_rate_history = np.array(success_rate_history) np.savetxt('Plot_Data/Pen_HER.txt', success_rate_history, fmt='%f', delimiter=',') ############################### # Name: _pre_process_inputs # Function: process the inputs for the actor network # Comment: ############################### def _pre_process_inputs(self, obs, goal): obs_norm = self.obs_norm.normalize(obs) goal_norm = self.goal_norm.normalize(goal) # concatenate the stuffs inputs = np.concatenate([obs_norm, goal_norm]) inputs = torch.tensor(inputs, dtype=torch.float32).unsqueeze(0) if self.args.cuda: inputs = inputs.cuda() return inputs def _choose_action(self, policy_predictions): action = policy_predictions.cpu().numpy().squeeze() # create the noise action += self.args.noise_epsilon * self.env_params[ 'action_max'] * np.random.randn(*action.shape) action = np.clip(action, -self.env_params['action_max'], self.env_params['action_max']) random_action = np.random.uniform(low=-self.env_params['action_max'], high=self.env_params['action_max'], size=self.env_params['action']) # decide random or not action += np.random.binomial(1, self.args.random_epsilon, 1)[0] * (random_action - action) return action def _update_normalizer(self, experience_buff): exp_obs, exp_a_goal, exp_d_goal, exp_actions = experience_buff exp_obs_next = exp_obs[:, 1:, :] exp_a_goal_next = exp_a_goal[:, 1:, :] num_exps = exp_actions.shape[1] buffer_temp = { 'obs': exp_obs, 'a_goal': exp_a_goal, 'd_goal': exp_d_goal, 'actions': exp_actions, 'obs_next': exp_obs_next, 'a_goal_next': exp_a_goal_next, } transitions = self.her_sample.her_sample_transitions( buffer_temp, num_exps) obs, d_goal = transitions['obs'], transitions['d_goal'] transitions['obs'], transitions['d_goal'] = self._pre_process_obs_goal( obs, d_goal) # update self.obs_norm.update(transitions['obs']) self.goal_norm.update(transitions['d_goal']) # recompute the stats self.obs_norm.recompute_stats() self.goal_norm.recompute_stats() ############################### # Name: _pre_process_obs_goal # Function: process the observation and desired goal for the normalization # Comment: ############################### def _pre_process_obs_goal(self, obs, goal): obs_proceed = np.clip(obs, -self.args.clip_obs, self.args.clip_obs) goal_proceed = np.clip(goal, -self.args.clip_obs, self.args.clip_obs) return obs_proceed, goal_proceed ############################### # Name: _soft_update_target_network # Function: soft update the parameters of the target network # Comment: ############################### def _soft_update_target_network(self, target_net, eval_net): for target_param, param in zip(target_net.parameters(), eval_net.parameters()): target_param.data.copy_((1 - self.args.avg_coeff) * param.data + self.args.avg_coeff * target_param.data) ############################### # Name: _update_network # Function: train the parameters of the actor network and critic network # Comment: ############################### def _update_network(self): # sample the transitions transitions = self.exp_buffer.sample(self.args.batch_size) obs, obs_next, d_goal = transitions['obs'], transitions[ 'obs_next'], transitions['d_goal'] transitions['obs'], transitions['d_goal'] = self._pre_process_obs_goal( obs, d_goal) transitions['obs_next'], transitions[ 'd_goal_next'] = self._pre_process_obs_goal(obs_next, d_goal) observation_norm = self.obs_norm.normalize(transitions['obs']) d_goal_norm = self.goal_norm.normalize(transitions['d_goal']) inputs_norm = np.concatenate([observation_norm, d_goal_norm], axis=1) observation_next_norm = self.obs_norm.normalize( transitions['obs_next']) d_goal_next_norm = self.goal_norm.normalize(transitions['d_goal_next']) inputs_next_norm = np.concatenate( [observation_next_norm, d_goal_next_norm], axis=1) inputs_norm_tensor = torch.tensor(inputs_norm, dtype=torch.float32) inputs_next_norm_tensor = torch.tensor(inputs_next_norm, dtype=torch.float32) actions_tensor = torch.tensor(transitions['actions'], dtype=torch.float32) reward_tensor = torch.tensor(transitions['reward'], dtype=torch.float32) if self.args.cuda: inputs_norm_tensor = inputs_norm_tensor.cuda() inputs_next_norm_tensor = inputs_next_norm_tensor.cuda() actions_tensor = actions_tensor.cuda() reward_tensor = reward_tensor.cuda() # calculate the target Q value function with torch.no_grad(): actions_next = self.actor_target_net(inputs_next_norm_tensor) q_next_value = self.critic_target_net(inputs_next_norm_tensor, actions_next) q_next_value = q_next_value.detach() target_q_value = reward_tensor + self.args.gamma * q_next_value target_q_value = target_q_value.detach() clip_return = 1 / (1 - self.args.gamma) # ?????????????? target_q_value = torch.clamp(target_q_value, -clip_return, 0) # calculate the loss real_q_value = self.critic_net(inputs_norm_tensor, actions_tensor) critic_loss = (target_q_value - real_q_value).pow(2).mean() # the actor loss actions_real = self.actor_net(inputs_norm_tensor) actor_loss = -self.critic_net(inputs_norm_tensor, actions_real).mean() actor_loss += self.args.action_l2 * ( actions_real / self.env_params['action_max']).pow(2).mean() # start to train the network self.actor_optimizer.zero_grad() actor_loss.backward() sync_grads(self.actor_net) self.actor_optimizer.step() self.critic_optimizer.zero_grad() critic_loss.backward() sync_grads(self.critic_net) self.critic_optimizer.step() ############################### # Name: _evaluate_agent # Function: evaluate the agent # Comment: ############################### def _evaluate_agent(self): all_success_rate = [] for _ in range(self.args.n_eval): per_success_rate = [] observations = self.env.reset() obs = observations['observation'] d_goal = observations['desired_goal'] for _ in range(self.env_params['max_timesteps']): with torch.no_grad(): input_tensor = self._pre_process_inputs(obs, d_goal) policy_predictions = self.actor_net(input_tensor) action = policy_predictions.detach().cpu().numpy().squeeze( ) observations_next, _, _, info = self.env.step(action) obs = observations_next['observation'] d_goal = observations_next['desired_goal'] per_success_rate.append(info['is_success']) all_success_rate.append(per_success_rate) all_success_rate = np.array(all_success_rate) local_success_rate = np.mean(all_success_rate[:, -1]) global_success_rate = MPI.COMM_WORLD.allreduce(local_success_rate, op=MPI.SUM) return global_success_rate / MPI.COMM_WORLD.Get_size()