def train(model, model_name: str, data_folder: str, fold: int, debug=False, img_size=IMG_SIZE, epochs=15, batch_size = 8, num_workers=4, resume_weights='', resume_epoch=0): """ Model training Args: model : PyTorch model model_name : string name for model for checkpoints saving fold: evaluation fold number, 0-3 debug: if True, runs the debugging on few images img_size: size of images for training (for pregressive learning) epochs: number of epochs to train batch_size: number of images in batch num_workers: number of workers available resume_weights: directory with weights to resume (if avaialable) resume_epoch: number of epoch to continue training """ device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print(device) # We weigh the loss for the 0 class lower to account for (some of) the big class imbalance class_weights = torch.from_numpy(np.array([0.2] + [1.0]*NUM_CLASSES, dtype=np.float32)) class_weights = class_weights.to(device) #creates directories for checkpoints, tensorboard and predicitons checkpoints_dir = f'{OUTPUT_ROOT}/checkpoints/{model_name}_fold_{fold}' history_dir = f'{OUTPUT_ROOT}/history/{model_name}_fold_{fold}' predictions_dir = f'{OUTPUT_ROOT}/oof/{model_name}_fold_{fold}' tensorboard_dir = f'{OUTPUT_ROOT}/tensorboard/{model_name}_fold_{fold}' validations_dir = f'{OUTPUT_ROOT}/oof/{model_name}_fold_{fold}/val' os.makedirs(checkpoints_dir, exist_ok=True) os.makedirs(history_dir, exist_ok=True) os.makedirs(predictions_dir, exist_ok=True) os.makedirs(tensorboard_dir, exist_ok=True) os.makedirs(validations_dir, exist_ok=True) logger = Logger(tensorboard_dir) print('\n', model_name, '\n') # choose inputs/targets input_filepaths = sorted(glob.glob(os.path.join(data_folder, "*_input.png"))) sample_tokens = [x.split("/")[-1].replace("_input.png","") for x in input_filepaths] sample_tokens = [x.replace("bev_data\\","") for x in sample_tokens] # train samples df = pd.read_csv(f'folds/train_fold_{fold}.csv') train_df = df[df['samples'].isin(sample_tokens)] print('train samples: ', train_df.head()) # validation samples df = pd.read_csv(f'folds/val_fold_{fold}.csv') valid_df = df[df['samples'].isin(sample_tokens)] print('valid samples: ', valid_df.head()) # optimizer and schedulers learning_rate = 1e-3 print(f'initial learning rate: {learning_rate}') #optimizer = optim.Adam(model.parameters(), lr=learning_rate) optimizer = RAdam(model.parameters(), lr=learning_rate) for param_group in optimizer.param_groups: print('learning_rate:', param_group['lr']) #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, verbose=True, factor=0.2) scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.2) #scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[2, 4, 6], gamma=0.2) # load model weights to continue training if resume_weights != '': load_model(model, resume_weights) model = model.to(device) # datasets for train and validation train_dataset = BEVImageDataset(fold=fold, df=train_df, debug=debug, img_size=img_size, input_dir=data_folder, transforms = crop_d4_transforms) valid_dataset = BEVImageDataset(fold=fold, df=valid_df, debug=debug, img_size=img_size, input_dir=data_folder, transforms = albu_valid_tansforms) # dataloaders for train and validation dataloader_train = DataLoader(train_dataset, num_workers=num_workers, batch_size=batch_size, shuffle=True) dataloader_valid = DataLoader(valid_dataset, num_workers=num_workers, batch_size=8, shuffle=False) print('{} training images, {} validation images'.format(len(train_dataset), len(valid_dataset))) # training cycle print("Start training") all_losses, valid_losses = [], [] history = {} for epoch in range(resume_epoch, epochs+1): print("Epoch", epoch) epoch_losses = [] progress_bar = tqdm(dataloader_train, total=len(dataloader_train)) #with torch.set_grad_enabled(True): --> sometime people write it for iter_num, (img, target, sample_ids) in enumerate(progress_bar): img = img.to(device) # [N, 3, H, W] target = target.to(device) # [N, H, W] with class indices (0, 1) prediction = model(img) # [N, 2, H, W] loss = F.cross_entropy(prediction, target, weight=class_weights) optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) optimizer.step() epoch_losses.append(loss.detach().cpu().numpy()) if iter_num == 0: visualize_predictions(img, prediction, target, predictions_dir, model_name, epoch) # loss history print("Epoch {}, Train Loss: {}".format(epoch, np.mean(epoch_losses))) all_losses.append(np.mean(epoch_losses)) logger.scalar_summary('loss_train', np.mean(epoch_losses), epoch) # validate model afterevery epoch valid_loss = validate(model, model_name, dataloader_valid, class_weights, epoch, validations_dir, save_oof = True) valid_losses.append(valid_loss) logger.scalar_summary('loss_valid', valid_loss, epoch) #logger.scalar_summary('iou_valid', valid_iou, epoch) # print current learning rate for param_group in optimizer.param_groups: print('learning_rate:', param_group['lr']) scheduler.step() # save model, optimizer and scheduler after every epoch checkpoint_filename = "{}_fold_{}_epoch_{}.pth".format(model_name, fold, epoch) checkpoint_filepath = os.path.join(checkpoints_dir, checkpoint_filename) torch.save({ 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch, 'loss': np.mean(epoch_losses), 'valid_loss': valid_loss, }, checkpoint_filepath)
class MA_MDDPG(): def __init__(self, env, parameters, encoder_hyperparameters, critic_hyperparameters, actor_hyperparameters): self.env = env self.name = parameters["name"] self.neighbor_map = env.neighbor_map self.state_height = int(self.env.ild_length / self.env.ver_length) # self.state_width = 6 self.phase_size = 4 self.parameters = parameters self.encoder_hyperparameters = encoder_hyperparameters self.critic_hyperparameters = critic_hyperparameters self.actor_hyperparameters = actor_hyperparameters self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") # self.device = torch.device("cuda:0") n_path = parameters["log_dir"] + self.name + '/' if not os.path.exists(n_path): os.mkdir(n_path) self.logger = Logger(n_path) self.save_path = parameters["model_dir"] self.set_random_seeds(parameters["random_seed"]) self.epsilon_exploration = parameters["epsilon_exploration"] self.replaybuffer = Replay_Buffer(parameters["buffer_size"], parameters["batch_size"], parameters["random_seed"]) # self.encoder = LocalStateEncoderBiLSTM(encoder_hyperparameters["state_size"], # encoder_hyperparameters["hidden_size"], # encoder_hyperparameters["num_layers"], # encoder_hyperparameters["output_size"], # encoder_hyperparameters["phase_size"], # self.device).to(self.device) # self.encoder_target = LocalStateEncoderBiLSTM(encoder_hyperparameters["state_size"], # encoder_hyperparameters["hidden_size"], # encoder_hyperparameters["num_layers"], # encoder_hyperparameters["output_size"], # encoder_hyperparameters["phase_size"], # self.device).to(self.device) self.encoder = LocalStateEncoderCNN( encoder_hyperparameters["output_size"], encoder_hyperparameters["phase_size"], self.device).to(self.device) self.encoder_target = LocalStateEncoderCNN( encoder_hyperparameters["output_size"], encoder_hyperparameters["phase_size"], self.device).to(self.device) self.copy_encoder_parameters(self.encoder, self.encoder_target) # state_dict = self.encoder.state_dict() self.actor_names = env.nodes_name self.actors = [ MemoryDDPG(actor, parameters, encoder_hyperparameters, critic_hyperparameters, actor_hyperparameters, self.device).load_encoder_parameters( self.encoder, self.encoder_target) for actor in self.actor_names ] self.memory = { actor.name: actor.get_local_memory() for actor in self.actors } self.global_step_number = 0 self.episode_number = 0 self.total_reward_per_epsiode = [] self.total_critic_loss = [] self.total_actor_loss = [] def set_random_seeds(self, random_seed): torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False torch.manual_seed(random_seed) random.seed(random_seed) np.random.seed(random_seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(random_seed) torch.cuda.manual_seed(random_seed) def update_memory(self): self.memory = [actor.get_local_memory() for actor in self.actors] def get_neighbor_memory(self, name): neighbor = self.neighbor_map[name] return [ self.memory[n_b].clone().reshape(1, -1).to(self.device) for n_b in neighbor ] def get_local_memory(self, name): return self.memory[name].clone().reshape(1, -1).to(self.device) def copy_encoder_parameters(self, from_model, to_model): for to_model, from_model in zip(to_model.parameters(), from_model.parameters()): to_model.data.copy_(from_model.data.clone()) def step(self): self.obs = self.env.reset() reward = [0 for _ in range(self.parameters["n_inter"])] while True: self.actions = self._pick_action(self.obs) self.next_obs, self.rewards, self.dones, _ = self.env.step( self.actions) reward = [ reward[ac_ind] + self.rewards[ac_name] for ac_ind, ac_name in enumerate(self.actor_names) ] if len(self.replaybuffer) >= self.parameters["batch_size"]: for _ in range(self.parameters["learning_step_per_session"]): states, phases, actions, rewards, next_states, next_phases, local_memorys, dones = self.replaybuffer.sample( ) self.critic_learn(states, phases, actions, rewards, next_states, next_phases, local_memorys, dones) self.actor_learn(states, phases, actions, local_memorys) states, phases, actions, rewards, n_states, n_phases, memory = self._dict_to_numpy( self.obs, self.actions, self.rewards, self.next_obs, self.memory) self.replaybuffer.add_experience(states, phases, actions, rewards, n_states, n_phases, memory, self.dones) self.obs = self.next_obs self.global_step_number += 1 if self.dones == True: break self.total_reward_per_epsiode.append(sum(reward)) self.logger.scalar_summary("total_reward", self.total_reward_per_epsiode[-1], self.episode_number + 1) self.episode_number += 1 def _pick_action(self, obs=None): # if obs is None: obs = self.env.reset() position, phase = obs actions = {} all_new_memory = [] for actor_ind, actor_name in enumerate(self.actor_names): po = torch.tensor(position[actor_name], dtype=torch.float).reshape( 1, self.state_height, -1).to(self.device) ph = torch.tensor(phase[actor_name], dtype=torch.float).reshape( 1, self.phase_size).to(self.device) n_memory = self.get_neighbor_memory(actor_name) l_memory = self.get_local_memory(actor_name) self.actors[actor_ind].actor.eval() with torch.no_grad(): action_output, new_memory = self.actors[actor_ind].actor( po, ph, l_memory, n_memory) # action_distribution = Categorical(action_output) # action = action_distribution.sample().cpu().numpy() if random.random() <= action_output[0, 0].cpu().numpy(): action = 1 else: action = 0 self.actors[actor_ind].actor.train() if random.random() <= self.epsilon_exploration: action = random.randint(0, 1) actions[actor_name] = action all_new_memory.append(new_memory) for actor_ind, actor in enumerate(self.actors): actor.update_local_memory(all_new_memory[actor_ind]) self.memory = { actor.name: actor.get_local_memory() for actor in self.actors } return actions def _dict_to_numpy(self, obs, actions, rewards, next_obs, memory): states, phases = obs n_states, n_phases = next_obs state_numpy = np.stack([states[a_n] for a_n in self.actor_names], axis=2).reshape(-1) phase_numpy = np.stack([phases[a_n] for a_n in self.actor_names], axis=1).reshape(-1) action_numpy = np.stack([actions[a_n] for a_n in self.actor_names], axis=0).reshape(-1) reward_numpy = np.stack([rewards[a_n] for a_n in self.actor_names], axis=0).reshape(-1) n_state_numpy = np.stack([n_states[a_n] for a_n in self.actor_names], axis=2).reshape(-1) n_phase_numpy = np.stack([n_phases[a_n] for a_n in self.actor_names], axis=1).reshape(-1) memory_numpy = np.stack( [memory[a_n].cpu() for a_n in self.actor_names], axis=1).reshape(-1) return state_numpy, phase_numpy, action_numpy, reward_numpy, n_state_numpy, n_phase_numpy, memory_numpy def critic_learn(self, states, phases, actions, rewards, next_states, next_phases, local_memorys, dones, clipping_norm=None): state = states.reshape(-1, self.state_height, self.encoder_hyperparameters["state_size"], self.parameters["n_inter"]) phase = phases.reshape(-1, self.phase_size, self.parameters["n_inter"]) action = actions.reshape(-1, self.parameters["n_inter"]) reward = rewards.reshape(-1, self.parameters["n_inter"]) next_state = next_states.reshape( -1, self.state_height, self.encoder_hyperparameters["state_size"], self.parameters["n_inter"]) next_phase = next_phases.reshape(-1, self.phase_size, self.parameters["n_inter"]) local_memory = local_memorys.reshape(-1, self.parameters["dim_memory"], self.parameters["n_inter"]) neighbor_map = [[ self.actor_names.index(neigh) for neigh in self.env.neighbor_map[ac_name] ] for ac_in, ac_name in enumerate(self.actor_names)] neighbor_memory = [[local_memory[:, :, i].squeeze(-1) for i in ind] for ind in neighbor_map] done = dones.reshape(-1, 1) with torch.no_grad(): actions_next = torch.cat([ self.actors[ac_in].actor_target( next_state[:, :, :, ac_in], next_phase[:, :, ac_in], local_memory[:, :, ac_in], neighbor_memory[ac_in])[0] for ac_in, _ in enumerate(self.actor_names) ], dim=1).reshape(-1, self.parameters["n_inter"]) critic_targets_next = torch.cat([ self.actors[ac_in].critic_target(next_state, next_phase, actions_next) for ac_in, _ in enumerate(self.actor_names) ], dim=1).reshape( -1, self.parameters["n_inter"]) critic_targets = reward + self.parameters[ "gamma"] * critic_targets_next #* (1.0 - done) # crititc_expected = torch.cat([self.actors[ac_in].critic(state, phase, action) # for ac_in, _ in enumerate(self.actor_names)], dim=1).reshape(-1, self.parameters["n_inter"]) total_loss = 0 for i, actor in enumerate(self.actors): actor.load_encoder_parameters(encoder=self.encoder) crititc_expected = actor.critic(state, phase, action).reshape(-1) loss = functional.mse_loss(crititc_expected, critic_targets[:, i]) total_loss += loss.float() actor.critic_optimizer.zero_grad() loss.backward(retain_graph=False) if clipping_norm is not None: torch.nn.utils.clip_grad_norm_(actor.critic.parameters(), clipping_norm) actor.critic_optimizer.step() self.encoder.load_state_dict(actor.critic_encoder_parameters()) for actor in self.actors: actor.load_encoder_parameters(encoder=self.encoder) self.total_critic_loss.append(total_loss) self.logger.scalar_summary("critic_loss", self.total_critic_loss[-1], self.global_step_number + 1) tau = self.parameters["critic_tau"] self._soft_update_encoder(tau) for actor in self.actors: for f_model, t_model in zip(actor.critic.parameters(), actor.critic_target.parameters()): t_model.data.copy_((1 - tau) * f_model.data + tau * t_model.data) def actor_learn(self, states, phases, actions, local_memorys, clipping_norm=None): if self.dones: #updata learning rate pass state = states.reshape(-1, self.state_height, self.encoder_hyperparameters["state_size"], self.parameters["n_inter"]) phase = phases.reshape(-1, self.phase_size, self.parameters["n_inter"]) action = actions.reshape(-1, self.parameters["n_inter"]) local_memory = local_memorys.reshape(-1, self.parameters["dim_memory"], self.parameters["n_inter"]) neighbor_map = [[ self.actor_names.index(neigh) for neigh in self.env.neighbor_map[ac_name] ] for ac_in, ac_name in enumerate(self.actor_names)] neighbor_memory = [[local_memory[:, :, i].squeeze(-1) for i in ind] for ind in neighbor_map] # actions_pred = torch.cat([self.actors[ac_in].actor(state[:,:,:,ac_in], phase[:,:,ac_in], local_memory[:,:,ac_in], neighbor_memory[ac_in])[0] # for ac_in, _ in enumerate(self.actor_names)], dim=1).reshape(-1, self.parameters["n_inter"]) total_loss = 0 for i, actor in enumerate(self.actors): temp = action.clone() actor.load_encoder_parameters(encoder=self.encoder) actions_pred = actor.actor(state[:, :, :, i], phase[:, :, i], local_memory[:, :, i], neighbor_memory[i])[0].reshape(-1) temp[:, i] = actions_pred action_loss = -actor.critic(state, phase, temp).mean() total_loss += action_loss actor.actor_optimizer.zero_grad() action_loss.backward(retain_graph=False) if clipping_norm is not None: torch.nn.utils.clip_grad_norm_(actor.actor.parameters(), clipping_norm) actor.actor_optimizer.step() self.encoder.load_state_dict(actor.actor_encoder_parameters()) for actor in self.actors: actor.load_encoder_parameters(encoder=self.encoder) self.total_actor_loss.append(total_loss) self.logger.scalar_summary("actor_loss", self.total_actor_loss[-1], self.global_step_number + 1) tau = self.parameters["actor_tau"] self._soft_update_encoder(tau) for actor in self.actors: for f_model, t_model in zip(actor.actor.parameters(), actor.actor_target.parameters()): t_model.data.copy_((1 - tau) * f_model.data + tau * t_model.data) def _soft_update_encoder(self, tau): for to_model, from_model in zip(self.encoder_target.parameters(), self.encoder.parameters()): to_model.data.copy_((1 - tau) * from_model.data + tau * to_model.data) def save_model(self): n_path = self.save_path + self.name + str(self.episode_number) + '/' if not os.path.exists(n_path): os.mkdir(n_path) os.mkdir(n_path + "encoder/") os.mkdir(n_path + "actor/") torch.save(self.encoder, n_path + "encoder/checkpoint") for i, actor in enumerate(self.actors): torch.save(actor, n_path + "actor/" + "/checkpoint" + str(i))