コード例 #1
0
def train(model, model_name: str, data_folder: str, fold: int, debug=False, img_size=IMG_SIZE,
          epochs=15, batch_size = 8, num_workers=4, resume_weights='', resume_epoch=0):
    """
    Model training
    
    Args: 
        model : PyTorch model
        model_name : string name for model for checkpoints saving
        fold: evaluation fold number, 0-3
        debug: if True, runs the debugging on few images 
        img_size: size of images for training (for pregressive learning)
        epochs: number of epochs to train
        batch_size: number of images in batch
        num_workers: number of workers available
        resume_weights: directory with weights to resume (if avaialable)
        resume_epoch: number of epoch to continue training    
    """

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print(device)
    
    # We weigh the loss for the 0 class lower to account for (some of) the big class imbalance
    class_weights = torch.from_numpy(np.array([0.2] + [1.0]*NUM_CLASSES, dtype=np.float32))
    class_weights = class_weights.to(device)

    #creates directories for checkpoints, tensorboard and predicitons
    checkpoints_dir = f'{OUTPUT_ROOT}/checkpoints/{model_name}_fold_{fold}'
    history_dir = f'{OUTPUT_ROOT}/history/{model_name}_fold_{fold}'
    predictions_dir = f'{OUTPUT_ROOT}/oof/{model_name}_fold_{fold}'
    tensorboard_dir = f'{OUTPUT_ROOT}/tensorboard/{model_name}_fold_{fold}'
    validations_dir = f'{OUTPUT_ROOT}/oof/{model_name}_fold_{fold}/val'
    os.makedirs(checkpoints_dir, exist_ok=True)
    os.makedirs(history_dir, exist_ok=True)
    os.makedirs(predictions_dir, exist_ok=True)
    os.makedirs(tensorboard_dir, exist_ok=True)
    os.makedirs(validations_dir, exist_ok=True)
    logger = Logger(tensorboard_dir)

    print('\n', model_name, '\n')

    # choose inputs/targets
    input_filepaths = sorted(glob.glob(os.path.join(data_folder, "*_input.png")))
    sample_tokens = [x.split("/")[-1].replace("_input.png","") for x in input_filepaths]
    sample_tokens = [x.replace("bev_data\\","") for x in sample_tokens]    

    # train samples
    df = pd.read_csv(f'folds/train_fold_{fold}.csv')
    train_df = df[df['samples'].isin(sample_tokens)]
    print('train samples: ', train_df.head())

    # validation samples
    df = pd.read_csv(f'folds/val_fold_{fold}.csv')
    valid_df = df[df['samples'].isin(sample_tokens)]
    print('valid samples: ', valid_df.head())
    
    # optimizer and schedulers
    learning_rate = 1e-3
    print(f'initial learning rate: {learning_rate}')
    #optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    optimizer = RAdam(model.parameters(), lr=learning_rate)
    for param_group in optimizer.param_groups:
        print('learning_rate:', param_group['lr'])
    #scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, patience=2, verbose=True, factor=0.2)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.2)
    #scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[2, 4, 6], gamma=0.2)
    
    # load model weights to continue training
    if resume_weights != '':
        load_model(model, resume_weights)                  	
    model = model.to(device) 
   
  
    # datasets for train and validation
    train_dataset = BEVImageDataset(fold=fold, df=train_df, 
                                    debug=debug, img_size=img_size, 
                                    input_dir=data_folder,
                                    transforms = crop_d4_transforms)
    
    valid_dataset = BEVImageDataset(fold=fold, df=valid_df, 
                                    debug=debug, img_size=img_size, 
                                    input_dir=data_folder,
                                    transforms = albu_valid_tansforms)                                

    # dataloaders for train and validation
    dataloader_train = DataLoader(train_dataset, 
                                  num_workers=num_workers,
                                  batch_size=batch_size,
                                  shuffle=True)
    
    dataloader_valid = DataLoader(valid_dataset,
                                  num_workers=num_workers,
                                  batch_size=8,
                                  shuffle=False)

    print('{} training images, {} validation images'.format(len(train_dataset), len(valid_dataset)))
    
    # training cycle
    print("Start training")
    all_losses, valid_losses = [], []
    history = {}
        
    for epoch in range(resume_epoch, epochs+1):
        print("Epoch", epoch)        
        epoch_losses = []
        progress_bar = tqdm(dataloader_train, total=len(dataloader_train))

        #with torch.set_grad_enabled(True): --> sometime people write it
        for iter_num, (img, target, sample_ids) in enumerate(progress_bar):
            img = img.to(device)  # [N, 3, H, W]
            target = target.to(device)  # [N, H, W] with class indices (0, 1)
            prediction = model(img)  # [N, 2, H, W]
            loss = F.cross_entropy(prediction, target, weight=class_weights)    
             
            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) 
            optimizer.step()   
                        
            epoch_losses.append(loss.detach().cpu().numpy())
            
            if iter_num == 0:
                visualize_predictions(img, prediction, target, predictions_dir, model_name, epoch)

        # loss history
        print("Epoch {}, Train Loss: {}".format(epoch, np.mean(epoch_losses)))
        all_losses.append(np.mean(epoch_losses))
        logger.scalar_summary('loss_train', np.mean(epoch_losses), epoch)
        
        # validate model afterevery epoch
        valid_loss = validate(model, model_name, dataloader_valid, class_weights,
                              epoch, validations_dir, save_oof = True)
        valid_losses.append(valid_loss)
        logger.scalar_summary('loss_valid', valid_loss, epoch)  
        #logger.scalar_summary('iou_valid', valid_iou, epoch)

        # print current learning rate
        for param_group in optimizer.param_groups:
            print('learning_rate:', param_group['lr'])
        scheduler.step()
        
        # save model, optimizer and scheduler after every epoch
        checkpoint_filename = "{}_fold_{}_epoch_{}.pth".format(model_name, fold, epoch)
        checkpoint_filepath = os.path.join(checkpoints_dir, checkpoint_filename)
        torch.save({
        'model': model.state_dict(),
        'optimizer': optimizer.state_dict(),
        'epoch': epoch,
        'loss': np.mean(epoch_losses),
        'valid_loss': valid_loss,
         }, checkpoint_filepath)  
コード例 #2
0
class MA_MDDPG():
    def __init__(self, env, parameters, encoder_hyperparameters,
                 critic_hyperparameters, actor_hyperparameters):
        self.env = env
        self.name = parameters["name"]
        self.neighbor_map = env.neighbor_map

        self.state_height = int(self.env.ild_length / self.env.ver_length)
        # self.state_width = 6
        self.phase_size = 4

        self.parameters = parameters
        self.encoder_hyperparameters = encoder_hyperparameters
        self.critic_hyperparameters = critic_hyperparameters
        self.actor_hyperparameters = actor_hyperparameters
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        # self.device = torch.device("cuda:0")
        n_path = parameters["log_dir"] + self.name + '/'
        if not os.path.exists(n_path):
            os.mkdir(n_path)
        self.logger = Logger(n_path)
        self.save_path = parameters["model_dir"]
        self.set_random_seeds(parameters["random_seed"])
        self.epsilon_exploration = parameters["epsilon_exploration"]
        self.replaybuffer = Replay_Buffer(parameters["buffer_size"],
                                          parameters["batch_size"],
                                          parameters["random_seed"])
        # self.encoder =  LocalStateEncoderBiLSTM(encoder_hyperparameters["state_size"],
        #                                        encoder_hyperparameters["hidden_size"],
        #                                        encoder_hyperparameters["num_layers"],
        #                                        encoder_hyperparameters["output_size"],
        #                                        encoder_hyperparameters["phase_size"],
        #                                        self.device).to(self.device)
        # self.encoder_target = LocalStateEncoderBiLSTM(encoder_hyperparameters["state_size"],
        #                                               encoder_hyperparameters["hidden_size"],
        #                                               encoder_hyperparameters["num_layers"],
        #                                               encoder_hyperparameters["output_size"],
        #                                               encoder_hyperparameters["phase_size"],
        #                                               self.device).to(self.device)
        self.encoder = LocalStateEncoderCNN(
            encoder_hyperparameters["output_size"],
            encoder_hyperparameters["phase_size"], self.device).to(self.device)
        self.encoder_target = LocalStateEncoderCNN(
            encoder_hyperparameters["output_size"],
            encoder_hyperparameters["phase_size"], self.device).to(self.device)
        self.copy_encoder_parameters(self.encoder, self.encoder_target)
        # state_dict = self.encoder.state_dict()

        self.actor_names = env.nodes_name
        self.actors = [
            MemoryDDPG(actor, parameters, encoder_hyperparameters,
                       critic_hyperparameters, actor_hyperparameters,
                       self.device).load_encoder_parameters(
                           self.encoder, self.encoder_target)
            for actor in self.actor_names
        ]
        self.memory = {
            actor.name: actor.get_local_memory()
            for actor in self.actors
        }
        self.global_step_number = 0
        self.episode_number = 0

        self.total_reward_per_epsiode = []
        self.total_critic_loss = []
        self.total_actor_loss = []

    def set_random_seeds(self, random_seed):
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        torch.manual_seed(random_seed)
        random.seed(random_seed)
        np.random.seed(random_seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(random_seed)
            torch.cuda.manual_seed(random_seed)

    def update_memory(self):
        self.memory = [actor.get_local_memory() for actor in self.actors]

    def get_neighbor_memory(self, name):
        neighbor = self.neighbor_map[name]
        return [
            self.memory[n_b].clone().reshape(1, -1).to(self.device)
            for n_b in neighbor
        ]

    def get_local_memory(self, name):
        return self.memory[name].clone().reshape(1, -1).to(self.device)

    def copy_encoder_parameters(self, from_model, to_model):
        for to_model, from_model in zip(to_model.parameters(),
                                        from_model.parameters()):
            to_model.data.copy_(from_model.data.clone())

    def step(self):
        self.obs = self.env.reset()
        reward = [0 for _ in range(self.parameters["n_inter"])]
        while True:
            self.actions = self._pick_action(self.obs)
            self.next_obs, self.rewards, self.dones, _ = self.env.step(
                self.actions)
            reward = [
                reward[ac_ind] + self.rewards[ac_name]
                for ac_ind, ac_name in enumerate(self.actor_names)
            ]
            if len(self.replaybuffer) >= self.parameters["batch_size"]:
                for _ in range(self.parameters["learning_step_per_session"]):
                    states, phases, actions, rewards, next_states, next_phases, local_memorys, dones = self.replaybuffer.sample(
                    )
                    self.critic_learn(states, phases, actions, rewards,
                                      next_states, next_phases, local_memorys,
                                      dones)
                    self.actor_learn(states, phases, actions, local_memorys)
            states, phases, actions, rewards, n_states, n_phases, memory = self._dict_to_numpy(
                self.obs, self.actions, self.rewards, self.next_obs,
                self.memory)
            self.replaybuffer.add_experience(states, phases, actions, rewards,
                                             n_states, n_phases, memory,
                                             self.dones)
            self.obs = self.next_obs
            self.global_step_number += 1
            if self.dones == True: break
        self.total_reward_per_epsiode.append(sum(reward))
        self.logger.scalar_summary("total_reward",
                                   self.total_reward_per_epsiode[-1],
                                   self.episode_number + 1)
        self.episode_number += 1

    def _pick_action(self, obs=None):
        # if obs is None: obs = self.env.reset()
        position, phase = obs
        actions = {}
        all_new_memory = []
        for actor_ind, actor_name in enumerate(self.actor_names):
            po = torch.tensor(position[actor_name], dtype=torch.float).reshape(
                1, self.state_height, -1).to(self.device)
            ph = torch.tensor(phase[actor_name], dtype=torch.float).reshape(
                1, self.phase_size).to(self.device)
            n_memory = self.get_neighbor_memory(actor_name)
            l_memory = self.get_local_memory(actor_name)
            self.actors[actor_ind].actor.eval()
            with torch.no_grad():
                action_output, new_memory = self.actors[actor_ind].actor(
                    po, ph, l_memory, n_memory)
                # action_distribution = Categorical(action_output)
                # action = action_distribution.sample().cpu().numpy()
                if random.random() <= action_output[0, 0].cpu().numpy():
                    action = 1
                else:
                    action = 0
            self.actors[actor_ind].actor.train()
            if random.random() <= self.epsilon_exploration:
                action = random.randint(0, 1)
            actions[actor_name] = action
            all_new_memory.append(new_memory)
        for actor_ind, actor in enumerate(self.actors):
            actor.update_local_memory(all_new_memory[actor_ind])
        self.memory = {
            actor.name: actor.get_local_memory()
            for actor in self.actors
        }
        return actions

    def _dict_to_numpy(self, obs, actions, rewards, next_obs, memory):
        states, phases = obs
        n_states, n_phases = next_obs
        state_numpy = np.stack([states[a_n] for a_n in self.actor_names],
                               axis=2).reshape(-1)
        phase_numpy = np.stack([phases[a_n] for a_n in self.actor_names],
                               axis=1).reshape(-1)
        action_numpy = np.stack([actions[a_n] for a_n in self.actor_names],
                                axis=0).reshape(-1)
        reward_numpy = np.stack([rewards[a_n] for a_n in self.actor_names],
                                axis=0).reshape(-1)
        n_state_numpy = np.stack([n_states[a_n] for a_n in self.actor_names],
                                 axis=2).reshape(-1)
        n_phase_numpy = np.stack([n_phases[a_n] for a_n in self.actor_names],
                                 axis=1).reshape(-1)
        memory_numpy = np.stack(
            [memory[a_n].cpu() for a_n in self.actor_names],
            axis=1).reshape(-1)
        return state_numpy, phase_numpy, action_numpy, reward_numpy, n_state_numpy, n_phase_numpy, memory_numpy

    def critic_learn(self,
                     states,
                     phases,
                     actions,
                     rewards,
                     next_states,
                     next_phases,
                     local_memorys,
                     dones,
                     clipping_norm=None):
        state = states.reshape(-1, self.state_height,
                               self.encoder_hyperparameters["state_size"],
                               self.parameters["n_inter"])
        phase = phases.reshape(-1, self.phase_size, self.parameters["n_inter"])
        action = actions.reshape(-1, self.parameters["n_inter"])
        reward = rewards.reshape(-1, self.parameters["n_inter"])
        next_state = next_states.reshape(
            -1, self.state_height, self.encoder_hyperparameters["state_size"],
            self.parameters["n_inter"])
        next_phase = next_phases.reshape(-1, self.phase_size,
                                         self.parameters["n_inter"])
        local_memory = local_memorys.reshape(-1, self.parameters["dim_memory"],
                                             self.parameters["n_inter"])
        neighbor_map = [[
            self.actor_names.index(neigh)
            for neigh in self.env.neighbor_map[ac_name]
        ] for ac_in, ac_name in enumerate(self.actor_names)]
        neighbor_memory = [[local_memory[:, :, i].squeeze(-1) for i in ind]
                           for ind in neighbor_map]
        done = dones.reshape(-1, 1)
        with torch.no_grad():
            actions_next = torch.cat([
                self.actors[ac_in].actor_target(
                    next_state[:, :, :, ac_in], next_phase[:, :, ac_in],
                    local_memory[:, :, ac_in], neighbor_memory[ac_in])[0]
                for ac_in, _ in enumerate(self.actor_names)
            ],
                                     dim=1).reshape(-1,
                                                    self.parameters["n_inter"])
            critic_targets_next = torch.cat([
                self.actors[ac_in].critic_target(next_state, next_phase,
                                                 actions_next)
                for ac_in, _ in enumerate(self.actor_names)
            ],
                                            dim=1).reshape(
                                                -1, self.parameters["n_inter"])
        critic_targets = reward + self.parameters[
            "gamma"] * critic_targets_next  #* (1.0 - done)
        # crititc_expected = torch.cat([self.actors[ac_in].critic(state, phase, action)
        #                               for ac_in, _ in enumerate(self.actor_names)], dim=1).reshape(-1, self.parameters["n_inter"])
        total_loss = 0
        for i, actor in enumerate(self.actors):
            actor.load_encoder_parameters(encoder=self.encoder)
            crititc_expected = actor.critic(state, phase, action).reshape(-1)
            loss = functional.mse_loss(crititc_expected, critic_targets[:, i])
            total_loss += loss.float()
            actor.critic_optimizer.zero_grad()
            loss.backward(retain_graph=False)
            if clipping_norm is not None:
                torch.nn.utils.clip_grad_norm_(actor.critic.parameters(),
                                               clipping_norm)
            actor.critic_optimizer.step()
            self.encoder.load_state_dict(actor.critic_encoder_parameters())
        for actor in self.actors:
            actor.load_encoder_parameters(encoder=self.encoder)
        self.total_critic_loss.append(total_loss)
        self.logger.scalar_summary("critic_loss", self.total_critic_loss[-1],
                                   self.global_step_number + 1)
        tau = self.parameters["critic_tau"]
        self._soft_update_encoder(tau)
        for actor in self.actors:
            for f_model, t_model in zip(actor.critic.parameters(),
                                        actor.critic_target.parameters()):
                t_model.data.copy_((1 - tau) * f_model.data +
                                   tau * t_model.data)

    def actor_learn(self,
                    states,
                    phases,
                    actions,
                    local_memorys,
                    clipping_norm=None):
        if self.dones:
            #updata learning rate
            pass
        state = states.reshape(-1, self.state_height,
                               self.encoder_hyperparameters["state_size"],
                               self.parameters["n_inter"])
        phase = phases.reshape(-1, self.phase_size, self.parameters["n_inter"])
        action = actions.reshape(-1, self.parameters["n_inter"])
        local_memory = local_memorys.reshape(-1, self.parameters["dim_memory"],
                                             self.parameters["n_inter"])
        neighbor_map = [[
            self.actor_names.index(neigh)
            for neigh in self.env.neighbor_map[ac_name]
        ] for ac_in, ac_name in enumerate(self.actor_names)]
        neighbor_memory = [[local_memory[:, :, i].squeeze(-1) for i in ind]
                           for ind in neighbor_map]
        # actions_pred = torch.cat([self.actors[ac_in].actor(state[:,:,:,ac_in], phase[:,:,ac_in], local_memory[:,:,ac_in], neighbor_memory[ac_in])[0]
        #                     for ac_in, _ in enumerate(self.actor_names)], dim=1).reshape(-1, self.parameters["n_inter"])
        total_loss = 0
        for i, actor in enumerate(self.actors):
            temp = action.clone()
            actor.load_encoder_parameters(encoder=self.encoder)
            actions_pred = actor.actor(state[:, :, :, i], phase[:, :, i],
                                       local_memory[:, :, i],
                                       neighbor_memory[i])[0].reshape(-1)
            temp[:, i] = actions_pred
            action_loss = -actor.critic(state, phase, temp).mean()
            total_loss += action_loss
            actor.actor_optimizer.zero_grad()
            action_loss.backward(retain_graph=False)
            if clipping_norm is not None:
                torch.nn.utils.clip_grad_norm_(actor.actor.parameters(),
                                               clipping_norm)
            actor.actor_optimizer.step()
            self.encoder.load_state_dict(actor.actor_encoder_parameters())
        for actor in self.actors:
            actor.load_encoder_parameters(encoder=self.encoder)
        self.total_actor_loss.append(total_loss)
        self.logger.scalar_summary("actor_loss", self.total_actor_loss[-1],
                                   self.global_step_number + 1)
        tau = self.parameters["actor_tau"]
        self._soft_update_encoder(tau)
        for actor in self.actors:
            for f_model, t_model in zip(actor.actor.parameters(),
                                        actor.actor_target.parameters()):
                t_model.data.copy_((1 - tau) * f_model.data +
                                   tau * t_model.data)

    def _soft_update_encoder(self, tau):
        for to_model, from_model in zip(self.encoder_target.parameters(),
                                        self.encoder.parameters()):
            to_model.data.copy_((1 - tau) * from_model.data +
                                tau * to_model.data)

    def save_model(self):
        n_path = self.save_path + self.name + str(self.episode_number) + '/'
        if not os.path.exists(n_path):
            os.mkdir(n_path)
            os.mkdir(n_path + "encoder/")
            os.mkdir(n_path + "actor/")
        torch.save(self.encoder, n_path + "encoder/checkpoint")
        for i, actor in enumerate(self.actors):
            torch.save(actor, n_path + "actor/" + "/checkpoint" + str(i))