コード例 #1
0
def test(model_path_effi7, model_path_resnest, output_dir, test_loader, addNDVI):
    in_channels = 4
    if(addNDVI):
        in_channels += 1
    model_resnest = smp.UnetPlusPlus(
        encoder_name="timm-resnest101e",
        encoder_weights="imagenet",
        in_channels=in_channels,
        classes=10,
        )
    model_effi7 = smp.UnetPlusPlus(
        encoder_name="efficientnet-b7",
        encoder_weights="imagenet",
        in_channels=in_channels,
        classes=10,   
        )
    # 如果模型是SWA
    if("swa" in model_path_resnest):
        model_resnest = AveragedModel(model_resnest)
    if("swa" in model_path_effi7):
        model_effi7 = AveragedModel(model_effi7)
    model_resnest.to(DEVICE);
    model_resnest.load_state_dict(torch.load(model_path_resnest))
    model_resnest.eval()
    model_effi7.to(DEVICE);
    model_effi7.load_state_dict(torch.load(model_path_effi7))
    model_effi7.eval()
    for image, image_stretch, image_path, ndvi in test_loader:
        with torch.no_grad():
            # image.shape: 16,4,256,256
            image_flip2 = torch.flip(image,[2])
            image_flip2 = image_flip2.cuda()
            image_flip3 = torch.flip(image,[3])
            image_flip3 = image_flip3.cuda()
            image = image.cuda()
            image_stretch = image_stretch.cuda()
            
            output1 = model_resnest(image).cpu().data.numpy()
            output2 = model_resnest(image_stretch).cpu().data.numpy()
            output3 = model_effi7(image).cpu().data.numpy()
            output4 = model_effi7(image_stretch).cpu().data.numpy()
            
            output5 = torch.flip(model_resnest(image_flip2),[2]).cpu().data.numpy()
            output6 = torch.flip(model_effi7(image_flip2),[2]).cpu().data.numpy()
            output7 = torch.flip(model_resnest(image_flip3),[3]).cpu().data.numpy()
            output8 = torch.flip(model_effi7(image_flip3),[3]).cpu().data.numpy()
            
        output = (output1 + output2 + output3 + output4 + output5 + output6 + output7 + output8) / 8.0
        # output.shape: 16,10,256,256
        for i in range(output.shape[0]):
            pred = output[i]
            # for low_ndvi in range(3,8):
            #     pred[low_ndvi][ndvi[i]>35] = 0
            # for high_ndvi in range(3):
            #     pred[high_ndvi][ndvi[i]<0.02] = 0
            pred = np.argmax(pred, axis = 0) + 1
            pred = np.uint8(pred)
            save_path = os.path.join(output_dir, image_path[i][-10:].replace('.tif', '.png'))
            print(save_path)
            cv2.imwrite(save_path, pred)
コード例 #2
0
ファイル: model.py プロジェクト: Etzelkut/eye_gaze
 def training_epoch_end(self, outputs):
     self.log('epoch_now',
              self.current_epoch,
              on_step=False,
              on_epoch=True,
              logger=True)
     (oppp) = self.optimizers(use_pl_optimizer=True)
     self.log('lr_now',
              self.get_lr_inside(oppp),
              on_step=False,
              on_epoch=True,
              logger=True)
     # https://github.com/PyTorchLightning/pytorch-lightning/issues/3095
     if self.learning_params["swa"] and (
             self.current_epoch >= self.learning_params["swa_start_epoch"]):
         if self.swa_model is None:
             (optimizer) = self.optimizers(use_pl_optimizer=True)
             print("creating_swa")
             self.swa_model = AveragedModel(self.network)
             self.new_scheduler = SWALR(
                 optimizer,
                 anneal_strategy="linear",
                 anneal_epochs=5,
                 swa_lr=self.learning_params["swa_lr"])
         # https://pytorch.org/blog/pytorch-1.6-now-includes-stochastic-weight-averaging/
         self.swa_model.update_parameters(self.network)
         self.new_scheduler.step()
コード例 #3
0
    def __init__(self, cfg_dir: str):
        # load config file and initialize the logger and the device
        self.cfg = get_conf(cfg_dir)
        self.logger = self.init_logger(self.cfg.logger)
        self.device = self.init_device()
        # creating dataset interface and dataloader for trained data
        self.data, self.val_data = self.init_dataloader()
        # create model and initialize its weights and move them to the device
        self.model = self.init_model()
        # initialize the optimizer
        self.optimizer, self.lr_scheduler = self.init_optimizer()
        # define loss function
        self.criterion = torch.nn.CrossEntropyLoss()
        # if resuming, load the checkpoint
        self.if_resume()

        # initialize the early_stopping object
        self.early_stopping = EarlyStopping(
            patience=self.cfg.train_params.patience,
            verbose=True,
            delta=self.cfg.train_params.early_stopping_delta,
        )

        # stochastic weight averaging
        if self.cfg.train_params.epochs > self.cfg.train_params.swa_start:
            self.swa_model = AveragedModel(self.model)
            self.swa_scheduler = SWALR(self.optimizer, **self.cfg.SWA)
コード例 #4
0
ファイル: vol_mdn.py プロジェクト: eadains/SystemsTrading
    def fit_model(self):
        """
        Fits model. Uses AdamW optimizer, model averaging, and a cosine annealing learning rate schedule.
        """
        optimizer = torch.optim.AdamW(self.model.parameters(), lr=0.001)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer, 100, 2
        )

        self.swa_model = AveragedModel(self.model)
        swa_start = 750
        swa_scheduler = SWALR(
            optimizer, swa_lr=0.001, anneal_epochs=10, anneal_strategy="cos"
        )

        self.model.train()
        self.swa_model.train()
        for epoch in range(1000):
            optimizer.zero_grad()
            output = self.model(self.x)

            loss = -output.log_prob(self.y.view(-1, 1)).sum()

            loss.backward()
            optimizer.step()

            if epoch > swa_start:
                self.swa_model.update_parameters(self.model)
                swa_scheduler.step()
            else:
                scheduler.step()

            if epoch % 10 == 0:
                print(f"Epoch {epoch} complete. Loss: {loss}")
コード例 #5
0
    def _configure_optimizers(self, ) -> None:
        """Loads the optimizers."""
        if self._optimizer is not None:
            self._optimizer = self._optimizer(self._network.parameters(),
                                              **self.optimizer_args)
        else:
            self._optimizer = None

        if self._optimizer and self._lr_scheduler is not None:
            if "steps_per_epoch" in self.lr_scheduler_args:
                self.lr_scheduler_args["steps_per_epoch"] = len(
                    self.train_dataloader())

            # Assume lr scheduler should update at each epoch if not specified.
            if "interval" not in self.lr_scheduler_args:
                interval = "epoch"
            else:
                interval = self.lr_scheduler_args.pop("interval")
            self._lr_scheduler = {
                "lr_scheduler":
                self._lr_scheduler(self._optimizer, **self.lr_scheduler_args),
                "interval":
                interval,
            }

        if self.swa_args is not None:
            self._swa_scheduler = {
                "swa_scheduler": SWALR(self._optimizer,
                                       swa_lr=self.swa_args["lr"]),
                "swa_start": self.swa_args["start"],
            }
            self._swa_network = AveragedModel(self._network).to(self.device)
コード例 #6
0
def get_swa(optimizer,
            model,
            swa_lr=0.005,
            anneal_epochs=10,
            anneal_strategy="cos"):
    '''
    SWALR Arguments:
        optimizer (torch.optim.Optimizer): wrapped optimizer
        swa_lr (float or list): the learning rate value for all param groups
            together or separately for each group.
        anneal_epochs (int): number of epochs in the annealing phase 
            (default: 10)
        anneal_strategy (str): "cos" or "linear"; specifies the annealing 
            strategy: "cos" for cosine annealing, "linear" for linear annealing
            (default: "cos")
        last_epoch (int): the index of the last epoch (default: 'cos')
    
    '''
    swa_model = AveragedModel(model)
    # swa_scheduler = SWALR(optimizer, swa_lr=swa_lr)
    # swa_scheduler = torch.optim.swa_utils.SWALR(optimizer, anneal_strategy="linear", anneal_epochs=5, swa_lr=swa_lr)
    swa_scheduler = SWALR(optimizer,
                          swa_lr=swa_lr,
                          anneal_epochs=anneal_epochs,
                          anneal_strategy=anneal_strategy)

    return swa_scheduler, swa_model
コード例 #7
0
ファイル: train_prev.py プロジェクト: navidkha/road_mtl
    def __init__(self, cfg_dir: str, data_loader: DataLoader, model,
                 labels_definition):
        self.cfg = get_conf(cfg_dir)
        self._labels_definition = labels_definition
        #TODO
        self.logger = self.init_logger(self.cfg.logger)
        #self.dataset = CustomDataset(**self.cfg.dataset)
        self.data = data_loader
        #self.val_dataset = CustomDatasetVal(**self.cfg.val_dataset)
        #self.val_data = DataLoader(self.val_dataset, **self.cfg.dataloader)
        # self.logger.log_parameters({"tr_len": len(self.dataset),
        #                             "val_len": len(self.val_dataset)})
        self.model = model
        #self.model._resnet.conv1.apply(init_weights_normal)
        self.device = self.cfg.train_params.device
        self.model = self.model.to(device=self.device)
        if self.cfg.train_params.optimizer.lower() == "adam":
            self.optimizer = optim.Adam(self.model.parameters(),
                                        **self.cfg.adam)
        elif self.cfg.train_params.optimizer.lower() == "rmsprop":
            self.optimizer = optim.RMSprop(self.model.parameters(),
                                           **self.cfg.rmsprop)
        else:
            raise ValueError(
                f"Unknown optimizer {self.cfg.train_params.optimizer}")

        self.lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(
            self.optimizer, T_max=100)
        self.criterion = nn.BCELoss()

        if self.cfg.logger.resume:
            # load checkpoint
            print("Loading checkpoint")
            save_dir = self.cfg.directory.load
            checkpoint = load_checkpoint(save_dir, self.device)
            self.model.load_state_dict(checkpoint["model"])
            self.optimizer.load_state_dict(checkpoint["optimizer"])
            self.lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])
            self.epoch = checkpoint["epoch"]
            self.e_loss = checkpoint["e_loss"]
            self.best = checkpoint["best"]
            print(
                f"{datetime.now():%Y-%m-%d %H:%M:%S} "
                f"Loading checkpoint was successful, start from epoch {self.epoch}"
                f" and loss {self.best}")
        else:
            self.epoch = 1
            self.best = np.inf
            self.e_loss = []

        # initialize the early_stopping object
        self.early_stopping = EarlyStopping(
            patience=self.cfg.train_params.patience,
            verbose=True,
            delta=self.cfg.train_params.early_stopping_delta,
        )

        # stochastic weight averaging
        self.swa_model = AveragedModel(self.model)
        self.swa_scheduler = SWALR(self.optimizer, **self.cfg.SWA)
コード例 #8
0
def train(num_epochs, model, data_loader, val_loader, val_every, device, file_name):
    learning_rate = 0.0001
    from torch.optim.swa_utils import AveragedModel, SWALR
    from torch.optim.lr_scheduler import CosineAnnealingLR
    from segmentation_models_pytorch.losses import SoftCrossEntropyLoss, JaccardLoss
    from adamp import AdamP

    criterion = [SoftCrossEntropyLoss(smooth_factor=0.1), JaccardLoss('multiclass', classes=12)]
    optimizer = AdamP(params=model.parameters(), lr=learning_rate, weight_decay=1e-6)
    swa_scheduler = SWALR(optimizer, swa_lr=learning_rate)
    swa_model = AveragedModel(model)
    look = Lookahead(optimizer, la_alpha=0.5)

    print('Start training..')
    best_miou = 0
    for epoch in range(num_epochs):
        hist = np.zeros((12, 12))
        model.train()
        for step, (images, masks, _) in enumerate(data_loader):
            loss = 0
            images = torch.stack(images)  # (batch, channel, height, width)
            masks = torch.stack(masks).long()  # (batch, channel, height, width)

            # gpu 연산을 위해 device 할당
            images, masks = images.to(device), masks.to(device)

            # inference
            outputs = model(images)
            for i in criterion:
                loss += i(outputs, masks)
            # loss 계산 (cross entropy loss)

            look.zero_grad()
            loss.backward()
            look.step()

            outputs = torch.argmax(outputs.squeeze(), dim=1).detach().cpu().numpy()
            hist = add_hist(hist, masks.detach().cpu().numpy(), outputs, n_class=12)
            acc, acc_cls, mIoU, fwavacc = label_accuracy_score(hist)
            # step 주기에 따른 loss, mIoU 출력
            if (step + 1) % 25 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, mIoU: {:.4f}'.format(
                    epoch + 1, num_epochs, step + 1, len(data_loader), loss.item(), mIoU))

        # validation 주기에 따른 loss 출력 및 best model 저장
        if (epoch + 1) % val_every == 0:
            avrg_loss, val_miou = validation(epoch + 1, model, val_loader, criterion, device)
            if val_miou > best_miou:
                print('Best performance at epoch: {}'.format(epoch + 1))
                print('Save model in', saved_dir)
                best_miou = val_miou
                save_model(model, file_name = file_name)

        if epoch > 3:
            swa_model.update_parameters(model)
            swa_scheduler.step()
コード例 #9
0
def weight_averaging(model_class, checkpoint_paths, data_loader, device):
    from torch.optim.swa_utils import AveragedModel, update_bn

    model = model_class.load_from_checkpoint(checkpoint_paths[0])
    swa_model = AveragedModel(model)

    for path in checkpoint_paths:
        model = model_class.load_from_checkpoint(path)
        swa_model.update_parameters(model)

    swa_model = swa_model.to(device)
    update_bn(data_loader, swa_model, device)
    return swa_model
コード例 #10
0
def build_swa_model(cfg: CfgNode, model: torch.nn.Module,
                    optimizer: torch.optim.Optimizer):
    # Instead of copying weights during initialization, the SWA model copys
    # the model weights when self.update_parameters is first called.
    # https://github.com/pytorch/pytorch/blob/1.7/torch/optim/swa_utils.py#L107

    # The SWA model needs to be constructed for all processes in distributed
    # training, otherwise the training can get stuck.
    swa_model = AveragedModel(model)
    lr = cfg.SOLVER.BASE_LR
    lr *= cfg.SOLVER.SWA.LR_FACTOR
    swa_scheduler = SWALR(optimizer, swa_lr=lr)
    return swa_model, swa_scheduler
コード例 #11
0
def average_model_weights(checkpoint_path, average_fn, checkpoint_N):
    checkpoint_files = [
        os.path.join(checkpoint_path, file_name)
        for file_name in os.listdir(checkpoint_path)
        if file_name.endswith(".pt")
    ]

    def ckpt_key(ckpt):
        return int(ckpt.split('_')[-1].split('.')[0])

    try:
        checkpoint_files = sorted(checkpoint_files, key=ckpt_key)
    except:
        logging.warn(
            "Checkpoint names are changed, which may cause inconsistent order."
        )

    # Select the last N checkpoint
    if checkpoint_N > 0 and checkpoint_N <= len(checkpoint_files):
        checkpoint_files = checkpoint_files[-checkpoint_N:]

    # initialize averaged model with first checkpoint
    model = load_model(checkpoint_files[0])
    averaged_model = AveragedModel(model, avg_fn=average_fn)

    # loop through the remaining checkpoints and update averaged model
    for checkpoint in checkpoint_files:
        model = load_model(checkpoint)
        averaged_model.update_parameters(model)

    last_checkpoint = torch.load(checkpoint_files[-1])
    opts = last_checkpoint['opts']
    filename = f'{opts.model}_{opts.data}_{last_checkpoint["epoch"]}_averaged.pt'
    save_path = os.path.join(checkpoint_path, filename)

    if opts.precision[-3:] == ".16":
        model.half()
    else:
        model.float()

    torch.save(
        {
            'epoch': last_checkpoint['epoch'] + 1,
            'model_state_dict': averaged_model.module.state_dict(),
            'loss': 0,  # dummy just to work with validate script
            'train_accuracy': 0,  # dummy just to work with validate script
            'opts': opts
        },
        save_path)

    return averaged_model
コード例 #12
0
    def __init__(self, model, device, config, fold_num):
        self.config = config
        self.epoch = 0
        self.start_epoch = 0
        self.fold_num = fold_num
        if self.config.stage2:
            self.base_dir = f'./result/stage2/{config.dir}/{config.dir}_fold_{config.fold_num}'
        else:
            self.base_dir = f'./result/{config.dir}/{config.dir}_fold_{config.fold_num}'
        os.makedirs(self.base_dir, exist_ok=True)
        self.log_path = f'{self.base_dir}/log.txt'
        self.best_summary_loss = 10**5

        self.model = model
        self.swa_model = AveragedModel(self.model)
        self.device = device
        self.wandb = True

        self.cutmix = self.config.cutmix_ratio
        self.fmix = self.config.fmix_ratio
        self.smix = self.config.smix_ratio

        self.es = EarlyStopping(patience=8)

        self.scaler = GradScaler()
        self.amp = self.config.amp
        param_optimizer = list(self.model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.001
        }, {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        }]

        self.optimizer, self.scheduler = get_optimizer(
            self.model, self.config.optimizer_name,
            self.config.optimizer_params, self.config.scheduler_name,
            self.config.scheduler_params, self.config.n_epochs)

        self.criterion = get_criterion(self.config.criterion_name,
                                       self.config.criterion_params)
        self.log(f'Fitter prepared. Device is {self.device}')
        set_wandb(self.config, fold_num)
コード例 #13
0
    def before_run(self, runner):
        """Construct the averaged model which will keep track of the running
        averages of the parameters of the model."""
        model = runner.model
        self.model = AveragedModel(model)

        self.meta = runner.meta

        if self.meta is None:
            self.meta = dict()
            self.meta.setdefault('hook_msgs', dict())

        if not 'hook_msgs' in self.meta.keys():
            self.meta.setdefault('hook_msgs', dict())
コード例 #14
0
    def __init__(self, config):
        self.config = config
        self.device =  'cuda' if cuda.is_available() else 'cpu'
        
        self.model = MLP(config)
        self.swa_model = AveragedModel(self.model)

        self.optimizer = make_optimizer(self.model, optimizer_name=self.config.optimizer, sam=self.config.sam)
        self.scheduler = make_scheduler(self.optimizer, decay_name=self.config.scheduler,
                                        num_training_steps=self.config.num_training_steps,
                                        num_warmup_steps=self.config.num_warmup_steps)
        self.swa_start = self.config.swa_start
        self.swa_scheduler = SWALR(self.optimizer, swa_lr=self.config.swa_lr)
        self.epoch_num = 0
        self.criterion = self.config.criterion
コード例 #15
0
def train_model(indep_vars, dep_var, verbose=True):
    """
    Trains MDNVol network. Uses AdamW optimizer with cosine annealing learning rate schedule.
    Ouputs averaged model over the last 25% of training epochs.

    indep_vars: n x m torch tensor containing independent variables
        n = number of data points
        m = number of input variables
    dep_var: n x 1 torch tensor containing single dependent variable
        n = number of data points
        1 = single output variable
    """
    model = MDN(indep_vars.shape[1], 1, 250, 5)
    optimizer = torch.optim.AdamW(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
        optimizer, 100, 2)

    swa_model = AveragedModel(model)
    swa_start = 750
    swa_scheduler = SWALR(optimizer,
                          swa_lr=0.001,
                          anneal_epochs=10,
                          anneal_strategy="cos")

    model.train()
    swa_model.train()
    for epoch in range(1000):
        optimizer.zero_grad()
        output = model(indep_vars)

        loss = -output.log_prob(dep_var).sum()

        loss.backward()
        optimizer.step()

        if epoch > swa_start:
            swa_model.update_parameters(model)
            swa_scheduler.step()
        else:
            scheduler.step()

        if epoch % 10 == 0:
            if verbose:
                print(f"Epoch {epoch} complete. Loss: {loss}")

    swa_model.eval()
    return swa_model
コード例 #16
0
ファイル: classification_test.py プロジェクト: janosh/aviary
def predict(model_class, test_set, checkpoint_path, device, robust):

    assert isfile(
        checkpoint_path), f"no checkpoint found at '{checkpoint_path}'"
    checkpoint = torch.load(checkpoint_path, map_location=device)

    chk_robust = checkpoint["model_params"]["robust"]
    assert (chk_robust == robust
            ), f"checkpoint['robust'] != robust ({chk_robust} vs {robust})"

    model = model_class(**checkpoint["model_params"], device=device)
    model.to(device)
    model.load_state_dict(checkpoint["state_dict"])

    if "swa" in checkpoint.keys():
        model.swa = checkpoint["swa"]

        model_dict = model.swa["model_state_dict"]
        model.swa["model"] = AveragedModel(model)
        model.swa["model"].load_state_dict(model_dict)

    idx, comp, y_test, output = model.predict(test_set)

    df = pd.DataFrame({"idx": idx, "comp": comp, "y_test": y_test})

    if model.robust:
        mean, log_std = output.chunk(2, dim=1)
        pre_logits_std = torch.exp(log_std).cpu().numpy()
        logits = sampled_softmax(mean, log_std, samples=10).cpu().numpy()
        pre_logits = mean.cpu().numpy()
        for idx, std_al in enumerate(pre_logits_std.T):
            df[f"class_{idx}_std_al"] = std_al

    else:
        pre_logits = output.cpu().numpy()
        logits = softmax(pre_logits, axis=1)

    for idx, (logit, pre_logit) in enumerate(zip(logits.T, pre_logits.T)):
        df[f"class_{idx}_logit"] = logit
        df[f"class_{idx}_pred"] = pre_logit

    return df, y_test, logits, pre_logits
コード例 #17
0
    def __init__(self, blocks, channels, features, pre_act=False,
                 radix=1, groups=1, bottleneck_width=64,
                 activation=nn.SiLU, squeeze_excitation=False,
                 bottleneck=False, bottleneck_expansion=4,
                 beta=0, val_lambda=0.333, lr=1e-2,
                 use_swa=False, swa_lr=1e-2, swa_freq=250):
        super(Network, self).__init__()
        self.save_hyperparameters()

        self.net = PolicyValueNetwork(
            blocks=blocks, channels=channels, features=features,
            pre_act=pre_act, activation=activation,
            squeeze_excitation=squeeze_excitation,
            bottleneck=bottleneck, bottleneck_expansion=bottleneck_expansion,
            radix=radix, groups=groups, bottleneck_width=bottleneck_width
        )
        if use_swa:
            self.swa_model = AveragedModel(self.net)

        self.ce = nn.CrossEntropyLoss(reduction='none')
        self.bce = nn.BCEWithLogitsLoss()
コード例 #18
0
def test_1(model_path, output_dir, test_loader, addNDVI):
    in_channels = 4
    if (addNDVI):
        in_channels += 1
    model = smp.UnetPlusPlus(
        encoder_name="resnet101",
        encoder_weights="imagenet",
        in_channels=in_channels,
        classes=10,
    )
    # model = smp.DeepLabV3Plus(
    #         encoder_name="timm-regnety_320", #resnet101
    #         encoder_weights="imagenet",
    #         in_channels=4,
    #         classes=8,
    # )
    # 如果模型是SWA
    if ("swa" in model_path):
        model = AveragedModel(model)
    model.to(DEVICE)
    model.load_state_dict(torch.load(model_path))
    model.eval()

    for image, image_stretch, image_path, ndvi in test_loader:
        with torch.no_grad():
            image = image.cuda()
            image_stretch = image_stretch.cuda()
            output1 = model(image).cpu().data.numpy()
            output2 = model(image_stretch).cpu().data.numpy()
        output = (output1 + output2) / 2.0
        for i in range(output.shape[0]):
            pred = output[i]
            pred = np.argmax(pred, axis=0) + 1
            pred = np.uint8(pred)
            save_path = os.path.join(
                output_dir,
                image_path[i].split('\\')[-1].replace('.tif', '.png'))
            #print(image_path[i][-10:])
            print(save_path)
            cv2.imwrite(save_path, pred)
コード例 #19
0
    def __init__(self, config: DNNConfig):
        self.config = config
        self.epochs = config.epoch_num
        self.device = config.device

        self.model = tmp_model
        #self.criterion = CustomLoss()

        self.criterion = nn.MSELoss()

        optimizer_kwargs = {
            'lr': config.lr,
            'weight_decay': config.weight_decay
        }
        self.sam = config.issam
        self.optimizer = make_optimizer(self.model,
                                        optimizer_kwargs,
                                        optimizer_name=config.optimizer_name,
                                        sam=config.issam)
        self.scheduler_name = config.scheduler_name
        self.scheduler = optim.lr_scheduler.CosineAnnealingLR(
            optimizer=self.optimizer, T_max=config.T_max)

        self.isswa = config.getattr('isswa', False)
        self.swa_start = config.getattr('swa_start', 0)

        if config.isswa:
            self.swa_model = AveragedModel(self.model)
            self.swa_scheduler = SWALR(self.optimizer, swa_lr=0.025)

        #self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=self.optimizer,
        #                                                      mode=config.mode, factor=config.factor)

        self.loss_log = {
            'train_loss': [],
            'train_score': [],
            'valid_loss': [],
            'valid_score': []
        }
コード例 #20
0
def predict(model_class, test_set, checkpoint_path, device, robust):

    assert isfile(
        checkpoint_path), f"no checkpoint found at '{checkpoint_path}'"
    checkpoint = torch.load(checkpoint_path, map_location=device)

    chk_robust = checkpoint["model_params"]["robust"]
    assert (chk_robust == robust
            ), f"checkpoint['robust'] != robust ({chk_robust} vs  {robust})"

    model = model_class(**checkpoint["model_params"], device=device)
    model.to(device)
    model.load_state_dict(checkpoint["state_dict"])

    normalizer = Normalizer()
    normalizer.load_state_dict(checkpoint["normalizer"])

    if "swa" in checkpoint.keys():
        model.swa = checkpoint["swa"]

        model_dict = model.swa["model_state_dict"]
        model.swa["model"] = AveragedModel(model)
        model.swa["model"].load_state_dict(model_dict)

    idx, comp, y_test, output = model.predict(test_set)

    df = pd.DataFrame({"idx": idx, "comp": comp, "y_test": y_test})

    output = output.cpu().squeeze(
    )  # move preds to CPU in case model ran on GPU
    if robust:
        mean, log_std_al = (x.squeeze() for x in output.chunk(2, dim=1))
        df["pred"] = normalizer.denorm(mean).numpy()
        df["std_al"] = (log_std_al.exp() * normalizer.std).numpy()
    else:
        df["pred"] = normalizer.denorm(output).numpy()

    return df
コード例 #21
0
    def load_weights(self,
                     network_fn: Optional[Type[nn.Module]] = None) -> None:
        """Load the network weights."""
        logger.debug("Loading network with pretrained weights.")
        filename = glob(self.weights_filename)[0]
        if not filename:
            raise FileNotFoundError(
                f"Could not find any pretrained weights at {self.weights_filename}"
            )
        # Loading state directory.
        state_dict = torch.load(filename,
                                map_location=torch.device(self._device))
        self._network_args = state_dict["network_args"]
        weights = state_dict["model_state"]

        # Initializes the network with trained weights.
        if network_fn is not None:
            self._network = network_fn(**self._network_args)
        self._network.load_state_dict(weights)

        if "swa_network" in state_dict:
            self._swa_network = AveragedModel(self._network).to(self.device)
            self._swa_network.load_state_dict(state_dict["swa_network"])
コード例 #22
0
def DecoderTensorWriting(model_weight_path,
                         decoder_img_output_path,
                         image_root_path,
                         imageNames,
                         if_swa=True):
    device = "cuda:1"
    model = EncoderDecoderNet(inChannels=3,
                              encodedDimension=encodedDimension,
                              drop_ratio=0,
                              layersExpandRatio=layersExpandRatio,
                              channelsExpandRatio=channelsExpandRatio,
                              blockExpandRatio=blockExpandRatio,
                              encoderImgHeight=12,
                              encoderImgWidth=52,
                              ch=12,
                              if_add_plate_infor=True).to(device)
    if if_swa:
        model = AveragedModel(model)
    model.load_state_dict(torch.load(model_weight_path))
    model = model.eval()
    transformer = tv.transforms.Compose([tv.transforms.ToTensor()])
    for i, nameD in enumerate(imageNames):
        imgD = Image.open(os.path.join(image_root_path, nameD)).convert("RGB")
        print("Decoder : ", i)
        print(nameD)
        tImg = transformer(imgD).unsqueeze(dim=0).to(device)
        if if_add_plate_information:
            decoderTensor, encoderT = model(
                tImg,
                torch.from_numpy(np.array([img2Plates[nameD]
                                           ])).float().to(device))
        else:
            decoderTensor, encoderT = model(tImg, None)
        #print(encoderT)
        decoder = torch.sigmoid(decoderTensor).detach().cpu().squeeze(dim=0)
        decoderImg = tv.transforms.ToPILImage()(decoder)
        decoderImg.save(os.path.join(decoder_img_output_path, nameD))
コード例 #23
0
ファイル: weight_avg.py プロジェクト: muzzynine/examples-1
def average_model_weights(checkpoint_path, average_fn):
    checkpoint_files = [
        os.path.join(checkpoint_path, file_name)
        for file_name in os.listdir(checkpoint_path)
        if file_name.endswith(".pt")
    ]

    # initialize averaged model with first checkpoint
    model = load_model(checkpoint_files[0])
    averaged_model = AveragedModel(model, avg_fn=average_fn)

    # loop through the remaining checkpoints and update averaged model
    for checkpoint in checkpoint_files:
        model = load_model(checkpoint)
        averaged_model.update_parameters(model)

    last_checkpoint = torch.load(checkpoint_files[-1])
    opts = last_checkpoint['opts']
    filename = f'{opts.model}_{opts.data}_{last_checkpoint["epoch"]}_averaged.pt'
    save_path = os.path.join(checkpoint_path, filename)

    if opts.precision[-3:] == ".16":
        model.half()
    else:
        model.float()

    torch.save(
        {
            'epoch': last_checkpoint['epoch'] + 1,
            'model_state_dict': averaged_model.module.state_dict(),
            'loss': 0,  # dummy just to work with validate script
            'train_accuracy': 0,  # dummy just to work with validate script
            'opts': opts
        },
        save_path)

    return averaged_model
コード例 #24
0
def EncoderTensorWriting(model_weight_path,
                         write_path,
                         image_root_path,
                         imageNames,
                         if_swa=True):
    device = "cuda:1"
    model = EncoderDecoderNet(inChannels=3,
                              encodedDimension=encodedDimension,
                              drop_ratio=0,
                              layersExpandRatio=layersExpandRatio,
                              channelsExpandRatio=channelsExpandRatio,
                              blockExpandRatio=blockExpandRatio,
                              encoderImgHeight=12,
                              encoderImgWidth=52,
                              ch=12,
                              if_add_plate_infor=True).to(device)
    if if_swa:
        model = AveragedModel(model)
    model.load_state_dict(torch.load(model_weight_path))
    model = model.eval()
    transformer = tv.transforms.Compose([tv.transforms.ToTensor()])
    for i, nameE in enumerate(imageNames):
        imgE = Image.open(os.path.join(image_root_path, nameE)).convert("RGB")
        print("Encoder : ", i)
        print(nameE)
        tImg = transformer(imgE).unsqueeze(dim=0).to(device)
        if if_add_plate_information:
            _, encoderTensor = model(
                tImg,
                torch.from_numpy(np.array([img2Plates[nameE]
                                           ])).float().to(device))
        else:
            _, encoderTensor = model(tImg, None)
        encoderTensor = encoderTensor.detach().cpu().numpy()
        encoderTensor = np.squeeze(encoderTensor, axis=0)
        np.save(os.path.join(write_path, nameE), encoderTensor)
コード例 #25
0
    trainTransforms = tv.transforms.Compose([
        tv.transforms.RandomHorizontalFlip(p=0.5),
        tv.transforms.RandomVerticalFlip(p=0.5),
        tv.transforms.RandomApply(
            [tv.transforms.RandomCrop(size=randomCropSize)], p=0.5),
        tv.transforms.RandomApply([tv.transforms.RandomRotation(degrees=60)],
                                  p=0.5),
        tv.transforms.Resize(size=inputImageSize),
        tv.transforms.ToTensor(),
        #tv.transforms.RandomErasing(p=0.2, scale=(0.1, 0.15), ratio=(0.1, 1.))
    ])
    testTransforms = tv.transforms.Compose(
        [tv.transforms.Resize(size=inputImageSize),
         tv.transforms.ToTensor()])
    model = tv.models.resnet50(num_classes=3).to(device)
    swa_model = AveragedModel(model)

    if trainOrTest.lower() == "train":
        ### Optimizer
        optimizer = optim.Adam(model.parameters(), lr=LR, weight_decay=1e-5)
        cosine_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                                epoch,
                                                                eta_min=0,
                                                                last_epoch=-1)
        scheduler = GradualWarmupScheduler(optimizer,
                                           multiplier=multiplier,
                                           total_epoch=warmEpoch,
                                           after_scheduler=cosine_scheduler)
        swa_scheduler = SWALR(optimizer,
                              swa_lr=LR,
                              anneal_epochs=15,
コード例 #26
0
def main():
    os.makedirs(SAVEPATH, exist_ok=True)
    print('save path:', SAVEPATH)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print('device:', device)

    print('weight_decay:', WEIGHTDECAY)
    print('momentum:', MOMENTUM)
    print('batch_size:', BATCHSIZE)
    print('lr:', LR)
    print('epoch:', EPOCHS)
    print('Label smoothing:', LABELSMOOTH)
    print('Stochastic Weight Averaging:', SWA)
    if SWA:
        print('Swa lr:', SWA_LR)
        print('Swa start epoch:', SWA_START)
    print('Cutout augmentation:', CUTOUT)
    if CUTOUT:
        print('Cutout size:', CUTOUTSIZE)
    print('Activation:', ACTIVATION)

    # get model
    model = get_seresnet_cifar(activation=ACTIVATION)

    # get loss function
    if LABELSMOOTH:
        criterion = LabelSmoothingLoss(classes=10, smoothing=0.1)
    else:
        criterion = torch.nn.CrossEntropyLoss()

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=LR,
                                momentum=MOMENTUM,
                                weight_decay=WEIGHTDECAY,
                                nesterov=True)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer,
                                                           T_max=EPOCHS,
                                                           eta_min=0)

    model = model.to(device)
    criterion = criterion.to(device)

    # Check number of parameters your model
    pytorch_total_params = sum(p.numel() for p in model.parameters())
    print(f"Number of parameters: {pytorch_total_params}")
    if int(pytorch_total_params) > 2000000:
        print('Your model has the number of parameters more than 2 millions..')
        return

    if SWA:
        # apply swa
        swa_model = AveragedModel(model)
        swa_scheduler = SWALR(optimizer, swa_lr=SWA_LR)
        swa_total_params = sum(p.numel() for p in swa_model.parameters())
        print(f"Swa parameters: {swa_total_params}")

    # cinic mean, std
    normalize = transforms.Normalize(mean=[0.47889522, 0.47227842, 0.43047404],
                                     std=[0.24205776, 0.23828046, 0.25874835])

    if CUTOUT:
        train_transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(), normalize,
            Cutout(size=CUTOUTSIZE)
        ])
    else:
        train_transform = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(), normalize
        ])

    train_dataset = torchvision.datasets.ImageFolder('/content/train',
                                                     transform=train_transform)
    train_loader = DataLoader(train_dataset,
                              batch_size=BATCHSIZE,
                              shuffle=True,
                              num_workers=4,
                              pin_memory=True)

    # colab reload
    start_epoch = 0
    if os.path.isfile(os.path.join(SAVEPATH, 'latest_checkpoint.pth')):
        checkpoint = torch.load(os.path.join(SAVEPATH,
                                             'latest_checkpoint.pth'))
        start_epoch = checkpoint['epoch']
        scheduler.load_state_dict(checkpoint['scheduler'])
        model.load_state_dict(checkpoint['model'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        if SWA:
            swa_scheduler.load_state_dict(checkpoint['swa_scheduler'])
            swa_model.load_state_dict(checkpoint['swa_model'])
        print(start_epoch, 'load parameter')

    for epoch in range(start_epoch, EPOCHS):
        print("\n----- epoch: {}, lr: {} -----".format(
            epoch, optimizer.param_groups[0]["lr"]))

        # train for one epoch
        start_time = time.time()
        train(train_loader, epoch, model, optimizer, criterion, device)
        elapsed_time = time.time() - start_time
        print('==> {:.2f} seconds to train this epoch\n'.format(elapsed_time))

        # learning rate scheduling
        if SWA and epoch > SWA_START:
            swa_model.update_parameters(model)
            swa_scheduler.step()
        else:
            scheduler.step()

        if SWA:
            checkpoint = {
                'epoch': epoch + 1,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict(),
                'swa_model': swa_model.state_dict(),
                'swa_scheduler': swa_scheduler.state_dict()
            }
        else:
            checkpoint = {
                'epoch': epoch + 1,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict()
            }
        torch.save(checkpoint, os.path.join(SAVEPATH, 'latest_checkpoint.pth'))
        if epoch % 10 == 0:
            torch.save(checkpoint,
                       os.path.join(SAVEPATH, '%d_checkpoint.pth' % epoch))
コード例 #27
0
    def __init__(self):

        if args.train is not None:
            self.train_tuple = get_tuple(args.train,
                                         bs=args.batch_size,
                                         shuffle=True,
                                         drop_last=False)

        if args.valid is not None:
            valid_bsize = 2048 if args.multiGPU else 50
            self.valid_tuple = get_tuple(args.valid,
                                         bs=valid_bsize,
                                         shuffle=False,
                                         drop_last=False)
        else:
            self.valid_tuple = None

        # Select Model, X is default
        if args.model == "X":
            self.model = ModelX(args)
        elif args.model == "V":
            self.model = ModelV(args)
        elif args.model == "U":
            self.model = ModelU(args)
        elif args.model == "D":
            self.model = ModelD(args)
        elif args.model == 'O':
            self.model = ModelO(args)
        else:
            print(args.model, " is not implemented.")

        # Load pre-trained weights from paths
        if args.loadpre is not None:
            self.model.load(args.loadpre)

        # GPU options
        if args.multiGPU:
            self.model.lxrt_encoder.multi_gpu()

        self.model = self.model.cuda()

        # Losses and optimizer
        self.logsoftmax = nn.LogSoftmax(dim=1)
        self.nllloss = nn.NLLLoss()

        if args.train is not None:
            batch_per_epoch = len(self.train_tuple.loader)
            self.t_total = int(batch_per_epoch * args.epochs // args.acc)
            print("Total Iters: %d" % self.t_total)

        def is_backbone(n):
            if "encoder" in n:
                return True
            elif "embeddings" in n:
                return True
            elif "pooler" in n:
                return True
            print("F: ", n)
            return False

        no_decay = ['bias', 'LayerNorm.weight']

        params = list(self.model.named_parameters())
        if args.reg:
            optimizer_grouped_parameters = [
                {
                    "params": [p for n, p in params if is_backbone(n)],
                    "lr": args.lr
                },
                {
                    "params": [p for n, p in params if not is_backbone(n)],
                    "lr": args.lr * 500
                },
            ]

            for n, p in self.model.named_parameters():
                print(n)

            self.optim = AdamW(optimizer_grouped_parameters, lr=args.lr)
        else:
            optimizer_grouped_parameters = [{
                'params':
                [p for n, p in params if not any(nd in n for nd in no_decay)],
                'weight_decay':
                args.wd
            }, {
                'params':
                [p for n, p in params if any(nd in n for nd in no_decay)],
                'weight_decay':
                0.0
            }]

            self.optim = AdamW(optimizer_grouped_parameters, lr=args.lr)

        if args.train is not None:
            self.scheduler = get_linear_schedule_with_warmup(
                self.optim, self.t_total * 0.1, self.t_total)

        self.output = args.output
        os.makedirs(self.output, exist_ok=True)

        # SWA Method:
        if args.contrib:
            self.optim = SWA(self.optim,
                             swa_start=self.t_total * 0.75,
                             swa_freq=5,
                             swa_lr=args.lr)

        if args.swa:
            self.swa_model = AveragedModel(self.model)
            self.swa_start = self.t_total * 0.75
            self.swa_scheduler = SWALR(self.optim, swa_lr=args.lr)
コード例 #28
0
ファイル: training.py プロジェクト: VictorCallejas/FB_MMHM
def training(model,
             train_dataloader,
             valid_dataloader,
             test_dataloader,
             model_cfg,
             fold_idx=1):

    print("--------  ", str(fold_idx), "  --------")
    global model_config
    model_config = model_cfg

    device = get_device()
    model.to(device)

    if fold_idx == 1: print('CONFIG: ')
    if fold_idx == 1:
        print([(v, getattr(model_config, v)) for v in dir(model_config)
               if v[:2] != "__"])
    if fold_idx == 1: print('MODEL: ', model)

    epochs = model_config.epochs

    if model_config.optimizer == 'AdamW':
        optimizer = torch.optim.AdamW(model.parameters(),
                                      lr=float(model_config.lr),
                                      eps=float(model_config.eps),
                                      weight_decay=float(
                                          model_config.weight_decay))
    elif model_config.optimizer == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=float(model_config.lr))

    if model_config.scheduler == 'linear':
        scheduler = get_linear_schedule_with_warmup(
            optimizer,
            num_warmup_steps=int(model_config.warmup_steps),
            num_training_steps=len(train_dataloader) * epochs)
    else:
        scheduler = None

    criterion = nn.BCEWithLogitsLoss()  #nn.CrossEntropyLoss()

    swa_model = AveragedModel(model)
    if model_config.swa_scheduler == 'linear':
        swa_scheduler = SWALR(optimizer, swa_lr=float(model_config.lr))
    else:
        swa_scheduler = CosineAnnealingLR(optimizer, T_max=100)

    print('TRAINING...')

    training_stats = []

    best_dev_auc = float('-inf')

    with tqdm(total=epochs, leave=False) as pbar:
        for epoch_i in range(0, epochs):

            if epoch_i >= int(model_config.swa_start):
                update_bn(train_dataloader, swa_model)
                train_auc, train_acc, avg_train_loss = train(
                    model, train_dataloader, device, criterion, optimizer)
                swa_model.update_parameters(model)
                swa_scheduler.step()
                update_bn(valid_dataloader, swa_model)
                valid_auc, valid_acc, avg_dev_loss, dev_d = valid(
                    swa_model, valid_dataloader, device, criterion)
            else:
                train_auc, train_acc, avg_train_loss = train(
                    model,
                    train_dataloader,
                    device,
                    criterion,
                    optimizer,
                    scheduler=scheduler)
                valid_auc, valid_acc, avg_dev_loss, dev_d = valid(
                    model, valid_dataloader, device, criterion)
            if cfg.final_train:
                valid_auc = 0
                valid_acc = 0
                avg_dev_loss = 0

            add_stats(training_stats, avg_train_loss, avg_dev_loss, train_acc,
                      train_auc, valid_acc, valid_auc)

            if (cfg.final_train &
                (epoch_i == epochs - 1)) | (not cfg.final_train &
                                            (valid_auc > best_dev_auc)):
                best_dev_auc = valid_auc
                if epoch_i >= int(model_config.swa_start):
                    update_bn(test_dataloader, swa_model)
                    test_d = gen_test(swa_model, test_dataloader, device)
                    save(fold_idx, swa_model, optimizer, dev_d, test_d,
                         valid_auc)
                else:
                    test_d = gen_test(model, test_dataloader, device)
                    save(fold_idx, model, optimizer, dev_d, test_d, valid_auc)

            pbar.update(1)

    print('TRAINING COMPLETED')

    # Show training results
    col_names = [
        'train_loss', 'train_acc', 'train_auc', 'dev_loss', 'dev_acc',
        'dev_auc'
    ]
    training_stats = pd.DataFrame(training_stats, columns=col_names)
    print(training_stats.head(epochs))
    plot_training_results(training_stats, fold_idx)

    # If config, get best model and make submission
    if cfg.run['submission'] == True:
        make_submission(model, test_dataloader)
コード例 #29
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--batch_size",
        default=8,
        type=int,
        help="batch size of both segmentation and classification training")
    parser.add_argument(
        "--seg_epoch",
        default=100,
        type=int,
        help="the number of epoch in the segmentation training")
    parser.add_argument(
        "--cls_epoch",
        default=20,
        type=int,
        help="the number of epoch in the classification training")
    parser.add_argument("--lr",
                        default=0.01,
                        type=float,
                        help="the learning rate of training")
    parser.add_argument("--swa_lr",
                        default=0.005,
                        type=float,
                        help="the stochastic learning rate of training")
    parser.add_argument(
        "--seg_weight",
        default=[0.1, 1],
        type=list,
        nargs='+',
        help="the weight of Binary Cross Entropy in the segmentation learning")
    parser.add_argument(
        "--cls_weight",
        default=[1, 1],
        type=list,
        nargs='+',
        help="the weight of Binary Cross Entropy in the classification learning"
    )
    parser.add_argument("--seed",
                        default=2021,
                        type=int,
                        help="the random seed")
    parser.add_argument(
        "--train_dir",
        default="/train_dir",
        type=str,
        help=
        "the train data directory. it consists of the both ng and ok directorys, and they have img and mask folders."
    )
    parser.add_argument(
        "--val_dir",
        default="/val_dir",
        type=str,
        help=
        "the validation data directory. it consists of the both ng and ok directorys, and they have img and mask folders."
    )

    args = parser.parse_args()

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    segmentation_train = True
    classification_train = True

    train_dir = Path(args.train_dir)
    val_dir = Path(args.val_dir)

    train_ok_dir = str(train_dir / "ok")
    train_mask_dir = str(train_dir / "mask")
    train_ng_dir = str(train_dir / "ng")

    val_ok_dir = str(val_dir / "ok")
    val_mask_dir = str(val_dir / "mask")
    val_ng_dir = str(val_dir / "ng")

    seg_train_dataset = SegmentationDataset(img_dir=train_ng_dir,
                                            mask_dir=train_mask_dir,
                                            n_channels=3,
                                            classes=1,
                                            train=True)
    seg_val_dataset = SegmentationDataset(img_dir=val_ng_dir,
                                          mask_dir=val_mask_dir,
                                          n_channels=3,
                                          classes=1,
                                          train=False)

    cls_train_dataset = ClassificationDataset(ok_dir=train_ok_dir,
                                              ng_dir=train_ng_dir,
                                              n_channels=3,
                                              classes=1,
                                              train=True)
    cls_val_dataset = ClassificationDataset(ok_dir=val_ok_dir,
                                            ng_dir=val_ng_dir,
                                            n_channels=3,
                                            classes=1,
                                            train=False)

    seg_train_loader = DataLoader(seg_train_dataset,
                                  batch_size=8,
                                  shuffle=True)
    seg_val_loader = DataLoader(seg_val_dataset, batch_size=8, shuffle=True)
    cls_train_loader = DataLoader(cls_train_dataset,
                                  batch_size=8,
                                  shuffle=True)
    cls_val_loader = DataLoader(cls_val_dataset, batch_size=8, shuffle=True)

    my_model = DownconvUnet(in_channel=3, seg_classes=1, cls_classes=2)
    avg_model = AveragedModel(my_model)

    my_model.to(device)
    avg_model.to(device)

    with mlflow.start_run() as run:
        seg_args = Params(args.batch_size, args.seg_epoch, args.lr, args.seed,
                          args.seg_weight)
        cls_args = Params(args.batch_size, args.cls_epoch, args.lr, args.seed,
                          args.cls_weight)
        mode_list = ["seg", "cls"]
        for mode in mode_list:
            for key, value in vars(seg_args).items():
                mlflow.log_param(f"{mode}_{key}", value)

        # Segmentation train

        if segmentation_train:
            print("-" * 5 + "Segmentation training start" + "-" * 5)

            my_model.mode = 1
            train_metrics = Metrics()
            train_loss = 0.
            train_iou = 0.
            train_acc = 0.

            val_metrics = Metrics()
            val_loss = 0.
            val_iou = 0.
            val_acc = 0.

            my_model.train()

            optimizer = torch.optim.Adam(my_model.parameters(), lr=seg_args.lr)
            scheduler = CosineAnnealingLR(optimizer, T_max=100)
            bce = WeightedBCELoss(weight=seg_args.weight)
            swa_start = int(seg_args.num_epoch * 0.75)
            swa_scheduler = SWALR(optimizer,
                                  anneal_strategy='linear',
                                  anneal_epochs=swa_start,
                                  swa_lr=seg_args.swa_lr)

            for epoch in range(seg_args.num_epoch):
                for batch_idx, batch in enumerate(seg_train_loader):
                    batch = tuple(t.to(device) for t in batch)
                    seg_x, seg_y = batch

                    optimizer.zero_grad()

                    pred_y = my_model(seg_x)
                    loss = bce(pred_y, seg_y)
                    loss.backward()
                    optimizer.step()

                    train_loss += loss.item()
                    train_metrics.update(pred_y, seg_y, loss.item())
                    train_iou += train_metrics.iou
                    train_acc += train_metrics.acc

                    step = epoch * len(seg_train_loader) + batch_idx
                    for metric, value in vars(train_metrics).items():
                        mlflow.log_metric(f"seg_train_{metric}",
                                          value,
                                          step=step)

                train_loss /= len(seg_train_loader)
                train_iou /= len(seg_train_loader)
                train_acc /= len(seg_train_loader)

                my_model.eval()

                for batch_idx, batch in enumerate(seg_val_loader):
                    batch = tuple(t.to(device) for t in batch)
                    seg_x, seg_y = batch
                    pred_y = my_model(seg_x)

                    loss = bce(pred_y, seg_y)

                    val_loss += loss.item()
                    val_metrics.update(pred_y, seg_y, val_loss)
                    val_iou += val_metrics.iou
                    val_acc += val_metrics.acc

                    step = epoch * len(seg_val_loader) + batch_idx
                    for metric, value in vars(val_metrics).items():
                        mlflow.log_metric(f"seg_val_{metric}",
                                          value,
                                          step=step)

                val_loss /= len(seg_val_loader)
                val_iou /= len(seg_val_loader)
                val_acc /= len(seg_val_loader)

                print(f"Epoch {epoch + 1}:")
                print("-" * 10)
                print(
                    f"train_loss {train_loss:.3f}, train_iou: {train_iou:.3f}, "
                    f"train_accuracy: {train_acc:.3f}")
                print(f"val_loss {val_loss:.3f}, val_iou: {val_iou:.3f}, "
                      f"val_accuracy: {val_acc:.3f}")

                if epoch > swa_start:
                    print("Stochastic average start")
                    avg_model.update_parameters(my_model)
                    swa_scheduler.step()
                else:
                    scheduler.step()

            print("Segmentation train completed")

            # Classification train

            if classification_train:
                print("-" * 5 + "Classification training start" + "-" * 5)

                my_model.mode = 2

                train_metrics = Metrics()
                train_loss = 0.
                train_iou = 0.
                train_acc = 0.

                val_metrics = Metrics()
                val_loss = 0.
                val_iou = 0.
                val_acc = 0.

                my_model.train()

                optimizer = torch.optim.Adam(my_model.parameters(),
                                             lr=cls_args.lr)
                scheduler = CosineAnnealingLR(optimizer, T_max=100)
                bce = WeightedBCELoss(weight=cls_args.weight)
                swa_start = int(cls_args.num_epoch * 0.75)
                swa_scheduler = SWALR(optimizer,
                                      anneal_strategy='linear',
                                      anneal_epochs=swa_start,
                                      swa_lr=cls_args.swa_lr)

                for epoch in range(cls_args.num_epoch):
                    for batch_idx, batch in enumerate(cls_train_loader):
                        batch = tuple(t.to(device) for t in batch)
                        cls_x, cls_y = batch

                        optimizer.zero_grad()

                        pred_y = my_model(cls_x)
                        loss = bce(pred_y, cls_y)
                        loss.backward()
                        optimizer.step()

                        train_loss += loss.item()
                        train_metrics.update(pred_y, cls_y, train_loss)
                        train_acc += train_metrics.acc

                        step = epoch * len(seg_train_loader) + batch_idx
                        for metric, value in vars(train_metrics).items():
                            mlflow.log_metric(f"cls_train_{metric}",
                                              value,
                                              step=step)

                    train_loss /= len(seg_train_loader)
                    train_acc /= len(seg_train_loader)

                    my_model.eval()

                    for batch_idx, batch in enumerate(cls_val_loader):
                        batch = tuple(t.to(device) for t in batch)
                        cls_x, cls_y = batch
                        pred_y = my_model(cls_x)

                        loss = bce(pred_y, cls_y)

                        val_loss += loss.item()
                        val_metrics.update(pred_y, cls_y, loss.item())
                        val_acc += val_metrics.acc

                        step = epoch * len(seg_train_loader) + batch_idx
                        for metric, value in vars(val_metrics).items():
                            mlflow.log_metric(f"cls_train_{metric}",
                                              value,
                                              step=step)

                    val_loss /= len(seg_val_loader)
                    val_acc /= len(seg_val_loader)

                    print(f"Epoch {epoch + 1}:")
                    print("-" * 10)
                    print(
                        f"train_loss {train_loss:.3f}, train_iou: {train_iou:.3f}, "
                        f"train_accuracy: {train_acc:.3f}")
                    print(f"val_loss {val_loss:.3f}, val_iou: {val_iou:.3f}, "
                          f"val_accuracy: {val_acc:.3f}")

                print("Classification train completed")

                if epoch > swa_start:
                    print("Stochastic average start")
                    avg_model.update_parameters(my_model)
                    swa_scheduler.step()
                else:
                    scheduler.step()
    weight_path = "weights/donwconv_swa_weights.pth"
    torch.save(my_model.state_dict(), weight_path)
    print(f"model weight saved to {weight_path}")
コード例 #30
0
        if fold > args.nfold - 1:
            break

        max_acc = 0.
        min_loss = 1e10
        print('Training with {} started'.format(fold))

        print(len(trn_idx), len(val_idx))
        train_loader, val_loader = prepare_dataloader(train, trn_idx, val_idx)

        device = torch.device('cuda')

        model = CassvaImgClassifier(args.model,
                                    train.label.nunique(),
                                    pretrained=True).to(device)
        swa_model = AveragedModel(model).to(device)

        model = torch.nn.DataParallel(model)

        scaler = GradScaler()

        if 'vit' in args.model:
            optimizer = torch.optim.AdamW(model.parameters(),
                                          lr=args.lr,
                                          weight_decay=0.001)
            # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)
        else:
            #optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
            optimizer = torch.optim.SGD(model.parameters(),
                                        lr=args.lr,
                                        momentum=0.9,