Esempio n. 1
0
 def __init__(self, batch_size, strategy, checkpoint_path, num_epochs, model, train_num_datasets, test_num_datasets,
              train_len=None, num_gpu=1, save_epoch=1, checkpoint_dir="training", num_classes=2,
              learning_rate=5e-4):
     self.num_epochs = num_epochs
     self.save_tensorboard_image = int(num_gpu) == 1
     self.checkpoint_path = checkpoint_path
     self.train_len = train_len
     self.batch_size = batch_size
     self.strategy = strategy
     self.num_gpu = int(num_gpu)
     self.train_epoch_step = (train_num_datasets // self.batch_size) - 1
     self.test_epoch_step = (test_num_datasets // self.batch_size) - 1
     self.save_epoch = int(save_epoch)
     self.model = model
     self.train_writer = tf.summary.create_file_writer('training')
     self.lr = self.multi_step_lr(initial_learning_rate=learning_rate, epochs=num_epochs)
     self.optimizer = AdamP(learning_rate=self.lr, weight_decay=1e-2)
     self.ckpt = tf.train.Checkpoint(model=self.model, optimizer=self.optimizer)
     self.ckpt_manager = tf.train.CheckpointManager(self.ckpt, checkpoint_dir, max_to_keep=5)
     if self.ckpt_manager.latest_checkpoint:
         self.ckpt.restore(self.ckpt_manager.latest_checkpoint)
         self.epoch = int(self.ckpt_manager.latest_checkpoint.split('-')[-1])
         tf.get_logger().info("Latest checkpoint restored:{}".format(self.ckpt_manager.latest_checkpoint))
     else:
         self.epoch = 0
         tf.get_logger().info('Not restoring from saved checkpoint')
     self.train_acc_metric = tf.keras.metrics.MeanIoU(
         num_classes=num_classes + 1 if num_classes == 1 else num_classes, name='train_accuracy')
     self.test_acc_metric = tf.keras.metrics.MeanIoU(
         num_classes=num_classes + 1 if num_classes == 1 else num_classes, name='test_accuracy')
     self.train_loss_metric = tf.keras.metrics.Mean(name='train_loss')
     self.test_loss_metric = tf.keras.metrics.Mean(name='test_loss')
Esempio n. 2
0
    def __init__(self, image_size, latent_dim = 512, fmap_max = 512, style_depth = 8, network_capacity = 16, transparent = False, fp16 = False, cl_reg = False, steps = 1, lr = 1e-4, fq_layers = [], fq_dict_size = 256, attn_layers = [], no_const = False):
        super().__init__()
        self.lr = lr
        self.steps = steps
        self.ema_updater = EMA(0.995)

        self.S = StyleVectorizer(latent_dim, style_depth)
        self.G = Generator(image_size, latent_dim, network_capacity, transparent = transparent, attn_layers = attn_layers, no_const = no_const, fmap_max = fmap_max)
        self.D = Discriminator(image_size, network_capacity, fq_layers = fq_layers, fq_dict_size = fq_dict_size, attn_layers = attn_layers, transparent = transparent, fmap_max = fmap_max)

        self.SE = StyleVectorizer(latent_dim, style_depth)
        self.GE = Generator(image_size, latent_dim, network_capacity, transparent = transparent, attn_layers = attn_layers, no_const = no_const)

        # experimental contrastive loss discriminator regularization
        assert not (transparent and cl_reg), 'contrastive loss regularization does not work with transparent images yet'
        self.D_cl = ContrastiveLearner(self.D, image_size, hidden_layer='flatten') if cl_reg else None

        # wrapper for augmenting all images going into the discriminator
        self.D_aug = AugWrapper(self.D, image_size)

        set_requires_grad(self.SE, False)
        set_requires_grad(self.GE, False)

        generator_params = list(self.G.parameters()) + list(self.S.parameters())
        self.G_opt = AdamP(generator_params, lr = self.lr, betas=(0.5, 0.9))
        self.D_opt = AdamP(self.D.parameters(), lr = self.lr, betas=(0.5, 0.9))

        self._init_weights()
        self.reset_parameter_averaging()

        self.cuda()
        
        if fp16:
            (self.S, self.G, self.D, self.SE, self.GE), (self.G_opt, self.D_opt) = amp.initialize([self.S, self.G, self.D, self.SE, self.GE], [self.G_opt, self.D_opt], opt_level='O2')
Esempio n. 3
0
    def configure_optimizers(self):
        if cfg['optimizer'] == 'adam':
            opt = torch.optim.Adam(self.predictor.parameters(),
                                   lr=5e-3,
                                   weight_decay=5e-4)
        elif cfg['optimizer'] == 'adamp':
            opt = AdamP(self.predictor.parameters(),
                        lr=0.0001,
                        betas=(0.9, 0.999),
                        weight_decay=1e-2)

        def lr_foo(epoch):
            if epoch < self.hparams.warm_up_step:
                # warm up lr
                lr_scale = 0.1**(self.hparams.warm_up_step - epoch)
            else:
                lr_scale = 0.95**epoch

            return lr_scale

        scheduler = LambdaLR(opt, lr_lambda=lr_foo)

        self.sched = scheduler
        self.opt = opt
        return [opt], [scheduler]
Esempio n. 4
0
def select_optimizer(param, opt_name: str, lr: float, weight_decay: float):
    if opt_name == 'SGD':
        optimizer = SGDP(param,
                         lr=lr,
                         momentum=0.9,
                         weight_decay=weight_decay,
                         nesterov=True)
    elif opt_name == 'SGDP':
        optimizer = SGDP(param,
                         lr=lr,
                         momentum=0.9,
                         weight_decay=weight_decay,
                         nesterov=True)
    elif opt_name == 'Adam':
        optimizer = torch.optim.Adam(param,
                                     lr=lr,
                                     betas=(0.9, 0.999),
                                     eps=1e-08,
                                     weight_decay=weight_decay,
                                     amsgrad=False)
    elif opt_name == 'AdamP':
        optimizer = AdamP(param,
                          lr=lr,
                          betas=(0.9, 0.999),
                          weight_decay=weight_decay,
                          nesterov=True)
    else:
        raise NotImplementedError('The optimizer should be in [SGD]')
    return optimizer
Esempio n. 5
0
    def configure_optimizers(self):
        lr = float(cfg["train_params"]["lr"])
        if cfg['optimizer'] == 'adam':
            opt = torch.optim.Adam(self.predictor.parameters(),
                                   lr=lr,
                                   weight_decay=5e-4)
        if cfg['optimizer'] == 'adamw':
            opt = torch.optim.AdamW(self.predictor.parameters(),
                                    lr=lr,
                                    weight_decay=5e-4)
        elif cfg['optimizer'] == 'adamp':
            opt = AdamP(self.predictor.parameters(),
                        lr=0.0005,
                        betas=(0.9, 0.999),
                        weight_decay=1e-2)

        def lr_foo(epoch):
            if epoch < self.hparams.warm_up_step:
                # warm up lr
                lr_scale = 0.1**(self.hparams.warm_up_step - epoch)
            else:
                lr_scale = 0.98**epoch
            lr_scale = max(1e-6, lr_scale)

            return lr_scale

        scheduler = LambdaLR(opt, lr_lambda=lr_foo)

        self.opt = opt
        return opt  # , [scheduler]
def get_optimizer(optimizer_name,
                  model,
                  lr,
                  weight_decay=0.0,
                  filter=lambda x: True,
                  sparse_embedding=False):
    parameters = [p for name, p in model.named_parameters() if filter(name)]
    if not parameters:
        return None

    if optimizer_name == "sgd":
        return torch.optim.SGD(parameters, lr=lr, weight_decay=weight_decay)

    elif optimizer_name == "sgdm":
        assert not sparse_embedding
        return torch.optim.SGD(parameters,
                               lr=lr,
                               weight_decay=weight_decay,
                               momentum=0.9)

    elif optimizer_name == "adam":
        if sparse_embedding:
            sparse_parameters = []
            dense_parameters = []
            for name, p in model.named_parameters():
                if name.endswith("embedding.weight"):
                    sparse_parameters.append(p)
                else:
                    dense_parameters.append(p)
            sparse_adam = torch.optim.SparseAdam(sparse_parameters, lr=lr)
            dense_adam = torch.optim.Adam(dense_parameters,
                                          lr=lr,
                                          weight_decay=weight_decay)
            optimizer = MultipleOptimizer(sparse_adam, dense_adam)
            return optimizer
        else:
            return torch.optim.Adam(parameters,
                                    lr=lr,
                                    weight_decay=weight_decay)

    elif optimizer_name == "adame":
        assert not sparse_embedding
        return torch.optim.Adam(parameters,
                                lr=lr,
                                weight_decay=weight_decay,
                                eps=1e-3)

    elif optimizer_name == "adamw":
        assert not sparse_embedding
        return torch.optim.AdamW(parameters, lr=lr, weight_decay=weight_decay)

    elif optimizer_name == "adamp":
        assert not sparse_embedding
        return AdamP(parameters, lr=lr, weight_decay=weight_decay)

    else:
        raise NotImplementedError()
Esempio n. 7
0
def init_optimizer(optimizer_name,
                   model,
                   lr,
                   wd,
                   lr_restart_step=1,
                   lr_decay_gamma=0.9,
                   scheduler="step",
                   nesterov=False,
                   num_epochs=None,
                   steps_per_epoch=None):
    if optimizer_name == "sgd":
        optimizer_ft = optim.SGD(model.parameters(),
                                 lr=lr,
                                 momentum=0.9,
                                 weight_decay=wd,
                                 nesterov=nesterov)
    elif optimizer_name == "adam":
        optimizer_ft = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    elif optimizer_name == "adamp":
        from adamp import AdamP
        optimizer_ft = AdamP(model.parameters(),
                             lr=lr,
                             betas=(0.9, 0.999),
                             weight_decay=wd)  # 1e-2)
    elif optimizer_name == "sgdp":
        from adamp import SGDP
        optimizer_ft = SGDP(model.parameters(),
                            lr=lr,
                            weight_decay=wd,
                            momentum=0.9,
                            nesterov=nesterov)
    # else:
    #     opt_attr = getattr(toptim, optimizer_name)
    #     if opt_attr:
    #         optimizer_ft = opt_attr(model.parameters())
    #     else:
    #         raise Exception("unknown optimizer name", optimizer_name)

    if scheduler == "cosine":
        exp_lr_scheduler = lr_scheduler.CosineAnnealingWarmRestarts(
            optimizer_ft, lr_restart_step)
        use_lr_schedule_steps = True
    elif scheduler == "cycle":
        exp_lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(
            optimizer_ft,
            max_lr=lr,
            steps_per_epoch=steps_per_epoch,
            epochs=num_epochs,
            pct_start=0.1)
        use_lr_schedule_steps = False
    elif scheduler == "step":
        exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft,
                                               step_size=lr_restart_step,
                                               gamma=lr_decay_gamma)
        use_lr_schedule_steps = False

    return optimizer_ft, exp_lr_scheduler, use_lr_schedule_steps
Esempio n. 8
0
def train(num_epochs, model, data_loader, val_loader, val_every, device, file_name):
    learning_rate = 0.0001
    from torch.optim.swa_utils import AveragedModel, SWALR
    from torch.optim.lr_scheduler import CosineAnnealingLR
    from segmentation_models_pytorch.losses import SoftCrossEntropyLoss, JaccardLoss
    from adamp import AdamP

    criterion = [SoftCrossEntropyLoss(smooth_factor=0.1), JaccardLoss('multiclass', classes=12)]
    optimizer = AdamP(params=model.parameters(), lr=learning_rate, weight_decay=1e-6)
    swa_scheduler = SWALR(optimizer, swa_lr=learning_rate)
    swa_model = AveragedModel(model)
    look = Lookahead(optimizer, la_alpha=0.5)

    print('Start training..')
    best_miou = 0
    for epoch in range(num_epochs):
        hist = np.zeros((12, 12))
        model.train()
        for step, (images, masks, _) in enumerate(data_loader):
            loss = 0
            images = torch.stack(images)  # (batch, channel, height, width)
            masks = torch.stack(masks).long()  # (batch, channel, height, width)

            # gpu 연산을 위해 device 할당
            images, masks = images.to(device), masks.to(device)

            # inference
            outputs = model(images)
            for i in criterion:
                loss += i(outputs, masks)
            # loss 계산 (cross entropy loss)

            look.zero_grad()
            loss.backward()
            look.step()

            outputs = torch.argmax(outputs.squeeze(), dim=1).detach().cpu().numpy()
            hist = add_hist(hist, masks.detach().cpu().numpy(), outputs, n_class=12)
            acc, acc_cls, mIoU, fwavacc = label_accuracy_score(hist)
            # step 주기에 따른 loss, mIoU 출력
            if (step + 1) % 25 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, mIoU: {:.4f}'.format(
                    epoch + 1, num_epochs, step + 1, len(data_loader), loss.item(), mIoU))

        # validation 주기에 따른 loss 출력 및 best model 저장
        if (epoch + 1) % val_every == 0:
            avrg_loss, val_miou = validation(epoch + 1, model, val_loader, criterion, device)
            if val_miou > best_miou:
                print('Best performance at epoch: {}'.format(epoch + 1))
                print('Save model in', saved_dir)
                best_miou = val_miou
                save_model(model, file_name = file_name)

        if epoch > 3:
            swa_model.update_parameters(model)
            swa_scheduler.step()
def get_optim(model: nn.Module, optim_type: str, lr: float):
    if optim_type == Config.Adam:
        optimizer = optim.Adam(model.parameters(), lr=lr)
    elif optim_type == Config.SGD:
        optimizer = optim.SGD(model.parameters(), lr=lr)
    elif optim_type == Config.Momentum:
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    elif optim_type == Config.AdamP:
        optimizer = AdamP(model.parameters(),
                          lr=lr,
                          betas=(0.9, 0.999),
                          weight_decay=1e-2)
    return optimizer
Esempio n. 10
0
def create_optimizer(args, model, filter_bias_and_bn=True):
    opt_lower = args.opt.lower()
    weight_decay = args.weight_decay
    if weight_decay and filter_bias_and_bn:
        parameters = add_weight_decay(model, weight_decay)
        weight_decay = 0.
    else:
        parameters = model.parameters()

    opt_split = opt_lower.split('_')
    opt_lower = opt_split[-1]
    if opt_lower == 'sgd' or opt_lower == 'nesterov':
        optimizer = optim.SGD(parameters,
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=weight_decay,
                              nesterov=True)
    elif opt_lower == 'momentum':
        optimizer = optim.SGD(parameters,
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=weight_decay,
                              nesterov=False)
    elif opt_lower == 'adam':
        optimizer = optim.Adam(parameters,
                               lr=args.lr,
                               weight_decay=weight_decay,
                               eps=args.opt_eps)
    elif opt_lower == 'rmsproptf':
        optimizer = RMSpropTF(parameters,
                              lr=args.lr,
                              alpha=0.9,
                              eps=args.opt_eps,
                              momentum=args.momentum,
                              weight_decay=weight_decay)
    elif opt_lower == 'adamp':
        optimizer = AdamP(parameters,
                          lr=args.lr,
                          betas=(0.9, 0.999),
                          weight_decay=weight_decay)

    else:
        assert False and "Invalid optimizer"
        raise ValueError

    return optimizer
 def configure_optimizers(self):
     if self.args.optimizer == 'AdamW':
         optimizer = AdamW(self.parameters(), lr=self.args.lr)
     elif self.args.optimizer == 'AdamP':
         from adamp import AdamP
         optimizer = AdamP(self.parameters(), lr=self.args.lr)
     else:
         raise NotImplementedError('Only AdamW and AdamP is Supported!')
     if self.args.lr_scheduler == 'cos':
         scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=1, T_mult=2)
     elif self.args.lr_scheduler == 'exp':
         scheduler = ExponentialLR(optimizer, gamma=0.5)
     else:
         raise NotImplementedError(
             'Only cos and exp lr scheduler is Supported!')
     return {
         'optimizer': optimizer,
         'scheduler': scheduler,
     }
Esempio n. 12
0
def get_optimizer(opt, model, momentum=0.9, wd=5e-4, nesterov=False):
    optimizer = None
    if opt.optim == 'sgd':
        optimizer = optim.SGD(
            filter(lambda p: p.requires_grad, model.parameters()),
            lr=opt.lr,
            momentum=momentum,
            weight_decay=wd,
            nesterov=nesterov
        )
    elif opt.optim == 'adam':
        optimizer = optim.Adam(
            filter(lambda p: p.requires_grad, model.parameters()),
            lr=opt.lr
        )
    elif opt.optim == 'adamp':
        optimizer = AdamP(filter(lambda p: p.requires_grad, model.parameters(
        )), lr=opt.lr, betas=(0.9, 0.999), weight_decay=1e-2)

    return optimizer
 def configure_optimizers(self):
     if cfg['optimizer'] == "Adam":
         optimizer = torch.optim.Adam(self.netD.parameters(), lr=cfg['lr'])
     elif cfg['optimizer'] == "AdamP":
         optimizer = AdamP(self.netD.parameters(),
                           lr=cfg['lr'],
                           betas=(0.9, 0.999),
                           weight_decay=1e-2)
     elif cfg['optimizer'] == "SGDP":
         optimizer = SGDP(self.netD.parameters(),
                          lr=cfg['lr'],
                          weight_decay=1e-5,
                          momentum=0.9,
                          nesterov=True)
     elif cfg['optimizer'] == "MADGRAD":
         from madgrad import MADGRAD
         optimizer = MADGRAD(self.netD.parameters(),
                             lr=cfg['lr'],
                             momentum=0.9,
                             weight_decay=0.01,
                             eps=1e-6)
     return optimizer
Esempio n. 14
0
def get_optimizer(model, optimizer_name, scheduler_name):
    if optimizer_name == 'Adam':
        optimizer = Adam(model.parameters(), lr=learning_rate)
    elif optimizer_name == 'AdamW':
        optimizer = AdamW(model.parameters(), lr=learning_rate)
    elif optimizer_name == 'AdamP':
        optimizer = AdamP(model.parameters(), lr=learning_rate)
    elif optimizer_name == 'MADGRAD':
        optimizer = madgrad.MADGRAD(model.parameters(), lr=learning_rate)
    else:
        optimizer = optim.Ranger(model.parameters(),
                                 lr=learning_rate,
                                 alpha=0.6,
                                 k=10)

    if scheduler_name == 'step':
        scheduler = StepLR(optimizer, 10, gamma=0.5)
    elif scheduler_name == 'reduce':
        scheduler = ReduceLROnPlateau(optimizer, factor=0.1, patience=10)
    else:
        scheduler = CosineAnnealingLR(optimizer, T_max=2, eta_min=0.)

    return optimizer, scheduler
Esempio n. 15
0
def load_optimizer(args, param_group):

    if args.optim.optimizer.lower() == 'sgd':
        optimizer = optim.SGD(
            param_group,
            args.optim.lr.init,
            momentum=args.optim.momentum,
            weight_decay=args.optim.wd.base,
            nesterov=args.optim.nesterov,
        )
    elif args.optim.optimizer.lower() == 'adamp':
        optimizer = AdamP(
            param_group,
            args.optim.lr.init,
            betas=(args.optim.momentum, 0.999),
            weight_decay=args.optim.wd.base,
            nesterov=args.optim.nesterov,
        )
    else:
        raise ValueError("Unknown optimizer : {}".format(args.optim.optimizer))

    set_init_lr(optimizer.param_groups)

    return optimizer
Esempio n. 16
0
def train(data_dir, model_dir, args):
    seed_everything(args.seed)

    save_dir = increment_path(os.path.join(model_dir, args.name))

    # -- settings
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # -- dataset
    dataset_module = getattr(import_module("dataset"),
                             args.dataset)  # MaskBaseDataset
    dataset = dataset_module(data_dir=data_dir, val_ratio=args.val_ratio)
    num_classes = dataset.num_classes  # 18

    # -- augmentation
    transform_module = getattr(import_module("dataset"),
                               args.augmentation)  # default: BaseAugmentation
    transform = transform_module(
        resize=args.resize,
        mean=dataset.mean,
        std=dataset.std,
    )
    dataset.set_transform(transform)

    # -- data_loader
    train_set, val_set = dataset.split_dataset()

    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              num_workers=8,
                              shuffle=True,
                              pin_memory=use_cuda,
                              drop_last=True)

    val_loader = DataLoader(val_set,
                            batch_size=args.batch_size,
                            num_workers=8,
                            shuffle=False,
                            pin_memory=use_cuda,
                            drop_last=True)

    # -- model
    models = []
    model_module_gender = getattr(import_module("model"),
                                  args.model_gender)  # default: BaseModel
    model_gender = model_module_gender(num_classes=args.num_classes_gender,
                                       grad_point=args.grad_point).to(device)
    model_gender = torch.nn.DataParallel(model_gender)

    # -- loss & metric

    criterion_gender = create_criterion(
        args.criterion_gender, classes=args.num_classes_gender)  # default: f1
    if args.optimizer == "AdamP":
        optimizer_gender = AdamP(filter(lambda p: p.requires_grad,
                                        model_gender.parameters()),
                                 lr=args.lr,
                                 weight_decay=5e-4)
    else:
        opt_module = getattr(import_module('torch.optim'),
                             args.optimizer)  # default: Adam
        optimizer_gender = opt_module(filter(lambda p: p.requires_grad,
                                             model_gender.parameters()),
                                      lr=args.lr,
                                      weight_decay=5e-4)
    scheduler_gender = StepLR(optimizer_gender, args.lr_decay_step, gamma=0.5)

    # -- logging
    logger_gender = SummaryWriter(log_dir=os.path.join(save_dir, 'gender'))
    with open(Path(save_dir) / 'gender' / 'config.json', 'w',
              encoding='utf-8') as f:
        json.dump(vars(args), f, ensure_ascii=False, indent=4)

    best_val_acc_gender = 0
    best_val_loss_gender = np.inf
    for epoch in range(args.epochs):
        # train loop
        model_gender.train()
        loss_value_gender = 0
        matches_gender = 0
        for idx, train_batch in enumerate(train_loader):
            inputs, labels_mask, labels_gender, labels_age = train_batch
            inputs = inputs.to(device)
            labels_gender = labels_gender.to(device)

            optimizer_gender.zero_grad()

            outs_gender = model_gender(inputs)
            preds_gender = torch.argmax(outs_gender, dim=-1)
            loss_gender = criterion_gender(outs_gender, labels_gender)

            loss_gender.backward()
            optimizer_gender.step()

            loss_value_gender += loss_gender.item()
            matches_gender += (preds_gender == labels_gender).sum().item()
            if (idx + 1) % args.log_interval == 0:
                train_loss_gender = loss_value_gender / args.log_interval
                train_acc_gender = matches_gender / args.batch_size / args.log_interval
                current_lr_gender = get_lr(optimizer_gender)
                print(
                    f"Epoch[{epoch}/{args.epochs}]({idx + 1}/{len(train_loader)}) || "
                    f"training loss {train_loss_gender:4.4} || training accuracy {train_acc_gender:4.2%} || lr {current_lr_gender}"
                )
                logger_gender.add_scalar("Train/loss", train_loss_gender,
                                         epoch * len(train_loader) + idx)
                logger_gender.add_scalar("Train/accuracy", train_acc_gender,
                                         epoch * len(train_loader) + idx)

                loss_value_gender = 0
                matches_gender = 0

        scheduler_gender.step()

        #val loop
        with torch.no_grad():
            print("Calculating validation results...")
            model_gender.eval()
            val_loss_items_gender = []
            val_acc_items_gender = []
            figure = None
            for val_batch in val_loader:
                inputs, labels_mask, labels_gender, labels_age = val_batch
                inputs = inputs.to(device)
                labels_gender = labels_gender.to(device)

                outs_gender = model_gender(inputs)
                preds_gender = torch.argmax(outs_gender, dim=-1)

                loss_item_gender = criterion_gender(outs_gender,
                                                    labels_gender).item()
                acc_item_gender = (labels_gender == preds_gender).sum().item()
                val_loss_items_gender.append(loss_item_gender)
                val_acc_items_gender.append(acc_item_gender)

                if figure is None:
                    # inputs_np = torch.clone(inputs).detach().cpu().permute(0, 2, 3, 1).numpy()
                    inputs_np = torch.clone(inputs).detach().cpu()
                    inputs_np = inputs_np.permute(0, 2, 3, 1).numpy()
                    inputs_np = dataset_module.denormalize_image(
                        inputs_np, dataset.mean, dataset.std)
                    figure = grid_image(
                        inputs_np, labels_mask, preds_gender,
                        args.dataset != "MaskSplitByProfileDataset")
                    plt.show()

            val_loss_gender = np.sum(val_loss_items_gender) / len(val_loader)
            val_acc_gender = np.sum(val_acc_items_gender) / len(val_set)
            if val_loss_gender < best_val_loss_gender or val_acc_gender > best_val_acc_gender:
                save_model(model_gender, epoch,
                           val_loss_gender, val_acc_gender,
                           os.path.join(save_dir, "gender"), args.model_gender)
                if val_loss_gender < best_val_loss_gender and val_acc_gender > best_val_acc_gender:
                    print(
                        f"New best model_gender for val acc and val loss : {val_acc_gender:4.2%} {val_loss_gender:4.2}! saving the best model_gender.."
                    )
                    best_val_loss_gender = val_loss_gender
                    best_val_acc_gender = val_acc_gender
                elif val_loss_gender < best_val_loss_gender:
                    print(
                        f"New best model_gender for val loss : {val_loss_gender:4.2}! saving the best model_gender.."
                    )
                    best_val_loss_gender = val_loss_gender
                elif val_acc_gender > best_val_acc_gender:
                    print(
                        f"New best model_gender for val accuracy : {val_acc_gender:4.2%}! saving the best model_gender.."
                    )
                    best_val_acc_gender = val_acc_gender

            print(
                f"[Val] acc: {val_acc_gender:4.2%}, loss: {val_loss_gender:4.2} || "
                f"best acc: {best_val_acc_gender:4.2%}, best loss: {best_val_loss_gender:4.2}"
            )
            logger_gender.add_scalar("Val/loss", val_loss_gender, epoch)
            logger_gender.add_scalar("Val/accuracy", val_acc_gender, epoch)
            logger_gender.add_figure("results", figure, epoch)
            print()
Esempio n. 17
0
def train(data_dir, model_dir, args):
    seed_everything(args.seed)
    # args.__dict__ == vars(args)

    save_dir = increment_path(os.path.join(model_dir, args.name))

    # -- settings
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # -- dataset
    dataset_module = getattr(import_module("dataset"),
                             args.dataset)  # MaskBaseDataset
    dataset = dataset_module(data_dir=data_dir, val_ratio=args.val_ratio)
    num_classes = dataset.num_classes  # 18

    # -- augmentation
    transform_module = getattr(import_module("dataset"),
                               args.augmentation)  # default: BaseAugmentation
    transform = transform_module(
        resize=args.resize,
        mean=dataset.mean,
        std=dataset.std,
    )
    dataset.set_transform(transform)

    # -- data_loader
    train_set, val_set = dataset.split_dataset()

    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              num_workers=8,
                              shuffle=True,
                              pin_memory=use_cuda,
                              drop_last=True)

    val_loader = DataLoader(val_set,
                            batch_size=args.batch_size,
                            num_workers=8,
                            shuffle=False,
                            pin_memory=use_cuda,
                            drop_last=True)

    # -- model
    model_module = getattr(import_module("model"),
                           args.model)  # default: BaseModel
    model = model_module(num_classes=num_classes,
                         grad_point=args.grad_point).to(device)
    model = torch.nn.DataParallel(model)
    # if want model train begin from args.continue_epoch checkpoint.
    if args.continue_train:
        try_dir = find_dir_try(args.continue_try_num, model_dir,
                               args.continue_name)
        epoch_dir = find_dir_epoch(args.continue_epoch, try_dir)
        model.load_state_dict(torch.load(epoch_dir))

    # -- loss & metric
    if args.criterion == "cross_entropy":
        criterion = create_criterion(args.criterion)  # default: cross_entropy
    else:
        criterion = create_criterion(
            args.criterion, classes=num_classes)  # default: cross_entropy
    if args.optimizer == "AdamP":
        optimizer = AdamP(filter(lambda p: p.requires_grad,
                                 model.parameters()),
                          lr=args.lr,
                          weight_decay=5e-4)
    else:
        opt_module = getattr(import_module('torch.optim'),
                             args.optimizer)  # default: Adam
        optimizer = opt_module(filter(lambda p: p.requires_grad,
                                      model.parameters()),
                               lr=args.lr,
                               weight_decay=5e-4)
    scheduler = StepLR(optimizer, args.lr_decay_step, gamma=0.5)

    # -- logging
    if not os.path.exists(save_dir):
        os.mkdir(save_dir)
    with open(Path(save_dir) / 'config.json', 'w', encoding='utf-8') as f:
        json.dump(vars(args), f, ensure_ascii=False, indent=4)

    best_val_acc = 0
    best_val_loss = np.inf
    for epoch in range(args.epochs):
        # train loop
        model.train()
        loss_value = 0
        matches = 0
        for idx, train_batch in enumerate(train_loader):
            inputs, labels = train_batch
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outs = model(inputs)
            preds = torch.argmax(outs, dim=-1)
            loss = criterion(outs, labels)

            loss.backward()
            optimizer.step()

            loss_value += loss.item()
            matches += (preds == labels).sum().item()
            if (idx + 1) % args.log_interval == 0:
                train_loss = loss_value / args.log_interval
                train_acc = matches / args.batch_size / args.log_interval
                current_lr = get_lr(optimizer)
                print(
                    f"Epoch[{epoch}/{args.epochs}]({idx + 1}/{len(train_loader)}) || "
                    f"training loss {train_loss:4.4} || training accuracy {train_acc:4.2%} || lr {current_lr}"
                )

                loss_value = 0
                matches = 0

        scheduler.step()

        #val loop
        with torch.no_grad():
            print("Calculating validation results...")
            model.eval()
            val_loss_items = []
            val_acc_items = []
            figure = None
            for val_batch in val_loader:
                inputs, labels = val_batch
                inputs = inputs.to(device)
                labels = labels.to(device)

                outs = model(inputs)
                preds = torch.argmax(outs, dim=-1)

                loss_item = criterion(outs, labels).item()
                acc_item = (labels == preds).sum().item()
                val_loss_items.append(loss_item)
                val_acc_items.append(acc_item)

                if figure is None:
                    # inputs_np = torch.clone(inputs).detach().cpu().permute(0, 2, 3, 1).numpy()
                    inputs_np = torch.clone(inputs).detach().cpu()
                    inputs_np = inputs_np.permute(0, 2, 3, 1).numpy()
                    inputs_np = dataset_module.denormalize_image(
                        inputs_np, dataset.mean, dataset.std)
                    figure = grid_image(
                        inputs_np, labels, preds,
                        args.dataset != "MaskSplitByProfileDataset")
                    plt.show()

            val_loss = np.sum(val_loss_items) / len(val_loader)
            val_acc = np.sum(val_acc_items) / len(val_set)
            if val_loss < best_val_loss or val_acc > best_val_acc:
                save_model(model, epoch, val_loss, val_acc, save_dir,
                           args.model)
                if val_loss < best_val_loss and val_acc > best_val_acc:
                    print(
                        f"New best model for val acc and val loss : {val_acc:4.2%} {val_loss:4.2}! saving the best model.."
                    )
                    best_val_loss = val_loss
                    best_val_acc = val_acc
                elif val_loss < best_val_loss:
                    print(
                        f"New best model for val loss : {val_loss:4.2}! saving the best model.."
                    )
                    save_model(model, epoch, val_loss, val_acc, save_dir,
                               args.model)
                    best_val_loss = val_loss
                elif val_acc > best_val_acc:
                    print(
                        f"New best model for val accuracy : {val_acc:4.2%}! saving the best model.."
                    )
                    save_model(model, epoch, val_loss, val_acc, save_dir,
                               args.model)
                    best_val_acc = val_acc

            print(
                f"[Val] acc: {val_acc:4.2%}, loss: {val_loss:4.2} || "
                f"best acc: {best_val_acc:4.2%}, best loss: {best_val_loss:4.2}"
            )
            print()
Esempio n. 18
0
def train(img_dir, model_dir, args):
    seed_everything(args.seed)

    start = time.time()
    get_current_time()

    save_dir = increment_path(os.path.join(model_dir, args.name))

    # settings
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # dataset
    dataset_module = getattr(import_module("dataset"), args.dataset)
    dataset = dataset_module(
        img_dir=img_dir,
        val_ratio=args.val_ratio,
    )
    num_classes = dataset.num_classes

    transform_module = getattr(import_module("dataset"), args.augmentation)
    transform = transform_module(mean=dataset.mean, std=dataset.std)

    train_dataset, val_dataset = dataset.split_dataset()
    train_dataset.dataset.set_transform(transform["train"])
    val_dataset.dataset.set_transform(transform["val"])

    train_loader = DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        num_workers=2,
        shuffle=True,
        pin_memory=torch.cuda.is_available(),
        drop_last=True,
    )

    val_loader = DataLoader(
        val_dataset,
        batch_size=args.valid_batch_size,
        num_workers=2,
        shuffle=False,
        pin_memory=torch.cuda.is_available(),
        drop_last=True,
    )

    model_module = getattr(import_module("model"), args.model)
    model = model_module(num_classes=num_classes).to(device)

    model = torch.nn.DataParallel(model)

    criterion = create_criterion(args.criterion)

    optimizer = None
    if args.optimizer == "AdamP":
        optimizer = AdamP(model.parameters(), lr=args.lr)
    else:
        opt_module = getattr(import_module("torch.optim"), args.optimizer)
        optimizer = opt_module(
            model.parameters(),
            # filter(lambda p: p.requires_grad, model.parameters()),
            lr=args.lr,
            # weight_decay=5e-4,
        )

    # scheduler = StepLR(optimizer, args.lr_decay_step, gamma=0.5)

    logger = SummaryWriter(log_dir=save_dir)

    best_val_acc = 0
    best_val_loss = np.inf
    best_val_f1 = 0

    for epoch in range(args.epochs):
        model.train()
        train_loss = 0
        train_acc = 0
        train_f1 = 0
        for i, data in enumerate(tqdm(train_loader)):
            imgs, labels = data
            imgs = imgs.float().to(device)
            labels = labels.long().to(device)

            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            preds = torch.argmax(outputs, 1)
            acc = (preds == labels).sum().item() / len(imgs)
            t_f1_score = f1_score(
                labels.cpu().detach().numpy(),
                preds.cpu().detach().numpy(),
                average="macro",
            )

            train_loss += loss
            train_acc += acc
            train_f1 += t_f1_score

            if (i + 1) % args.log_interval == 0:
                train_loss /= args.log_interval
                train_acc /= args.log_interval
                train_f1 /= args.log_interval
                current_lr = get_lr(optimizer)
                print(
                    f"Epoch[{epoch + 1}/{args.epochs}]({i + 1}/{len(train_loader)}) || trainin_loss {train_loss:.4f} || training acc {train_acc:.4f} || train f1_score {train_f1:.4f} || lr {current_lr}"
                )

                logger.add_scalar("Train/loss", train_loss,
                                  epoch * len(train_loader) + i)
                logger.add_scalar("Train/accuracy", train_acc,
                                  epoch * len(train_loader) + i)
                logger.add_scalar("Train/F1-score", train_f1,
                                  epoch * len(train_loader) + i)

                train_loss = 0
                train_acc = 0
                train_f1 = 0

        # scheduler.step()

        # training은 1 epoch이 끝나야 완료된 것
        # 학습이 끝난 각 epoch에서 최고의 score를 가진 것을 저장하는 것
        with torch.no_grad():
            print("Validation step---------------------")
            model.eval()
            val_loss_items = []
            val_acc_items = []
            val_f1_items = []

            for data in tqdm(val_loader):
                imgs, labels = data
                imgs = imgs.float().to(device)
                labels = labels.long().to(device)

                outputs = model(imgs)
                preds = torch.argmax(outputs, 1)

                loss = criterion(outputs, labels).item()
                acc = (labels == preds).sum().item()
                val_f1 = f1_score(
                    labels.cpu().detach().numpy(),
                    preds.cpu().detach().numpy(),
                    average="macro",
                )

                val_loss_items.append(loss)
                val_acc_items.append(acc)
                val_f1_items.append(val_f1)

            val_loss = np.sum(val_loss_items) / len(val_loader)
            val_acc = np.sum(val_acc_items) / len(val_dataset)
            val_f1 = np.sum(val_f1_items) / len(val_loader)

            print(
                f"val_loader: {len(val_loader)} | val_dataset: {len(val_dataset)}"
            )

            best_val_loss = min(best_val_loss, val_loss)
            best_val_f1 = max(val_f1, best_val_f1)
            best_val_acc = max(val_acc, best_val_acc)

            # if val_acc > best_val_acc:
            # print(
            #     f"New best model for val acc: {val_acc:4.2%}! saving the best model..."
            # )
            #     torch.save(model.module.state_dict(), f"{save_dir}/best.pth")
            #     best_val_acc = val_acc

            if val_f1 > best_val_f1:
                print(
                    f"New best model for val f1: {val_f1:.4f}! saving the best model..."
                )
                torch.save(model.module.state_dict(), f"{save_dir}/best.pth")
                best_val_f1 = val_f1

            # TODO: last model 저장이 여기 위치가 맞나 ??
            # torch.save(model.module.state_dict(), f"{save_dir}/last.pth")
            print(
                f"[Val] acc: {val_acc:.4f}, loss: {val_loss:.4f} || best acc: {best_val_acc:.4f}, best loss: {best_val_loss:.4f}"
            )

            logger.add_scalar("Val/loss", val_loss, epoch)
            logger.add_scalar("Val/accuracy", val_acc, epoch)
            logger.add_scalar("Val/f1-score", val_f1, epoch)
            print()

    torch.save(model.module.state_dict(), f"{save_dir}/last.pth")

    # How much time training taken
    times = time.time() - start
    minute, sec = divmod(times, 60)
    print(f"Finish Training! Taken time is {minute} minutes {sec} seconds")
Esempio n. 19
0
    return model_ft, input_size


model, input_size = initialize_model(model_name,
                                     num_classes,
                                     feature_extract,
                                     use_pretrained=True)

#model = MobileNetV3(num_classes).to(device)
#model=SuperLightMobileNet(num_classes)
#model=EfficientNet(1,1,num_classes)
#model = Xception(num_classes).to(device)
#self.criterion = nn.CrossEntropyLoss().to(self.device)
CEloss = nn.CrossEntropyLoss()
optimizer = AdamP(model.parameters(),
                  lr=0.01,
                  betas=(0.9, 0.999),
                  weight_decay=1e-2)
#optimizer = torch.optim.Adam(, lr=lr,weight_decay=)
#scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,patience=10,min_lr=0.0001)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=[30, 60, 100],
                                                 gamma=0.1)
model = nn.DataParallel(model)
model.to(device)

import numpy as np
total_epoch = 50
total_iteration_per_epoch = int(np.ceil(len(train_dataset) / batch_size))

for epoch in range(1, total_epoch + 1):
    model.train()
Esempio n. 20
0
def pseudo_labeling(num_epochs, model, data_loader, val_loader,
                    unlabeled_loader, device, val_every, file_name):
    # Instead of using current epoch we use a "step" variable to calculate alpha_weight
    # This helps the model converge faster
    from torch.optim.swa_utils import AveragedModel, SWALR
    from segmentation_models_pytorch.losses import SoftCrossEntropyLoss, JaccardLoss
    from adamp import AdamP

    criterion = [
        SoftCrossEntropyLoss(smooth_factor=0.1),
        JaccardLoss('multiclass', classes=12)
    ]
    optimizer = AdamP(params=model.parameters(), lr=0.0001, weight_decay=1e-6)
    swa_scheduler = SWALR(optimizer, swa_lr=0.0001)
    swa_model = AveragedModel(model)
    optimizer = Lookahead(optimizer, la_alpha=0.5)

    step = 100
    size = 256
    best_mIoU = 0
    model.train()
    print('Start Pseudo-Labeling..')
    for epoch in range(num_epochs):
        hist = np.zeros((12, 12))
        for batch_idx, (imgs, image_infos) in enumerate(unlabeled_loader):

            # Forward Pass to get the pseudo labels
            # --------------------------------------------- test(unlabelse)를 모델에 통과
            model.eval()
            outs = model(torch.stack(imgs).to(device))
            oms = torch.argmax(outs.squeeze(), dim=1).detach().cpu().numpy()
            oms = torch.Tensor(oms)
            oms = oms.long()
            oms = oms.to(device)

            # --------------------------------------------- 학습

            model.train()
            # Now calculate the unlabeled loss using the pseudo label
            imgs = torch.stack(imgs)
            imgs = imgs.to(device)
            # preds_array = preds_array.to(device)

            output = model(imgs)
            loss = 0
            for each in criterion:
                loss += each(output, oms)

            unlabeled_loss = alpha_weight(step) * loss

            # Backpropogate
            optimizer.zero_grad()
            unlabeled_loss.backward()
            optimizer.step()
            output = torch.argmax(output.squeeze(),
                                  dim=1).detach().cpu().numpy()
            hist = add_hist(hist,
                            oms.detach().cpu().numpy(),
                            output,
                            n_class=12)

            if (batch_idx + 1) % 25 == 0:
                acc, acc_cls, mIoU, fwavacc = label_accuracy_score(hist)
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, mIoU:{:.4f}'.
                      format(epoch + 1, num_epochs, batch_idx + 1,
                             len(unlabeled_loader), unlabeled_loss.item(),
                             mIoU))
            # For every 50 batches train one epoch on labeled data
            # 50배치마다 라벨데이터를 1 epoch학습
            if batch_idx % 50 == 0:

                # Normal training procedure
                for batch_idx, (images, masks, _) in enumerate(data_loader):
                    labeled_loss = 0
                    images = torch.stack(images)
                    # (batch, channel, height, width)
                    masks = torch.stack(masks).long()

                    # gpu 연산을 위해 device 할당
                    images, masks = images.to(device), masks.to(device)

                    output = model(images)

                    for each in criterion:
                        labeled_loss += each(output, masks)

                    optimizer.zero_grad()
                    labeled_loss.backward()
                    optimizer.step()

                # Now we increment step by 1
                step += 1

        if (epoch + 1) % val_every == 0:
            avrg_loss, val_mIoU = validation(epoch + 1, model, val_loader,
                                             criterion, device)
            if val_mIoU > best_mIoU:
                print('Best performance at epoch: {}'.format(epoch + 1))
                print('Save model in', saved_dir)
                best_mIoU = val_mIoU
                save_model(model, file_name=file_name)

        model.train()

        if epoch > 3:
            swa_model.update_parameters(model)
            swa_scheduler.step()
Esempio n. 21
0
margin.to(device)
nomargin = ArcMarginForTest(in_feature=512,
                            out_feature=num_classes,
                            easy_margin=True)
nomargin.to(device)
# Tensorboard : network graph 생성

# writer.add_graph(margin, (model(images.to(device)),labels.to(device)))
# writer.close()
classes = tuple([x for x in range(0, num_classes)])
#criterion = FocalLoss(gamma=2, alpha=0.25).to(device)
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = AdamP([{
    'params': model.parameters(),
    'weight_decay': 5e-6
}, {
    'params': margin.parameters(),
    'weight_decay': 5e-6
}],
                  lr=learning_rate)
#
# optimizer = torch.optim.Adam([
#     {'params': model.parameters(), 'weight_decay': 5e-6},
#     {'params': margin.parameters(), 'weight_decay': 5e-6}
# # ], lr=learning_rate)
# scheduler = lr_scheduler.ExponentialLR(optimizer, gamma= 0.99)
m_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer,
                                                   milestones=[15, 30],
                                                   gamma=0.33)
# co_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer,
#  T_max=10,
#  eta_min=0)
Esempio n. 22
0
    args.add_argument("--iteration", type=str, default='0')
    args.add_argument("--pause", type=int, default=0)

    config = args.parse_args()

    model = ResNet50(block=models.resnet.BasicBlock,
                      layers=[2, 2, 2, 2],
                      num_classes=config.num_classes)
    load_weight(model)
    criterion = nn.BCEWithLogitsLoss()

    model = model.cuda()
    criterion = criterion.cuda()

    optimizer = AdamP(
        [param for param in model.parameters() if param.requires_grad],
        lr=config.base_lr, weight_decay=1e-4)
    scheduler = StepLR(optimizer, step_size=20, gamma=0.1)

    if IS_ON_NSML:
        # This NSML block is mandatory. Do not change.
        bind_nsml(model)
        nsml.save('checkpoint')
        if config.pause:
            nsml.paused(scope=locals())

    if config.mode == 'train':
        # Local debugging block. This module is not mandatory.
        # But this would be quite useful for troubleshooting.
        train_loader = data_loader(root=DATASET_PATH, split='train')
        val_loader = data_loader(root=DATASET_PATH, split='val')
Esempio n. 23
0
def train(cfg):
    SEED = cfg.values.seed
    MODEL_NAME = cfg.values.model_name
    USE_KFOLD = cfg.values.val_args.use_kfold
    TRAIN_ONLY = cfg.values.train_only

    seed_everything(SEED)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # model_config_module = getattr(import_module('transformers'), cfg.values.model_arc + 'Config')
    model_config = AutoConfig.from_pretrained(MODEL_NAME)
    model_config.num_labels = 42

    whole_df = load_data("/opt/ml/input/data/train/train.tsv")
    additional_df = load_data("/opt/ml/input/data/train/additional_train.tsv")

    whole_label = whole_df['label'].values
    # additional_label = additional_df['label'].values

    if cfg.values.tokenizer_arc:
        tokenizer_module = getattr(import_module('transformers'),
                                   cfg.values.tokenizer_arc)
        tokenizer = tokenizer_module.from_pretrained(MODEL_NAME)
    else:
        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

    early_stopping = EarlyStoppingCallback(early_stopping_patience=9999999,
                                           early_stopping_threshold=0.001)

    training_args = TrainingArguments(
        output_dir=cfg.values.train_args.output_dir,  # output directory
        save_total_limit=cfg.values.train_args.
        save_total_limit,  # number of total save model.
        save_steps=cfg.values.train_args.save_steps,  # model saving step.
        num_train_epochs=cfg.values.train_args.
        num_epochs,  # total number of training epochs
        learning_rate=cfg.values.train_args.lr,  # learning_rate
        per_device_train_batch_size=cfg.values.train_args.
        train_batch_size,  # batch size per device during training
        per_device_eval_batch_size=cfg.values.train_args.
        eval_batch_size,  # batch size for evaluation         
        warmup_steps=cfg.values.train_args.
        warmup_steps,  # number of warmup steps for learning rate scheduler
        weight_decay=cfg.values.train_args.
        weight_decay,  # strength of weight decay            
        max_grad_norm=cfg.values.train_args.max_grad_norm,
        logging_dir=cfg.values.train_args.
        logging_dir,  # directory for storing logs
        logging_steps=cfg.values.train_args.logging_steps,  # log saving step.
        evaluation_strategy=cfg.values.train_args.
        evaluation_strategy,  # evaluation strategy to adopt during training
        # `no`: No evaluation during training.
        # `steps`: Evaluate every `eval_steps`.
        # `epoch`: Evaluate every end of epoch.
        eval_steps=cfg.values.train_args.eval_steps,  # evaluation step.
        dataloader_num_workers=4,
        seed=SEED,
        label_smoothing_factor=cfg.values.train_args.label_smoothing_factor,
        load_best_model_at_end=True,
        # metric_for_best_model='accuracy'
    )

    if USE_KFOLD:
        kfold = StratifiedKFold(n_splits=cfg.values.val_args.num_k)

        k = 1
        for train_idx, val_idx in kfold.split(whole_df, whole_label):
            print('\n')
            cpprint('=' * 15 + f'{k}-Fold Cross Validation' + '=' * 15)
            train_df = whole_df.iloc[train_idx]
            # train_df = pd.concat((train_df, additional_df))
            val_df = whole_df.iloc[val_idx]

            if cfg.values.model_arc == 'Roberta':
                tokenized_train = roberta_tokenized_dataset(
                    train_df, tokenizer)
                tokenized_val = roberta_tokenized_dataset(val_df, tokenizer)
            else:
                tokenized_train = tokenized_dataset(train_df, tokenizer)
                tokenized_val = tokenized_dataset(val_df, tokenizer)

            RE_train_dataset = RE_Dataset(tokenized_train,
                                          train_df['label'].values)
            RE_val_dataset = RE_Dataset(tokenized_val, val_df['label'].values)

            try:
                if cfg.values.model_name == 'Bert':
                    model = BertForSequenceClassification.from_pretrained(
                        MODEL_NAME, config=model_config)
                else:
                    model = AutoModelForSequenceClassification.from_pretrained(
                        MODEL_NAME, config=model_config)
            except:
                # model_module = getattr(import_module('transformers'), cfg.values.model_arc)
                model_module = getattr(
                    import_module('transformers'),
                    cfg.values.model_arc + 'ForSequenceClassification')
                model = model_module.from_pretrained(MODEL_NAME,
                                                     config=model_config)

            model.parameters
            model.to(device)

            training_args.output_dir = cfg.values.train_args.output_dir + f'/{k}fold'
            training_args.logging_dir = cfg.values.train_args.output_dir + f'/{k}fold'

            optimizer = MADGRAD(model.parameters(),
                                lr=training_args.learning_rate)
            total_step = len(
                RE_train_dataset
            ) / training_args.per_device_train_batch_size * training_args.num_train_epochs
            scheduler = transformers.get_linear_schedule_with_warmup(
                optimizer,
                num_warmup_steps=training_args.warmup_steps,
                num_training_steps=total_step)
            optimizers = optimizer, scheduler

            trainer = Trainer(
                model=
                model,  # the instantiated 🤗 Transformers model to be trained
                args=training_args,  # training arguments, defined above
                train_dataset=RE_train_dataset,  # training dataset
                eval_dataset=RE_val_dataset,  # evaluation dataset
                compute_metrics=compute_metrics,  # define metrics function
                optimizers=optimizers,
                # callbacks=[early_stopping]
            )
            k += 1
            # train model
            trainer.train()

    else:
        cpprint('=' * 20 + f'START TRAINING' + '=' * 20)
        if not TRAIN_ONLY:
            train_df, val_df = train_test_split(
                whole_df,
                test_size=cfg.values.val_args.test_size,
                random_state=SEED)
            # train_df = pd.concat((train_df, additional_df))

            if cfg.values.model_arc == 'Roberta':
                tokenized_train = roberta_tokenized_dataset(
                    train_df, tokenizer)
                tokenized_val = roberta_tokenized_dataset(val_df, tokenizer)
            else:
                tokenized_train = tokenized_dataset(train_df, tokenizer)
                tokenized_val = tokenized_dataset(val_df, tokenizer)

            RE_train_dataset = RE_Dataset(tokenized_train,
                                          train_df['label'].values)
            RE_val_dataset = RE_Dataset(tokenized_val, val_df['label'].values)

            try:
                if cfg.values.model_name == 'Bert':
                    model = BertForSequenceClassification.from_pretrained(
                        MODEL_NAME, config=model_config)
                else:
                    model = AutoModelForSequenceClassification.from_pretrained(
                        MODEL_NAME, config=model_config)
            except:
                # model_module = getattr(import_module('transformers'), cfg.values.model_arc)
                model_module = getattr(
                    import_module('transformers'),
                    cfg.values.model_arc + 'ForSequenceClassification')
                model = model_module.from_pretrained(MODEL_NAME,
                                                     config=model_config)

            model.parameters
            model.to(device)

            optimizer = transformers.AdamW(model.parameters(),
                                           lr=training_args.learning_rate)
            total_step = len(
                RE_train_dataset
            ) / training_args.per_device_train_batch_size * training_args.num_train_epochs
            # scheduler = transformers.get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=training_args.warmup_steps, num_training_steps=total_step)
            scheduler = transformers.get_linear_schedule_with_warmup(
                optimizer,
                num_warmup_steps=training_args.warmup_steps,
                num_training_steps=total_step)
            optimizers = optimizer, scheduler

            trainer = Trainer(
                model=
                model,  # the instantiated 🤗 Transformers model to be trained
                args=training_args,  # training arguments, defined above
                train_dataset=RE_train_dataset,  # training dataset
                eval_dataset=RE_val_dataset,  # evaluation dataset
                compute_metrics=compute_metrics,  # define metrics function
                optimizers=optimizers,
                callbacks=[early_stopping])

            # train model
            trainer.train()

        else:
            training_args.evaluation_strategy = 'no'

            if cfg.values.model_arc == 'Roberta':
                print('Roberta')
                tokenized_train = roberta_tokenized_dataset(
                    whole_df, tokenizer)
            else:
                tokenized_train = tokenized_dataset(whole_df, tokenizer)

            RE_train_dataset = RE_Dataset(tokenized_train,
                                          whole_df['label'].values)

            try:
                model = AutoModelForSequenceClassification.from_pretrained(
                    MODEL_NAME, config=model_config)
            except:
                # model_module = getattr(import_module('transformers'), cfg.values.model_arc)
                model_module = getattr(
                    import_module('transformers'),
                    cfg.values.model_arc + 'ForSequenceClassification')
                model = model_module.from_pretrained(MODEL_NAME,
                                                     config=model_config)

            model.parameters
            model.to(device)

            training_args.output_dir = cfg.values.train_args.output_dir + '/only_train'
            training_args.logging_dir = cfg.values.train_args.output_dir + '/only_train'

            optimizer = AdamP(model.parameters(),
                              lr=training_args.learning_rate)
            total_step = len(
                RE_train_dataset
            ) / training_args.per_device_train_batch_size * training_args.num_train_epochs
            scheduler = transformers.get_linear_schedule_with_warmup(
                optimizer,
                num_warmup_steps=training_args.warmup_steps,
                num_training_steps=total_step)
            optimizers = optimizer, scheduler

            trainer = Trainer(
                model=
                model,  # the instantiated 🤗 Transformers model to be trained
                args=training_args,  # training arguments, defined above
                train_dataset=RE_train_dataset,  # training dataset
                optimizers=optimizers,
                # callbacks=[early_stopping]
            )

            # train model
            trainer.train()
Esempio n. 24
0
elif args.optimizer == 'SDG' and args.sam:
    base_optimizer = optim.SGD
    optimizer = SAM(net.parameters(),
                    base_optimizer,
                    lr=initial_lr,
                    momentum=momentum,
                    weight_decay=weight_decay)
elif args.optimizer == 'SDGP':
    optimizer = SGDP(net.parameters(),
                     lr=0.1,
                     weight_decay=1e-5,
                     momentum=0.9,
                     nesterov=True)
elif args.optimizer == 'ADAMP':
    optimizer = AdamP(net.parameters(),
                      lr=0.01,
                      betas=(0.9, 0.999),
                      weight_decay=1e-2)
else:
    optimizer = optim.SGD(net.parameters(),
                          lr=initial_lr,
                          momentum=momentum,
                          weight_decay=weight_decay)

steps = 15
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, steps)

criterion = MultiBoxLoss(num_classes, 0.35, True, 0, True, 7, 0.35, False)

priorbox = PriorBox(cfg, image_size=(img_dim, img_dim))
with torch.no_grad():
    priors = priorbox.forward()
Esempio n. 25
0
def get_adamp(lr=0.001, model=None, weight_decay=1e-6):
    params = [p for p in model.parameters() if p.requires_grad]
    return AdamP(params, lr=lr)
Esempio n. 26
0
def adamp(params, lr):
    return AdamP(params, lr=lr, betas=(0.9, 0.999), weight_decay=1e-2)
Esempio n. 27
0
def train_no_val(img_dir, model_dir, args):
    seed_everything(args.seed)

    start = time.time()
    get_current_time()

    save_dir = increment_path(os.path.join(model_dir, args.name))

    # settings
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # dataset
    dataset_module = getattr(import_module("dataset"), args.dataset)
    dataset = dataset_module(
        img_dir=img_dir,
        val_ratio=args.val_ratio,
    )
    num_classes = dataset.num_classes

    transform_module = getattr(import_module("dataset"), args.augmentation)
    transform = transform_module(mean=dataset.mean, std=dataset.std)

    dataset.set_transform(transform["train"])

    train_loader = DataLoader(
        dataset,
        batch_size=args.batch_size,
        num_workers=2,
        shuffle=True,
        pin_memory=torch.cuda.is_available(),
        drop_last=True,
    )

    model_module = getattr(import_module("model"), args.model)
    model = model_module(num_classes=num_classes).to(device)

    model = torch.nn.DataParallel(model)

    criterion = create_criterion(args.criterion)

    optimizer = None
    if args.optimizer == "AdamP":
        optimizer = AdamP(model.parameters())
    else:
        opt_module = getattr(import_module("torch.optim"), args.optimizer)
        optimizer = opt_module(
            model.parameters(),
            # filter(lambda p: p.requires_grad, model.parameters()),
            lr=args.lr,
            # weight_decay=5e-4,
        )

    # scheduler = StepLR(optimizer, args.lr_decay_step, gamma=0.5)

    logger = SummaryWriter(log_dir=save_dir)

    best_val_acc = 0
    best_val_loss = np.inf
    best_val_f1 = 0

    for epoch in range(args.epochs):
        model.train()
        train_loss = 0
        train_acc = 0
        train_f1 = 0
        for i, data in enumerate(tqdm(train_loader)):
            imgs, labels = data
            imgs = imgs.float().to(device)
            labels = labels.long().to(device)

            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            preds = torch.argmax(outputs, 1)
            acc = (preds == labels).sum().item() / len(imgs)
            t_f1_score = f1_score(
                labels.cpu().detach().numpy(),
                preds.cpu().detach().numpy(),
                average="macro",
            )

            train_loss += loss
            train_acc += acc
            train_f1 += t_f1_score

            if (i + 1) % args.log_interval == 0:
                train_loss /= args.log_interval
                train_acc /= args.log_interval
                train_f1 /= args.log_interval
                current_lr = get_lr(optimizer)
                print(
                    f"Epoch[{epoch + 1}/{args.epochs}]({i + 1}/{len(train_loader)}) || trainin_loss {train_loss:.4f} || training acc {train_acc:.4f} || train f1_score {train_f1:.4f} || lr {current_lr}"
                )

                logger.add_scalar("Train/loss", train_loss,
                                  epoch * len(train_loader) + i)
                logger.add_scalar("Train/accuracy", train_acc,
                                  epoch * len(train_loader) + i)
                logger.add_scalar("Train/F1-score", train_f1,
                                  epoch * len(train_loader) + i)

                train_loss = 0
                train_acc = 0
                train_f1 = 0

    torch.save(model.module.state_dict(), f"{save_dir}/last.pth")

    # How much time training taken
    times = time.time() - start
    minute, sec = divmod(times, 60)
    print(f"Finish Training! Taken time is {minute} minutes {sec} seconds")
Esempio n. 28
0
                                    unsup_val=unsup_val,
                                    BATCH=args.batch)

    ##########model load #########
    print(args.pre_train)
    model = two_head_net(args.model, num_class, args.p_weight_path,
                         args.pre_train)

    ############sup train##########
    if args.sup_train:
        if args.pre_train:
            print("using pre trained model, do not need to train again!")
            pass
        else:
            #optimizer = optim.SGD(model.parameters(), lr=args.sup_lr, momentum=args.sup_momentum, weight_decay=args.sup_wdecay,nesterov=True)
            optimizer = AdamP(model.parameters(), lr=args.sup_lr)
            scheduler = lr_scheduler.MultiStepLR(optimizer,
                                                 milestones=MILESTONES)
            sup_train_exp(model.cuda(), criterions, optimizer, scheduler,
                          dataset_loader, args.sup_epoch, args.sup_path)
            print("========sup train fininsh!=============")

    ############unsup train###########
    if args.unsup_train:
        checkpoint = torch.load(args.sup_path, map_location='cuda:0')
        model.load_state_dict(checkpoint)
        optimizer = AdamP(model.parameters(),
                          lr=args.unsup_lr,
                          momentum=args.unsup_momentum,
                          weight_decay=args.unsup_wdecay)
        scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=MILESTONES)
Esempio n. 29
0
def main():
    ##############################################################################
    if args.server == 'server_A':
        work_dir = os.path.join('/data1/JM/lung-seg-back-up', args.exp)
        print(work_dir)
    elif args.server == 'server_B':
        work_dir = os.path.join('/data1/workspace/JM_gen/lung-seg-back-up',
                                args.exp)
        print(work_dir)
    elif args.server == 'server_D':
        work_dir = os.path.join(
            '/daintlab/home/woans0104/workspace/'
            'lung-seg-back-up', args.exp)

        print(work_dir)
    ##############################################################################

    if not os.path.exists(work_dir):
        os.makedirs(work_dir)

    # copy this file to work dir to keep training configuration
    shutil.copy(__file__, os.path.join(work_dir, 'main.py'))
    with open(os.path.join(work_dir, 'args.pkl'), 'wb') as f:
        pickle.dump(args, f)

    source_dataset, target_dataset1, target_dataset2 \
        = loader.dataset_condition(args.source_dataset)

    # 1.load_dataset
    train_loader_source, test_loader_source \
        = loader.get_loader(server=args.server,
                            dataset=source_dataset,
                            train_size=args.train_size,
                            aug_mode=args.aug_mode,
                            aug_range=args.aug_range,
                            batch_size=args.batch_size,
                            work_dir=work_dir)

    train_loader_target1, _ = loader.get_loader(server=args.server,
                                                dataset=target_dataset1,
                                                train_size=1,
                                                aug_mode=False,
                                                aug_range=args.aug_range,
                                                batch_size=1,
                                                work_dir=work_dir)
    train_loader_target2, _ = loader.get_loader(server=args.server,
                                                dataset=target_dataset2,
                                                train_size=1,
                                                aug_mode=False,
                                                aug_range=args.aug_range,
                                                batch_size=1,
                                                work_dir=work_dir)

    test_data_li = [
        test_loader_source, train_loader_target1, train_loader_target2
    ]

    trn_logger = Logger(os.path.join(work_dir, 'train.log'))
    trn_raw_logger = Logger(os.path.join(work_dir, 'train_raw.log'))
    val_logger = Logger(os.path.join(work_dir, 'validation.log'))

    trn_logger_ae = Logger(os.path.join(work_dir, 'ae_train.log'))
    val_logger_ae = Logger(os.path.join(work_dir, 'ae_validation.log'))

    # 2.model_select
    model_seg = Unet2D(in_shape=(1, 256, 256))
    model_seg = model_seg.cuda()
    model_ae = ae_lung(in_shape=(1, 256, 256))
    model_ae = model_ae.cuda()

    cudnn.benchmark = True

    # 3.gpu select
    model_seg = nn.DataParallel(model_seg)
    model_ae = nn.DataParallel(model_ae)

    # 4.optim
    if args.optim == 'adam':
        optimizer_seg = torch.optim.Adam(model_seg.parameters(),
                                         betas=(args.adam_beta1, 0.999),
                                         eps=args.eps,
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)

        optimizer_ae = torch.optim.Adam(model_ae.parameters(),
                                        betas=(args.adam_beta1, 0.999),
                                        eps=args.eps,
                                        lr=args.lr,
                                        weight_decay=args.weight_decay)
    elif args.optim == 'adamp':
        optimizer_seg = AdamP(model_seg.parameters(),
                              betas=(args.adam_beta1, 0.999),
                              eps=args.eps,
                              lr=args.lr,
                              weight_decay=args.weight_decay)

        optimizer_ae = AdamP(model_ae.parameters(),
                             betas=(args.adam_beta1, 0.999),
                             eps=args.eps,
                             lr=args.lr,
                             weight_decay=args.weight_decay)

    elif args.optim == 'sgd':
        optimizer_seg = torch.optim.SGD(model_seg.parameters(),
                                        lr=args.lr,
                                        weight_decay=args.weight_decay)

        optimizer_ae = torch.optim.SGD(model_ae.parameters(),
                                       lr=args.lr,
                                       weight_decay=args.weight_decay)

    # lr decay
    lr_schedule = args.lr_schedule
    lr_scheduler_seg = optim.lr_scheduler.MultiStepLR(
        optimizer_seg, milestones=lr_schedule[:-1], gamma=0.1)

    lr_scheduler_ae = optim.lr_scheduler.MultiStepLR(
        optimizer_ae, milestones=lr_schedule[:-1], gamma=0.1)

    # 5.loss

    criterion_seg = select_loss(args.seg_loss_function)
    criterion_ae = select_loss(args.ae_loss_function)
    criterion_embedding = select_loss(args.embedding_loss_function)

    ############################################################################
    # train

    best_iou = 0
    try:
        if args.train_mode:
            for epoch in range(lr_schedule[-1]):

                train(model_seg=model_seg,
                      model_ae=model_ae,
                      train_loader=train_loader_source,
                      epoch=epoch,
                      criterion_seg=criterion_seg,
                      criterion_ae=criterion_ae,
                      criterion_embedding=criterion_embedding,
                      optimizer_seg=optimizer_seg,
                      optimizer_ae=optimizer_ae,
                      logger=trn_logger,
                      sublogger=trn_raw_logger,
                      logger_ae=trn_logger_ae)

                iou = validate(model_seg=model_seg,
                               model_ae=model_ae,
                               val_loader=test_loader_source,
                               epoch=epoch,
                               criterion_seg=criterion_seg,
                               criterion_ae=criterion_ae,
                               logger=val_logger,
                               logger_ae=val_logger_ae)

                print('validation result ************************************')

                lr_scheduler_seg.step()
                lr_scheduler_ae.step()

                if args.val_size == 0:
                    is_best = 1
                else:
                    is_best = iou > best_iou
                best_iou = max(iou, best_iou)
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'state_dict': model_seg.state_dict(),
                        'optimizer': criterion_seg.state_dict()
                    },
                    is_best,
                    work_dir,
                    filename='checkpoint.pth')

        print("train end")
    except RuntimeError as e:
        print(
            '#jm_private',
            '-----------------------------------  error train : '
            'send to message JM  '
            '& Please send a kakao talk -------------------------- '
            '\n error message : {}'.format(e))

        import ipdb
        ipdb.set_trace()

    draw_curve(work_dir, trn_logger, val_logger)
    draw_curve(work_dir, trn_logger_ae, val_logger_ae, labelname='ae')

    # here is load model for last pth
    check_best_pth(work_dir)

    # validation
    if args.test_mode:
        print('Test mode ...')
        main_test(model=model_seg, test_loader=test_data_li, args=args)
save_every_iters = len(train_loader)

num_epochs = 100

criterion = nn.CrossEntropyLoss()

lr = 0.001
weight_decay = 1e-2

optimizer = AdamP(
    [
        {
            "params": model.backbone.parameters()
        },
        {
            "params": model.classifier.parameters()
        },
    ],
    lr=1.0,
    betas=(0.9, 0.999),
    weight_decay=weight_decay,
)

le = len(train_loader)


def lambda_lr_scheduler(iteration, lr0, n, a):
    return lr0 * pow((1.0 - 1.0 * iteration / n), a)


lr_scheduler = lrs.LambdaLR(