Exemple #1
0
 def configure_optimizers(self):
     optimizer = RAdam(self.parameters(), lr=self.hparams.learning_rate)
     scheduler = LambdaLR(optimizer, self.learning_rate_warmup)
     return {
         "optimizer": optimizer,
         "lr_scheduler": {
             "scheduler": scheduler,
             "interval": "step"
         },
     }
Exemple #2
0
def get_optimizer(config, model, filter_bias_and_bn=True):
    opt_lower = config.optimizer.name.lower()
    weight_decay = config.optimizer.params.weight_decay
    if 'adamw' in opt_lower or 'radam' in opt_lower:
        # Compensate for the way current AdamW and RAdam optimizers apply LR to the weight-decay
        # I don't believe they follow the paper or original Torch7 impl which schedules weight
        # decay based on the ratio of current_lr/initial_lr
        weight_decay /= config.optimizer.params.lr
    if weight_decay and filter_bias_and_bn:
        parameters = add_weight_decay(model, weight_decay)
        weight_decay = 0.
    else:
        parameters = model.parameters()

    if 'fused' in opt_lower:
        assert has_apex and torch.cuda.is_available(
        ), 'APEX and CUDA required for fused optimizers'

    opt_look_ahed = config.optimizer.lookahead.apply
    if opt_lower == 'sgd':
        optimizer = optim.SGD(parameters,
                              lr=config.optimizer.params.lr,
                              momentum=config.optimizer.params.momentum,
                              weight_decay=weight_decay,
                              nesterov=True)
    elif opt_lower == 'adam':
        optimizer = optim.Adam(parameters, lr=config.optimizer.params.lr)
    elif opt_lower == 'adamw':
        optimizer = AdamW(parameters,
                          lr=config.optimizer.params.lr,
                          weight_decay=weight_decay,
                          eps=config.optimizer.params.opt_eps)
    elif opt_lower == 'nadam':
        optimizer = Nadam(parameters,
                          lr=config.optimizer.params.lr,
                          weight_decay=weight_decay,
                          eps=config.optimizer.params.opt_eps)
    elif opt_lower == 'radam':
        optimizer = RAdam(parameters,
                          lr=config.optimizer.params.lr,
                          weight_decay=weight_decay,
                          eps=config.optimizer.params.opt_eps)
    else:
        assert False and "Invalid optimizer"
        raise ValueError

    if opt_look_ahed:
        optimizer = Lookahead(optimizer)

    return optimizer
    t = Transformer.trnsfrmr_nt(seq_len=seq_len,
                                ini_len=args.ini_len,
                                final_len=model_final_len).to(device)

elif args.model == 'lstm':
    from Models import LSTM
    t = LSTM.lstm(seq_len=seq_len,
                  ini_len=args.ini_len,
                  final_len=model_final_len).to(device)

if path.exists(args.param_file):
    t.load_state_dict(torch.load(args.param_file))

if args.optimizer == 'RAdam':
    from optimizers import RAdam
    optimizer = RAdam.RAdam(t.parameters(), lr=args.lr)
elif args.optimizer == 'Adam':
    optimizer = torch.optim.Adam(t.parameters(), lr=args.lr)

t = t.double()
train_mse = []
test_mse = [10000]

for ij in range(epochs):
    loss_list = []
    for i, batch in enumerate(train_data_loader):
        optimizer.zero_grad()
        in_batch = batch['in'].to(device)
        out = t(in_batch)
        loss = lossfn(batch['out'].to(device), out)
        loss_list.append(loss)
Exemple #4
0
        hparams['resume_dir'] = args.resume_dir
        args = Namespace(**hparams)
        net = DeepConvolutionalUNet(hidden_size=args.n_fft // 2 + 1)
        net = nn.DataParallel(net)
        model_path = os.path.join(args.resume_dir, 'model_best.ckpt')
        print(f'Resume model from {model_path} ...')
        checkpoint = torch.load(model_path)
        net.load_state_dict(checkpoint['model_state_dict'])
    net = net.to(device)

    # optimization
    # optimizer = optim.SGD(net.parameters(), lr=args.learning_rate, momentum=0.9)
    # optimizer = optim.Adam(net.parameters(), lr=args.learning_rate, weight_decay=0.1)
    optimizer = RAdam(net.parameters(),
                      lr=args.learning_rate,
                      weight_decay=0.1)
    scheduler = None
    if args.use_swa:
        steps_per_epoch = len(train_dataloader) // args.batch_size
        optimizer = SWA(optimizer,
                        swa_start=20 * steps_per_epoch,
                        swa_freq=steps_per_epoch)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer.optimizer,
                                                         mode="max",
                                                         patience=5,
                                                         factor=0.5)

    else:
        scheduler = None
Exemple #5
0
elif args.model == 'b1':
    model = EfficientNet.from_pretrained('efficientnet-b1', num_classes=6)

elif args.model == 'b4':
    model = EfficientNet.from_pretrained('efficientnet-b4', num_classes=6)
elif args.model == 'b5':
    model = EfficientNet.from_pretrained('efficientnet-b5', num_classes=6)

model = torch.nn.DataParallel(model).cuda()
#model.to(device)

criterion = torch.nn.BCEWithLogitsLoss().cuda()
sub_criterion = torch.nn.BCEWithLogitsLoss(reduce=False, reduction=None).cuda()

plist = [{'params': model.parameters(), 'lr': 0.00001}]
optimizer = RAdam(plist, lr=0.00001)


def log_loss(output, label):
    rev_label = (label == 0).float()
    output = torch.sigmoid(output)
    loss = torch.abs(output - rev_label)
    loss = -torch.log(loss.prod(-1).mean())
    return loss


# Train
#lr_sc = lr_scheduler.StepLR(optimizer, step_size=2)
if args.keep > 0:
    checkpoint = torch.load('/home/boh001/save_model/effi/{}.pth'.format(
        args.keep))
Exemple #6
0
def main():
    print(args.work_dir, args.exp)
    work_dir = os.path.join(args.work_dir, args.exp)

    if not os.path.exists(work_dir):
        os.makedirs(work_dir)

    # copy this file to work dir to keep training configuration
    shutil.copy(__file__, os.path.join(work_dir, 'main.py'))
    with open(os.path.join(work_dir, 'args.pkl'), 'wb') as f:
        pickle.dump(args, f)

    # 1.dataset
    train_filename = args.trn_root
    test_filename = args.test_root

    trainset = Segmentation_2d_data(train_filename)
    valiset = Segmentation_2d_data(test_filename)

    train_loader = data.DataLoader(trainset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True)
    valid_loader = data.DataLoader(valiset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True)

    trn_logger = Logger(os.path.join(work_dir, 'train.log'))
    trn_raw_logger = Logger(os.path.join(work_dir, 'train_raw.log'))
    val_logger = Logger(os.path.join(work_dir, 'validation.log'))

    if args.model == 'unet':
        net = Unet2D(in_shape=(1, 512, 512), padding=args.padding_size, momentum=args.batchnorm_momentum)
    elif args.model == 'unetcoord':
        net = Unet2D_coordconv(in_shape=(1, 512, 512), padding=args.padding_size,
                            momentum=args.batchnorm_momentum, coordnumber=args.coordconv_no, radius=False)
    elif args.model == 'unetmultiinput':
        net = Unet2D_multiinput(in_shape=(1, 512, 512), padding=args.padding_size,
                                momentum=args.batchnorm_momentum)
    elif args.model == 'scse_block':
        net = Unet_sae(in_shape=(1, 512, 512), padding=args.padding_size, momentum=args.batchnorm_momentum)
    else:
        raise ValueError('Not supported network.')

    # loss
    if args.loss_function == 'bce':
        criterion = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([args.bce_weight])).cuda()
    elif args.loss_function == 'dice':
        criterion = DiceLoss().cuda()
    else:
        raise ValueError('{} loss is not supported yet.'.format(args.loss_function))

    # optim
    if args.optim_function == 'sgd':
        optimizer = torch.optim.SGD(net.parameters(), lr=args.initial_lr, momentum=args.momentum,
                                weight_decay=args.weight_decay)
    elif args.optim_function == 'adam':
        optimizer = torch.optim.Adam(net.parameters(), lr=args.initial_lr, weight_decay=args.weight_decay)
    elif args.optim_function == 'radam':
        optimizer = RAdam(net.parameters(), lr=args.initial_lr, weight_decay = args.weight_decay)
    else:
        raise ValueError('{} loss is not supported yet.'.format(args.optim_function))

    net = nn.DataParallel(net).cuda()

    cudnn.benchmark = True

    lr_schedule = args.lr_schedule
    lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                  milestones=lr_schedule[:-1],
                                                  gamma=0.1)
    best_iou = 0

    for epoch in range(lr_schedule[-1]):
        train(train_loader, net, criterion, optimizer, epoch, trn_logger, trn_raw_logger)

        iou = validate(valid_loader, net, criterion, epoch, val_logger)
        lr_scheduler.step()

        is_best = iou > best_iou
        best_iou = max(iou, best_iou)
        checkpoint_filename = 'model_checkpoint_{:0>3}.pth'.format(epoch + 1)
        save_checkpoint({'epoch': epoch + 1,
                            'state_dict': net.state_dict(),
                            'optimizer': optimizer.state_dict()},
                        is_best,
                        work_dir,
                        checkpoint_filename)

    draw_curve(work_dir, trn_logger, val_logger)
Exemple #7
0
def main():
    print(args.work_dir, args.exp)
    work_dir = os.path.join(args.work_dir, args.exp)

    kaggle_path = "/data2/sk_data/kaggle_data/stage_1_train_images_png"
    kaggle_csv_path = "/data2/sk_data/kaggle_data/bin_dataframe.csv"
    label_data = pd.read_csv(kaggle_csv_path)

    if not os.path.exists(work_dir):
        os.makedirs(work_dir)

    # copy this file to work dir to keep training configuration
    shutil.copy(__file__, os.path.join(work_dir, 'main.py'))
    with open(os.path.join(work_dir, 'args.pkl'), 'wb') as f:
        pickle.dump(args, f)

    # 1.dataset

    train_filename = args.trn_root
    test_filename = args.test_root

    if args.model == "efficientnet" :
        if args.kaggle == True :
            trainset = load_kaggle_data_with_balanced(kaggle_path, kaggle_csv_path)
            class_sample_count = np.array([len(np.where(label_data["any"]==t)[0]) for t in np.unique(label_data["any"])])
            weight = 1. / class_sample_count
            train_weights = np.array([weight[t] for t in label_data["any"]])
            train_sampler = torch.utils.data.WeightedRandomSampler(weights=train_weights,
                                 num_samples=len(train_weights))
        else : 
            trainset = Classification_Data(train_filename)
            class_sample_count = np.array([len(np.where(label_data["any"]==t)[0]) for t in np.unique(label_data["any"])])
            weight = 1. / class_sample_count
            train_weights = np.array([weight[t] for t in label_data["any"]])
            train_sampler = torch.utils.data.WeightedRandomSampler(weights=train_weights,
                                 num_samples=len(train_weights))
        valiset = Classification_Data(test_filename)

    elif args.model == "resnet" :
        trainset = Classification_Data(train_filename)
        valiset = Classification_Data(test_filename)
    else :
        raise ValueError('Not supported network.')

    # train_history = History(len(trainset))
    if args.kaggle == False :
        train_loader = data.DataLoader(trainset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False, sampler = train_sampler)
    else : 
        train_loader = data.DataLoader(trainset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True)

    valid_loader = data.DataLoader(valiset, batch_size=args.batch_size, num_workers=args.num_workers)

    # save input stats for later use
    trn_logger = Logger(os.path.join(work_dir, 'train.log'))
    trn_raw_logger = Logger(os.path.join(work_dir, 'train_raw.log'))
    val_logger = Logger(os.path.join(work_dir, 'validation.log'))
    print(len(trainset))

    # model

    if args.model == 'unet':
        net = Unet2D(in_shape=(args.multi_input, 512, 512), padding=args.padding_size, momentum=args.batchnorm_momentum)
    elif args.model == 'efficientnet' :
        net = EfficientNet.from_pretrained('efficientnet-' + args.number, num_classes=1)
    elif args.model == 'resnet' :
        net = models.resnet50(pretrained=True)
        num_ftrs = net.fc.in_fetures
        net.fc = nn.Linear(num_ftrs, 1)
        print("Load Resnet-50")
    else:
        raise ValueError('Not supported network.')

    # loss
    if args.loss_function == 'bce':
        criterion = nn.BCEWithLogitsLoss(pos_weight=torch.Tensor([args.bce_weight])).cuda()
    elif args.loss_function == "cross_entropy" :
        criterion = torch.nn.CrossEntropyLoss()
    else:
        raise ValueError('{} loss is not supported yet.'.format(args.loss_function))

    # optim
    if args.optim_function == 'sgd':
        optimizer = torch.optim.SGD(net.parameters(), lr=args.initial_lr, momentum=args.momentum,
                                    weight_decay=args.weight_decay)
    elif args.optim_function == 'adam':
        optimizer = torch.optim.Adam(net.parameters(), lr=args.initial_lr)
    elif args.optim_function == 'radam':
        optimizer = RAdam(net.parameters(), lr=args.initial_lr, weight_decay=args.weight_decay)
    else:
        raise ValueError('{} loss is not supported yet.'.format(args.optim_function))

    net = nn.DataParallel(net).cuda()
    cudnn.benchmark = True

    lr_schedule = args.lr_schedule
    lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                  milestones=lr_schedule[:-1],
                                                  gamma=0.1)

    best_acc = 0
    for epoch in range(lr_schedule[-1]):
        train(train_loader, net, criterion, optimizer, epoch, trn_logger, sublogger=trn_raw_logger, trainset = trainset, val_loader= valid_loader, val_logger = val_logger, val_mode=True)

        print("Done")
        loss, acc = validate(valid_loader, net, criterion, epoch, val_logger)

        lr_scheduler.step()
        if best_acc == 0 :
            best_acc = acc
        else :
            best_acc = max(acc, best_acc)
        is_best = True 

        if is_best == True :
            checkpoint_filename = 'model_checkpoint_{:0>3}.pth'.format(epoch + 1)
            save_checkpoint({'epoch': epoch + 1,
                                'state_dict': net.state_dict(),
                                'optimizer': optimizer.state_dict()},
                            is_best,
                            work_dir,
                            checkpoint_filename)

    draw_curve(work_dir, trn_logger, val_logger)
Exemple #8
0
 def configure_optimizers(self):
     return RAdam(self.parameters(), lr=self.hparams.learning_rate)