Ejemplo n.º 1
0
def test_loss_fixed_size(batch):
    device = torch.device('cpu')
    loss_func_mean = losses.get_criterion(device, 'mean')
    loss_func_mean_of_mean = losses.get_criterion(device, 'mean_of_mean')
    loss_mean = loss_func_mean(batch)
    loss_mean_of_mean = loss_func_mean_of_mean(batch)
    assert loss_mean.size() == loss_mean_of_mean.size()
    assert torch.allclose(loss_mean,
                          loss_mean_of_mean), loss_mean - loss_mean_of_mean
Ejemplo n.º 2
0
def test_hierarchy(batch):
    correct, total = metrics.accuracy_max(batch)
    acc = correct / total
    loss_func = losses.get_criterion(DEVICE, 'mean')
    if OPT_SCALE:
        res = scipy.optimize.minimize_scalar(
            lambda x: loss_func(torch.mul(batch, x)), bracket=(1e-1, 1e2))
        scale = res.x
        if scale <= 0:
            raise RuntimeError(
                "Something went wrong during the optimization process")
    else:
        scale = 216
    loss = loss_func(torch.mul(batch, scale))
    assert loss >= (1 - acc) * math.log(2)
Ejemplo n.º 3
0
    def __init__(self, args, **kwargs):
        super(DecTrainer, self).__init__(args, **kwargs)

        # dataloader
        self.trainloader = get_dataloader(args, cfg, 'train')
        # self.trainloader_val = get_dataloader(args, cfg, 'train_voc')
        self.valloader = get_dataloader(args, cfg, 'val')
        self.denorm = self.trainloader.dataset.denorm
        self.use_triplet = args.use_triplet
        self.loss_3d = args.loss_3d
        self.normalize_feature = args.normalize_feature

        self.nclass = get_num_classes(args)
        self.classNames = get_class_names(args)
        assert self.nclass == len(self.classNames) - 1

        self.classIndex = {}
        for i, cname in enumerate(self.classNames):
            self.classIndex[cname] = i

        # model
        self.enc = get_model(cfg.NET, num_classes=self.nclass)
        self.criterion_cls = get_criterion(cfg.NET.LOSS)

        # optimizer using different LR
        enc_params = self.enc.parameter_groups(cfg.NET.LR, cfg.NET.WEIGHT_DECAY)
        self.optim_enc = self.get_optim(enc_params, cfg.NET)

        # checkpoint management
        self._define_checkpoint('enc', self.enc, self.optim_enc)
        self._load_checkpoint(args.resume)

        self.fixed_batch = None
        self.fixed_batch_path = args.fixed_batch_path
        if os.path.isfile(self.fixed_batch_path):
            print("Loading fixed batch from {}".format(self.fixed_batch_path))
            self.fixed_batch = torch.load(self.fixed_batch_path)

        # using cuda
        if cfg.NUM_GPUS != 0:
            self.enc = nn.DataParallel(self.enc)
            self.criterion_cls = nn.DataParallel(self.criterion_cls)
            self.enc = self.enc.cuda()
            self.criterion_cls = self.criterion_cls.cuda()

        # CHANGE: visual
        self.visual_times = 0
        self.dataset = args.dataset.lower()
    def __init__(self,
                 backbone: nn.Module,
                 criterion=None,
                 device: str = 'cpu',
                 num_classes: int = 30,
                 alpha: float = .2):

        super(FacialKeypointsDetector, self).__init__()
        self.alpha = alpha
        self.backbone = backbone
        self.pool = nn.AdaptiveAvgPool2d(output_size=1)
        self.head = DoubleStageRegressor(backbone.out_features, num_classes)
        self.criterion = criterion
        self.rmse = get_criterion("RMSE")
        self.device = device
        self.name = f"fkd_{backbone.name}"
        self.to(device)
Ejemplo n.º 5
0
    def __init__(self, args, **kwargs):
        super(DecTrainer, self).__init__(args, **kwargs)

        # dataloader
        self.trainloader = get_dataloader(args, cfg, 'train')
        self.trainloader_val = get_dataloader(args, cfg, 'train_voc')
        self.valloader = get_dataloader(args, cfg, 'val')
        self.denorm = self.trainloader.dataset.denorm

        self.nclass = get_num_classes(args)
        self.classNames = get_class_names(args)[:-1]
        assert self.nclass == len(self.classNames)

        self.classIndex = {}
        for i, cname in enumerate(self.classNames):
            self.classIndex[cname] = i

        # model
        self.enc = get_model(cfg.GENERATOR, num_classes=self.nclass)
        self.criterion_cls = get_criterion(cfg.GENERATOR.LOSS)
        print(self.enc)

        # optimizer using different LR
        enc_params = self.enc.parameter_groups(cfg.GENERATOR.LR, cfg.GENERATOR.WEIGHT_DECAY)
        self.optim_enc = self.get_optim(enc_params, cfg.GENERATOR)

        # checkpoint management
        self._define_checkpoint('enc', self.enc, self.optim_enc)
        self._load_checkpoint(args.resume)

        self.fixed_batch = None
        self.fixed_batch_path = args.fixed_batch_path
        if os.path.isfile(self.fixed_batch_path):
            print("Loading fixed batch from {}".format(self.fixed_batch_path))
            self.fixed_batch = torch.load(self.fixed_batch_path)

        # using cuda
        self.enc = nn.DataParallel(self.enc).cuda()
        self.criterion_cls = nn.DataParallel(self.criterion_cls).cuda()
Ejemplo n.º 6
0
                         TEST_FUNCS,
                         ids=lambda func_data: func_data[1])
def test_torch_func(tensor_list, func_data):
    """ Test torch function """
    func, _ = func_data
    masked_tensor = maskedtensor.from_list(tensor_list, dims=(0, 1))
    res_mt = list(func(masked_tensor))
    res_lst = apply_list_tensors(tensor_list, func)
    for t_mt, t_lst in zip(res_mt, res_lst):
        assert t_mt.size() == t_lst.size()
        assert torch.allclose(t_mt, t_lst,
                              atol=ATOL), torch.norm(t_mt - t_lst,
                                                     p=float('inf'))


TEST_SCORE_FUNCS = [(get_criterion(DEVICE, 'mean'), 'loss')]


@pytest.mark.parametrize('func_data',
                         TEST_SCORE_FUNCS,
                         ids=lambda func_data: func_data[1])
def test_score_func(score_list, func_data):
    """ Test score function """
    func, _ = func_data
    masked_tensor = maskedtensor.from_list(score_list, dims=(0, 1))
    res_mt = func(masked_tensor)
    res_lst = func(torch.stack(score_list))
    assert torch.allclose(res_mt, res_lst,
                          atol=ATOL), torch.norm(res_mt - res_lst,
                                                 p=float('inf'))
Ejemplo n.º 7
0
def main():
    #     args = parse_args()
    IMAGE_PATH = 'data/images/'
    num_classes_1 = 168
    num_classes_2 = 11
    num_classes_3 = 7
    stats = (0.0692, 0.2051)

    train_df = pd.read_csv('data/train_with_folds.csv')
    # train_df = train_df.set_index(['image_id'])
    # train_df = train_df.drop(['grapheme'], axis=1)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)

    # Data Loaders
    # df_train, df_val = train_test_split(train_df, test_size=0.2, random_state=2021)

    # train_transform = get_transform(128)
    train_transform = A.Compose([
        A.CoarseDropout(max_holes=1, max_width=64, max_height=64, p=0.9),
        A.ShiftScaleRotate(rotate_limit=5, p=0.9),
        A.Normalize(mean=stats[0], std=stats[1], always_apply=True)
    ])
    val_transform = A.Compose(
        [A.Normalize(mean=stats[0], std=stats[1], always_apply=True)])

    BATCH_SIZE = 50
    folds = [{
        'train': [1, 2, 3, 4],
        'val': [0]
    }, {
        'train': [0, 2, 3, 4],
        'val': [1]
    }, {
        'train': [1, 0, 3, 4],
        'val': [2]
    }, {
        'train': [1, 2, 0, 4],
        'val': [3]
    }, {
        'train': [1, 2, 3, 0],
        'val': [4]
    }]

    # Loop over folds
    for fld in range(1):
        fld = 4
        print(f'Train fold: {fld}')

        train_loader = get_loader(train_df,
                                  IMAGE_PATH,
                                  folds=folds[fld]['train'],
                                  batch_size=BATCH_SIZE,
                                  workers=4,
                                  shuffle=True,
                                  transform=train_transform)
        val_loader = get_loader(train_df,
                                IMAGE_PATH,
                                folds=folds[fld]['val'],
                                batch_size=BATCH_SIZE,
                                workers=4,
                                shuffle=False,
                                transform=val_transform)

        # Build Model
        model = load_model('seresnext50_32x4d', pretrained=True)
        model = model.cuda()

        # Optimizer
        optimizer = get_optimizer(model, lr=.00016)

        # Loss
        criterion1 = get_criterion()

        # Training
        history = pd.DataFrame()
        history2 = pd.DataFrame()

        torch.cuda.empty_cache()
        gc.collect()

        best = 0
        best2 = 1e10
        n_epochs = 100
        early_epoch = 0

        # Scheduler
        scheduler = get_scheduler(optimizer,
                                  train_loader=train_loader,
                                  epochs=n_epochs)

        # print('Loading previous training...')
        # state = torch.load('model.pth')
        # model.load_state_dict(state['model_state'])
        # best = state['kaggle']
        # best2 = state['loss']
        # print(f'Loaded model with kaggle score: {best}, loss: {best2}')
        # optimizer.load_state_dict(state['opt_state'])
        # scheduler.load_state_dict(state['scheduler_state'])
        # early_epoch = state['epoch'] + 1
        # print(f'Beginning at epoch {early_epoch}')
        # print('')

        for epoch in range(n_epochs - early_epoch):
            epoch += early_epoch
            torch.cuda.empty_cache()
            gc.collect()

            # ###################################################################
            # ############## TRAINING ###########################################
            # ###################################################################

            model.train()
            total_loss = 0
            total_loss_1 = 0
            total_loss_2 = 0
            total_loss_3 = 0

            # ratio = pow(.5,epoch/50)
            # ratio = 0.7
            ratio = 1.0

            t = tqdm(train_loader)
            for batch_idx, (img_batch, y_batch) in enumerate(t):
                img_batch = img_batch.cuda().float()
                y_batch = y_batch.cuda().long()

                optimizer.zero_grad()

                label1 = y_batch[:, 0]
                label2 = y_batch[:, 1]
                label3 = y_batch[:, 2]
                rand = np.random.rand()
                if rand < 0.5:
                    images, targets = mixup(img_batch, label1, label2, label3,
                                            0.4)
                    output1, output2, output3 = model(images)
                    l1, l2, l3 = mixup_criterion(output1,
                                                 output2,
                                                 output3,
                                                 targets,
                                                 rate=ratio)
                elif rand < 1:
                    images, targets = cutmix(img_batch, label1, label2, label3,
                                             0.4)
                    output1, output2, output3 = model(images)
                    l1, l2, l3 = cutmix_criterion(output1,
                                                  output2,
                                                  output3,
                                                  targets,
                                                  rate=ratio)
                # else:
                #     output1,output2,output3 = model(img_batch)
                #     l1, l2, l3 = criterion1(output1,output2,output3, y_batch)

                loss = l1 * .4 + l2 * .3 + l3 * .3
                total_loss += loss
                total_loss_1 += l1 * .4
                total_loss_2 += l2 * .3
                total_loss_3 += l3 * .3
                t.set_description(
                    f'Epoch {epoch+1}/{n_epochs}, LR: %6f, Ratio: %.4f, Loss: %.4f, Root loss: %.4f, Vowel loss: %.4f, Consonant loss: %.4f'
                    % (optimizer.state_dict()['param_groups'][0]['lr'], ratio,
                       total_loss / (batch_idx + 1), total_loss_1 /
                       (batch_idx + 1), total_loss_2 /
                       (batch_idx + 1), total_loss_3 / (batch_idx + 1)))
                # t.set_description(f'Epoch {epoch}/{n_epochs}, LR: %6f, Loss: %.4f'%(optimizer.state_dict()['param_groups'][0]['lr'],total_loss/(batch_idx+1)))

                if history is not None:
                    history.loc[epoch + batch_idx / len(train_loader),
                                'train_loss'] = loss.data.cpu().numpy()
                    history.loc[
                        epoch + batch_idx / len(train_loader),
                        'lr'] = optimizer.state_dict()['param_groups'][0]['lr']

                loss.backward()
                optimizer.step()
                # if scheduler is not None:
                #     scheduler.step()

            # ###################################################################
            # ############## VALIDATION #########################################
            # ###################################################################

            model.eval()
            loss = 0

            preds_1 = []
            preds_2 = []
            preds_3 = []
            tars_1 = []
            tars_2 = []
            tars_3 = []
            with torch.no_grad():
                for img_batch, y_batch in val_loader:
                    img_batch = img_batch.cuda().float()
                    y_batch = y_batch.cuda().long()

                    o1, o2, o3 = model(img_batch)

                    l1, l2, l3 = criterion1(o1, o2, o3, y_batch)
                    loss += l1 * .4 + l2 * .3 + l3 * .3

                    for j in range(len(o1)):
                        preds_1.append(torch.argmax(F.softmax(o1[j]), -1))
                        preds_2.append(torch.argmax(F.softmax(o2[j]), -1))
                        preds_3.append(torch.argmax(F.softmax(o3[j]), -1))
                    for i in y_batch:
                        tars_1.append(i[0].data.cpu().numpy())
                        tars_2.append(i[1].data.cpu().numpy())
                        tars_3.append(i[2].data.cpu().numpy())

            preds_1 = [p.data.cpu().numpy() for p in preds_1]
            preds_2 = [p.data.cpu().numpy() for p in preds_2]
            preds_3 = [p.data.cpu().numpy() for p in preds_3]
            preds_1 = np.array(preds_1).T.reshape(-1)
            preds_2 = np.array(preds_2).T.reshape(-1)
            preds_3 = np.array(preds_3).T.reshape(-1)

            scores = []
            scores.append(
                sklearn.metrics.recall_score(tars_1, preds_1, average='macro'))
            scores.append(
                sklearn.metrics.recall_score(tars_2, preds_2, average='macro'))
            scores.append(
                sklearn.metrics.recall_score(tars_3, preds_3, average='macro'))
            final_score = np.average(scores, weights=[2, 1, 1])

            loss /= len(val_loader)

            if history2 is not None:
                history2.loc[epoch, 'val_loss'] = loss.cpu().numpy()
                history2.loc[epoch, 'acc'] = final_score
                history2.loc[epoch, 'root_acc'] = scores[0]
                history2.loc[epoch, 'vowel_acc'] = scores[1]
                history2.loc[epoch, 'consonant_acc'] = scores[2]

            if scheduler is not None:
                scheduler.step(final_score)

            print(
                f'Dev loss: %.4f, Kaggle: {final_score}, Root acc: {scores[0]}, Vowel acc: {scores[1]}, Consonant acc: {scores[2]}'
                % (loss))

            if epoch > 0:
                history2['acc'].plot()
                plt.savefig(f'epoch%03d_{fld}_acc.png' % (epoch + 1))
                plt.clf()

            if loss < best2:
                best2 = loss
                print(f'Saving best model... (loss)')
                torch.save(
                    {
                        'epoch': epoch,
                        'loss': loss,
                        'kaggle': final_score,
                        'model_state': model.state_dict(),
                        'opt_state': optimizer.state_dict(),
                        'scheduler_state': scheduler.state_dict()
                    }, f'model-1_{fld}.pth')

            if final_score > best:
                best = final_score
                print(f'Saving best model... (acc)')
                torch.save(
                    {
                        'epoch': epoch,
                        'loss': loss,
                        'kaggle': final_score,
                        'model_state': model.state_dict(),
                        'opt_state': optimizer.state_dict(),
                        'scheduler_state': scheduler.state_dict()
                    }, f'model_{fld}.pth')
def main(**kwargs):
    training_path = kwargs.get('training_data_path')
    checkpoint_path = kwargs.get('checkpoint_path')
    tensorboard_log_dir = kwargs.get('tensorboard_log_dir')
    if not os.path.isdir(checkpoint_path):
        os.mkdir(checkpoint_path)

    backbone_name = kwargs.get('backbone')
    criterion_name = kwargs.get('criterion').upper()
    optimizer_name = kwargs.get('optimizer').upper()
    scheduler = kwargs.get('scheduler',None)

    pretrained = kwargs.get('pretrained')
    num_classes = kwargs.get('num_classes')
    device = kwargs.get('device')

    batch_size = kwargs.get('batch_size')
    epochs = kwargs.get('epochs')
    hyperparameters = kwargs.get('hyperparameters',{})
    augmentations = kwargs.get('augmentations',{})
    verbose = kwargs.get('verbose')

    train_split = kwargs.get('train_split')
    nfolds = kwargs.get('nfolds')

    val_splits = [(1-train_split) / nfolds] * nfolds

    resume = kwargs.get('resume')
    only_weights = kwargs.get('only_weights')

    seed = hyperparameters.get('seed')

    random_jitter = augmentations.get('jitter',{})
    random_horizontal_flip = augmentations.get('horizontal_flip', 0.5)
    random_rotation = augmentations.get('rotation', 20)

    writer = SummaryWriter(log_dir=tensorboard_log_dir, flush_secs=20)

    if seed: seed_everything(seed)

    # TODO calculate mean and std
    mean = hyperparameters.get('mean',0)
    std = hyperparameters.get('std',1)

    splits = [train_split]+val_splits
    assert sum(splits) <= 1,"given splits must be lower or equal than 1"

    original_img_size = 96

    criterion = get_criterion(criterion_name)

    backbone = get_backbone(backbone_name, pretrained=pretrained)

    model = FacialKeypointsDetector(backbone, criterion=criterion,
        device=device, num_classes=num_classes)

    optimizer = get_optimizer(optimizer_name, model.parameters(),
        kwargs=hyperparameters.get('optimizer',{}))

    scaler = GradScaler()

    val_transforms = None
    val_target_transform = TargetTransform(original_img_size)

    train_transform = train_target_transform = None
    train_transforms = transforms.TrainTransforms(model.get_input_size(), original_img_size, 
        mean=mean, std=std, brightness=random_jitter.get('brightness'),
        contrast=random_jitter.get('contrast'), saturation=random_jitter.get('saturation'),
        hue=random_jitter.get('hue'), rotation_degree=random_rotation,
        hflip=random_horizontal_flip)

    val_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(model.get_input_size()),
        transforms.ToTensor(),
        transforms.Normalize(mean,std)])

    train_dataset,*val_datasets = get_training_datasets(root_path=training_path,
        train_transforms=(train_transforms,train_transform,train_target_transform),
        val_transforms=(val_transforms,val_transform,val_target_transform),
        split_ratios=splits)

    val_dls = []
    train_dl = torch.utils.data.DataLoader(train_dataset,
        num_workers=4, batch_size=batch_size,
        pin_memory=True, collate_fn=custom_collate_fn, shuffle=True)

    for val_ds in val_datasets:
        val_dls.append( torch.utils.data.DataLoader(
            val_ds, batch_size=batch_size, num_workers=2) )

    current_epoch = 0
    best_loss = math.inf
    if resume:
        print(Fore.CYAN, f"loading checkpoint from {checkpoint_path}",Style.RESET_ALL)
        best_loss,current_epoch = load_checkpoint(model, optimizer, scheduler=scheduler,
            save_path=checkpoint_path, suffix='last', only_weights=only_weights)

    try:
        for epoch in range(current_epoch,epochs):
            training_loop(train_dl, model, epoch, epochs, optimizer, writer,scaler,
                scheduler=scheduler, verbose=verbose)

            val_losses = []
            for i,val_dl in enumerate(val_dls):
                val_loss = validation_loop(val_dl, model)
                val_losses.append(val_loss)
                print(Fore.LIGHTBLUE_EX, f"validation [{i+1}] loss:  {val_loss:.07f}",Style.RESET_ALL)
                writer.add_scalar(f'Loss/val_{i+1}', val_loss, epoch)

            mean_val_loss = sum(val_losses) / len(val_losses)
            print(Fore.LIGHTBLUE_EX, f"validation [mean] loss:  {mean_val_loss:.07f}",Style.RESET_ALL)
            writer.add_scalar(f'Loss/val_mean', mean_val_loss, epoch)
            writer.flush()
            if mean_val_loss < best_loss:
                best_loss = mean_val_loss
                print(Fore.CYAN, "saving best checkpoint...",Style.RESET_ALL)
                save_checkpoint(model,optimizer,epoch,best_loss,
                    scheduler=scheduler, suffix='best', save_path=checkpoint_path)

            print(Fore.CYAN, "saving last checkpoint...",Style.RESET_ALL)
            save_checkpoint(model,optimizer,epoch,best_loss,
                scheduler=scheduler, suffix='last', save_path=checkpoint_path)

    except KeyboardInterrupt:
        print(Fore.RED, "training interrupted with ctrl+c saving current state of the model",Style.RESET_ALL)
        save_checkpoint(model,optimizer,epoch,best_loss,
            scheduler=scheduler, suffix='last', save_path=checkpoint_path)
    writer.close()