Esempio n. 1
0
def generate_submission(net,
                        config,
                        folds=1,
                        SUBM_OUT=None,
                        gen_csv=True,
                        attn=False):
    print('Generating predictions...')

    net.eval()

    test_loader = get_test_loader(imsize=config.imsize,
                                  num_channels=config.num_channels,
                                  batch_size=config.batch_size)

    test_preds = torch.zeros(len(test_loader.dataset), 28)
    for _ in range(folds):
        test_preds += generate_preds(net, test_loader, test=True, attn=attn)
    test_preds = test_preds.numpy() / float(folds)

    if gen_csv:
        print('Generating submission with class wise thresholding...')
        best_th = find_threshold(net,
                                 config,
                                 class_wise=True,
                                 plot=True,
                                 attn=attn)

        preds_df = pd.DataFrame(data=test_preds)
        preds_df['th'] = pd.Series(best_th)
        preds_df.to_csv(SUBM_OUT.replace('subm', 'preds'), index=False)

        save_pred(test_preds, best_th, SUBM_OUT)

    return test_preds
Esempio n. 2
0
def subm_ensemble():
    if len(os.listdir('./subm/')) == 0:
        raise ValueError('Submission directory is empty')

    all_preds = np.zeros(
        (len(pd.read_csv('./data/sample_submission.csv')), 28))

    for i, filepath in enumerate(glob.iglob('./subm/*.csv')):
        print('Processing file', filepath.split('/')[-1])
        predi = pd.read_csv(filepath)
        all_preds += np.stack(predi['Predicted'].apply(label_gen_np)).astype(
            np.float)

    SUBM_OUT = './subm/subm_ensemble_{}.csv'.\
                        format(datetime.now().strftime('%Y-%m-%d_%H:%M:%S'))
    if args.outfile != '':
        SUBM_OUT = SUBM_OUT.replace('.csv', '_{}.csv'.format(args.outfile))

    save_pred(all_preds, float(i + 1) / 2., SUBM_OUT)
Esempio n. 3
0
def preds_ensemble():
    if len(os.listdir('./preds/')) == 0:
        raise ValueError('Preds directory is empty')

    all_preds = np.zeros(
        (len(pd.read_csv('./data/sample_submission.csv')), 28))
    th = 0

    for i, filepath in enumerate(glob.iglob('./preds/*.csv')):
        print('Processing file', filepath.split('/')[-1])
        predi = pd.read_csv(filepath)
        all_preds += predi[predi.columns[:-1]]
        th += predi['th'][0]
    all_preds /= float(i + 1)
    th /= float(i + 1)

    SUBM_OUT = './subm/pred_ensemble_{}.csv'.\
                        format(datetime.now().strftime('%Y-%m-%d_%H:%M:%S'))
    if args.outfile != '':
        SUBM_OUT = SUBM_OUT.replace('.csv', '_{}.csv'.format(args.outfile))

    save_pred(all_preds, th, SUBM_OUT)
Esempio n. 4
0
def main(args):
    global best_acc
    global best_auc

    if not os.path.exists(args.checkpoint):
        os.makedirs(args.checkpoint)

    print("==> Creating model '{}-{}', stacks={}, blocks={}, feats={}".format(
        args.netType, args.pointType, args.nStacks, args.nModules,
        args.nFeats))

    print("=> Models will be saved at: {}".format(args.checkpoint))

    model = models.__dict__[args.netType](num_stacks=args.nStacks,
                                          num_blocks=args.nModules,
                                          num_feats=args.nFeats,
                                          use_se=args.use_se,
                                          use_attention=args.use_attention,
                                          num_classes=68)

    model = torch.nn.DataParallel(model).cuda()

    criterion = torch.nn.MSELoss(size_average=True).cuda()

    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)

    title = args.checkpoint.split('/')[-1] + ' on ' + args.data.split('/')[-1]

    Loader = get_loader(args.data)

    val_loader = torch.utils.data.DataLoader(Loader(args, 'A'),
                                             batch_size=args.val_batch,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.resume:
        if os.path.isfile(args.resume):
            print("=> Loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_acc = checkpoint['best_acc']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> Loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            logger = Logger(os.path.join(args.checkpoint, 'log.txt'),
                            title=title,
                            resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names([
            'Epoch', 'LR', 'Train Loss', 'Valid Loss', 'Train Acc', 'Val Acc',
            'AUC'
        ])

    cudnn.benchmark = True
    print('=> Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / (1024. * 1024)))

    if args.evaluation:
        print('=> Evaluation only')
        D = args.data.split('/')[-1]
        save_dir = os.path.join(args.checkpoint, D)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        loss, acc, predictions, auc = validate(val_loader, model, criterion,
                                               args.netType, args.debug,
                                               args.flip)
        save_pred(predictions, checkpoint=save_dir)
        return

    train_loader = torch.utils.data.DataLoader(Loader(args, 'train'),
                                               batch_size=args.train_batch,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)
    lr = args.lr
    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer, epoch, lr, args.schedule,
                                  args.gamma)
        print('=> Epoch: %d | LR %.8f' % (epoch + 1, lr))

        train_loss, train_acc = train(train_loader, model, criterion,
                                      optimizer, args.netType, args.debug,
                                      args.flip)
        # do not save predictions in model file
        valid_loss, valid_acc, predictions, valid_auc = validate(
            val_loader, model, criterion, args.netType, args.debug, args.flip)

        logger.append([
            int(epoch + 1), lr, train_loss, valid_loss, train_acc, valid_acc,
            valid_auc
        ])

        is_best = valid_auc >= best_auc
        best_auc = max(valid_auc, best_auc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'netType': args.netType,
                'state_dict': model.state_dict(),
                'best_acc': best_auc,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            predictions,
            checkpoint=args.checkpoint)

    logger.close()
    logger.plot(['AUC'])
    savefig(os.path.join(args.checkpoint, 'log.eps'))
Esempio n. 5
0
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(['Epoch', 'LR', 'Train Loss', 'Valid Loss', 'Train Acc', 'Val Acc', 'AUC'])

    cudnn.benchmark = True
    print('=> Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / (1024. * 1024)))

    if args.evaluation:
        print('=> Evaluation only')
        D = args.data.split('/')[-1]
        save_dir = os.path.join(args.checkpoint, D)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        loss, acc, predictions, auc = validate(val_loader, model, criterion, args.netType,
                                                        args.debug, args.flip)
        save_pred(predictions, checkpoint=save_dir)
        return

    train_loader = torch.utils.data.DataLoader(
        Loader(args, 'train'),
        batch_size=args.train_batch,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True)
    lr = args.lr
    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer, epoch, lr, args.schedule, args.gamma)
        print('=> Epoch: %d | LR %.8f' % (epoch + 1, lr))
        sys.stdout.flush()

        train_loss, train_acc, model= train(train_loader, model, criterion, optimizer, args.netType,
def main_subm(net=None, opcon=None, attn=False):
    if opcon is not None:
        config = opcon

    model_params = [config.model_name, config.exp_name]
    MODEL_CKPT = './model_weights/best_{}_{}.pth'.format(*model_params)

    if net is None:
        Net = getattr(model_list, config.model_name)
        net = Net(num_channels=config.num_channels)
        net = nn.parallel.DataParallel(net)
        net.to(device)

    print('Loading model from ' + MODEL_CKPT)

    try:
        net.load_state_dict(torch.load(MODEL_CKPT))
    except:
        net.load_state_dict(torch.load(MODEL_CKPT)['state_dict'])

    SUBM_OUT = './subm/best_{}_{}.csv'.format(*model_params)
    if args.outfile != '':
        SUBM_OUT = SUBM_OUT.replace('.csv', '_{}.csv'.format(args.outfile))

    if not config.cosine_annealing:
        generate_submission(net,
                            config,
                            args.folds,
                            SUBM_OUT,
                            gen_csv=True,
                            attn=attn)
    else:
        test_preds_avg = generate_submission(net,
                                             config,
                                             args.folds,
                                             SUBM_OUT,
                                             gen_csv=False,
                                             attn=attn)
        best_th = 2 * find_threshold(net, config, plot=False, attn=attn)
        num_models = 2

        for MODEL_CKPT in glob.glob("./model_weights/cycle*{}.pth".format(
                config.exp_name)):
            print('Loading model from ' + MODEL_CKPT)
            net.load_state_dict(torch.load(MODEL_CKPT))
            test_preds_avg += generate_submission(net,
                                                  config,
                                                  args.folds,
                                                  SUBM_OUT,
                                                  gen_csv=False,
                                                  attn=attn)
            best_th += find_threshold(net, config, plot=False, attn=attn)
            num_models += 1

        test_preds_avg /= num_models
        best_th /= num_models

        preds_df = pd.DataFrame(data=test_preds_avg)
        preds_df['th'] = best_th
        preds_df.to_csv(SUBM_OUT.replace('subm', 'preds'), index=False)

        print("Generating submission with threshold = ", best_th)
        save_pred(test_preds_avg, best_th, SUBM_OUT)
Esempio n. 7
0
def main(args):
    global best_acc
    global best_auc

    if not os.path.exists(args.checkpoint):
        os.makedirs(args.checkpoint)

    print("==> Creating model '{}-{}', stacks={}, blocks={}, feats={}".format(
        args.netType, args.pointType, args.nStacks, args.nModules, args.nFeats))

    print("=> Models will be saved at: {}".format(args.checkpoint))

    model = models.__dict__[args.netType](
        num_stacks=args.nStacks,
        num_blocks=args.nModules,
        num_feats=args.nFeats,
        use_se=args.use_se,
        use_attention=args.use_attention,
        num_classes=68)

    model = torch.nn.DataParallel(model).cuda()

    criterion = torch.nn.MSELoss(size_average=True).cuda()

    optimizer = torch.optim.RMSprop(
        model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    title = args.checkpoint.split('/')[-1] + ' on ' + args.data.split('/')[-1]

    Loader = get_loader(args.data)

    val_loader = torch.utils.data.DataLoader(
        Loader(args, 'A'),
        batch_size=args.val_batch,
        shuffle=False,
        num_workers=args.workers,
        pin_memory=True)

    if args.resume:
        if os.path.isfile(args.resume):
            print("=> Loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_acc = checkpoint['best_acc']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> Loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
            logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title, resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        logger = Logger(os.path.join(args.checkpoint, 'log.txt'), title=title)
        logger.set_names(['Epoch', 'LR', 'Train Loss', 'Valid Loss', 'Train Acc', 'Val Acc', 'AUC'])

    cudnn.benchmark = True
    print('=> Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / (1024. * 1024)))

    if args.evaluation:
        print('=> Evaluation only')
        D = args.data.split('/')[-1]
        save_dir = os.path.join(args.checkpoint, D)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        loss, acc, predictions, auc = validate(val_loader, model, criterion, args.netType,
                                                        args.debug, args.flip)
        save_pred(predictions, checkpoint=save_dir)
        return

    train_loader = torch.utils.data.DataLoader(
        Loader(args, 'train'),
        batch_size=args.train_batch,
        shuffle=True,
        num_workers=args.workers,
        pin_memory=True)
    lr = args.lr
    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer, epoch, lr, args.schedule, args.gamma)
        print('=> Epoch: %d | LR %.8f' % (epoch + 1, lr))

        train_loss, train_acc = train(train_loader, model, criterion, optimizer, args.netType,
                                      args.debug, args.flip)
        # do not save predictions in model file
        valid_loss, valid_acc, predictions, valid_auc = validate(val_loader, model, criterion, args.netType,
                                                      args.debug, args.flip)

        logger.append([int(epoch + 1), lr, train_loss, valid_loss, train_acc, valid_acc, valid_auc])

        is_best = valid_auc >= best_auc
        best_auc = max(valid_auc, best_auc)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'netType': args.netType,
                'state_dict': model.state_dict(),
                'best_acc': best_auc,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            predictions,
            checkpoint=args.checkpoint)

    logger.close()
    logger.plot(['AUC'])
    savefig(os.path.join(args.checkpoint, 'log.eps'))
Esempio n. 8
0
def do_epoch(
    setname,
    loader,
    model,
    criterion,
    epochno=-1,
    optimizer=None,
    num_classes=None,
    debug=False,
    checkpoint=None,
    mean=torch.Tensor([0.5, 0.5, 0.5]),
    std=torch.Tensor([1.0, 1.0, 1.0]),
    feature_dim=1024,
    save_logits=False,
    save_features=False,
    num_figs=100,
    topk=[1],
    save_feature_dir="",
    save_fig_dir="",
):
    assert setname == "train" or setname == "val"
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = [AverageMeter()]
    perfs = []
    for k in topk:
        perfs.append(AverageMeter())

    if save_logits:
        all_logits = torch.Tensor(loader.dataset.__len__(), num_classes)
    if save_features:
        all_features = torch.Tensor(loader.dataset.__len__(), feature_dim)

    if setname == "train":
        model.train()
    elif setname == "val":
        model.eval()

    end = time.time()

    gt_win, pred_win, fig_gt_pred = None, None, None
    bar = Bar("E%d" % (epochno + 1), max=len(loader))
    for i, data in enumerate(loader):

        if data.get("gpu_collater", False):
            # We handle collation on the GPU to enable faster data augmentation
            with torch.no_grad():
                data["rgb"] = data["rgb"].cuda()
                collater_kwargs = {}
                if isinstance(loader.dataset, torch.utils.data.ConcatDataset):
                    cat_datasets = loader.dataset.datasets
                    collater = cat_datasets[0].gpu_collater
                    cat_datasets = {
                        type(x).__name__.lower(): x
                        for x in cat_datasets
                    }
                    collater_kwargs["concat_datasets"] = cat_datasets
                else:
                    collater = loader.dataset.gpu_collater
                data = collater(minibatch=data, **collater_kwargs)

        # measure data loading time
        data_time.update(time.time() - end)

        inputs = data["rgb"]
        targets = data["class"]

        inputs_cuda = inputs.cuda()
        targets_cuda = targets.cuda()

        # forward pass
        outputs_cuda = model(inputs_cuda)

        # compute the loss
        logits = outputs_cuda["logits"].data.cpu()
        loss = criterion(outputs_cuda["logits"], targets_cuda)
        topk_acc = performance(logits, targets, topk=topk)

        for ki, acc in enumerate(topk_acc):
            perfs[ki].update(acc, inputs.size(0))

        losses[0].update(loss.item(), inputs.size(0))

        # generate predictions
        if save_logits:
            all_logits[data["index"]] = logits
        if save_features:
            all_features[data["index"]] = outputs_cuda["embds"].squeeze(
            ).data.cpu()  # TODO

        if (debug or is_show(num_figs, i, len(loader))):
            fname = "pred_%s_epoch%02d_iter%05d" % (setname, epochno, i)
            save_path = save_fig_dir / fname
            gt_win, pred_win, fig_gt_pred = viz_gt_pred(
                inputs,
                logits,
                targets,
                mean,
                std,
                data,
                gt_win,
                pred_win,
                fig_gt_pred,
                save_path=save_path,
                show=debug,
            )

        # compute gradient and do optim step
        if setname == "train":
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # plot progress
        bar.suffix = "({batch}/{size}) Data: {data:.1f}s | Batch: {bt:.1f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:} | Perf: {perf:}".format(
            batch=i + 1,
            size=len(loader),
            data=data_time.val,
            bt=batch_time.avg,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=", ".join(
                [f"{losses[i].avg:.3f}" for i in range(len(losses))]),
            perf=", ".join([f"{perfs[i].avg:.3f}" for i in range(len(perfs))]),
        )
        bar.next()
    bar.finish()

    # save outputs
    if save_logits or save_features:
        meta = {
            "clip_gt": np.asarray(loader.dataset.get_set_classes()),
            "clip_ix": loader.dataset.valid,
            "video_names": loader.dataset.get_all_videonames(),
        }
    if save_logits:
        save_pred(
            all_logits,
            checkpoint=save_feature_dir,
            filename="preds.mat",
            meta=meta,
        )
    if save_features:
        save_pred(
            all_features,
            checkpoint=save_feature_dir,
            filename="features.mat",
            meta=meta,
        )
    return losses, perfs
Esempio n. 9
0
        print("Doing all labels stacking")
        features = pd.concat((train_dfs[i] for i in range(len(train_dfs))),
                             axis=1)
        test_features = pd.concat((test_dfs[i] for i in range(len(test_dfs))),
                                  axis=1)

        features = np.array(features)
        test_features = np.array(test_features)

        if args.classifier == "randomforest":
            rf_random = fit_features(features,
                                     labels,
                                     "f1_macro",
                                     n_iter=2,
                                     cv=3)
            pred = rf_random.predict(test_features)
            bs = rf_random.best_score_

        if args.classifier in ["nn", "neuralnetwork"]:
            model = fit_neural_network(features, labels)
            pred = model.predict(test_features, batch_size=16)

        np.save('./stacks/{}.npy'.format(args.name), pred)
        t2 = time.time()
        print("Fitted. Best score: ", bs, ". Time taken = ", t2 - t1)

    save_pred(pred,
              th=0.5,
              SUBM_OUT='./subm/{}.csv'.format(args.name),
              fill_empty=False)