Example #1
0
def main(fold):
    mdl_path = "%s/%s_fold_%s_model_best_loss.pth.tar" % (config.best_models, config.model_name, str(fold))
    args = get_args(mdl_path)
    model = MultiModalNet("se_resnext101_32x4d", "dpn26", 0.5)
    model_dict = torch.load(args.model_path)
    model.load_state_dict(model_dict['state_dict'])

    model.to(device)
    model.eval()

    test_files = pd.read_csv("./test.csv")
    test_gen = MultiModalDataset(test_files, config.test_data, config.test_vis, augument=False, mode="test")
    test_loader = DataLoader(test_gen, 1, shuffle=False, pin_memory=True, num_workers=1)
    test(test_loader, model, fold)
Example #2
0
model2 = MultiModalNet('se_resnext101_32x4d', 'DPN26', 0.5)
checkpoint2 = torch.load(
    'checkpoints/se_resnext101_32x4d_fold_0_checkpoint.pth')
new_state_dict = OrderedDict()
for k, v in checkpoint2['state_dict'].items():
    name = k[7:]  # remove module.
    new_state_dict[name] = v
model2.load_state_dict(new_state_dict)

# if torch.cuda.device_count() > 1:
model1 = nn.DataParallel(model1)
model2 = nn.DataParallel(model2)
model1.to(device)
model2.to(device)
model1.eval()
model2.eval()
torch.backends.cudnn.benchmark = True

test_files = pd.read_csv("./test.csv")
test_gen = MultiModalDataset(test_files,
                             config.test_data,
                             config.test_vis,
                             augument=False,
                             mode="test")
test_loader = DataLoader(test_gen,
                         batch_size=1,
                         shuffle=False,
                         pin_memory=True,
                         num_workers=1)
Example #3
0
def main():
    # 4.1 mkdirs
    if not os.path.exists(config.submit):
        os.makedirs(config.submit)
    for fold in range(config.FOLD):
        if not os.path.exists(config.weights + config.model_name + os.sep +
                              str(fold)):
            os.makedirs(config.weights + config.model_name + os.sep +
                        str(fold))
    if not os.path.exists(config.best_models):
        os.mkdir(config.best_models)
    if not os.path.exists("./logs/"):
        os.mkdir("./logs/")
    # with open('../data/train_lgb.pkl', 'rb') as f:
    #     magic_trains = pickle.load(f)
    # with open('../data/test_lgb.pkl', 'rb') as f:
    #     magic_tests = pickle.load(f)
    # resume = False
    # if resume:
    #     checkpoint = torch.load(r'./checkpoints/best_models/seresnext101_dpn92_defrog_multimodal_fold_0_model_best_loss.pth.tar')
    #     best_acc = checkpoint['best_acc']
    #     best_loss = checkpoint['best_loss']
    #     best_f1 = checkpoint['best_f1']
    #     start_epoch = checkpoint['epoch']

    start = timer()
    # from torchsummary import summary
    # print(summary(model, [(3, 100, 100), (7*26, 24)]))
    all_files = pd.read_csv("../data/train.csv")
    all_files = all_files.sample(frac=1, random_state=666)
    test_files = pd.read_csv("../data/test.csv")
    max_epoch = config.epochs
    if config.debug:
        all_files = all_files.iloc[:1000]
        test_files = test_files.iloc[:100]
        config.batch_size = 2
        max_epoch = 1
    train_label = np.array(all_files['Target'])
    if config.OOF:
        result = np.zeros((len(all_files), 9))
        # print(result.shape)
        skf = StratifiedKFold(n_splits=config.FOLD,
                              random_state=2019,
                              shuffle=False)
        for fold, (train_idx,
                   val_idx) in enumerate(skf.split(all_files, train_label)):
            print('fold:', fold)
            val_data_list = all_files.iloc[val_idx]

            # load dataset
            val_gen = MultiModalDataset(val_data_list,
                                        config.train_data,
                                        config.train_vis,
                                        augument=False,
                                        mode="train")
            val_loader = DataLoader(val_gen,
                                    batch_size=config.batch_size,
                                    shuffle=False,
                                    pin_memory=True,
                                    num_workers=1)

            best_model = torch.load(
                "%s/%s_fold_%s_model_best_acc.pth.tar" %
                (config.best_models, config.model_name, str(fold)))
            model = MultiModalNet(drop=0.5)
            if torch.cuda.device_count() > 1:
                model = nn.DataParallel(model)
            model.to(device)
            model.eval()
            model.load_state_dict(best_model["state_dict"])
            result_oof = []
            with torch.no_grad():
                for i, (images, (visit, ),
                        target) in tqdm(enumerate(val_loader)):

                    image_var = images.to(device)
                    # print(image_var.shape)
                    # magic = magic.to(device)
                    visit = visit.to(device)
                    indx_target = target.clone()
                    target = torch.from_numpy(
                        np.array(target)).float().to(device)
                    y_oof = np.array(
                        F.softmax(model(image_var, visit)).cpu().data.numpy())
                    # print(y_oof.shape)
                    result_oof.extend(y_oof)
            result_oof = np.array(result_oof)
            print(len(val_idx), result_oof.shape)
            result[val_idx] = result_oof
        print(result.shape)
        with open("../data/oof2.pkl", 'wb') as f:
            pickle.dump(result, f)

    if config.train and config.FOLD > 1:
        # train_data_list,val_data_list = train_test_split(all_files, test_size=0.1, random_state = 2050)
        skf = StratifiedKFold(n_splits=config.FOLD,
                              random_state=2019,
                              shuffle=False)
        for fold, (train_idx,
                   val_idx) in enumerate(skf.split(all_files, train_label)):
            print('fold:', fold)
            train_data_list = all_files.iloc[train_idx]
            val_data_list = all_files.iloc[val_idx]
            # train_magic = magic_trains.iloc[train_idx]
            # val_magic = magic_trains.iloc[val_idx]
            # load dataset
            train_gen = MultiModalDataset(train_data_list,
                                          config.train_data,
                                          config.train_vis,
                                          mode="train")
            train_loader = DataLoader(
                train_gen,
                batch_size=config.batch_size,
                shuffle=True,
                pin_memory=True,
                num_workers=1
            )  #num_worker is limited by shared memory in Docker!

            val_gen = MultiModalDataset(val_data_list,
                                        config.train_data,
                                        config.train_vis,
                                        augument=False,
                                        mode="train")
            val_loader = DataLoader(val_gen,
                                    batch_size=config.batch_size,
                                    shuffle=False,
                                    pin_memory=True,
                                    num_workers=1)

            start_epoch = 0
            best_acc = 0
            best_loss = np.inf
            best_f1 = 0
            best_results = [0, np.inf, 0]
            val_metrics = [0, np.inf, 0]
            #model
            # 4.2 get model
            model = MultiModalNet(drop=0.5)
            if fold == 0:
                total_num = sum(p.numel() for p in model.parameters())
                trainable_num = sum(p.numel() for p in model.parameters()
                                    if p.requires_grad)
                print('Total', total_num, 'Trainable', trainable_num)
            # 4.3 optim & criterion
            optimizer = Nadam(model.parameters(), lr=5e-4)
            #torch.optim.Adamax(model.parameters(), 0.001)
            # optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum=0.9,weight_decay=1e-4)
            criterion = nn.CrossEntropyLoss().to(device)
            # scheduler = lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.25)
            scheduler = lr_scheduler.MultiStepLR(optimizer, [6, 12, 18],
                                                 gamma=0.5)
            # lr_scheduler.ReduceLROnPlateau(optimizer)
            # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[4, 8, 12], gamma=0.25)
            # n_batches = int(len(train_loader.dataset) // train_loader.batch_size)
            # scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*2)
            if torch.cuda.device_count() > 1:
                model = nn.DataParallel(model)
            model.to(device)

            #train
            best_acc_epoch = 0
            for epoch in range(0, max_epoch):
                if epoch - best_acc_epoch > 5:
                    break
                scheduler.step(epoch)
                # train
                # train_metrics = None
                train_metrics = train(train_loader, model, criterion,
                                      optimizer, epoch, val_metrics,
                                      best_results, start)
                # val
                val_metrics = evaluate(val_loader, model, criterion, epoch,
                                       train_metrics, best_results, start)
                # check results
                is_best_acc = val_metrics[0] > best_results[0]
                if is_best_acc:
                    best_acc_epoch = epoch
                best_results[0] = max(val_metrics[0], best_results[0])
                is_best_loss = val_metrics[1] < best_results[1]
                best_results[1] = min(val_metrics[1], best_results[1])
                is_best_f1 = val_metrics[2] > best_results[2]
                best_results[2] = max(val_metrics[2], best_results[2])
                # save model
                save_checkpoint(
                    {
                        "epoch": epoch + 1,
                        "model_name": config.model_name,
                        "state_dict": model.state_dict(),
                        "best_acc": best_results[0],
                        "best_loss": best_results[1],
                        "optimizer": optimizer.state_dict(),
                        "fold": fold,
                        "best_f1": best_results[2],
                    }, is_best_acc, is_best_loss, is_best_f1, fold)
                # print logs
                print('\r', end='', flush=True)
                log.write('%s  %5.1f %6.1f      |   %0.3f   %0.3f   %0.3f     |  %0.3f   %0.3f    %0.3f    |   %s  %s  %s | %s' % (\
                        "best", epoch, epoch,
                        train_metrics[0], train_metrics[1],train_metrics[2],
                        val_metrics[0],val_metrics[1],val_metrics[2],
                        str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8],
                        time_to_str((timer() - start),'min'))
                    )
                log.write("\n")
                time.sleep(0.01)
    if config.train and config.FOLD == 1:
        train_data_list, val_data_list, train_magic, val_magic = train_test_split(
            all_files, magic_trains, test_size=0.1, random_state=2050)
        # skf = StratifiedKFold(n_splits=config.FOLD, random_state=2019, shuffle=False)
        # for fold, (train_idx, val_idx) in enumerate(skf.split(all_files, train_label)):
        #     print('fold:', fold)
        #     train_data_list = all_files.iloc[train_idx]
        #     val_data_list = all_files.iloc[val_idx]
        # load dataset
        train_gen = MultiModalDataset(train_data_list,
                                      train_magic,
                                      config.train_data,
                                      config.train_vis,
                                      mode="train")
        train_loader = DataLoader(
            train_gen,
            batch_size=config.batch_size,
            shuffle=True,
            pin_memory=True,
            num_workers=1)  #num_worker is limited by shared memory in Docker!

        val_gen = MultiModalDataset(val_data_list,
                                    val_magic,
                                    config.train_data,
                                    config.train_vis,
                                    augument=False,
                                    mode="train")
        val_loader = DataLoader(val_gen,
                                batch_size=config.batch_size,
                                shuffle=False,
                                pin_memory=True,
                                num_workers=1)

        start_epoch = 0
        best_acc = 0
        best_loss = np.inf
        best_f1 = 0
        best_results = [0, np.inf, 0]
        val_metrics = [0, np.inf, 0]
        #model
        # 4.2 get model
        model = MultiModalNet(drop=0.5)
        # 4.3 optim & criterion
        optimizer = torch.optim.Adamax(model.parameters(), 0.001)
        # optimizer = optim.SGD(model.parameters(),lr = config.lr,momentum=0.9,weight_decay=1e-4)
        criterion = nn.CrossEntropyLoss().to(device)
        # scheduler = lr_scheduler.StepLR(optimizer,step_size=5,gamma=0.25)
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer)
        # scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[4, 8, 12], gamma=0.25)
        # n_batches = int(len(train_loader.dataset) // train_loader.batch_size)
        # scheduler = CosineAnnealingLR(optimizer, T_max=n_batches*2)
        if torch.cuda.device_count() > 1:
            model = nn.DataParallel(model)
        model.to(device)

        #train
        best_acc_epoch = 0
        for epoch in range(0, max_epoch):
            if epoch - best_acc_epoch > 5:
                break
            scheduler.step(epoch)
            # train
            # train_metrics = None
            train_metrics = train(train_loader, model, criterion, optimizer,
                                  epoch, val_metrics, best_results, start)
            # val
            val_metrics = evaluate(val_loader, model, criterion, epoch,
                                   train_metrics, best_results, start)
            # check results
            is_best_acc = val_metrics[0] > best_results[0]
            if is_best_acc:
                best_acc_epoch = epoch
            best_results[0] = max(val_metrics[0], best_results[0])
            is_best_loss = val_metrics[1] < best_results[1]
            best_results[1] = min(val_metrics[1], best_results[1])
            is_best_f1 = val_metrics[2] > best_results[2]
            best_results[2] = max(val_metrics[2], best_results[2])
            # save model
            save_checkpoint(
                {
                    "epoch": epoch + 1,
                    "model_name": config.model_name,
                    "state_dict": model.state_dict(),
                    "best_acc": best_results[0],
                    "best_loss": best_results[1],
                    "optimizer": optimizer.state_dict(),
                    "fold": fold,
                    "best_f1": best_results[2],
                }, is_best_acc, is_best_loss, is_best_f1, fold)
            # print logs
            print('\r', end='', flush=True)
            log.write('%s  %5.1f %6.1f      |   %0.3f   %0.3f   %0.3f     |  %0.3f   %0.3f    %0.3f    |   %s  %s  %s | %s' % (\
                    "best", epoch, epoch,
                    train_metrics[0], train_metrics[1],train_metrics[2],
                    val_metrics[0],val_metrics[1],val_metrics[2],
                    str(best_results[0])[:8],str(best_results[1])[:8],str(best_results[2])[:8],
                    time_to_str((timer() - start),'min'))
                )
            log.write("\n")
            time.sleep(0.01)
    if config.predict:
        # test data
        models = []
        for fold in range(5):
            best_model = torch.load(
                "%s/%s_fold_%s_model_best_acc.pth.tar" %
                (config.best_models, config.model_name, str(fold)))
            model = MultiModalNet(drop=0.5)
            if torch.cuda.device_count() > 1:
                model = nn.DataParallel(model)
            model.to(device)
            model.eval()
            model.load_state_dict(best_model["state_dict"])
            models.append(model)
        test_gen = MultiModalDataset(test_files,
                                     config.test_data,
                                     config.test_vis,
                                     augument=False,
                                     mode="test",
                                     TTA=True)
        test_loader = DataLoader(test_gen,
                                 batch_size=config.batch_size,
                                 shuffle=False,
                                 pin_memory=True,
                                 num_workers=1)
        # predict
        test(test_loader, models)