Exemple #1
0
def main():

    torch.manual_seed(114514)
    torch.cuda.manual_seed_all(114514)

    model = to_cuda(NetBasic(top_softmax))
    model = DataParallel(model, device_ids=[0, 1, 2, 3])
    model.apply(weights_init)
    criterion = nn.CrossEntropyLoss()  # ce_loss
    optimizer = Adam(model.parameters(), lr=args.lr)

    print "executing fold %d" % args.fold
    # import dataset

    train_dataset = ExclusionDataset(luna_dir,
                                     data_index_dir,
                                     fold=args.fold,
                                     phase='train')
    X_train, y_train = load_data(train_dataset, nodule_dir)
    unlabeled_dataset = ExclusionDataset(luna_dir,
                                         data_index_dir,
                                         fold=args.fold,
                                         phase='unlabeled')
    X_ul = load_data(unlabeled_dataset, nodule_dir)
    print "Labeled training samples: %d" % len(train_dataset)
    if args.semi_spv == 0:
        print "supervised mission"
    else:
        print "semi-supervised mission"
        print "Unlabeled training samples: %d" % len(unlabeled_dataset)
    # data argumentation
    if args.argument != 0:
        X_train, y_train = argumentation(X_train, y_train, args.argument)

    # parameters for training
    batch_size = args.batch_size
    print
    print
    ce_loss_list = []
    vat_loss_list = []
    for epoch in range(args.epochs):
        print "epoch: %d" % (epoch + 1)

        # epoch decay settings
        if epoch <= args.epochs * 0.5:
            decayed_lr = args.lr
        elif epoch <= args.epochs * 0.8:
            decayed_lr = 0.1 * args.lr
        else:
            decayed_lr = ((args.epochs - epoch) *
                          (0.1 * args.lr)) / (args.epochs -
                                              (0.8 * args.epochs))
        optimizer.lr = decayed_lr
        optimizer.betas = (0.5, 0.999)
        print "contains %d iterations." % num_iter_per_epoch
        for i in tqdm(range(num_iter_per_epoch)):
            # training in batches
            batch_indices = torch.LongTensor(
                np.random.choice(len(train_dataset), batch_size,
                                 replace=False))
            x_64 = X_train[batch_indices]
            y = y_train[batch_indices]
            x_32 = extract_half(x_64)

            # semi-supervised, we used same batch-size for both labeled and unlabeled
            if args.semi_spv == 1:
                batch_indices_unlabeled = torch.LongTensor(
                    np.random.choice(len(unlabeled_dataset),
                                     batch_size,
                                     replace=False))
                ul_x_64 = X_ul[batch_indices_unlabeled]
                ul_x_32 = extract_half(ul_x_64)
                v_loss, ce_loss = train_semi(model.train(),
                                             Variable(to_cuda(x_32)),
                                             Variable(to_cuda(x_64)),
                                             Variable(to_cuda(y)),
                                             Variable(to_cuda(ul_x_32)),
                                             Variable(to_cuda(ul_x_64)),
                                             optimizer,
                                             criterion,
                                             epsilon=args.epsilon,
                                             lamb=args.lamb)
                if i == num_iter_per_epoch - 1:
                    print "epoch %d: " % (
                        epoch + 1), "vat_loss: ", v_loss, "ce_loss: ", ce_loss
                    ce_loss_list.append(ce_loss)
                    vat_loss_list.append(v_loss)

            # supervised with cross-entropy loss
            else:
                sv_loss = train_supervise(model.train(),
                                          Variable(to_cuda(x_32)),
                                          Variable(to_cuda(x_64)),
                                          Variable(to_cuda(y)), optimizer,
                                          criterion)
                if i == num_iter_per_epoch - 1:
                    print "epoch %d: " % (epoch + 1), "sv_loss", sv_loss
                    ce_loss_list.append(sv_loss)

    # saving model
    print "saving model..."
    state_dict = model.module.state_dict()
    for key in state_dict.keys():
        state_dict[key] = state_dict[key].cpu()
    if args.semi_spv == 1:
        save_dir = os.path.join(args.save_dir, 'fold%d' % args.fold,
                                'semi_spv')
    else:
        save_dir = os.path.join(args.save_dir, 'fold%d' % args.fold,
                                'supervise')
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    torch.save(
        {
            'save_dir': args.save_dir,
            'state_dict': state_dict,
            'args': args
        }, os.path.join(save_dir, 'model.ckpt'))

    # Saving loss results
    print "Saving loss results"
    if args.semi_spv == 1:
        ce_loss_list = np.asarray(ce_loss_list, dtype=np.float64)
        vat_loss_list = np.asarray(vat_loss_list, dtype=np.float64)
        np.save(os.path.join(save_dir, 'vat_loss.npy'), vat_loss_list)
        np.save(os.path.join(save_dir, 'ce_loss.npy'), ce_loss_list)
    else:
        ce_loss_list = np.asarray(ce_loss_list, dtype=np.float64)
        np.save(os.path.join(save_dir, 'sv_loss.npy'), ce_loss_list)

    # Generating test results one by one
    print "Evaluation step..."
    test_dataset = ExclusionDataset(luna_dir,
                                    data_index_dir,
                                    fold=args.fold,
                                    phase='test')
    print "Testing samples: %d" % len(test_dataset)
    X_test, y_test, uids, center = load_data(test_dataset, nodule_dir)
    y_test = y_test.numpy()
    series_uid_list = []
    coord_x_list = []
    coord_y_list = []
    coord_z_list = []
    proba_pos_list = []
    proba_neg_list = []
    label_list = []
    print "Testing..."
    for i in tqdm(range(len(test_dataset))):
        prob_neg, prob_pos = evaluate(
            model.eval(), Variable(to_cuda(extract_half(X_test[[i]]))),
            Variable(to_cuda(X_test[[i]])))
        series_uid_list.append(uids[i])
        coord_x_list.append(center[i][0])
        coord_y_list.append(center[i][1])
        coord_z_list.append(center[i][2])
        proba_neg_list.append(prob_neg)
        proba_pos_list.append(prob_pos)
        label_list.append(y_test[i])
    print "Finished evaluation step, generating evaluation files.."
    # Saving results
    data_frame = DataFrame({
        'seriesuid': series_uid_list,
        'coordX': coord_x_list,
        'coordY': coord_y_list,
        'coordZ': coord_z_list,
        'proba_neg': proba_neg_list,
        'proba_pos': proba_pos_list,
        'label': label_list
    })
    data_frame.to_csv(os.path.join(save_dir, 'eval_results.csv'),
                      index=False,
                      sep=',')