예제 #1
0
def default_classical_scorings(task="predict"):
    if task == 'predict':
        scorings = (mm.Accuracy(tensor=False),
                    mm.BalancedAccuracy(tensor=False),
                    mm.F1Score(average='macro', tensor=False),
                    mm.Precision(average='macro', tensor=False),
                    mm.Recall(average='macro', tensor=False),
                    mm.ROCAUC(average='macro', tensor=False))
    else:
        scorings = (mm.CIndex(tensor=False, hazard=True), )
    return scorings
예제 #2
0
def main(args):
    if args.cuda and not torch.cuda.is_available():
        raise ValueError("GPUs are not available, please run at cpu mode")
    # init
    data = tileData(args.root, args.img_rows, args.img_cols)
    evaluators = [
        metrics.OAAcc(),
        metrics.Precision(),
        metrics.Recall(),
        metrics.F1Score(),
        metrics.Kappa(),
        metrics.Jaccard()
    ]
    # prediction
    for checkpoint in args.checkpoints:
        model, is_multi = load_checkpoint(checkpoint, args.cuda)
        performs = [[] for i in range(len(evaluators))]
        for idx in range(len(data)):
            print("Handling {} by {} \r".format(data.files[idx], checkpoint))
            x, y, shapes = data.slice_by_id(idx)
            # generate prediction
            with torch.set_grad_enabled(False):
                for step in range(0, x.shape[0], args.batch_size):
                    x_batch = x[step:step + args.batch_size]
                    y_batch = y[step:step + args.batch_size]
                    if args.cuda:
                        x_batch = x_batch.cuda()
                        y_batch = y_batch.cuda()
                    if is_multi:
                        y_pred = model(x_batch)[0].detach()
                    else:
                        y_pred = model(x_batch).detach()
                    # get performance
                    for i, evaluator in enumerate(evaluators):
                        performs[i].append(
                            evaluator(y_pred, y_batch)[0].item())

        performs = [(sum(p) / len(p)) for p in performs]
        performs = pd.DataFrame(
            [[time.strftime("%h_%d"), checkpoint] + performs],
            columns=['time', 'checkpoint'] + [repr(x) for x in evaluators])
        # save performance
        log_path = os.path.join(Result_DIR, "patchPerforms.csv")
        if os.path.exists(log_path):
            perform = pd.read_csv(log_path)
        else:
            perform = pd.DataFrame([])
        perform = perform.append(performs, ignore_index=True)
        perform.to_csv(log_path, index=False, float_format="%.3f")
예제 #3
0
def main():
    warnings.filterwarnings('ignore')

    # config
    parser = argparse.ArgumentParser()
    parser.add_argument('-s',
                        '--save',
                        default='./save',
                        help='保存的文件夹路径,如果有重名,会在其后加-来区别')
    parser.add_argument('-is',
                        '--image_size',
                        default=224,
                        type=int,
                        help='patch会被resize到多大,默认时224 x 224')
    parser.add_argument('-vts',
                        '--valid_test_size',
                        default=(0.1, 0.1),
                        type=float,
                        nargs=2,
                        help='训练集和测试集的大小,默认时0.1, 0.1')
    parser.add_argument('-bs',
                        '--batch_size',
                        default=32,
                        type=int,
                        help='batch size,默认时32')
    parser.add_argument('-nw',
                        '--num_workers',
                        default=12,
                        type=int,
                        help='多进程数目,默认时12')
    parser.add_argument('-lr',
                        '--learning_rate',
                        default=0.0001,
                        type=float,
                        help='学习率大小,默认时0.0001')
    parser.add_argument('-e',
                        '--epoch',
                        default=10,
                        type=int,
                        help='epoch 数量,默认是10')
    parser.add_argument('--reduction',
                        default='mean',
                        help='聚合同一bag的instances时的聚合方式,默认时mean')
    parser.add_argument('--multipler',
                        default=2.0,
                        type=float,
                        help="为了平衡pos和neg,在weight再乘以一个大于1的数,默认是2.0")
    args = parser.parse_args()
    save = args.save
    image_size = (args.image_size, args.image_size)
    valid_size, test_size = args.valid_test_size
    batch_size = args.batch_size
    num_workers = args.num_workers
    lr = args.learning_rate
    epoch = args.epoch
    reduction = args.reduction
    multipler = args.multipler

    # ----- 读取数据 -----
    neg_dir = './DATA/TCT/negative'
    pos_dir = './DATA/TCT/positive'

    dat = MilData.from2dir(neg_dir, pos_dir)
    train_transfer = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    test_transfer = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    train_dat, valid_dat, test_dat = dat.split_by_bag(
        test_size,
        valid_size,
        train_transfer=train_transfer,
        valid_transfer=test_transfer,
        test_transfer=test_transfer)
    dataloaders = {
        'train':
        data.DataLoader(train_dat,
                        batch_size=batch_size,
                        num_workers=num_workers,
                        shuffle=True),
        'valid':
        data.DataLoader(
            valid_dat,
            batch_size=batch_size,
            num_workers=num_workers,
        ),
        'test':
        data.DataLoader(
            test_dat,
            batch_size=batch_size,
            num_workers=num_workers,
        )
    }

    # ----- 构建网络和优化器 -----
    net = NormalCnn()
    criterion = nn.BCELoss(reduction='none')
    optimizer = optim.Adam(net.parameters(), lr=lr)
    scorings = [
        mm.Loss(),
        mm.Recall(reduction=reduction),
        mm.ROCAUC(reduction=reduction),
        mm.BalancedAccuracy(reduction=reduction),
        mm.F1Score(reduction=reduction),
        mm.Precision(reduction=reduction),
        mm.Accuracy(reduction=reduction)
    ]

    # ----- 训练网络 -----
    try:
        net, hist, weighter = train(net,
                                    criterion,
                                    optimizer,
                                    dataloaders,
                                    epoch=epoch,
                                    metrics=scorings,
                                    weighter_multipler=multipler)

        test_hist = evaluate(net, dataloaders['test'], criterion, scorings)
    except Exception as e:
        import ipdb
        ipdb.set_trace()  # XXX BREAKPOINT

    # 保存结果
    dirname = check_update_dirname(save)
    torch.save(net.state_dict(), os.path.join(dirname, 'model.pth'))
    torch.save(weighter, os.path.join(dirname, 'weigher.pth'))
    pd.DataFrame(hist).to_csv(os.path.join(dirname, 'train.csv'))
    with open(os.path.join(dirname, 'config.json'), 'w') as f:
        json.dump(args.__dict__, f)
    with open(os.path.join(dirname, 'test.json'), 'w') as f:
        json.dump(test_hist, f)
예제 #4
0
def main(args):
    if args.cuda and not torch.cuda.is_available():
        raise ValueError("GPUs are not available, please run at cpu mode")
    # init
    data = tileData(args.root, args.img_rows, args.img_cols)
    evaluators = [
        metrics.OAAcc(),
        metrics.Precision(),
        metrics.Recall(),
        metrics.F1Score(),
        metrics.Kappa(),
        metrics.Jaccard()
    ]
    # prediction
    for checkpoint in args.checkpoints:
        model, is_multi = load_checkpoint(checkpoint, args.cuda)
        Save_DIR = os.path.join(Result_DIR, "area", checkpoint.split("_")[0])
        if not os.path.exists(Save_DIR):
            os.makedirs(Save_DIR)
        performs = [[] for i in range(len(evaluators))]
        for idx in range(len(data)):
            print("Handling {} by {} \r".format(data.files[idx], checkpoint))
            x, y, shapes = data.slice_by_id(idx)
            # get prediction
            y_pred = []
            with torch.set_grad_enabled(False):
                for step in range(0, x.shape[0], args.batch_size):
                    x_batch = x[step:step + args.batch_size]
                    if args.cuda:
                        x_batch = x_batch.cuda()
                    # generate prediction
                    if is_multi:
                        y_pred.append(model(x_batch)[0].detach())
                    else:
                        y_pred.append(model(x_batch).detach())
            y_pred = torch.cat(y_pred, 0)
            if args.cuda:
                y_pred = y_pred.cpu()
            assert y_pred.shape[0] == x.shape[
                0], "All data should be iterated."
            del x
            pred_img = vision.slices_to_img(
                vision.ytensor_to_slices(y_pred, data.cmap), shapes)
            # y_img = vision.slices_to_img(
            #     vision.ytensor_to_slices(y, data.cmap), data.shapes)
            # merge slices into image & save result image
            imsave(os.path.join(Save_DIR, data.files[idx]),
                   pred_img,
                   compress=6)
            # N tensor 2 one
            # pred_tensor = vision.tensors_to_tensor(y_pred, shapes)
            # y_tensor = vision.tensors_to_tensor(y, shapes)
            # get performance
            for idx, evaluator in enumerate(evaluators):
                # a = evaluator(pred_tensor, y_tensor)[0].item()
                # b = evaluator(y_pred, y)[0].item()
                # print("{} => One : {} ; N : {}".format(repr(evaluator), a, b))
                performs[idx].append(evaluator(y_pred, y)[0].item())

        performs = [(sum(p) / len(p)) for p in performs]
        performs = pd.DataFrame(
            [[time.strftime("%h_%d"), checkpoint] + performs],
            columns=['time', 'checkpoint'] + [repr(x) for x in evaluators])
        # save performance
        log_path = os.path.join(Result_DIR, "areaPerforms.csv")
        if os.path.exists(log_path):
            perform = pd.read_csv(log_path)
        else:
            perform = pd.DataFrame([])
        perform = perform.append(performs, ignore_index=True)
        perform.to_csv(log_path, index=False, float_format="%.3f")
예제 #5
0
def main(args):
    if args.cuda and not torch.cuda.is_available():
        raise ValueError("GPUs are not available, please run at cpu mode")

    evaluators = [
        metrics.OAAcc(),
        metrics.Precision(),
        metrics.Recall(),
        metrics.F1Score(),
        metrics.Kappa(),
        metrics.Jaccard()
    ]

    for checkpoint in args.checkpoints:
        print("Handling by {} ...\r".format(checkpoint))
        Save_DIR = os.path.join(Result_DIR, 'single', checkpoint.split("_")[0])
        if not os.path.exists(Save_DIR):
            os.makedirs(Save_DIR)
        # initialize datasets
        infos = checkpoint.split('_')[0].split('-')
        _, valset = load_dataset(infos[2], "IM")
        print("Testing with {}-Dataset: {} examples".format(
            infos[2], len(valset)))
        # Load checkpoint
        model, is_multi = load_checkpoint(checkpoint, args.cuda)
        # load data
        data_loader = DataLoader(
            valset,
            1,
            num_workers=4,
            shuffle=False,
            pin_memory=True,
        )
        performs = [[] for i in range(len(evaluators))]
        imgsets = []
        with torch.set_grad_enabled(False):
            for idx, sample in enumerate(data_loader):
                # get tensors from sample
                x = sample["src"]
                y = sample["tar"]
                if args.cuda:
                    x = x.cuda()
                    y = y.cuda()
                if is_multi:
                    gen_y = model(x)[0]
                else:
                    gen_y = model(x)
                # get performance
                for i, evaluator in enumerate(evaluators):
                    performs[i].append(
                        evaluator(gen_y.detach(), y.detach())[0].item())
                if args.cuda:
                    x = x.detach().cpu()
                    y = x.detach().cpu()
                    gen_y = gen_y.detach().cpu()
                x = x.numpy()[0].transpose((1, 2, 0))
                y = y.numpy()[0].transpose((1, 2, 0))
                gen_y = gen_y.numpy()[0].transpose((1, 2, 0))
                x_img = valset._src2img(x, whitespace=False)
                y_img = valset._tar2img(y, whitespace=False)
                gen_img = valset._tar2img(gen_y, whitespace=False)
                canny_x = vision.canny_edge(x_img)
                canny_y = vision.canny_edge(y_img)
                canny_gen = vision.canny_edge(gen_img)
                # mask_pair = vision.pair_to_rgb(gen_img, y_img, args.color)
                canny_pair = vision.pair_to_rgb(canny_y,
                                                canny_x,
                                                args.color,
                                                use_dilation=True,
                                                disk_value=args.disk)
                edge_pair = vision.pair_to_rgb(canny_gen,
                                               canny_y,
                                               args.color,
                                               use_dilation=True,
                                               disk_value=args.disk)
                imgsets.append([
                    vision.add_barrier(x_img, args.spaces),
                    vision.add_barrier(canny_pair, args.spaces),
                    # vision.add_barrier(mask_pair, args.spaces),
                    vision.add_barrier(edge_pair, args.spaces),
                ])
                if len(imgsets) >= args.disp_cols * args.gen_nb:
                    break
            # visualization
            for i in range(args.gen_nb):
                imgset = []
                for j in range(args.disp_cols):
                    imgset.append(
                        np.concatenate(imgsets[i * args.disp_cols + j],
                                       axis=0))
                vis_img = np.concatenate(imgset, axis=1)
                name = "{}_canny_segmap_edge_{}.png".format(
                    checkpoint.split('_')[0], i)
                imsave(os.path.join(Save_DIR, name),
                       vision.add_barrier(vis_img, args.spaces))
                print("Saving {} ...".format(name))
예제 #6
0
def main():

    # ----- 根据data来读取不同的数据和不同的loss、metrics -----
    if config.args.data == 'brca':
        rna = RnaData.predicted_data(config.brca_cli, config.brca_rna,
                                     {'PAM50Call_RNAseq': 'pam50'})
        rna.transform(tf.LabelMapper(config.brca_label_mapper))
        out_shape = len(config.brca_label_mapper)
        criterion = nn.CrossEntropyLoss()
        scorings = (mm.Loss(), mm.Accuracy(), mm.BalancedAccuracy(),
                    mm.F1Score(average='macro'), mm.Precision(average='macro'),
                    mm.Recall(average='macro'), mm.ROCAUC(average='macro'))
    elif config.args.data == 'survival':
        if os.path.exists('./DATA/temp_pan.pth'):
            rna = RnaData.load('./DATA/temp_pan.pth')
        else:
            rna = RnaData.survival_data(config.pan_cli, config.pan_rna,
                                        '_OS_IND', '_OS')
        out_shape = 1
        if config.args.loss_type == 'cox':
            criterion = NegativeLogLikelihood()
        elif config.args.loss_type == 'svm':
            criterion = SvmLoss(rank_ratio=config.args.svm_rankratio)
        scorings = (mm.Loss(), mm.CIndex())
    rna.transform(tf.ZeroFilterCol(0.8))
    rna.transform(tf.MeanFilterCol(1))
    rna.transform(tf.StdFilterCol(0.5))
    norm = tf.Normalization()
    rna.transform(norm)

    # ----- 构建网络和优化器 -----
    inpt_shape = rna.X.shape[1]
    if config.args.net_type == 'mlp':
        net = MLP(inpt_shape, out_shape, config.args.hidden_num,
                  config.args.block_num).cuda()
    elif config.args.net_type == 'atten':
        net = SelfAttentionNet(inpt_shape, out_shape, config.args.hidden_num,
                               config.args.bottle_num, config.args.block_num,
                               config.args.no_res, config.act,
                               config.args.no_head, config.args.no_bottle,
                               config.args.no_atten,
                               config.args.dropout_rate).cuda()
    elif config.args.net_type == 'resnet':
        net = ResidualNet(inpt_shape, out_shape, config.args.hidden_num,
                          config.args.bottle_num,
                          config.args.block_num).cuda()

    # ----- 训练网络,cross validation -----
    split_iterator = rna.split_cv(config.args.test_size,
                                  config.args.cross_valid)
    train_hists = []
    test_hists = []
    for split_index, (train_rna, test_rna) in enumerate(split_iterator):
        print('##### save: %s, split: %d #####' %
              (config.args.save, split_index))
        #  从train中再分出一部分用作验证集,决定停止
        train_rna, valid_rna = train_rna.split(0.1)
        dats = {
            'train': train_rna.to_torchdat(),
            'valid': valid_rna.to_torchdat(),
        }
        dataloaders = {
            k: data.DataLoader(v, batch_size=config.args.batch_size)
            for k, v in dats.items()
        }
        test_dataloader = data.DataLoader(test_rna.to_torchdat(),
                                          batch_size=config.args.batch_size)
        # 网络训练前都进行一次参数重置,避免之前的训练的影响
        net.reset_parameters()
        # train
        optimizer = optim.Adamax(net.parameters(),
                                 lr=config.args.learning_rate)
        lrs = config.lrs(optimizer)
        net, hist = train(
            net,
            criterion,
            optimizer,
            dataloaders,
            epoch=config.args.epoch,
            metrics=scorings,
            l2=config.args.l2,
            standard_metric_index=config.args.standard_metric_index,
            scheduler=lrs)
        # test
        test_res = evaluate(net, criterion, test_dataloader, metrics=scorings)
        # 将多次训练的结果保存到一个df中
        hist = pd.DataFrame(hist)
        hist['split_index'] = split_index
        train_hists.append(hist)
        # 保存多次test的结果
        test_res['split_index'] = split_index
        test_hists.append(test_res)
        # 每个split训练的模型保存为一个文件
        torch.save(net.state_dict(),
                   os.path.join(config.save_dir, 'model%d.pth' % split_index))
    # 保存train的结果
    train_hists = pd.concat(train_hists)
    train_hists.to_csv(os.path.join(config.save_dir, 'train.csv'))
    # 保存test的结果
    test_hists = pd.DataFrame(test_hists)
    test_hists.to_csv(os.path.join(config.save_dir, 'test.csv'))