Exemple #1
0
def main(opt):
    logger = src.util.init_logger(is_main=True)
    tokenizer = transformers.BertTokenizerFast.from_pretrained(
        'bert-base-uncased')
    model_class = src.model.Retriever
    #model, _, _, _, _, _ = src.util.load(model_class, opt.model_path, opt)
    model = model_class.from_pretrained(opt.model_path)

    model.eval()
    model = model.to(opt.device)
    if not opt.no_fp16:
        model = model.half()

    passages = src.util.load_passages(args.passages)

    shard_size = len(passages) // args.num_shards
    start_idx = args.shard_id * shard_size
    end_idx = start_idx + shard_size
    if args.shard_id == args.num_shards - 1:
        end_idx = len(passages)

    passages = passages[start_idx:end_idx]
    logger.info(
        f'Embedding generation for {len(passages)} passages from idx {start_idx} to {end_idx}'
    )

    allids, allembeddings = embed_passages(opt, passages, model, tokenizer)

    output_path = Path(args.output_path)
    save_file = output_path.parent / (output_path.name +
                                      f'_{args.shard_id:02d}')
    output_path.parent.mkdir(parents=True, exist_ok=True)
    logger.info(f'Saving {len(allids)} passage embeddings to {save_file}')
    with open(save_file, mode='wb') as f:
        pickle.dump((allids, allembeddings), f)

    logger.info(
        f'Total passages processed {len(allids)}. Written to {save_file}.')
Exemple #2
0
        global_rank,  #use the global rank and world size attibutes to split the eval set on multiple gpus
        world_size=opt.world_size)
    eval_dataset = src.data.Dataset(
        eval_examples,
        opt.n_context,
    )

    eval_sampler = SequentialSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset,
                                 sampler=eval_sampler,
                                 batch_size=opt.per_gpu_batch_size,
                                 num_workers=20,
                                 collate_fn=collator_function)

    model_class = src.model.FiDT5
    model = model_class.from_pretrained(opt.model_path)
    model = model.to(opt.device)

    logger.info("Start eval")
    exactmatch, total = evaluate(model, eval_dataset, eval_dataloader,
                                 tokenizer, opt)

    logger.info(f'EM {100*exactmatch:.2f}, Total number of example {total}')

    if opt.write_results and opt.is_main:
        glob_path = Path(opt.checkpoint_dir) / opt.name / 'test_results'
        write_path = Path(opt.checkpoint_dir) / opt.name / 'final_output.json'
        src.util.write_output(glob_path, write_path)
    if opt.write_crossattention_scores:
        src.util.save_distributed_dataset(eval_dataset.data, opt)
Exemple #3
0
    device_str = "cuda:0" if torch.cuda.is_available() else "cpu"
    print("Device: %s\n" % device_str)
    device = torch.device(device_str)

    # Hyperparameter for Cutmix
    cutmix_beta = 0.3

    # Hyperparameter
    epochs = 100
    lr = 0.01

    train_loader, valid_loader = data.load_data(batch_size=64)
    print("Train samples: %d" % len(train_loader.dataset))
    print("Valid samples: %d" % len(valid_loader.dataset))
    model = model.model()
    model = model.to(device)

    criterion_lss1 = nn.BCELoss()
    criterion_lss2 = nn.KLDivLoss(reduction='batchmean')
    criterion_ce = nn.CrossEntropyLoss()

    optimizer = optim.Adam(model.parameters(), lr=lr)

    time_str = time.strftime("%m_%d-%Hh%Mm%Ss", time.localtime())
    file = open("../log/%s.csv" % time_str, 'w')
    writer = csv.writer(file)
    headers = [
        "train_loss", "train_acc", "train_lsl", "train_lss_1", "train_lss_2",
        "train_lsd", "valid_loss", "valid_acc", "valid_lsl", "valid_lss_1",
        "valid_lss_2", "valid_lsd"
    ]
Exemple #4
0
def run(args):

    ### Data Loading

    if args.task == 0:
        print('Task 0: MR Dataset Prediction')
        augmentor = transforms.Compose([
            transforms.Lambda(lambda x: torch.Tensor(x)),
            mrnet.torchsample.transforms.RandomRotate(25),
            mrnet.torchsample.transforms.RandomTranslate([0.11, 0.11]),
            mrnet.torchsample.transforms.RandomFlip(),
            transforms.Lambda(
                lambda x: x.repeat(3, 1, 1, 1).permute(1, 0, 2, 3)),
        ])
        job = 'acl'
        plane = 'sagittal'
        train_ds = mrnet.mrnet_dataloader.MRDataset(
            '/data/larson2/RCC_dl/MRNet-v1.0/data/',
            job,
            plane,
            transform=augmentor,
            train=True)
        train_loader = torch.utils.data.DataLoader(train_ds,
                                                   batch_size=1,
                                                   shuffle=True,
                                                   num_workers=11,
                                                   drop_last=False)

        val_ds = mrnet.mrnet_dataloader.MRDataset(
            '/data/larson2/RCC_dl/MRNet-v1.0/data/', job, plane, train=False)
        val_loader = torch.utils.data.DataLoader(val_ds,
                                                 batch_size=1,
                                                 shuffle=- True,
                                                 num_workers=11,
                                                 drop_last=False)

    elif args.task == 1:
        print('Task 1: clear cell grade prediction')
        path = '/data/larson2/RCC_dl/new/clear_cell/'

        augmentor = transforms.Compose([
            transforms.Lambda(lambda x: torch.Tensor(x)),
            src.dataloader.Rescale(-160, 240),  # rset dynamic range
            transforms.Lambda(
                lambda x: x.repeat(3, 1, 1, 1).permute(3, 0, 1, 2)),
            #             src.dataloader.Normalize(),
            #             src.dataloader.Crop(90),
            #             src.dataloader.RandomCenterCrop(90),
            src.dataloader.RandomHorizontalFlip(),
            src.dataloader.RandomRotate(25),
            src.dataloader.Resize(256),
        ])

        augmentor2 = transforms.Compose([
            transforms.Lambda(lambda x: torch.Tensor(x)),
            src.dataloader.Rescale(-160, 240),  # rset dynamic range
            transforms.Lambda(
                lambda x: x.repeat(3, 1, 1, 1).permute(3, 0, 1, 2)),
            #         src.dataloader.Normalize(),
            #         src.dataloader.Crop(90),
            src.dataloader.Resize(256),
        ])

        train_ds = src.dataloader.RCCDataset_h5(path,
                                                mode='train',
                                                transform=augmentor)
        train_loader = DataLoader(train_ds,
                                  batch_size=1,
                                  shuffle=True,
                                  num_workers=1,
                                  drop_last=False)

        val_ds = src.dataloader.RCCDataset_h5(path,
                                              mode='val',
                                              transform=augmentor2)
        val_loader = DataLoader(val_ds,
                                batch_size=1,
                                shuffle=True,
                                num_workers=1,
                                drop_last=False)
        print(f'train size: {len(train_loader)}')
        print(f'val size: {len(val_loader)}')

        pos_weight = args.weight

    ### Some Checkers
    print('Summary: ')

    print(f'\ttrain size: {len(train_loader)}')
    print(f'\tval size: {len(val_loader)}')
    print('\tDatatype = ', train_ds[1][0].dtype)
    print('\tMin = ', train_ds[1][0].min())
    print('\tMax = ', train_ds[1][0].max())
    print('\tInput size', train_ds[0][0].shape)
    print('\tweight = ', args.weight)

    ### Some trackers
    log_root_folder = "/data/larson2/RCC_dl/logs/"

    now = datetime.now()
    now = now.strftime("%Y%m%d-%H%M%S")
    logdir = os.path.join(
        log_root_folder,
        f"task_{args.task}_{args.prefix_name}_model{args.model}_{now}")
    os.makedirs(logdir)
    print(f'logdir = {logdir}')

    writer = SummaryWriter(logdir)

    ### Model Construction

    ## Select Model
    if args.model == 1:
        model = src.model.MRNet()
    elif args.model == 2:
        model = src.model.MRNet2()
    elif args.model == 3:
        model = src.model.MRNetBN()
    elif args.model == 4:
        model = src.model.MRResNet()
    elif args.model == 5:
        model = src.model.MRNetScratch()
    elif args.model == 6:
        model = src.model.TDNet()
    else:
        print('Invalid model name')
        return

    ## Weight Initialization

    ## Training Stretegy
    device = torch.device(
        "cuda:{}".format(args.gpu) if torch.cuda.is_available() else "cpu")
    print('\tCuda:', torch.cuda.is_available(), f'\n\tdevice = {device}')

    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.1)

    if args.lr_scheduler == "plateau":
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               patience=3,
                                                               factor=.3,
                                                               threshold=1e-4,
                                                               verbose=True)
    elif args.lr_scheduler == "step":
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                    step_size=3,
                                                    gamma=args.gamma)

    model = model.to(device)

    ### Ready?
    best_val_loss = float('inf')
    best_val_auc = float(0)
    iteration_change_loss = 0
    t_start_training = time.time()

    ### Here we go
    for epoch in range(args.epochs):
        current_lr = src.train3d.get_lr(optimizer)

        t_start = time.time()

        train_loss, train_auc = src.train3d.train_model(
            model, train_loader, device, epoch, args.epochs, optimizer, writer,
            current_lr, args.log_every, args.weight)
        val_loss, val_auc = src.train3d.evaluate_model(
            model,
            val_loader,
            device,
            epoch,
            args.epochs,
            writer,
            current_lr,
            args.log_every,
        )

        if args.lr_scheduler == 'plateau':
            scheduler.step(val_loss)
        elif args.lr_scheduler == 'step':
            scheduler.step()

        t_end = time.time()
        delta = t_end - t_start

        print(
            "train loss : {0} | train auc {1} | val loss {2} | val auc {3} | elapsed time {4} s"
            .format(train_loss, train_auc, val_loss, val_auc, delta))

        iteration_change_loss += 1
        print('-' * 30)

        model_root_dir = "/data/larson2/RCC_dl/models/"

        if val_auc > best_val_auc:
            best_val_auc = val_auc
            if bool(args.save_model):
                file_name = f'task_{args.task}_model_{args.model}_{args.prefix_name}_val_auc_{val_auc:0.4f}_train_auc_{train_auc:0.4f}_epoch_{epoch+1}_weight_{args.weight}_lr_{args.lr}_gamma_{args.gamma}_lrsche_{args.lr_scheduler}.pth'
                #                 for f in os.listdir(model_root_dir):
                #                     if  (args.prefix_name in f):
                #                         os.remove(os.path.join(model_root_dir, f))
                torch.save(model, os.path.join(model_root_dir, file_name))

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            iteration_change_loss = 0

        if iteration_change_loss == args.patience:
            print(
                'Early stopping after {0} iterations without the decrease of the val loss'
                .format(iteration_change_loss))
            break

    t_end_training = time.time()
    print(f'training took {t_end_training - t_start_training} s')
Exemple #5
0
def run(args):
    print('Task 1: clear cell grade prediction')
    path = '/data/larson2/RCC_dl/new/clear_cell/'

    transform = {
        'train':
        transforms.Compose([
            transforms.Lambda(lambda x: torch.Tensor(x)),
            src.dataloader.Rescale(-160, 240,
                                   zero_center=True),  # rset dynamic range
            transforms.Lambda(
                lambda x: x.repeat(3, 1, 1, 1).permute(3, 0, 1, 2)),
            #     src.dataloader.Normalize(),
            #     src.dataloader.Crop(110),
            #     src.dataloader.RandomCenterCrop(90),
            src.dataloader.RandomHorizontalFlip(),
            #     src.dataloader.RandomRotate(25),
            src.dataloader.Resize(256)
        ]),
        'val':
        transforms.Compose([
            transforms.Lambda(lambda x: torch.Tensor(x)),
            src.dataloader.Rescale(-160, 240,
                                   zero_center=True),  # rset dynamic range
            transforms.Lambda(
                lambda x: x.repeat(3, 1, 1, 1).permute(3, 0, 1, 2)),
            #       src.dataloader.Normalize(),
            #       src.dataloader.Crop(90),
            src.dataloader.Resize(256)
        ])
    }

    my_dataset = {
        'train':
        src.dataloader.RCCDataset_h5(path,
                                     mode='train',
                                     transform=transform['train']),
        'val':
        src.dataloader.RCCDataset_h5(path,
                                     mode='val',
                                     transform=transform['train'])
    }

    my_loader = {
        x: DataLoader(my_dataset[x], batch_size=1, shuffle=True, num_workers=4)
        for x in ['train', 'val']
    }

    print('train size: ', len(my_loader['train']))
    print('train size: ', len(my_loader['val']))

    ### Some Checkers
    print('Summary: ')
    print('\ttrain size: ', len(my_loader['train']))
    print('\ttrain size: ', len(my_loader['val']))
    print('\tDatatype = ', next(iter(my_loader['train']))[0].dtype)
    print('\tMin = ', next(iter(my_loader['train']))[0].min())
    print('\tMax = ', next(iter(my_loader['train']))[0].max())
    print('\tInput size', next(iter(my_loader['train']))[0].shape)
    #     print('\tweight = ', args.weight)

    ### Tensorboard Log Setup
    log_root_folder = "/data/larson2/RCC_dl/logs/"
    now = datetime.now()
    now = now.strftime("%Y%m%d-%H%M%S")
    logdir = os.path.join(
        log_root_folder,
        f"{now}_model_{args.model}_{args.prefix_name}_epoch_{args.epochs}_weight_{args.weight}_lr_{args.lr}_gamma_{args.gamma}_lrsche_{args.lr_scheduler}_{now}"
    )
    #     os.makedirs(logdir)
    print(f'\tlogdir = {logdir}')

    writer = SummaryWriter(logdir)

    ### Model Selection

    device = torch.device(
        "cuda:{}".format(args.gpu) if torch.cuda.is_available() else "cpu")

    model = src.model.TDNet()
    model = model.to(device)

    writer.add_graph(model, my_dataset['train'][0][0].to(device))

    print('\tCuda:', torch.cuda.is_available(), f'\n\tdevice = {device}')

    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=0.1)

    if args.lr_scheduler == "plateau":
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                               patience=3,
                                                               factor=.3,
                                                               threshold=1e-4,
                                                               verbose=True)
    elif args.lr_scheduler == "step":
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                    step_size=3,
                                                    gamma=args.gamma)

    pos_weight = torch.FloatTensor([args.weight]).to(device)
    criterion = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)

    ### Ready?
    best_val_loss = float('inf')
    best_val_auc = float(0)
    best_model_wts = copy.deepcopy(model.state_dict())
    iteration_change_loss = 0
    t_start_training = time.time()

    ### Here we go
    for epoch in range(args.epochs):
        current_lr = get_lr(optimizer)
        t_start = time.time()

        epoch_loss = {'train': 0., 'val': 0.}
        epoch_corrects = {'train': 0., 'val': 0.}

        epoch_acc = 0.0
        epoch_AUC = 0.0

        for phase in ['train', 'val']:
            if phase == 'train':
                if args.lr_scheduler == "step":
                    scheduler.step()
                model.train()
            else:
                model.eval()

            running_losses = []
            running_corrects = 0.
            y_trues = []
            y_probs = []
            y_preds = []

            print('lr: ', current_lr)
            for i, (inputs, labels, header) in enumerate(my_loader[phase]):
                optimizer.zero_grad()

                inputs = inputs.to(device)
                labels = labels.to(device)

                # forward
                # track history only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs.float())  # raw logits
                    probs = torch.sigmoid(
                        outputs)  # [0, 1] probability, shape = s * 1
                    preds = torch.round(
                        probs
                    )  # 0 or 1, shape = s * 1, prediction for each slice
                    pt_pred, _ = torch.mode(
                        preds, 0
                    )  # take majority vote, shape = 1, prediction for each patient

                    count0 = (preds == 0).sum().float()
                    count1 = (preds == 1).sum().float()
                    pt_prob = count1 / (preds.shape[0])

                    # convert label to slice level
                    loss = criterion(outputs, labels.repeat(
                        inputs.shape[1], 1))  # inputs shape = 1*s*3*256*256

                    # backward + optimize only if in training phases
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # multiple loss by slice num per batch?
                running_losses.append(loss.item())  # * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

                y_trues.append(int(labels.item()))
                y_probs.append(pt_prob.item())  # use ratio to get probability
                y_preds.append(pt_pred.item())

                writer.add_scalar(f'{phase}/Loss', loss.item(),
                                  epoch * len(my_loader[phase]) + i)
                writer.add_pr_curve('{phase}pr_curve', y_trues, y_probs, 0)

                if (i % args.log_every == 0) & (i > 0):
                    print(
                        'Epoch: {0}/{1} | Single batch number : {2}/{3} | avg loss:{4} | Acc: {5:.4f} | lr: {6}'
                        .format(epoch + 1, args.epochs, i,
                                len(my_loader[phase]),
                                np.round(np.mean(running_losses), 4),
                                (running_corrects / len(my_loader[phase])),
                                current_lr))

            # epoch statistics
            epoch_loss[phase] = np.round(np.mean(running_losses), 4)
            epoch_corrects[phase] = (running_corrects / len(my_loader[phase]))

            cm = confusion_matrix(y_trues, y_preds, labels=[0, 1])
            src.helper.print_cm(cm, ['0', '1'])
            sens, spec, acc = src.helper.compute_stats(y_trues, y_preds)
            print('sens: {:.4f}'.format(sens))
            print('spec: {:.4f}'.format(spec))
            print('acc:  {:.4f}'.format(acc))
            print()

        print(
            '\ Summary  train loss: {0} | val loss: {1} | train acc: {2:.4f} | val acc: {3:.4f}'
            .format(epoch_loss['train'], epoch_loss['val'],
                    epoch_corrects['train'], epoch_corrects['val']))
        print('-' * 30)
Exemple #6
0
    )
    train_dataset = src.data.Dataset(train_examples, opt.n_context)
    # use golbal rank and world size to split the eval set on multiple gpus
    eval_examples = src.data.load_data(
        opt.eval_data,
        global_rank=opt.global_rank,
        world_size=opt.world_size,
        maxload=opt.maxload
    )
    eval_dataset = src.data.Dataset(eval_examples, opt.n_context)

    if not checkpoint_exists and opt.model_path == "none":
        t5 = transformers.T5ForConditionalGeneration.from_pretrained(model_name)
        model = src.model.FiDT5(t5.config)
        model.load_t5(t5.state_dict())
        model = model.to(opt.local_rank)
        optimizer, scheduler = src.util.set_optim(opt, model)
        step, best_dev_em = 0, 0.0
    elif opt.model_path == "none":
        load_path = checkpoint_path / 'checkpoint' / 'latest'
        model, optimizer, scheduler, opt_checkpoint, step, best_dev_em = \
            src.util.load(model_class, load_path, opt, reset_params=False)
        logger.info(f"Model loaded from {load_path}")
    else:
        model, optimizer, scheduler, opt_checkpoint, step, best_dev_em = \
            src.util.load(model_class, opt.model_path, opt, reset_params=True)
        logger.info(f"Model loaded from {opt.model_path}")

    model.set_checkpoint(opt.use_checkpoint)

    if opt.is_distributed:
Exemple #7
0
def evaluate_model(
    model,
    val_loader,
    device,
    epoch,
    num_epochs,
    writer,
    current_lr,
    log_every=20,
):
    _ = model.eval()

    model = model.to(device)
    y_trues = []
    y_logits = []
    y_probs = []
    y_preds = []
    loss_values = []

    criterion = torch.nn.BCEWithLogitsLoss()

    for i, (image, label, header) in enumerate(val_loader):

        image = image.to(device)
        label = label.to(device)

        outputs = model(image.float())
        loss = criterion(outputs, label)

        probs = torch.sigmoid(outputs)
        preds = torch.round(probs)

        loss_values.append(loss.item())
        y_trues.append(int(label.item()))
        y_logits.append(outputs.item())
        y_probs.append(probs.item())
        y_preds.append(preds.item())

        try:
            auc = metrics.roc_auc_score(y_trues, y_probs)
        except:
            auc = 0.5

        writer.add_scalar('Val/Loss', loss.item(), epoch * len(val_loader) + i)
        writer.add_scalar('Val/AUC', auc, epoch * len(val_loader) + i)

        if (i % log_every == 0) & (i > 0):
            print(
                '''[Epoch: {0} / {1} |Single batch number : {2} / {3} ] | avg val loss {4} | val auc : {5} | lr : {6}'''
                .format(epoch + 1, num_epochs, i, len(val_loader),
                        np.round(np.mean(loss_values), 4), np.round(auc, 4),
                        current_lr))

    cm = confusion_matrix(y_trues, y_preds, labels=[0, 1])
    print_cm(cm, ['0', '1'])
    sens, spec, acc = compute_stats(y_trues, y_preds)
    print('sens: {:.4f}'.format(sens))
    print('spec: {:.4f}'.format(spec))
    print('acc:  {:.4f}'.format(acc))
    print()

    writer.add_scalar('Val/AUC_epoch', auc, epoch + i)

    val_loss_epoch = np.round(np.mean(loss_values), 4)
    val_auc_epoch = np.round(auc, 4)
    return val_loss_epoch, val_auc_epoch
Exemple #8
0
def train_model(model,
                train_loader,
                device,
                epoch,
                num_epochs,
                optimizer,
                writer,
                current_lr,
                log_every=100,
                weight=1):
    _ = model.train()

    model = model.to(device)
    y_trues = []
    y_logits = []
    y_probs = []
    y_preds = []
    loss_values = []

    pos_weight = torch.FloatTensor([weight]).to(device)
    criterion = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)

    for i, (image, label, header) in enumerate(train_loader):
        optimizer.zero_grad()

        image = image.to(device)
        label = label.to(device)

        outputs = model(image.float())
        loss = criterion(outputs, label)
        loss.backward()
        optimizer.step()

        probs = torch.sigmoid(outputs)
        preds = torch.round(probs)

        loss_values.append(loss.item())
        y_trues.append(int(label.item()))
        y_logits.append(outputs.item())
        y_probs.append(probs.item())
        y_preds.append(preds.item())

        try:
            auc = metrics.roc_auc_score(y_trues, y_probs)
        except:
            auc = 0.5

        writer.add_scalar('Train/Loss', loss.item(),
                          epoch * len(train_loader) + i)
        writer.add_scalar('Train/AUC', auc, epoch * len(train_loader) + i)

        if (i % log_every == 0) & (i > 0):
            print(
                '''[Epoch: {0} / {1} |Single batch number : {2} / {3} ]| avg train loss {4} | train auc : {5} | lr : {6}'''
                .format(epoch + 1, num_epochs, i, len(train_loader),
                        np.round(np.mean(loss_values), 4), np.round(auc, 4),
                        current_lr))

    cm = confusion_matrix(y_trues, y_preds, labels=[0, 1])
    print_cm(cm, ['0', '1'])
    sens, spec, acc = compute_stats(y_trues, y_preds)
    print('sens: {:.4f}'.format(sens))
    print('spec: {:.4f}'.format(spec))
    print('acc:  {:.4f}'.format(acc))
    print()

    writer.add_scalar('Train/AUC_epoch', auc, epoch + i)

    train_loss_epoch = np.round(np.mean(loss_values), 4)
    train_auc_epoch = np.round(auc, 4)
    return train_loss_epoch, train_auc_epoch