Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser(
        description='Train DSMIL on 20x patch features learned by SimCLR')
    parser.add_argument('--num_class',
                        default=2,
                        type=int,
                        help='Number of output classes')
    parser.add_argument('--feats_size',
                        default=512,
                        type=int,
                        help='Dimension of the feature size')
    parser.add_argument('--lr',
                        default=0.0002,
                        type=float,
                        help='Initial learning rate')
    parser.add_argument('--num_epoch',
                        default=100,
                        type=int,
                        help='Number of total training epochs')
    parser.add_argument('--weight_decay',
                        default=5e-3,
                        type=float,
                        help='Weight decay')
    args = parser.parse_args()

    i_classifier = mil.FCLayer(in_size=args.feats_size,
                               out_size=args.num_class).cuda()
    b_classifier = mil.BClassifier(input_size=args.feats_size,
                                   output_class=args.num_class).cuda()
    milnet = mil.MILNet(i_classifier, b_classifier).cuda()
    criterion = nn.BCEWithLogitsLoss()

    optimizer = torch.optim.Adam(milnet.parameters(),
                                 lr=args.lr,
                                 betas=(0.5, 0.9),
                                 weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.num_epoch, 0)

    bags_path = pd.read_csv('datasets/tcga-dataset/TCGA.csv')
    train_path = bags_path.iloc[0:int(len(bags_path) * 0.8), :]
    test_path = bags_path.iloc[int(len(bags_path) * 0.8):, :]

    for epoch in range(1, args.num_epoch):
        train_path = shuffle(train_path).reset_index(drop=True)
        test_path = shuffle(test_path).reset_index(drop=True)
        train_loss_bag = train(train_path, milnet, criterion, optimizer,
                               args)  # iterate all bags
        test_loss_bag, avg_score, aucs, thresholds_optimal = test(
            test_path, milnet, criterion, optimizer, args)
        print(
            '\r Epoch [%d/%d] train loss: %.4f test loss: %.4f, average score: %.4f, auc_LUAD: %.4f, auc_LUSC: %.4f'
            % (epoch, args.num_epoch, train_loss_bag, test_loss_bag, avg_score,
               aucs[0], aucs[1]))
        scheduler.step()
Ejemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser(description='Train DSMIL on 20x patch features learned by SimCLR')
    parser.add_argument('--num_class', default=2, type=int, help='Number of output classes')
    parser.add_argument('--feats_size', default=512, type=int, help='Dimension of the feature size')
    parser.add_argument('--lr', default=0.0002, type=float, help='Initial learning rate')
    parser.add_argument('--num_epoch', default=40, type=int, help='Number of total training epochs')
    parser.add_argument('--weight_decay', default=5e-3, type=float, help='Weight decay')
    parser.add_argument('--simclr', default=1, type=int, help='Use newly trained features 1/0(on/off)')
    args = parser.parse_args()
    
    
    i_classifier = mil.FCLayer(in_size=args.feats_size, out_size=args.num_class).cuda()
    b_classifier = mil.BClassifier(input_size=args.feats_size, output_class=args.num_class).cuda()
    milnet = mil.MILNet(i_classifier, b_classifier).cuda()
    criterion = nn.BCEWithLogitsLoss()
    
    optimizer = torch.optim.Adam(milnet.parameters(), lr=args.lr, betas=(0.5, 0.9), weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.num_epoch, 0)
    
    if args.simclr == 0:
        bags_csv = 'datasets/tcga-dataset/TCGA.csv'
    else:
        luad_list = glob.glob('datasets'+os.sep+'wsi-tcga-lung'+os.sep+'LUAD'+os.sep+'*.csv')
        lusc_list = glob.glob('datasets'+os.sep+'wsi-tcga-lung'+os.sep+'LUSC'+os.sep+'*.csv')
        luad_df = pd.DataFrame(luad_list)
        luad_df['label'] = 0
        luad_df.to_csv('datasets/wsi-tcga-lung/LUAD.csv', index=False)        
        lusc_df = pd.DataFrame(lusc_list)
        lusc_df['label'] = 1
        lusc_df.to_csv('datasets/wsi-tcga-lung/LUSC.csv', index=False)        
        bags_path = luad_df.append(lusc_df, ignore_index=True)
        bags_path = shuffle(bags_path)
        bags_path.to_csv('datasets/wsi-tcga-lung/TCGA.csv', index=False)
        bags_csv = 'datasets/wsi-tcga-lung/TCGA.csv'
        
    bags_path = pd.read_csv(bags_csv)
    train_path = bags_path.iloc[0:int(len(bags_path)*0.8), :]
    test_path = bags_path.iloc[int(len(bags_path)*0.8):, :]
    
    for epoch in range(1, args.num_epoch):
        train_path = shuffle(train_path).reset_index(drop=True)
        test_path = shuffle(test_path).reset_index(drop=True)
        train_loss_bag = train(train_path, milnet, criterion, optimizer, args) # iterate all bags
        test_loss_bag, avg_score, aucs, thresholds_optimal = test(test_path, milnet, criterion, optimizer, args)
        print('\r Epoch [%d/%d] train loss: %.4f test loss: %.4f, average score: %.4f, auc_LUAD: %.4f, auc_LUSC: %.4f' % 
              (epoch, args.num_epoch, train_loss_bag, test_loss_bag, avg_score, aucs[0], aucs[1]))
        scheduler.step()
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser(description='Train DSMIL on 20x patch features learned by SimCLR')
    parser.add_argument('--num_classes', default=2, type=int, help='Number of output classes [2]')
    parser.add_argument('--feats_size', default=512, type=int, help='Dimension of the feature size [512]')
    parser.add_argument('--lr', default=0.0002, type=float, help='Initial learning rate [0.0002]')
    parser.add_argument('--num_epochs', default=200, type=int, help='Number of total training epochs [40|200]')
    parser.add_argument('--gpu_index', type=int, nargs='+', default=(0,), help='GPU ID(s) [0]')
    parser.add_argument('--weight_decay', default=5e-3, type=float, help='Weight decay [5e-3]')
    parser.add_argument('--dataset', default='TCGA-lung-default', type=str, help='Dataset folder name')
    parser.add_argument('--split', default=0.2, type=float, help='Training/Validation split [0.2]')
    parser.add_argument('--model', default='dsmil', type=str, help='MIL model [dsmil]')
    parser.add_argument('--dropout_patch', default=0, type=float, help='Patch dropout rate [0]')
    parser.add_argument('--dropout_node', default=0, type=float, help='Bag classifier dropout rate [0]')
    parser.add_argument('--non_linearity', default=1, type=float, help='Additional nonlinear operation [0]')
    args = parser.parse_args()
    gpu_ids = tuple(args.gpu_index)
    os.environ['CUDA_VISIBLE_DEVICES']=','.join(str(x) for x in gpu_ids)
    
    if args.model == 'dsmil':
        import dsmil as mil
    elif args.model == 'abmil':
        import abmil as mil
    
    i_classifier = mil.FCLayer(in_size=args.feats_size, out_size=args.num_classes).cuda()
    b_classifier = mil.BClassifier(input_size=args.feats_size, output_class=args.num_classes, dropout_v=args.dropout_node, nonlinear=args.non_linearity).cuda()
    milnet = mil.MILNet(i_classifier, b_classifier).cuda()
    if args.model == 'dsmil':
        state_dict_weights = torch.load('init.pth')
        try:
            milnet.load_state_dict(state_dict_weights, strict=False)
        except:
            del state_dict_weights['b_classifier.v.1.weight']
            del state_dict_weights['b_classifier.v.1.bias']
            milnet.load_state_dict(state_dict_weights, strict=False)
    criterion = nn.BCEWithLogitsLoss()
    
    optimizer = torch.optim.Adam(milnet.parameters(), lr=args.lr, betas=(0.5, 0.9), weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.num_epochs, 0.000005)
    
    if args.dataset == 'TCGA-lung-default':
        bags_csv = 'datasets/tcga-dataset/TCGA.csv'
    else:
        bags_csv = os.path.join('datasets', args.dataset, args.dataset+'.csv')
        
    bags_path = pd.read_csv(bags_csv)
    train_path = bags_path.iloc[0:int(len(bags_path)*(1-args.split)), :]
    test_path = bags_path.iloc[int(len(bags_path)*(1-args.split)):, :]
    best_score = 0
    save_path = os.path.join('weights', datetime.date.today().strftime("%m%d%Y"))
    os.makedirs(save_path, exist_ok=True)
    run = len(glob.glob(os.path.join(save_path, '*.pth')))
    for epoch in range(1, args.num_epochs):
        train_path = shuffle(train_path).reset_index(drop=True)
        test_path = shuffle(test_path).reset_index(drop=True)
        train_loss_bag = train(train_path, milnet, criterion, optimizer, args) # iterate all bags
        test_loss_bag, avg_score, aucs, thresholds_optimal = test(test_path, milnet, criterion, optimizer, args)
        if args.dataset=='TCGA-lung':
            print('\r Epoch [%d/%d] train loss: %.4f test loss: %.4f, average score: %.4f, auc_LUAD: %.4f, auc_LUSC: %.4f' % 
                  (epoch, args.num_epochs, train_loss_bag, test_loss_bag, avg_score, aucs[0], aucs[1]))
        else:
            print('\r Epoch [%d/%d] train loss: %.4f test loss: %.4f, average score: %.4f, AUC: ' % 
                  (epoch, args.num_epochs, train_loss_bag, test_loss_bag, avg_score) + '|'.join('class-{}>>{}'.format(*k) for k in enumerate(aucs))) 
        scheduler.step()
        current_score = (sum(aucs) + avg_score)/2
        if current_score >= best_score:
            best_score = current_score
            save_name = os.path.join(save_path, str(run+1)+'.pth')
            torch.save(milnet.state_dict(), save_name)
            if args.dataset=='TCGA-lung':
                print('Best model saved at: ' + save_name + ' Best thresholds: LUAD %.4f, LUSC %.4f' % (thresholds_optimal[0], thresholds_optimal[1]))
            else:
                print('Best model saved at: ' + save_name)
                print('Best thresholds ===>>> '+ '|'.join('class-{}>>{}'.format(*k) for k in enumerate(thresholds_optimal)))
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser(description='Train DSMIL on classfical MIL datasets')
    parser.add_argument('--datasets', default='musk1', type=str, help='Choose MIL datasets from: musk1, musk2, elephant, fox, tiger')
    parser.add_argument('--lr', default=0.0002, type=float, help='Initial learning rate')
    parser.add_argument('--num_epoch', default=40, type=int, help='Number of total training epochs')
    parser.add_argument('--cv_fold', default=10, type=int, help='Number of cross validation fold')
    parser.add_argument('--weight_decay', default=5e-3, type=float, help='Weight decay')
    args = parser.parse_args()
    
    if args.datasets == 'musk1':
        data_all = get_data('datasets/mil_dataset/Musk/musk1norm.svm')
        args.num_feats = 166
    if args.datasets == 'musk2':
        data_all = get_data('datasets/mil_dataset/Musk/musk2norm.svm')
        args.num_feats = 166
    if args.datasets == 'elephant':
        data_all = get_data('datasets/mil_dataset/Elephant/data_100x100.svm')
        args.num_feats = 230
    if args.datasets == 'fox':
        data_all = get_data('datasets/mil_dataset/Fox/data_100x100.svm')
        args.num_feats = 230
    if args.datasets == 'tiger':
        data_all = get_data('datasets/mil_dataset/Tiger/data_100x100.svm')
        args.num_feats = 230  
    
    bag_ins_list = []
    num_bag = data_all[-1][1]+1
    for i in range(num_bag):
        bag_data = get_bag(data_all, i)
        bag_label = bag_data[0, 2]
        bag_vector = bag_data[:, 3]
        bag_ins_list.append([bag_label, bag_vector])
    bag_ins_list = shuffle(bag_ins_list)
    
    ### check both classes exist in testing bags
    valid_bags = 0
    while(valid_bags):
        bag_ins_list = shuffle(bag_ins_list)
        for k in range (0, args.cv_fold):
            bags_list, test_list = cross_validation_set(bag_ins_list, fold=args.cv_fold, index=k)
            bag_labels = 0
            for i, data in enumerate(test_list):
                bag_labels = np.clip(data[0], 0, 1) + bag_labels
            if bag_labels > 0:
                valid_bags = 1         
    
    acs = []
    print('Dataset: ' + args.datasets)
    for k in range(0, args.cv_fold):
        print('Start %d-fold cross validation: fold %d ' % (args.cv_fold, k))
        bags_list, test_list = cross_validation_set(bag_ins_list, fold=args.cv_fold, index=k)
        i_classifier = mil.FCLayer(args.num_feats, 1)
        b_classifier = mil.BClassifier(input_size=args.num_feats, output_class=1)
        milnet = mil.MILNet(i_classifier, b_classifier).cuda()
        pos_weight = torch.tensor(compute_pos_weight(bags_list))
        criterion = nn.BCEWithLogitsLoss(pos_weight)
        optimizer = torch.optim.Adam(milnet.parameters(), lr=args.lr, betas=(0.5, 0.9), weight_decay=args.weight_decay)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.num_epoch, 0)
        optimal_ac = 0
        for epoch in range(0, args.num_epoch):
            train_loss = epoch_train(bags_list, optimizer, criterion, milnet, args) # iterate all bags
            test_loss, bag_labels, bag_predictions = epoch_test(test_list, criterion, milnet, args)
            accuracy, auc_value, precision, recall, fscore = five_scores(bag_labels, bag_predictions)
            sys.stdout.write('\r Epoch [%d/%d] train loss: %.4f, test loss: %.4f, accuracy: %.4f, aug score: %.4f, precision: %.4f, recall: %.4f, fscore: %.4f ' % 
                  (epoch+1, args.num_epoch, train_loss, test_loss, accuracy, auc_value, precision, recall, fscore))
            optimal_ac = max(accuracy, optimal_ac)
            scheduler.step()
        print('\n Optimal accuracy: %.4f ' % (optimal_ac))
        acs.append(optimal_ac)
    print('Cross validation accuracy mean: %.4f, std %.4f ' % (np.mean(np.array(acs)), np.std(np.array(acs))))
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser(
        description='Train DSMIL on 20x patch features learned by SimCLR')
    parser.add_argument('--num_classes',
                        default=2,
                        type=int,
                        help='Number of output classes')
    parser.add_argument('--feats_size',
                        default=512,
                        type=int,
                        help='Dimension of the feature size')
    parser.add_argument('--lr',
                        default=0.0002,
                        type=float,
                        help='Initial learning rate')
    parser.add_argument('--num_epochs',
                        default=40,
                        type=int,
                        help='Number of total training epochs')
    parser.add_argument('--weight_decay',
                        default=5e-3,
                        type=float,
                        help='Weight decay')
    parser.add_argument('--new_features',
                        default=0,
                        type=int,
                        help='Use newly trained features 1/0(on/off)')
    args = parser.parse_args()

    i_classifier = mil.FCLayer(in_size=args.feats_size,
                               out_size=args.num_classes).cuda()
    b_classifier = mil.BClassifier(input_size=args.feats_size,
                                   output_class=args.num_classes).cuda()
    milnet = mil.MILNet(i_classifier, b_classifier).cuda()
    criterion = nn.BCEWithLogitsLoss()

    optimizer = torch.optim.Adam(milnet.parameters(),
                                 lr=args.lr,
                                 betas=(0.5, 0.9),
                                 weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.num_epochs, 0.000005)

    if args.new_features == 0:
        bags_csv = 'datasets/tcga-dataset/TCGA.csv'
    else:
        luad_list = glob.glob('datasets' + os.sep + 'wsi-tcga-lung' + os.sep +
                              'LUAD' + os.sep + '*.csv')
        lusc_list = glob.glob('datasets' + os.sep + 'wsi-tcga-lung' + os.sep +
                              'LUSC' + os.sep + '*.csv')
        luad_df = pd.DataFrame(luad_list)
        luad_df['label'] = 0
        luad_df.to_csv('datasets/wsi-tcga-lung/LUAD.csv', index=False)
        lusc_df = pd.DataFrame(lusc_list)
        lusc_df['label'] = 1
        lusc_df.to_csv('datasets/wsi-tcga-lung/LUSC.csv', index=False)
        bags_path = luad_df.append(lusc_df, ignore_index=True)
        bags_path = shuffle(bags_path)
        bags_path.to_csv('datasets/wsi-tcga-lung/TCGA.csv', index=False)
        bags_csv = 'datasets/wsi-tcga-lung/TCGA.csv'

    bags_path = pd.read_csv(bags_csv)
    train_path = bags_path.iloc[0:int(len(bags_path) * 0.8), :]
    test_path = bags_path.iloc[int(len(bags_path) * 0.8):, :]
    best_score = 0
    save_path = os.path.join('weights',
                             datetime.date.today().strftime("%m%d%Y"))
    os.makedirs(save_path, exist_ok=True)
    run = len(glob.glob(os.path.join(save_path, '*.pth')))
    for epoch in range(1, args.num_epochs):
        train_path = shuffle(train_path).reset_index(drop=True)
        test_path = shuffle(test_path).reset_index(drop=True)
        train_loss_bag = train(train_path, milnet, criterion, optimizer,
                               args)  # iterate all bags
        test_loss_bag, avg_score, aucs, thresholds_optimal = test(
            test_path, milnet, criterion, optimizer, args)
        print(
            '\r Epoch [%d/%d] train loss: %.4f test loss: %.4f, average score: %.4f, auc_LUAD: %.4f, auc_LUSC: %.4f'
            % (epoch, args.num_epochs, train_loss_bag, test_loss_bag,
               avg_score, aucs[0], aucs[1]))
        scheduler.step()
        current_score = (aucs[0] + aucs[1] + avg_score + 1 - test_loss_bag) / 4
        if current_score >= best_score:
            best_score = current_score
            save_name = os.path.join(save_path, str(run + 1) + '.pth')
            torch.save(milnet.state_dict(), save_name)
            print('Best model saved at: ' + save_name +
                  ' Best thresholds: LUAD %.4f, LUSC %.4f' %
                  (thresholds_optimal[0], thresholds_optimal[1]))