def main(): parser = argparse.ArgumentParser( description='Train DSMIL on 20x patch features learned by SimCLR') parser.add_argument('--num_class', default=2, type=int, help='Number of output classes') parser.add_argument('--feats_size', default=512, type=int, help='Dimension of the feature size') parser.add_argument('--lr', default=0.0002, type=float, help='Initial learning rate') parser.add_argument('--num_epoch', default=100, type=int, help='Number of total training epochs') parser.add_argument('--weight_decay', default=5e-3, type=float, help='Weight decay') args = parser.parse_args() i_classifier = mil.FCLayer(in_size=args.feats_size, out_size=args.num_class).cuda() b_classifier = mil.BClassifier(input_size=args.feats_size, output_class=args.num_class).cuda() milnet = mil.MILNet(i_classifier, b_classifier).cuda() criterion = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(milnet.parameters(), lr=args.lr, betas=(0.5, 0.9), weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.num_epoch, 0) bags_path = pd.read_csv('datasets/tcga-dataset/TCGA.csv') train_path = bags_path.iloc[0:int(len(bags_path) * 0.8), :] test_path = bags_path.iloc[int(len(bags_path) * 0.8):, :] for epoch in range(1, args.num_epoch): train_path = shuffle(train_path).reset_index(drop=True) test_path = shuffle(test_path).reset_index(drop=True) train_loss_bag = train(train_path, milnet, criterion, optimizer, args) # iterate all bags test_loss_bag, avg_score, aucs, thresholds_optimal = test( test_path, milnet, criterion, optimizer, args) print( '\r Epoch [%d/%d] train loss: %.4f test loss: %.4f, average score: %.4f, auc_LUAD: %.4f, auc_LUSC: %.4f' % (epoch, args.num_epoch, train_loss_bag, test_loss_bag, avg_score, aucs[0], aucs[1])) scheduler.step()
def main(): parser = argparse.ArgumentParser(description='Train DSMIL on 20x patch features learned by SimCLR') parser.add_argument('--num_class', default=2, type=int, help='Number of output classes') parser.add_argument('--feats_size', default=512, type=int, help='Dimension of the feature size') parser.add_argument('--lr', default=0.0002, type=float, help='Initial learning rate') parser.add_argument('--num_epoch', default=40, type=int, help='Number of total training epochs') parser.add_argument('--weight_decay', default=5e-3, type=float, help='Weight decay') parser.add_argument('--simclr', default=1, type=int, help='Use newly trained features 1/0(on/off)') args = parser.parse_args() i_classifier = mil.FCLayer(in_size=args.feats_size, out_size=args.num_class).cuda() b_classifier = mil.BClassifier(input_size=args.feats_size, output_class=args.num_class).cuda() milnet = mil.MILNet(i_classifier, b_classifier).cuda() criterion = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(milnet.parameters(), lr=args.lr, betas=(0.5, 0.9), weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.num_epoch, 0) if args.simclr == 0: bags_csv = 'datasets/tcga-dataset/TCGA.csv' else: luad_list = glob.glob('datasets'+os.sep+'wsi-tcga-lung'+os.sep+'LUAD'+os.sep+'*.csv') lusc_list = glob.glob('datasets'+os.sep+'wsi-tcga-lung'+os.sep+'LUSC'+os.sep+'*.csv') luad_df = pd.DataFrame(luad_list) luad_df['label'] = 0 luad_df.to_csv('datasets/wsi-tcga-lung/LUAD.csv', index=False) lusc_df = pd.DataFrame(lusc_list) lusc_df['label'] = 1 lusc_df.to_csv('datasets/wsi-tcga-lung/LUSC.csv', index=False) bags_path = luad_df.append(lusc_df, ignore_index=True) bags_path = shuffle(bags_path) bags_path.to_csv('datasets/wsi-tcga-lung/TCGA.csv', index=False) bags_csv = 'datasets/wsi-tcga-lung/TCGA.csv' bags_path = pd.read_csv(bags_csv) train_path = bags_path.iloc[0:int(len(bags_path)*0.8), :] test_path = bags_path.iloc[int(len(bags_path)*0.8):, :] for epoch in range(1, args.num_epoch): train_path = shuffle(train_path).reset_index(drop=True) test_path = shuffle(test_path).reset_index(drop=True) train_loss_bag = train(train_path, milnet, criterion, optimizer, args) # iterate all bags test_loss_bag, avg_score, aucs, thresholds_optimal = test(test_path, milnet, criterion, optimizer, args) print('\r Epoch [%d/%d] train loss: %.4f test loss: %.4f, average score: %.4f, auc_LUAD: %.4f, auc_LUSC: %.4f' % (epoch, args.num_epoch, train_loss_bag, test_loss_bag, avg_score, aucs[0], aucs[1])) scheduler.step()
def main(): parser = argparse.ArgumentParser(description='Train DSMIL on 20x patch features learned by SimCLR') parser.add_argument('--num_classes', default=2, type=int, help='Number of output classes [2]') parser.add_argument('--feats_size', default=512, type=int, help='Dimension of the feature size [512]') parser.add_argument('--lr', default=0.0002, type=float, help='Initial learning rate [0.0002]') parser.add_argument('--num_epochs', default=200, type=int, help='Number of total training epochs [40|200]') parser.add_argument('--gpu_index', type=int, nargs='+', default=(0,), help='GPU ID(s) [0]') parser.add_argument('--weight_decay', default=5e-3, type=float, help='Weight decay [5e-3]') parser.add_argument('--dataset', default='TCGA-lung-default', type=str, help='Dataset folder name') parser.add_argument('--split', default=0.2, type=float, help='Training/Validation split [0.2]') parser.add_argument('--model', default='dsmil', type=str, help='MIL model [dsmil]') parser.add_argument('--dropout_patch', default=0, type=float, help='Patch dropout rate [0]') parser.add_argument('--dropout_node', default=0, type=float, help='Bag classifier dropout rate [0]') parser.add_argument('--non_linearity', default=1, type=float, help='Additional nonlinear operation [0]') args = parser.parse_args() gpu_ids = tuple(args.gpu_index) os.environ['CUDA_VISIBLE_DEVICES']=','.join(str(x) for x in gpu_ids) if args.model == 'dsmil': import dsmil as mil elif args.model == 'abmil': import abmil as mil i_classifier = mil.FCLayer(in_size=args.feats_size, out_size=args.num_classes).cuda() b_classifier = mil.BClassifier(input_size=args.feats_size, output_class=args.num_classes, dropout_v=args.dropout_node, nonlinear=args.non_linearity).cuda() milnet = mil.MILNet(i_classifier, b_classifier).cuda() if args.model == 'dsmil': state_dict_weights = torch.load('init.pth') try: milnet.load_state_dict(state_dict_weights, strict=False) except: del state_dict_weights['b_classifier.v.1.weight'] del state_dict_weights['b_classifier.v.1.bias'] milnet.load_state_dict(state_dict_weights, strict=False) criterion = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(milnet.parameters(), lr=args.lr, betas=(0.5, 0.9), weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.num_epochs, 0.000005) if args.dataset == 'TCGA-lung-default': bags_csv = 'datasets/tcga-dataset/TCGA.csv' else: bags_csv = os.path.join('datasets', args.dataset, args.dataset+'.csv') bags_path = pd.read_csv(bags_csv) train_path = bags_path.iloc[0:int(len(bags_path)*(1-args.split)), :] test_path = bags_path.iloc[int(len(bags_path)*(1-args.split)):, :] best_score = 0 save_path = os.path.join('weights', datetime.date.today().strftime("%m%d%Y")) os.makedirs(save_path, exist_ok=True) run = len(glob.glob(os.path.join(save_path, '*.pth'))) for epoch in range(1, args.num_epochs): train_path = shuffle(train_path).reset_index(drop=True) test_path = shuffle(test_path).reset_index(drop=True) train_loss_bag = train(train_path, milnet, criterion, optimizer, args) # iterate all bags test_loss_bag, avg_score, aucs, thresholds_optimal = test(test_path, milnet, criterion, optimizer, args) if args.dataset=='TCGA-lung': print('\r Epoch [%d/%d] train loss: %.4f test loss: %.4f, average score: %.4f, auc_LUAD: %.4f, auc_LUSC: %.4f' % (epoch, args.num_epochs, train_loss_bag, test_loss_bag, avg_score, aucs[0], aucs[1])) else: print('\r Epoch [%d/%d] train loss: %.4f test loss: %.4f, average score: %.4f, AUC: ' % (epoch, args.num_epochs, train_loss_bag, test_loss_bag, avg_score) + '|'.join('class-{}>>{}'.format(*k) for k in enumerate(aucs))) scheduler.step() current_score = (sum(aucs) + avg_score)/2 if current_score >= best_score: best_score = current_score save_name = os.path.join(save_path, str(run+1)+'.pth') torch.save(milnet.state_dict(), save_name) if args.dataset=='TCGA-lung': print('Best model saved at: ' + save_name + ' Best thresholds: LUAD %.4f, LUSC %.4f' % (thresholds_optimal[0], thresholds_optimal[1])) else: print('Best model saved at: ' + save_name) print('Best thresholds ===>>> '+ '|'.join('class-{}>>{}'.format(*k) for k in enumerate(thresholds_optimal)))
def main(): parser = argparse.ArgumentParser(description='Train DSMIL on classfical MIL datasets') parser.add_argument('--datasets', default='musk1', type=str, help='Choose MIL datasets from: musk1, musk2, elephant, fox, tiger') parser.add_argument('--lr', default=0.0002, type=float, help='Initial learning rate') parser.add_argument('--num_epoch', default=40, type=int, help='Number of total training epochs') parser.add_argument('--cv_fold', default=10, type=int, help='Number of cross validation fold') parser.add_argument('--weight_decay', default=5e-3, type=float, help='Weight decay') args = parser.parse_args() if args.datasets == 'musk1': data_all = get_data('datasets/mil_dataset/Musk/musk1norm.svm') args.num_feats = 166 if args.datasets == 'musk2': data_all = get_data('datasets/mil_dataset/Musk/musk2norm.svm') args.num_feats = 166 if args.datasets == 'elephant': data_all = get_data('datasets/mil_dataset/Elephant/data_100x100.svm') args.num_feats = 230 if args.datasets == 'fox': data_all = get_data('datasets/mil_dataset/Fox/data_100x100.svm') args.num_feats = 230 if args.datasets == 'tiger': data_all = get_data('datasets/mil_dataset/Tiger/data_100x100.svm') args.num_feats = 230 bag_ins_list = [] num_bag = data_all[-1][1]+1 for i in range(num_bag): bag_data = get_bag(data_all, i) bag_label = bag_data[0, 2] bag_vector = bag_data[:, 3] bag_ins_list.append([bag_label, bag_vector]) bag_ins_list = shuffle(bag_ins_list) ### check both classes exist in testing bags valid_bags = 0 while(valid_bags): bag_ins_list = shuffle(bag_ins_list) for k in range (0, args.cv_fold): bags_list, test_list = cross_validation_set(bag_ins_list, fold=args.cv_fold, index=k) bag_labels = 0 for i, data in enumerate(test_list): bag_labels = np.clip(data[0], 0, 1) + bag_labels if bag_labels > 0: valid_bags = 1 acs = [] print('Dataset: ' + args.datasets) for k in range(0, args.cv_fold): print('Start %d-fold cross validation: fold %d ' % (args.cv_fold, k)) bags_list, test_list = cross_validation_set(bag_ins_list, fold=args.cv_fold, index=k) i_classifier = mil.FCLayer(args.num_feats, 1) b_classifier = mil.BClassifier(input_size=args.num_feats, output_class=1) milnet = mil.MILNet(i_classifier, b_classifier).cuda() pos_weight = torch.tensor(compute_pos_weight(bags_list)) criterion = nn.BCEWithLogitsLoss(pos_weight) optimizer = torch.optim.Adam(milnet.parameters(), lr=args.lr, betas=(0.5, 0.9), weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.num_epoch, 0) optimal_ac = 0 for epoch in range(0, args.num_epoch): train_loss = epoch_train(bags_list, optimizer, criterion, milnet, args) # iterate all bags test_loss, bag_labels, bag_predictions = epoch_test(test_list, criterion, milnet, args) accuracy, auc_value, precision, recall, fscore = five_scores(bag_labels, bag_predictions) sys.stdout.write('\r Epoch [%d/%d] train loss: %.4f, test loss: %.4f, accuracy: %.4f, aug score: %.4f, precision: %.4f, recall: %.4f, fscore: %.4f ' % (epoch+1, args.num_epoch, train_loss, test_loss, accuracy, auc_value, precision, recall, fscore)) optimal_ac = max(accuracy, optimal_ac) scheduler.step() print('\n Optimal accuracy: %.4f ' % (optimal_ac)) acs.append(optimal_ac) print('Cross validation accuracy mean: %.4f, std %.4f ' % (np.mean(np.array(acs)), np.std(np.array(acs))))
def main(): parser = argparse.ArgumentParser( description='Train DSMIL on 20x patch features learned by SimCLR') parser.add_argument('--num_classes', default=2, type=int, help='Number of output classes') parser.add_argument('--feats_size', default=512, type=int, help='Dimension of the feature size') parser.add_argument('--lr', default=0.0002, type=float, help='Initial learning rate') parser.add_argument('--num_epochs', default=40, type=int, help='Number of total training epochs') parser.add_argument('--weight_decay', default=5e-3, type=float, help='Weight decay') parser.add_argument('--new_features', default=0, type=int, help='Use newly trained features 1/0(on/off)') args = parser.parse_args() i_classifier = mil.FCLayer(in_size=args.feats_size, out_size=args.num_classes).cuda() b_classifier = mil.BClassifier(input_size=args.feats_size, output_class=args.num_classes).cuda() milnet = mil.MILNet(i_classifier, b_classifier).cuda() criterion = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(milnet.parameters(), lr=args.lr, betas=(0.5, 0.9), weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, args.num_epochs, 0.000005) if args.new_features == 0: bags_csv = 'datasets/tcga-dataset/TCGA.csv' else: luad_list = glob.glob('datasets' + os.sep + 'wsi-tcga-lung' + os.sep + 'LUAD' + os.sep + '*.csv') lusc_list = glob.glob('datasets' + os.sep + 'wsi-tcga-lung' + os.sep + 'LUSC' + os.sep + '*.csv') luad_df = pd.DataFrame(luad_list) luad_df['label'] = 0 luad_df.to_csv('datasets/wsi-tcga-lung/LUAD.csv', index=False) lusc_df = pd.DataFrame(lusc_list) lusc_df['label'] = 1 lusc_df.to_csv('datasets/wsi-tcga-lung/LUSC.csv', index=False) bags_path = luad_df.append(lusc_df, ignore_index=True) bags_path = shuffle(bags_path) bags_path.to_csv('datasets/wsi-tcga-lung/TCGA.csv', index=False) bags_csv = 'datasets/wsi-tcga-lung/TCGA.csv' bags_path = pd.read_csv(bags_csv) train_path = bags_path.iloc[0:int(len(bags_path) * 0.8), :] test_path = bags_path.iloc[int(len(bags_path) * 0.8):, :] best_score = 0 save_path = os.path.join('weights', datetime.date.today().strftime("%m%d%Y")) os.makedirs(save_path, exist_ok=True) run = len(glob.glob(os.path.join(save_path, '*.pth'))) for epoch in range(1, args.num_epochs): train_path = shuffle(train_path).reset_index(drop=True) test_path = shuffle(test_path).reset_index(drop=True) train_loss_bag = train(train_path, milnet, criterion, optimizer, args) # iterate all bags test_loss_bag, avg_score, aucs, thresholds_optimal = test( test_path, milnet, criterion, optimizer, args) print( '\r Epoch [%d/%d] train loss: %.4f test loss: %.4f, average score: %.4f, auc_LUAD: %.4f, auc_LUSC: %.4f' % (epoch, args.num_epochs, train_loss_bag, test_loss_bag, avg_score, aucs[0], aucs[1])) scheduler.step() current_score = (aucs[0] + aucs[1] + avg_score + 1 - test_loss_bag) / 4 if current_score >= best_score: best_score = current_score save_name = os.path.join(save_path, str(run + 1) + '.pth') torch.save(milnet.state_dict(), save_name) print('Best model saved at: ' + save_name + ' Best thresholds: LUAD %.4f, LUSC %.4f' % (thresholds_optimal[0], thresholds_optimal[1]))