def main(argv): input_filename = '' output_prefix = '' model_list = '' lungs_mask_filename = None apg_mask_filename = None gpu_id = '-1' try: opts, args = getopt.getopt(argv, "hi:o:m:l:a:g:", ["Input=", "Output=", "Model=", "Lungs=", "APG=", "GPU="]) except getopt.GetoptError: print('usage: main.py --Input <CT volume path> --Output <Results output path> --Model <Inference model name>' ' --Lungs <Lung mask filepath> --APG <Anatomical mask filename> --GPU <GPU id>') sys.exit(2) for opt, arg in opts: if opt == '-h': print('main.py --Input <CT volume path> --Output <Results output path> --Model <Inference model name>' ' --Lungs <Lung mask filepath> --APG <Anatomical mask filename> --GPU <GPU id>') sys.exit() elif opt in ("-i", "--Input"): input_filename = arg elif opt in ("-o", "--Output"): output_prefix = arg elif opt in ("-m", "--Model"): model_list = arg elif opt in ("-l", "--Lungs"): lungs_mask_filename = arg elif opt in ("-a", "--APG"): apg_mask_filename = arg elif opt in ("-g", "--GPU"): if arg.isnumeric(): gpu_id = arg if input_filename == '': print('usage: main.py --Input <CT volume path> --Output <Results output path> --Model <Inference model name>' ' --Lungs <Lung mask filepath> --APG <Anatomical mask filename> --GPU <GPU id>') sys.exit() os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id if os.path.exists(input_filename): real_path = os.path.realpath(os.path.dirname(input_filename)) input_filename = os.path.join(real_path, os.path.basename(input_filename)) else: print('Input filename does not exist on disk, with argument: {}'.format(input_filename)) sys.exit(2) if os.path.exists(os.path.dirname(output_prefix)): real_path = os.path.realpath(os.path.dirname(output_prefix)) output_prefix = os.path.join(real_path, os.path.basename(output_prefix)) else: print('Directory name for the output prefix does not exist on disk, with argument: {}'.format(input_filename)) sys.exit(2) model_list = model_list.split(',') if len(model_list) == 1: fit(input_filename=input_filename, output_path=output_prefix, selected_model=model_list[0], lungs_mask_filename=lungs_mask_filename, anatomical_priors_filename=apg_mask_filename) else: fit_ensemble(input_filename=input_filename, output_path=output_prefix, model_list=model_list, lungs_mask_filename=lungs_mask_filename, anatomical_priors_filename=apg_mask_filename)
def main(argv): input_filename = '' output_prefix = '' model_name = '' gpu_id = '-1' try: opts, args = getopt.getopt(argv, "hi:o:m:g:", ["Input=", "Output=", "Model=", "GPU="]) except getopt.GetoptError: print('usage: main.py --Input <MRI volume path> --Output <Results output path> --Model <Inference model name>' ' --GPU <GPU id>') sys.exit(2) for opt, arg in opts: if opt == '-h': print('main.py --Input <MRI volume path> --Output <Results output path> --Model <Inference model name>' ' --GPU <GPU id>') sys.exit() elif opt in ("-i", "--Input"): input_filename = arg elif opt in ("-o", "--Output"): output_prefix = arg elif opt in ("-m", "--Model"): model_name = arg elif opt in ("-g", "--GPU"): if arg.isnumeric(): gpu_id = arg if input_filename == '': print('usage: main.py --Input <MRI volume path> --Output <Results output path> --Model <Inference model name>' ' --GPU <GPU id>') sys.exit() os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id if os.path.exists(input_filename): real_path = os.path.realpath(os.path.dirname(input_filename)) input_filename = os.path.join(real_path, os.path.basename(input_filename)) else: print('Input filename does not exist on disk, with argument: {}'.format(input_filename)) sys.exit(2) if os.path.exists(os.path.dirname(output_prefix)): real_path = os.path.realpath(os.path.dirname(output_prefix)) output_prefix = os.path.join(real_path, os.path.basename(output_prefix)) else: print('Directory name for the output prefix does not exist on disk, with argument: {}'.format(input_filename)) sys.exit(2) fit(input_filename=input_filename, output_path=output_prefix, selected_model=model_name)
def lr_find(start_lr=1e-7, end_lr=10, num_it: int = 49, stop_div: bool = True, wd: float = None, annealing_func=annealing_exp): "Explore lr from `start_lr` to `end_lr` over `num_it` iterations in `learn`. If `stop_div`, stops when loss diverges." opt = BasicOptim(simple_net.parameters(), start_lr) cb = LRFinder(start_lr, end_lr, num_it, stop_div, annealing_func=annealing_func) learner = Learner(simple_net, mnist_loss, opt, dl, valid_dl, cb=CallbackHandler([cb])) epochs = int(np.ceil(num_it / len(learner.train_dl))) fit(epochs, learn=learner)
from torch.utils.data import DataLoader from pathlib import Path from src.callback import CallbackHandler, BatchCounter, TimeCheck, PrintLoss, GetValAcc from src.data import get_dsets from src.fit import fit from src.learner import Learner from src.measure import mnist_loss from src.optim import BasicOptim if __name__ == '__main__': path = Path('data') train_dset, valid_dset = get_dsets(path) dl = DataLoader(train_dset, batch_size=256) valid_dl = DataLoader(valid_dset, batch_size=256) simple_net = nn.Sequential( nn.Linear(28 * 28, 30), nn.ReLU(), nn.Linear(30, 1)) lr = 1e-3 opt = BasicOptim(simple_net.parameters(), lr) learner = Learner(simple_net, mnist_loss, opt, dl, valid_dl, cb=CallbackHandler([BatchCounter(), TimeCheck(), PrintLoss(), GetValAcc()])) fit(10, learn=learner)
def run(args): train_diagnosis, test_diagnosis = data(args) SEED = 2021 torch.manual_seed(SEED) torch.cuda.manual_seed_all(SEED) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') logging.basicConfig(filename='train.log', filemode='w', level=logging.DEBUG) logging.info("Model Name: %s", args.model_name.upper()) logging.info("Device: %s", device) logging.info("Batch Size: %d", args.batch_size) logging.info("Learning Rate: %f", args.learning_rate) if args.model_name == "bert": learning_rate = args.learning_rate loss_fn = nn.BCELoss() opt_fn = torch.optim.Adam bert_train_dataset = BERTdataset(train_diagnosis) bert_test_dataset = BERTdataset(test_diagnosis) bert_train_loader, bert_val_loader, bert_test_loader = dataloader( bert_train_dataset, bert_test_dataset, args.batch_size, args.val_split) model = BERTclassifier().to(device) bert_fit(args.epochs, model, bert_train_loader, bert_val_loader, args.icd_type, opt_fn, loss_fn, learning_rate, device) bert_test_results(model, bert_test_loader, args.icd_type, device) elif args.model_name == 'gru': learning_rate = args.learning_rate loss_fn = nn.BCELoss() opt_fn = torch.optim.Adam counts, vocab2index = count_vocab_index(train_diagnosis, test_diagnosis) rnn_train_dataset = rnndataset(train_diagnosis, vocab2index) rnn_test_dataset = rnndataset(train_diagnosis, vocab2index) rnn_train_loader, rnn_val_loader, rnn_test_loader = dataloader( rnn_train_dataset, rnn_test_dataset, args.batch_size, args.val_split) w2vmodel = Word2Vec.load(args.w2vmodel) weights = get_emb_matrix(w2vmodel, counts) gruw2vmodel = GRUw2vmodel(weights_matrix=weights, hidden_size=256, num_layers=2, device=device).to(device) fit(args.epochs, gruw2vmodel, rnn_train_loader, rnn_val_loader, args.icd_type, opt_fn, loss_fn, learning_rate, device) test_results(gruw2vmodel, rnn_test_loader, args.icd_type, device) elif args.model_name == 'lstm': learning_rate = args.learning_rate loss_fn = nn.BCELoss() opt_fn = torch.optim.Adam counts, vocab2index = count_vocab_index(train_diagnosis, test_diagnosis) rnn_train_dataset = rnndataset(train_diagnosis, vocab2index) rnn_test_dataset = rnndataset(train_diagnosis, vocab2index) rnn_train_loader, rnn_val_loader, rnn_test_loader = dataloader( rnn_train_dataset, rnn_test_dataset, args.batch_size, args.val_split) w2vmodel = Word2Vec.load(args.w2vmodel) weights = get_emb_matrix(w2vmodel, counts) lstmw2vmodel = LSTMw2vmodel(weights_matrix=weights, hidden_size=256, num_layers=2, device=device).to(device) fit(args.epochs, lstmw2vmodel, rnn_train_loader, rnn_val_loader, args.icd_type, opt_fn, loss_fn, learning_rate, device) test_results(lstmw2vmodel, rnn_test_loader, args.icd_type, device) elif args.model_name == "cnn": learning_rate = args.learning_rate loss_fn = nn.BCELoss() opt_fn = torch.optim.Adam cnn_train_dataset = cnndataset(train_diagnosis) cnn_test_dataset = cnndataset(test_diagnosis) cnn_train_loader, cnn_val_loader, cnn_test_loader = dataloader( cnn_train_dataset, cnn_test_dataset, args.batch_size, args.val_split) model = character_cnn(cnn_train_dataset.vocabulary, cnn_train_dataset.sequence_length).to(device) fit(args.epochs, model, cnn_train_loader, cnn_val_loader, args.icd_type, opt_fn, loss_fn, learning_rate, device) test_results(model, cnn_test_loader, args.icd_type, device) elif args.model_name == 'hybrid': learning_rate = args.learning_rate loss_fn = nn.BCELoss() opt_fn = torch.optim.Adam counts, vocab2index = count_vocab_index(train_diagnosis, test_diagnosis) hybrid_train_dataset = hybriddataset(train_diagnosis, vocab2index) hybrid_test_dataset = hybriddataset(train_diagnosis, vocab2index) hybrid_train_loader, hybrid_val_loader, hybrid_test_loader = dataloader( hybrid_train_dataset, hybrid_test_dataset, args.batch_size, args.val_split) w2vmodel = Word2Vec.load(args.w2vmodel) weights = get_emb_matrix(w2vmodel, counts) model = hybrid(hybrid_train_dataset.vocabulary, hybrid_train_dataset.sequence_length, weights_matrix=weights, hidden_size=256, num_layers=2).to(device) hybrid_fit(args.epochs, model, hybrid_train_loader, hybrid_val_loader, args.icd_type, opt_fn, loss_fn, learning_rate, device) hybrid_test_results(model, hybrid_test_loader, args.icd_type, device) elif args.model_name == 'ovr': X_train, y_train = mlmodel_data(train_diagnosis, args.icd_type) X_test, y_test = mlmodel_data(test_diagnosis, args.icd_type) tfidf_vectorizer = TfidfVectorizer(max_df=0.8) X_train = tfidf_vectorizer.fit_transform(X_train) X_test = tfidf_vectorizer.transform(X_test) ml_model = train_classifier(X_train, y_train) y_predict = ml_model.predict(X_test) print('-' * 20 + args.icd_type + '-' * 20) mlmodel_result(y_test, y_predict)