def main(argv):
    input_filename = ''
    output_prefix = ''
    model_list = ''
    lungs_mask_filename = None
    apg_mask_filename = None
    gpu_id = '-1'
    try:
        opts, args = getopt.getopt(argv, "hi:o:m:l:a:g:", ["Input=", "Output=", "Model=", "Lungs=", "APG=", "GPU="])
    except getopt.GetoptError:
        print('usage: main.py --Input <CT volume path> --Output <Results output path> --Model <Inference model name>'
              ' --Lungs <Lung mask filepath> --APG <Anatomical mask filename> --GPU <GPU id>')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('main.py --Input <CT volume path> --Output <Results output path> --Model <Inference model name>'
                  ' --Lungs <Lung mask filepath> --APG <Anatomical mask filename> --GPU <GPU id>')
            sys.exit()
        elif opt in ("-i", "--Input"):
            input_filename = arg
        elif opt in ("-o", "--Output"):
            output_prefix = arg
        elif opt in ("-m", "--Model"):
            model_list = arg
        elif opt in ("-l", "--Lungs"):
            lungs_mask_filename = arg
        elif opt in ("-a", "--APG"):
            apg_mask_filename = arg
        elif opt in ("-g", "--GPU"):
            if arg.isnumeric():
                gpu_id = arg
    if input_filename == '':
        print('usage: main.py --Input <CT volume path> --Output <Results output path> --Model <Inference model name>'
              ' --Lungs <Lung mask filepath> --APG <Anatomical mask filename> --GPU <GPU id>')
        sys.exit()

    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id

    if os.path.exists(input_filename):
        real_path = os.path.realpath(os.path.dirname(input_filename))
        input_filename = os.path.join(real_path, os.path.basename(input_filename))
    else:
        print('Input filename does not exist on disk, with argument: {}'.format(input_filename))
        sys.exit(2)

    if os.path.exists(os.path.dirname(output_prefix)):
        real_path = os.path.realpath(os.path.dirname(output_prefix))
        output_prefix = os.path.join(real_path, os.path.basename(output_prefix))
    else:
        print('Directory name for the output prefix does not exist on disk, with argument: {}'.format(input_filename))
        sys.exit(2)

    model_list = model_list.split(',')
    if len(model_list) == 1:
        fit(input_filename=input_filename, output_path=output_prefix, selected_model=model_list[0],
            lungs_mask_filename=lungs_mask_filename, anatomical_priors_filename=apg_mask_filename)
    else:
        fit_ensemble(input_filename=input_filename, output_path=output_prefix, model_list=model_list,
                     lungs_mask_filename=lungs_mask_filename, anatomical_priors_filename=apg_mask_filename)
Ejemplo n.º 2
0
def main(argv):
    input_filename = ''
    output_prefix = ''
    model_name = ''
    gpu_id = '-1'
    try:
        opts, args = getopt.getopt(argv, "hi:o:m:g:", ["Input=", "Output=", "Model=", "GPU="])
    except getopt.GetoptError:
        print('usage: main.py --Input <MRI volume path> --Output <Results output path> --Model <Inference model name>'
              ' --GPU <GPU id>')
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print('main.py --Input <MRI volume path> --Output <Results output path> --Model <Inference model name>'
                  ' --GPU <GPU id>')
            sys.exit()
        elif opt in ("-i", "--Input"):
            input_filename = arg
        elif opt in ("-o", "--Output"):
            output_prefix = arg
        elif opt in ("-m", "--Model"):
            model_name = arg
        elif opt in ("-g", "--GPU"):
            if arg.isnumeric():
                gpu_id = arg
    if input_filename == '':
        print('usage: main.py --Input <MRI volume path> --Output <Results output path> --Model <Inference model name>'
              ' --GPU <GPU id>')
        sys.exit()

    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu_id

    if os.path.exists(input_filename):
        real_path = os.path.realpath(os.path.dirname(input_filename))
        input_filename = os.path.join(real_path, os.path.basename(input_filename))
    else:
        print('Input filename does not exist on disk, with argument: {}'.format(input_filename))
        sys.exit(2)

    if os.path.exists(os.path.dirname(output_prefix)):
        real_path = os.path.realpath(os.path.dirname(output_prefix))
        output_prefix = os.path.join(real_path, os.path.basename(output_prefix))
    else:
        print('Directory name for the output prefix does not exist on disk, with argument: {}'.format(input_filename))
        sys.exit(2)

    fit(input_filename=input_filename, output_path=output_prefix, selected_model=model_name)
Ejemplo n.º 3
0
def lr_find(start_lr=1e-7,
            end_lr=10,
            num_it: int = 49,
            stop_div: bool = True,
            wd: float = None,
            annealing_func=annealing_exp):
    "Explore lr from `start_lr` to `end_lr` over `num_it` iterations in `learn`. If `stop_div`, stops when loss diverges."
    opt = BasicOptim(simple_net.parameters(), start_lr)
    cb = LRFinder(start_lr,
                  end_lr,
                  num_it,
                  stop_div,
                  annealing_func=annealing_func)
    learner = Learner(simple_net,
                      mnist_loss,
                      opt,
                      dl,
                      valid_dl,
                      cb=CallbackHandler([cb]))
    epochs = int(np.ceil(num_it / len(learner.train_dl)))
    fit(epochs, learn=learner)
Ejemplo n.º 4
0
from torch.utils.data import DataLoader
from pathlib import Path

from src.callback import CallbackHandler, BatchCounter, TimeCheck, PrintLoss, GetValAcc
from src.data import get_dsets
from src.fit import fit
from src.learner import Learner
from src.measure import mnist_loss
from src.optim import BasicOptim

if __name__ == '__main__':
    path = Path('data')

    train_dset, valid_dset = get_dsets(path)

    dl = DataLoader(train_dset, batch_size=256)
    valid_dl = DataLoader(valid_dset, batch_size=256)

    simple_net = nn.Sequential(
        nn.Linear(28 * 28, 30),
        nn.ReLU(),
        nn.Linear(30, 1))

    lr = 1e-3
    opt = BasicOptim(simple_net.parameters(), lr)

    learner = Learner(simple_net, mnist_loss, opt, dl, valid_dl,
                      cb=CallbackHandler([BatchCounter(), TimeCheck(), PrintLoss(), GetValAcc()]))

    fit(10, learn=learner)
def run(args):

    train_diagnosis, test_diagnosis = data(args)

    SEED = 2021
    torch.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    logging.basicConfig(filename='train.log',
                        filemode='w',
                        level=logging.DEBUG)
    logging.info("Model Name: %s", args.model_name.upper())
    logging.info("Device: %s", device)
    logging.info("Batch Size: %d", args.batch_size)
    logging.info("Learning Rate: %f", args.learning_rate)

    if args.model_name == "bert":

        learning_rate = args.learning_rate
        loss_fn = nn.BCELoss()
        opt_fn = torch.optim.Adam

        bert_train_dataset = BERTdataset(train_diagnosis)
        bert_test_dataset = BERTdataset(test_diagnosis)

        bert_train_loader, bert_val_loader, bert_test_loader = dataloader(
            bert_train_dataset, bert_test_dataset, args.batch_size,
            args.val_split)

        model = BERTclassifier().to(device)

        bert_fit(args.epochs, model, bert_train_loader, bert_val_loader,
                 args.icd_type, opt_fn, loss_fn, learning_rate, device)
        bert_test_results(model, bert_test_loader, args.icd_type, device)

    elif args.model_name == 'gru':
        learning_rate = args.learning_rate
        loss_fn = nn.BCELoss()
        opt_fn = torch.optim.Adam

        counts, vocab2index = count_vocab_index(train_diagnosis,
                                                test_diagnosis)
        rnn_train_dataset = rnndataset(train_diagnosis, vocab2index)
        rnn_test_dataset = rnndataset(train_diagnosis, vocab2index)

        rnn_train_loader, rnn_val_loader, rnn_test_loader = dataloader(
            rnn_train_dataset, rnn_test_dataset, args.batch_size,
            args.val_split)

        w2vmodel = Word2Vec.load(args.w2vmodel)
        weights = get_emb_matrix(w2vmodel, counts)

        gruw2vmodel = GRUw2vmodel(weights_matrix=weights,
                                  hidden_size=256,
                                  num_layers=2,
                                  device=device).to(device)

        fit(args.epochs, gruw2vmodel, rnn_train_loader, rnn_val_loader,
            args.icd_type, opt_fn, loss_fn, learning_rate, device)
        test_results(gruw2vmodel, rnn_test_loader, args.icd_type, device)

    elif args.model_name == 'lstm':
        learning_rate = args.learning_rate
        loss_fn = nn.BCELoss()
        opt_fn = torch.optim.Adam

        counts, vocab2index = count_vocab_index(train_diagnosis,
                                                test_diagnosis)
        rnn_train_dataset = rnndataset(train_diagnosis, vocab2index)
        rnn_test_dataset = rnndataset(train_diagnosis, vocab2index)

        rnn_train_loader, rnn_val_loader, rnn_test_loader = dataloader(
            rnn_train_dataset, rnn_test_dataset, args.batch_size,
            args.val_split)

        w2vmodel = Word2Vec.load(args.w2vmodel)
        weights = get_emb_matrix(w2vmodel, counts)

        lstmw2vmodel = LSTMw2vmodel(weights_matrix=weights,
                                    hidden_size=256,
                                    num_layers=2,
                                    device=device).to(device)

        fit(args.epochs, lstmw2vmodel, rnn_train_loader, rnn_val_loader,
            args.icd_type, opt_fn, loss_fn, learning_rate, device)
        test_results(lstmw2vmodel, rnn_test_loader, args.icd_type, device)

    elif args.model_name == "cnn":

        learning_rate = args.learning_rate
        loss_fn = nn.BCELoss()
        opt_fn = torch.optim.Adam

        cnn_train_dataset = cnndataset(train_diagnosis)
        cnn_test_dataset = cnndataset(test_diagnosis)

        cnn_train_loader, cnn_val_loader, cnn_test_loader = dataloader(
            cnn_train_dataset, cnn_test_dataset, args.batch_size,
            args.val_split)

        model = character_cnn(cnn_train_dataset.vocabulary,
                              cnn_train_dataset.sequence_length).to(device)

        fit(args.epochs, model, cnn_train_loader, cnn_val_loader,
            args.icd_type, opt_fn, loss_fn, learning_rate, device)
        test_results(model, cnn_test_loader, args.icd_type, device)

    elif args.model_name == 'hybrid':

        learning_rate = args.learning_rate
        loss_fn = nn.BCELoss()
        opt_fn = torch.optim.Adam

        counts, vocab2index = count_vocab_index(train_diagnosis,
                                                test_diagnosis)

        hybrid_train_dataset = hybriddataset(train_diagnosis, vocab2index)
        hybrid_test_dataset = hybriddataset(train_diagnosis, vocab2index)

        hybrid_train_loader, hybrid_val_loader, hybrid_test_loader = dataloader(
            hybrid_train_dataset, hybrid_test_dataset, args.batch_size,
            args.val_split)

        w2vmodel = Word2Vec.load(args.w2vmodel)
        weights = get_emb_matrix(w2vmodel, counts)

        model = hybrid(hybrid_train_dataset.vocabulary,
                       hybrid_train_dataset.sequence_length,
                       weights_matrix=weights,
                       hidden_size=256,
                       num_layers=2).to(device)

        hybrid_fit(args.epochs, model, hybrid_train_loader, hybrid_val_loader,
                   args.icd_type, opt_fn, loss_fn, learning_rate, device)
        hybrid_test_results(model, hybrid_test_loader, args.icd_type, device)

    elif args.model_name == 'ovr':

        X_train, y_train = mlmodel_data(train_diagnosis, args.icd_type)
        X_test, y_test = mlmodel_data(test_diagnosis, args.icd_type)

        tfidf_vectorizer = TfidfVectorizer(max_df=0.8)
        X_train = tfidf_vectorizer.fit_transform(X_train)
        X_test = tfidf_vectorizer.transform(X_test)

        ml_model = train_classifier(X_train, y_train)
        y_predict = ml_model.predict(X_test)

        print('-' * 20 + args.icd_type + '-' * 20)
        mlmodel_result(y_test, y_predict)