コード例 #1
0
ファイル: app.py プロジェクト: kowalskyyy999/pytorch-web-app
    if request.method == 'POST':
        image_file = request.files['image']
        if image_file:
            image_location = os.path.join(UPLOAD_FOLDER, image_file.filename)

            image_file.save(image_location)
            result = utils.predict(image_location, MODEL, MAPPING)

            return render_template("index.html",
                                   prediction=result['dog'],
                                   filename=image_file.filename)

    return render_template("home.html")


@app.route('/display/<filename>')
def display_image(filename):
    return redirect(url_for('static', filename='img/' + filename), code=301)


if __name__ == '__main__':
    checkpoint = torch.load('./results/checkpoint.pth', map_location='cpu')
    n_classes = checkpoint['n_classes']
    MAPPING = checkpoint['inv_mapping_label']
    MODEL = utils.Net(n_classes=n_classes,
                      pretrained=True)  #.to(device=utils.DEVICE)
    MODEL.load_state_dict(checkpoint['state_dict'])
    MODEL.eval()
    app.run(port=int(os.environ.get('PORT', 5000)),
            debug=True,
            extra_files=UPLOAD_FOLDER)
コード例 #2
0
        transforms.Resize(resize_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ]),
}

if __name__ == '__main__':
    filename = sys.argv[1]
    testset = utils.customDataset(datatype='test', transform=data_transforms['test'], filename=filename)
    testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # load models
    PATH = './model/efficientnet-b4_20201012-215938_0.001_9_0.4751207829210885'
    new_net = utils.Net()
    new_net.load_state_dict(torch.load(PATH))
    new_net.to(device)
    new_net.eval()
    
    # inference
    y_pred_test = np.array([])
    with torch.no_grad():
        for data_test in tqdm(testloader):
            images = data_test.to(device)
            outputs = new_net(images)

            _, y_pred_tag = torch.max(outputs, 1)
            y_pred_test = np.hstack([y_pred_test, y_pred_tag.cpu().detach().numpy()])

    output_filename = './classification.txt'
コード例 #3
0
import argparse

import torch

import utils

if __name__ == "__main__":
    # Parser
    # ---------------
    parser = argparse.ArgumentParser("Script to create a neural net")
    parser.add_argument("--cuda", help="If using GPU", action="store_true")
    parser.add_argument("--path", help="Model path",
                        type=str, default="networks/neural_net.pt")

    # Parse argument
    # ---------------
    args = parser.parse_args()

    # Using CUDA if asked and available
    # ---------------
    use_cuda = args.cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # Creating and saving the neural net
    # ---------------
    model = utils.Net().to(device)
    torch.save(model, args.path)
コード例 #4
0
ファイル: main.py プロジェクト: bradford415/CALM
def main():
    # Maybe delete this ?
    group = 'lung'

    parser = argparse.ArgumentParser(description='classifier')
    parser.add_argument('--sample_file', type=str, default='lung.emx.txt', help="the name of the GEM organized by samples (columns) by genes (rows)")
    parser.add_argument('--label_file', type=str, default='sample_condition.txt', help="name of the label file: two columns that maps the sample to the label")
    parser.add_argument('--output_name', type=str, default='tissue-run-1', help="name of the output directory to store the output files")
    #parser.add_argument('--overwrite_output', type=bool, default=False, help="overwrite the output directory file if it already exists")
    parser.add_argument('--batch_size', type=int, default=16, help="size of batches to split data")
    parser.add_argument('--max_epoch', type=int, default=100, help="number of passes through a dataset")
    parser.add_argument('--learning_rate', type=float, default=0.001, help="controls the rate at which the weights of the model update")
    parser.add_argument('--test_split', type=float, default=0.3, help="percentage of test data, the train data will be the remaining data. 30% -> 0.3")
    parser.add_argument('--continuous_discrete', type=str, default='continuous', help="type of data in the sample file, typically RNA will be continous and DNA will be discrete")
    parser.add_argument('--plot_results', type=bool, default=True, help="plots the sample distribution, training/test accuracy/loss, and confusion matrix")
    parser.add_argument('--use_gpu', type=bool, default=False, help="true to use a gpu, false to use the cpu - if the node does not have a gpu then it will use the cpu")
    args = parser.parse_args()

    #If data is discrete, data should only range between 0-3
    #if args.continuous_discrete == "discrete":
        #args.input_num_classes = 4

    # Initialize file paths and create output folder
    LABEL_FILE = os.path.join(INPUT_DIR, args.label_file)
    SAMPLE_FILE = os.path.join(INPUT_DIR, args.sample_file)
    OUTPUT_DIR_FINAL = os.path.join(OUTPUT_DIR, args.output_name + "-" + str(datetime.today().strftime('%Y-%m-%d-%H:%M')))
    if not os.path.exists(OUTPUT_DIR_FINAL):
        os.makedirs(OUTPUT_DIR_FINAL)

    # Create log file to keep track of model parameters
    logging.basicConfig(filename=os.path.join(OUTPUT_DIR_FINAL,'classifier.log'),
                        filemode='w',
                        format='%(message)s',
                        level=logging.INFO)
    logger = logging.getLogger(__name__)
    logger.info('Classifer log file for ' + args.sample_file + ' - Started on ' + str(datetime.today().strftime('%Y-%m-%d-%H:%M')) + '\n')
    logger.info('Batch size: %d', args.batch_size)
    logger.info('Number of epochs: %d', args.max_epoch)
    logger.info('Learning Rate: %f', args.learning_rate)
    logger.info('Sample filename: ' + args.sample_file)
    logger.info('Output directory: ' + args.output_name)

    if args.continuous_discrete != 'continuous' and args.continuous_discrete != 'discrete':
        logger.error("ERROR: check that the continuous_discrete argument is spelled correctly.")
        logger.error("       only continuous or discrete data can be processed.")
        sys.exit("\nCommand line argument error. Please check the log file.\n")

    # Intialize gpu usage if desired
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda and args.use_gpu else "cpu")
    train_kwargs = {'batch_size': 16}
    test_kwargs = {'batch_size': 16}
    if use_cuda:
        cuda_kwargs = {'num_workers': 1,
                       'pin_memory': True,
                       'shuffle': True}
        train_kwargs.update(cuda_kwargs)
        test_kwargs.update(cuda_kwargs)

    # Load matrix, labels/weights, and number of samples
    column_names = ("sample", "label")
    matrix_df = pd.read_csv(SAMPLE_FILE, sep='\t', index_col=[0])
    labels_df = pd.read_csv(LABEL_FILE, names=column_names, delim_whitespace=True, header=None)


    # Error checking for same number of samples in both files and samples are unique
    samples_unique = set(labels_df.iloc[:,0])
    assert len(labels_df) == len(matrix_df.columns)
    assert len(labels_df) == len(samples_unique)

    
    labels, class_weights = preprocessing.labels_and_weights(labels_df)
    args.output_num_classes = len(labels)
    is_binary = False
    if len(labels) == 2:
        is_binary = True
        args.output_num_classess = 1

    # Define model paramters
    batch_size = args.batch_size
    max_epoch = args.max_epoch
    learning_rate = args.learning_rate #5e-4
    num_features = len(matrix_df.index)

    # Setup model
    model = utils.Net(input_seq_length=num_features,
                  output_num_classes=args.output_num_classes).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1)

    if is_binary:
        loss_fn = torch.nn.BCEWithLogitsLoss()
    else:
        loss_fn = torch.nn.CrossEntropyLoss()#(weight=class_weights)

    logger.info('Number of samples: %d\n', len(labels_df))
    logger.info('Labels: ')
    for i in range(len(labels)):
        logger.info('       %d - %s', i, labels[i])
    
    # Replace missing data with the global minimum of the dataset
    val_min, val_max = np.nanmin(matrix_df), np.nanmax(matrix_df)
    matrix_df.fillna(val_min, inplace=True)

    # Transposing matrix to align with label file
    matrix_transposed_df = matrix_df.T

    # Create density and tsne plot
    graphs = Plotter(OUTPUT_DIR_FINAL)
    graphs.density(matrix_df)
    graphs.tsne(matrix_transposed_df, labels_df, labels, title=args.sample_file)

    train_data, test_data = preprocessing.split_data(matrix_transposed_df, labels_df, args.test_split, args.output_num_classes)

    # Convert tuple of df's to tuple of np's
    # Allows the dataset class to access w/ data[][] instead of data[].iloc[]
    train_data_np = (train_data[0].values, train_data[1].values)
    test_data_np = (test_data[0].values, test_data[1].values)

    train_dataset = dataset.Dataset(train_data_np)
    test_dataset = dataset.Dataset(test_data_np)
    train_generator = data.DataLoader(train_dataset, **train_kwargs, drop_last=False)
    test_generator = data.DataLoader(test_dataset, **test_kwargs, drop_last=False)
    # drop_last=True would drop the last batch if the sample size is not divisible by the batch size

    logger.info('\nTraining size: %d \nTesting size: %d\n', len(train_dataset), len(test_dataset))

    # Create variables to store accuracy and loss
    loss_meter = utils.AverageMeter()
    loss_meter.reset()
    summary_file = pd.DataFrame([], columns=['Epoch', 'Training Loss', 'Accuracy', 'Accurate Count', 'Total Items'])
    train_stats = pd.DataFrame([], columns=['accuracy', 'loss'])
    test_stats = pd.DataFrame([], columns=['accuracy', 'loss'])

    # Train and test the model
    for epoch in range(args.max_epoch):
        train_stats = train(model, device, is_binary, train_generator, optimizer, loss_fn, batch_size, loss_meter, train_stats)
        test_stats = test(model, device, is_binary, test_generator, loss_fn, epoch, batch_size, loss_meter, test_stats, train_stats, logger)
        scheduler.step()

    # Training finished - Below is used for testing the network, plots and saving results
    if(args.plot_results):
        y_predict_list = []
        y_target_list = []
        y_predict_list, y_target_list = forward(model, device, is_binary, test_generator, y_predict_list, y_target_list)

        graphs.accuracy(train_stats, test_stats, graphs_title=args.sample_file)
        graphs.confusion(y_predict_list, y_target_list, labels, cm_title=args.sample_file)
        logger.info("\n\nf1 score: %0.2f" % (f1_score(y_target_list, y_predict_list, average="weighted")))

    #summary_file.to_csv(RESULTS_FILE, sep='\t', index=False)
    logger.info('\nFinal Accuracy: %2.3f', test_stats.iloc[epoch]['accuracy'])
    logger.info('\nFinished at  ' + str(datetime.today().strftime('%Y-%m-%d-%H:%M')))
コード例 #5
0
def train_model_multiclass(directory, f, n_classes, opt):
    filepath = directory + f
    if f == '':
        f = '_12345'
    print('Loading data...')
    train_ldrs, test_ldrs = load_data(filepath + '.csv')

    tr_loss = []
    tr_acc = []
    vl_loss = []
    vl_acc = []
    for train_ldr, test_ldr in zip(train_ldrs, test_ldrs):
        net = utils.Net(n_classes).to(device)
        criterion = nn.CrossEntropyLoss()

        if opt == 'SGD':
            optimizer = optim.SGD(net.parameters(),
                                  lr=0.1,
                                  momentum=0.9,
                                  weight_decay=0.0005,
                                  nesterov=True)
        elif opt == 'Adam':
            optimizer = optim.Adam(net.parameters(),
                                   lr=0.01,
                                   betas=(0.9, 0.999),
                                   weight_decay=0.0005,
                                   amsgrad=False)
        else:
            raise ValueError('Invalid optimizer selected. Choose \'SGD\' or '
                             '\'Adam\'.')
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                   milestones=n_schedule,
                                                   gamma=0.1)

        print('Training...')
        print('Filters per layer:', net.n_filters)
        print('Criterion:', criterion)
        print(optimizer)

        losses = [[], [100]]
        accs = [[], []]
        early_stopping = 0
        for epoch in range(n_epochs):
            # Training
            net.training = True
            train_correct = 0
            train_total = 0
            train_loss = 0.0
            for local_batch, local_labels in train_ldr:
                # Transfer to GPU
                local_batch = local_batch.to(device, dtype=torch.float)
                local_labels = local_labels.view(-1).to(device,
                                                        dtype=torch.long)

                # Train
                optimizer.zero_grad()

                # Forward + backward + optimize
                logits = net(local_batch)
                loss = criterion(logits, local_labels)
                loss.backward()
                optimizer.step()

                # Tracking
                train_loss += loss.item()
                predicted = torch.argmax(logits, dim=1)
                train_total += local_labels.size(0)
                train_correct += (predicted == local_labels).sum().item()

            train_acc = train_correct / train_total
            scheduler.step()

            # Validation
            net.training = False
            val_correct = 0
            val_total = 0
            val_loss = 0
            with torch.no_grad():
                for local_batch, local_labels in test_ldr:
                    # Transfer to GPU
                    local_batch = local_batch.to(device, dtype=torch.float)
                    local_labels = local_labels.to(device)

                    # Test
                    logits = net(local_batch)
                    loss = criterion(logits, local_labels)

                    # Tracking
                    val_loss += loss.item()
                    predicted = torch.argmax(logits, dim=1)
                    val_total += local_labels.size(0)
                    val_correct += (predicted == local_labels).sum().item()

            val_acc = val_correct / val_total

            losses[0].append(train_loss)
            losses[1].append(val_loss)
            accs[0].append(train_acc)
            accs[1].append(val_acc)

            if val_loss >= losses[1][-2]:
                early_stopping += 1
            elif early_stopping > 0:
                early_stopping -= 1

            early = False
            if early_stopping == n_early:
                early = True

            if epoch % 10 == 9 or early:
                print('Epoch:', epoch + 1,
                      '| Train Acc:', round(train_acc, 8), '| Train Loss:',
                      round(train_loss,
                            8), '| Val Acc:', round(val_acc, 8), '| Val Loss:',
                      round(val_loss, 8), '| Early:', early_stopping)

            if early:
                print('Early stopping.')
                break

        losses[1] = losses[1][1:]

        tr_loss.append(losses[0])
        tr_acc.append(accs[0])
        vl_loss.append(losses[1])
        vl_acc.append(accs[1])

    best = [mean(heapq.nlargest(10, a)) for a in vl_acc]
    if plot_:
        # Plot loss and accuracy
        savedir_ = savedir + '\cnn-2d\\' + f[1:] + '\\'
        plot(savedir_, f, tr_loss, tr_acc, vl_loss, vl_acc, best)

    return best