if request.method == 'POST': image_file = request.files['image'] if image_file: image_location = os.path.join(UPLOAD_FOLDER, image_file.filename) image_file.save(image_location) result = utils.predict(image_location, MODEL, MAPPING) return render_template("index.html", prediction=result['dog'], filename=image_file.filename) return render_template("home.html") @app.route('/display/<filename>') def display_image(filename): return redirect(url_for('static', filename='img/' + filename), code=301) if __name__ == '__main__': checkpoint = torch.load('./results/checkpoint.pth', map_location='cpu') n_classes = checkpoint['n_classes'] MAPPING = checkpoint['inv_mapping_label'] MODEL = utils.Net(n_classes=n_classes, pretrained=True) #.to(device=utils.DEVICE) MODEL.load_state_dict(checkpoint['state_dict']) MODEL.eval() app.run(port=int(os.environ.get('PORT', 5000)), debug=True, extra_files=UPLOAD_FOLDER)
transforms.Resize(resize_size), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]), } if __name__ == '__main__': filename = sys.argv[1] testset = utils.customDataset(datatype='test', transform=data_transforms['test'], filename=filename) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False) device = 'cuda' if torch.cuda.is_available() else 'cpu' # load models PATH = './model/efficientnet-b4_20201012-215938_0.001_9_0.4751207829210885' new_net = utils.Net() new_net.load_state_dict(torch.load(PATH)) new_net.to(device) new_net.eval() # inference y_pred_test = np.array([]) with torch.no_grad(): for data_test in tqdm(testloader): images = data_test.to(device) outputs = new_net(images) _, y_pred_tag = torch.max(outputs, 1) y_pred_test = np.hstack([y_pred_test, y_pred_tag.cpu().detach().numpy()]) output_filename = './classification.txt'
import argparse import torch import utils if __name__ == "__main__": # Parser # --------------- parser = argparse.ArgumentParser("Script to create a neural net") parser.add_argument("--cuda", help="If using GPU", action="store_true") parser.add_argument("--path", help="Model path", type=str, default="networks/neural_net.pt") # Parse argument # --------------- args = parser.parse_args() # Using CUDA if asked and available # --------------- use_cuda = args.cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") # Creating and saving the neural net # --------------- model = utils.Net().to(device) torch.save(model, args.path)
def main(): # Maybe delete this ? group = 'lung' parser = argparse.ArgumentParser(description='classifier') parser.add_argument('--sample_file', type=str, default='lung.emx.txt', help="the name of the GEM organized by samples (columns) by genes (rows)") parser.add_argument('--label_file', type=str, default='sample_condition.txt', help="name of the label file: two columns that maps the sample to the label") parser.add_argument('--output_name', type=str, default='tissue-run-1', help="name of the output directory to store the output files") #parser.add_argument('--overwrite_output', type=bool, default=False, help="overwrite the output directory file if it already exists") parser.add_argument('--batch_size', type=int, default=16, help="size of batches to split data") parser.add_argument('--max_epoch', type=int, default=100, help="number of passes through a dataset") parser.add_argument('--learning_rate', type=float, default=0.001, help="controls the rate at which the weights of the model update") parser.add_argument('--test_split', type=float, default=0.3, help="percentage of test data, the train data will be the remaining data. 30% -> 0.3") parser.add_argument('--continuous_discrete', type=str, default='continuous', help="type of data in the sample file, typically RNA will be continous and DNA will be discrete") parser.add_argument('--plot_results', type=bool, default=True, help="plots the sample distribution, training/test accuracy/loss, and confusion matrix") parser.add_argument('--use_gpu', type=bool, default=False, help="true to use a gpu, false to use the cpu - if the node does not have a gpu then it will use the cpu") args = parser.parse_args() #If data is discrete, data should only range between 0-3 #if args.continuous_discrete == "discrete": #args.input_num_classes = 4 # Initialize file paths and create output folder LABEL_FILE = os.path.join(INPUT_DIR, args.label_file) SAMPLE_FILE = os.path.join(INPUT_DIR, args.sample_file) OUTPUT_DIR_FINAL = os.path.join(OUTPUT_DIR, args.output_name + "-" + str(datetime.today().strftime('%Y-%m-%d-%H:%M'))) if not os.path.exists(OUTPUT_DIR_FINAL): os.makedirs(OUTPUT_DIR_FINAL) # Create log file to keep track of model parameters logging.basicConfig(filename=os.path.join(OUTPUT_DIR_FINAL,'classifier.log'), filemode='w', format='%(message)s', level=logging.INFO) logger = logging.getLogger(__name__) logger.info('Classifer log file for ' + args.sample_file + ' - Started on ' + str(datetime.today().strftime('%Y-%m-%d-%H:%M')) + '\n') logger.info('Batch size: %d', args.batch_size) logger.info('Number of epochs: %d', args.max_epoch) logger.info('Learning Rate: %f', args.learning_rate) logger.info('Sample filename: ' + args.sample_file) logger.info('Output directory: ' + args.output_name) if args.continuous_discrete != 'continuous' and args.continuous_discrete != 'discrete': logger.error("ERROR: check that the continuous_discrete argument is spelled correctly.") logger.error(" only continuous or discrete data can be processed.") sys.exit("\nCommand line argument error. Please check the log file.\n") # Intialize gpu usage if desired use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda and args.use_gpu else "cpu") train_kwargs = {'batch_size': 16} test_kwargs = {'batch_size': 16} if use_cuda: cuda_kwargs = {'num_workers': 1, 'pin_memory': True, 'shuffle': True} train_kwargs.update(cuda_kwargs) test_kwargs.update(cuda_kwargs) # Load matrix, labels/weights, and number of samples column_names = ("sample", "label") matrix_df = pd.read_csv(SAMPLE_FILE, sep='\t', index_col=[0]) labels_df = pd.read_csv(LABEL_FILE, names=column_names, delim_whitespace=True, header=None) # Error checking for same number of samples in both files and samples are unique samples_unique = set(labels_df.iloc[:,0]) assert len(labels_df) == len(matrix_df.columns) assert len(labels_df) == len(samples_unique) labels, class_weights = preprocessing.labels_and_weights(labels_df) args.output_num_classes = len(labels) is_binary = False if len(labels) == 2: is_binary = True args.output_num_classess = 1 # Define model paramters batch_size = args.batch_size max_epoch = args.max_epoch learning_rate = args.learning_rate #5e-4 num_features = len(matrix_df.index) # Setup model model = utils.Net(input_seq_length=num_features, output_num_classes=args.output_num_classes).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.1) if is_binary: loss_fn = torch.nn.BCEWithLogitsLoss() else: loss_fn = torch.nn.CrossEntropyLoss()#(weight=class_weights) logger.info('Number of samples: %d\n', len(labels_df)) logger.info('Labels: ') for i in range(len(labels)): logger.info(' %d - %s', i, labels[i]) # Replace missing data with the global minimum of the dataset val_min, val_max = np.nanmin(matrix_df), np.nanmax(matrix_df) matrix_df.fillna(val_min, inplace=True) # Transposing matrix to align with label file matrix_transposed_df = matrix_df.T # Create density and tsne plot graphs = Plotter(OUTPUT_DIR_FINAL) graphs.density(matrix_df) graphs.tsne(matrix_transposed_df, labels_df, labels, title=args.sample_file) train_data, test_data = preprocessing.split_data(matrix_transposed_df, labels_df, args.test_split, args.output_num_classes) # Convert tuple of df's to tuple of np's # Allows the dataset class to access w/ data[][] instead of data[].iloc[] train_data_np = (train_data[0].values, train_data[1].values) test_data_np = (test_data[0].values, test_data[1].values) train_dataset = dataset.Dataset(train_data_np) test_dataset = dataset.Dataset(test_data_np) train_generator = data.DataLoader(train_dataset, **train_kwargs, drop_last=False) test_generator = data.DataLoader(test_dataset, **test_kwargs, drop_last=False) # drop_last=True would drop the last batch if the sample size is not divisible by the batch size logger.info('\nTraining size: %d \nTesting size: %d\n', len(train_dataset), len(test_dataset)) # Create variables to store accuracy and loss loss_meter = utils.AverageMeter() loss_meter.reset() summary_file = pd.DataFrame([], columns=['Epoch', 'Training Loss', 'Accuracy', 'Accurate Count', 'Total Items']) train_stats = pd.DataFrame([], columns=['accuracy', 'loss']) test_stats = pd.DataFrame([], columns=['accuracy', 'loss']) # Train and test the model for epoch in range(args.max_epoch): train_stats = train(model, device, is_binary, train_generator, optimizer, loss_fn, batch_size, loss_meter, train_stats) test_stats = test(model, device, is_binary, test_generator, loss_fn, epoch, batch_size, loss_meter, test_stats, train_stats, logger) scheduler.step() # Training finished - Below is used for testing the network, plots and saving results if(args.plot_results): y_predict_list = [] y_target_list = [] y_predict_list, y_target_list = forward(model, device, is_binary, test_generator, y_predict_list, y_target_list) graphs.accuracy(train_stats, test_stats, graphs_title=args.sample_file) graphs.confusion(y_predict_list, y_target_list, labels, cm_title=args.sample_file) logger.info("\n\nf1 score: %0.2f" % (f1_score(y_target_list, y_predict_list, average="weighted"))) #summary_file.to_csv(RESULTS_FILE, sep='\t', index=False) logger.info('\nFinal Accuracy: %2.3f', test_stats.iloc[epoch]['accuracy']) logger.info('\nFinished at ' + str(datetime.today().strftime('%Y-%m-%d-%H:%M')))
def train_model_multiclass(directory, f, n_classes, opt): filepath = directory + f if f == '': f = '_12345' print('Loading data...') train_ldrs, test_ldrs = load_data(filepath + '.csv') tr_loss = [] tr_acc = [] vl_loss = [] vl_acc = [] for train_ldr, test_ldr in zip(train_ldrs, test_ldrs): net = utils.Net(n_classes).to(device) criterion = nn.CrossEntropyLoss() if opt == 'SGD': optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) elif opt == 'Adam': optimizer = optim.Adam(net.parameters(), lr=0.01, betas=(0.9, 0.999), weight_decay=0.0005, amsgrad=False) else: raise ValueError('Invalid optimizer selected. Choose \'SGD\' or ' '\'Adam\'.') scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=n_schedule, gamma=0.1) print('Training...') print('Filters per layer:', net.n_filters) print('Criterion:', criterion) print(optimizer) losses = [[], [100]] accs = [[], []] early_stopping = 0 for epoch in range(n_epochs): # Training net.training = True train_correct = 0 train_total = 0 train_loss = 0.0 for local_batch, local_labels in train_ldr: # Transfer to GPU local_batch = local_batch.to(device, dtype=torch.float) local_labels = local_labels.view(-1).to(device, dtype=torch.long) # Train optimizer.zero_grad() # Forward + backward + optimize logits = net(local_batch) loss = criterion(logits, local_labels) loss.backward() optimizer.step() # Tracking train_loss += loss.item() predicted = torch.argmax(logits, dim=1) train_total += local_labels.size(0) train_correct += (predicted == local_labels).sum().item() train_acc = train_correct / train_total scheduler.step() # Validation net.training = False val_correct = 0 val_total = 0 val_loss = 0 with torch.no_grad(): for local_batch, local_labels in test_ldr: # Transfer to GPU local_batch = local_batch.to(device, dtype=torch.float) local_labels = local_labels.to(device) # Test logits = net(local_batch) loss = criterion(logits, local_labels) # Tracking val_loss += loss.item() predicted = torch.argmax(logits, dim=1) val_total += local_labels.size(0) val_correct += (predicted == local_labels).sum().item() val_acc = val_correct / val_total losses[0].append(train_loss) losses[1].append(val_loss) accs[0].append(train_acc) accs[1].append(val_acc) if val_loss >= losses[1][-2]: early_stopping += 1 elif early_stopping > 0: early_stopping -= 1 early = False if early_stopping == n_early: early = True if epoch % 10 == 9 or early: print('Epoch:', epoch + 1, '| Train Acc:', round(train_acc, 8), '| Train Loss:', round(train_loss, 8), '| Val Acc:', round(val_acc, 8), '| Val Loss:', round(val_loss, 8), '| Early:', early_stopping) if early: print('Early stopping.') break losses[1] = losses[1][1:] tr_loss.append(losses[0]) tr_acc.append(accs[0]) vl_loss.append(losses[1]) vl_acc.append(accs[1]) best = [mean(heapq.nlargest(10, a)) for a in vl_acc] if plot_: # Plot loss and accuracy savedir_ = savedir + '\cnn-2d\\' + f[1:] + '\\' plot(savedir_, f, tr_loss, tr_acc, vl_loss, vl_acc, best) return best