import numpy as np import sys import argparse from functions import knn_lc, calculate_accuracy, predict_label parser = argparse.ArgumentParser(description='Implement a k-NN algorithm') parser.add_argument( "-k", type=int, help="Number of nearest neighbors to look for in choose(n,k)") parser.add_argument('-train', type=str, help='Train data set path') parser.add_argument('-test', type=str, help='Test data set path') args = parser.parse_args() k = args.k train = args.train test = args.test test = test.replace('\r', '') #Removes the carriage return cuz I use windows #Load the data in with open(train, "r") as read_file: train = json.load(read_file) with open(test, "r") as read_file: test = json.load(read_file) #Loop through percentages of data. for x in range(10, 110, 10): nn = knn_lc(k, train, test, x) winners = predict_label(train, test, nn[0]) accuracy = calculate_accuracy(test, winners) print(nn[1], accuracy, sep=',')
#Load the data in with open(train,"r") as read_file: train = json.load(read_file) with open(test,"r") as read_file: test = json.load(read_file) with open(val,"r") as read_file: val = json.load(read_file) #Loop through k accuracy = np.zeros(kmax+1) for k_index in range(1,kmax+1): nn = [] winners = [] nn = knn(k_index,train,val) winners = predict_label(train,val,nn) accuracy[k_index] = calculate_accuracy(val,winners) print(k_index, accuracy[k_index], sep = "," ) #Print optimal k opt_k = np.argmax(accuracy) print(opt_k) #Find accuracy on test set #Train on train + val for i in val['data']: train['data'].append(i) nn = knn(opt_k,train,test) winners = predict_label(train,test,nn) test_accuracy = calculate_accuracy(test,winners) print(test_accuracy)
i_batch) timers["load_data"].start() writer_train.file_writer.flush() # # Train Accuracy calculation if classifier_type != Outputs.REGRESSOR: for i_batch, sample_batched in enumerate(train_dataloader): outputs = net(sample_batched['image'].to(parameters.device).to( parameters.device)) # sample_batched['diagnosis'] = sample_batched['diagnosis'] - 1 loss = blindness_loss( outputs, sample_batched['diagnosis'].long().to(parameters.device)) predicted_labels = predict_label(outputs, classifier_type) correct_labels_train += accuracy_score( predicted_labels.cpu(), blindness_loss.converted_label.cpu(), normalize=False) train_accuracy = (correct_labels_train / len(dataset_combined)) print('Train accuracy: ' + str(train_accuracy)) writer_train.add_scalar(tag='accuracy', scalar_value=train_accuracy, global_step=epoch) ############################## Validation ################################################################### net.eval() torch.no_grad()
def model2csv(data_csv, data_dir, model_path, output_path, classifier_type): """ :param data_csv: :param data_dir: :param model_path: :param output_path: :param classifier_type: :return: """ ##################################################################################### # loading data according to specific model and export to csv ##################################################################################### batch_size = 32 device = torch.device("cuda") dataset = BDDataset(csv_file=data_csv, data_dir=data_dir) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=8) model = torch.load(model_path) net = ResNet(num_outputs=5) net.to(parameters.device) net.load_state_dict(model, strict=False) net.eval() blindness_loss = parameters.loss_dict[classifier_type]().to( parameters.device) # analyze = parameters.analyzer_dict[classifier_type]() collect = [] with torch.no_grad(): for i_batch, sample_batched in enumerate(dataloader): if i_batch % 10 == 0: print("batch {}/{}".format(i_batch, len(dataloader))) outputs = net(sample_batched['image'].to(device)) blindness_loss(outputs, sample_batched['diagnosis'].to(parameters.device)) predicted_labels = predict_label(outputs, classifier_type) for i in range(len(sample_batched['name'])): collect.append({ 'id_code': os.path.basename(sample_batched['name'][i]), 'predicted_label': predicted_labels[i].item(), 'converted_label': blindness_loss.converted_label[i].item(), "diagnosis": sample_batched['diagnosis'][i].item(), 'scores': np.array(outputs[i].cpu()), }) dataframe = pd.DataFrame(collect) if os.path.isdir(os.path.dirname(output_path)): pass else: os.makedirs(os.path.dirname(output_path)) dataframe.to_csv(output_path)