Beispiel #1
0
import numpy as np
import sys
import argparse
from functions import knn_lc, calculate_accuracy, predict_label

parser = argparse.ArgumentParser(description='Implement a k-NN algorithm')
parser.add_argument(
    "-k",
    type=int,
    help="Number of nearest neighbors to look for in choose(n,k)")
parser.add_argument('-train', type=str, help='Train data set path')
parser.add_argument('-test', type=str, help='Test data set path')
args = parser.parse_args()
k = args.k
train = args.train
test = args.test
test = test.replace('\r', '')  #Removes the carriage return cuz I use windows

#Load the data in
with open(train, "r") as read_file:
    train = json.load(read_file)
with open(test, "r") as read_file:
    test = json.load(read_file)

#Loop through percentages of data.
for x in range(10, 110, 10):
    nn = knn_lc(k, train, test, x)
    winners = predict_label(train, test, nn[0])
    accuracy = calculate_accuracy(test, winners)
    print(nn[1], accuracy, sep=',')
Beispiel #2
0
#Load the data in
with open(train,"r") as read_file:
    train = json.load(read_file)
with open(test,"r") as read_file:
    test = json.load(read_file)
with open(val,"r") as read_file:
    val = json.load(read_file)

#Loop through k
accuracy = np.zeros(kmax+1)
for k_index in range(1,kmax+1):
    nn = []
    winners = []
    nn = knn(k_index,train,val)
    winners = predict_label(train,val,nn)
    accuracy[k_index] = calculate_accuracy(val,winners)
    print(k_index, accuracy[k_index], sep = "," )

#Print optimal k
opt_k = np.argmax(accuracy)
print(opt_k)

#Find accuracy on test set
#Train on train + val
for i in val['data']:
    train['data'].append(i)
nn = knn(opt_k,train,test)
winners = predict_label(train,test,nn)
test_accuracy = calculate_accuracy(test,winners)
print(test_accuracy)
Beispiel #3
0
                                       i_batch)
            timers["load_data"].start()
            writer_train.file_writer.flush()

        # # Train Accuracy calculation
        if classifier_type != Outputs.REGRESSOR:
            for i_batch, sample_batched in enumerate(train_dataloader):
                outputs = net(sample_batched['image'].to(parameters.device).to(
                    parameters.device))
                # sample_batched['diagnosis'] = sample_batched['diagnosis'] - 1

                loss = blindness_loss(
                    outputs,
                    sample_batched['diagnosis'].long().to(parameters.device))

                predicted_labels = predict_label(outputs, classifier_type)
                correct_labels_train += accuracy_score(
                    predicted_labels.cpu(),
                    blindness_loss.converted_label.cpu(),
                    normalize=False)
        train_accuracy = (correct_labels_train / len(dataset_combined))

        print('Train accuracy: ' + str(train_accuracy))
        writer_train.add_scalar(tag='accuracy',
                                scalar_value=train_accuracy,
                                global_step=epoch)

        ##############################   Validation  ###################################################################

        net.eval()
        torch.no_grad()
Beispiel #4
0
def model2csv(data_csv, data_dir, model_path, output_path, classifier_type):
    """

    :param data_csv:
    :param data_dir:
    :param model_path:
    :param output_path:
    :param classifier_type:
    :return:
    """
    #####################################################################################
    # loading data according to specific model and export to csv
    #####################################################################################

    batch_size = 32

    device = torch.device("cuda")

    dataset = BDDataset(csv_file=data_csv, data_dir=data_dir)
    dataloader = DataLoader(dataset,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=8)
    model = torch.load(model_path)
    net = ResNet(num_outputs=5)
    net.to(parameters.device)
    net.load_state_dict(model, strict=False)
    net.eval()
    blindness_loss = parameters.loss_dict[classifier_type]().to(
        parameters.device)

    # analyze = parameters.analyzer_dict[classifier_type]()

    collect = []
    with torch.no_grad():
        for i_batch, sample_batched in enumerate(dataloader):
            if i_batch % 10 == 0:
                print("batch {}/{}".format(i_batch, len(dataloader)))
            outputs = net(sample_batched['image'].to(device))
            blindness_loss(outputs,
                           sample_batched['diagnosis'].to(parameters.device))
            predicted_labels = predict_label(outputs, classifier_type)
            for i in range(len(sample_batched['name'])):
                collect.append({
                    'id_code':
                    os.path.basename(sample_batched['name'][i]),
                    'predicted_label':
                    predicted_labels[i].item(),
                    'converted_label':
                    blindness_loss.converted_label[i].item(),
                    "diagnosis":
                    sample_batched['diagnosis'][i].item(),
                    'scores':
                    np.array(outputs[i].cpu()),
                })
        dataframe = pd.DataFrame(collect)
        if os.path.isdir(os.path.dirname(output_path)):
            pass
        else:
            os.makedirs(os.path.dirname(output_path))
        dataframe.to_csv(output_path)