コード例 #1
0
def evaluate(filename):
    """
    Evaluate the models based on the timestamp provided.
    :param filename: Timestamp of the latest run set.
    :return:
    """
    device = torch.device("cuda:0" if cuda.is_available() else "cpu")

    helper = Helper()

    testing_set, testing_loader = helper.get_data(mode="test",
                                                  testing_batch_size=1)

    print("Starting evaluation")

    to_tensor = transforms.ToTensor()

    true_val = {}
    score = {}
    tpr = {}
    fpr = {}
    thresh = {}
    area = {}

    for set_n in range(1, 11):

        true_val[set_n] = []
        score[set_n] = []

        model = load_model(filename + str(set_n) + ".pt", device)

        model.eval()

        total_len = len(testing_loader[set_n])

        with torch.no_grad():
            for i, (list_1, list_2,
                    labels) in enumerate(testing_loader[set_n]):

                if type(list_1).__name__ != 'list' or type(
                        list_2).__name__ != 'list':
                    print("Issues with testing file at location {0}".format(i))
                    print(list_1)
                    print(list_2)
                    continue

                l1_avg = np.zeros([1, model.features])
                l1 = 0

                l2_avg = np.zeros([1, model.features])
                l2 = 0

                for im in list_1:
                    try:
                        image = Image.open(im[0])
                        tensor_img = to_tensor(image).to(device)
                        output = model(tensor_img.unsqueeze(0))
                        l1_avg += output.cpu().numpy()
                        l1 += 1
                    except FileNotFoundError:
                        print("File {0} not found. Skipping.".format(im))
                l1_avg /= l1

                for im in list_2:
                    try:
                        image = Image.open(im[0])
                        tensor_img = to_tensor(image).to(device)
                        output = model(tensor_img.unsqueeze(0))
                        l2_avg += output.cpu().numpy()
                        l2 += 1
                    except FileNotFoundError:
                        print("File {0} not found. Skipping.".format(im))
                l2_avg /= l2

                s = cosine_similarity(l1_avg.reshape(1, -1),
                                      l2_avg.reshape(1, -1))[0, 0]
                score[set_n].append(s)
                true_val[set_n].append(labels.item())
                if (i + 1) % 500 == 0:
                    print("Step: {0}/{1}".format(i, total_len))
                #     print(score[1][i], true_val[1][i])

            # Code to evaluate ROC graph is taken from the official documentation.
            # https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html#sphx-glr-auto-examples-model-selection-plot-roc-py

            fpr[set_n], tpr[set_n], thresh[set_n] = roc_curve(
                np.asarray(true_val[set_n]), np.asarray(score[set_n]))
            area[set_n] = auc(fpr[set_n], tpr[set_n])

            plt.figure()
            plt.plot(fpr[set_n],
                     tpr[set_n],
                     color='darkorange',
                     lw=2,
                     label="ROC curve (area = {0:.2f})".format(area[set_n]))

            plt.xlim([0.0, 1.05])
            plt.ylim([0.0, 1.05])
            plt.xlabel('False Positive Rate')
            plt.ylabel('True Positive Rate')
            plt.title('Receiver Operating Characteristic for Split {0}'.format(
                set_n))
            plt.legend(loc="lower right")
            plt.savefig(
                fname="images/{1}ROC{0}.jpg".format(set_n, filename[:-1]))

    color_list = [
        "aqua", "chocolate", "brown", "navy", "lime", "olive", "silver",
        "gold", "pink", "magenta"
    ]

    plt.figure()
    print("Thresholds aquired:")
    for set_n in range(1, 11):
        print("Split {0}".format(set_n), thresh[set_n])
        plt.plot(fpr[set_n],
                 tpr[set_n],
                 color=color_list[set_n - 1],
                 lw=1,
                 label="Split {0}".format(set_n))

    plt.xlim([0.0, 1.05])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (Consolidated)}')
    plt.legend(loc="lower right")
    plt.savefig(fname="images/{0}ROC_ALL.jpg".format(filename[:-1]))

    pd.DataFrame(true_val).to_csv(
        path_or_buf="results/{0}GT.csv".format(filename[:-1]))
    pd.DataFrame(score).to_csv(
        path_or_buf="results/{0}Score.csv".format(filename[:-1]))
    pd.DataFrame(tpr).to_csv(
        path_or_buf="results/{0}TPR.csv".format(filename[:-1]))
    pd.DataFrame(fpr).to_csv(
        path_or_buf="results/{0}FPR.csv".format(filename[:-1]))
    pd.DataFrame(thresh).to_csv(
        path_or_buf="results/{0}TH.csv".format(filename[:-1]))
    pd.DataFrame(area).to_csv(
        path_or_buf="results/{0}Area.csv".format(filename[:-1]))

    print("\n\nDone")
コード例 #2
0
def main(batch_size, num_epochs, lr, file_write, flag_dummy, temperature,
         lr_decay, features):
    """
    Main function for training.
    :param batch_size  : Batch size to use.
    :param num_epochs  : Number of epochs for each split.
    :param lr          : Learning rate to be set at the start of each split.
    :param file_write  : Write output to stdout(default) or a file.
    :param flag_dummy  : Create a dummy file for evaluation.
    :param temperature : Set default temperature for softmax/log_softmax layer while training.
    :param lr_decay    : Learning rate decay for every drop in min loss observed.
    :param features    : Number of nodes for penultimate feature layer.
    :return:
    """

    # ''' ---------------------Parameters---------------------'''
    device = torch.device("cuda:0" if cuda.is_available() else "cpu")

    # optim_name = 'SGD'
    # optim_name = 'RMS'
    optim_name = 'Adam'

    batch_print = 50
    op_dir = "pickles/"

    t_stmp = time.strftime("%Y%m%d_%H%M%S", time.gmtime())

    # Creating a file to store the name of the latest models
    ff = open("latest_t_stmp.txt", 'w')
    ff.write("A2_T{0}_S".format(t_stmp))
    ff.close()

    helper = Helper("log/log_" + t_stmp + ".txt")
    helper.write_file(file_write)

    helper.log(msg="Starting data loading.")

    training_set, training_loader = helper.get_data(
        mode="train", training_batch_size=batch_size)
    helper.log(msg="Finished data loading. Starting main training.")

    for set_n in range(1, 11):

        init_lr = lr

        model, criterion, optimizer = mod.get_model(device,
                                                    optim_name,
                                                    lamb=0,
                                                    learning_rate=init_lr,
                                                    final_features=features)
        model.train(True)
        model.set_temperature(temperature)

        if flag_dummy:
            helper.log(msg="\nCreating dummy file.\n")
            dummy_file = {
                "model": model.state_dict(),
                "criterion": criterion.state_dict(),
                "optimizer": optimizer.state_dict(),
                "optim_name": optim_name,
                "features": features
            }
            torch.save(dummy_file, op_dir + "dummy.pt")
            flag_dummy = False

        helper.log(msg="\nStart of split {0}\n".format(set_n))

        total_len = len(training_loader[set_n])

        running_loss = 0.0
        cor = 0
        tot = 0
        cor_b = 0
        tot_b = 0
        past_loss = 6.0 * batch_print

        for epoch in range(num_epochs):
            for i, (images, labels) in enumerate(training_loader[set_n]):

                # Change variable type to match GPU requirements
                inp = images.to(device)
                lab = labels.to(device)

                # Reset gradients before processing
                optimizer.zero_grad()

                # Get model output
                out = model(inp)

                # Calculate loss
                loss = criterion(out, lab)

                # Accuracy calc
                _, predicted = torch.max(out.data, 1)

                tot_b += batch_size
                cor_b += (predicted == lab).sum().item()

                tot += batch_size
                cor += (predicted == lab).sum().item()

                # Update weights
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                # logger.log(msg="\rLoss = {0}        ".format(l), end="")
                if (i + 1) % batch_print == 0:

                    helper.log(
                        msg="Split: {3}, Epoch: {0}, step: {1}/{2} ".format(
                            epoch + 1, i + 1, total_len, set_n),
                        end="\t")

                    helper.log(
                        msg="Running Loss (avg): {0:.06f}, Past: {1:.06f}".
                        format((running_loss / batch_print),
                               (past_loss / batch_print)),
                        end="\t")
                    helper.log(
                        msg="Accuracy: (Per {2})|(Total): {0:.03f}|{1:.03f} %".
                        format((cor_b * 100) / tot_b, (cor * 100) / tot,
                               batch_size * batch_print),
                        end="\t")

                    if running_loss < past_loss:
                        past_loss = running_loss
                        init_lr *= lr_decay
                        for params in optimizer.param_groups:
                            params['lr'] = max(init_lr, 0.001)

                    helper.log(msg="LR: {0:.06f}".format(init_lr))

                    running_loss = 0.0
                    cor_b = 0
                    tot_b = 0

        filename = op_dir + "A2_T{1}_S{0}.pt".format(set_n, t_stmp)

        # Idea for named saved file was picked up from here:
        # https://github.com/quiltdata/pytorch-examples/blob/master/imagenet/main.py
        save_file = {
            "model": model.state_dict(),
            "criterion": criterion.state_dict(),
            "optimizer": optimizer.state_dict(),
            "optim_name": optim_name,
            "features": features
        }

        torch.save(save_file, filename)
        helper.log(
            msg="\nFile {0} saved for split {1}".format(filename, set_n))

    helper.close()