예제 #1
0
def evaluate_datasets(args, model, device, descriptor, train_acc):
    datapath = args.dataPrefixPath
    val_one_many_res = predict_one_out_of_many(args, model, device, (datapath + "validate/"))
    train_one_many_res = predict_one_out_of_many(args, model, device, (datapath + "train/"), 200)
    one_out_of_many_accuracy = (
        '{}:val={:.2f}%,{:.2f}%,train={:.2f}%,{:.2f}%'.format(descriptor, val_one_many_res[0],
                                                              val_one_many_res[1],
                                                              train_one_many_res[0],
                                                              train_one_many_res[1]))
    eval_ranking_result = predict_batch(args, model, device, (datapath + "validate/"))
    train_ranking_result = predict_batch(args, model, device, (datapath + "train/"), max_samples=400)
    pairwise_accuracy = (
        '{}:val={:.2f}%,{:.2f}%,[{}/{}],train=,{:.2f}%,{:.2f}%,[{}/{}]'
            .format(descriptor, eval_ranking_result[0],
                    eval_ranking_result[1],
                    eval_ranking_result[2],
                    eval_ranking_result[3],
                    train_ranking_result[0],
                    train_ranking_result[1],
                    train_ranking_result[2],
                    train_ranking_result[3]))
    # uncomment if you are not interested in results on ds+ for models trained on ds
    if ds_dataset(args.dataPrefixPath):
        update_arguments_for_dataset("./dataset/data/dsplus/test/", args)
        datapath = datapath.replace("ds", "dsplus")
        val_one_many_res = predict_one_out_of_many(args, model, device, (datapath + "validate/"))
        one_out_of_many_accuracy += (",val-ds+={:.2f}%,{:.2f}%".format(val_one_many_res[0],
                                                                       val_one_many_res[1]))
        eval_ranking_result = predict_batch(args, model, device, (datapath + "validate/"))
        pairwise_accuracy += (",val-ds+={:.2f}%,{:.2f}%,[{}/{}]".format(eval_ranking_result[0],
                                                                        eval_ranking_result[1],
                                                                        eval_ranking_result[2],
                                                                        eval_ranking_result[3]))
        update_arguments_for_dataset("./dataset/data/ds/test/", args)
    return pairwise_accuracy, one_out_of_many_accuracy
def predict_batch(args, model, device, dataset, desc=None, max_samples=-1):
    model.eval()
    file_list_neg = [s for s in os.listdir(dataset) if ("_0.txt" in s)]

    if ds_dataset(dataset):
        file_list_neg.sort(key=lambda x: len(open(dataset + x).readlines()), reverse=True) # my_collate_rnn
    else:
        file_list_neg.sort(key=lambda x: int(x.split("-")[2]), reverse=True) # my_collate_rnn

    file_list_pos = [good_file(s, dataset, args)[1] for s in file_list_neg]

    data_loader_evaluation_pos = get_custom_dataset(args, dataset, False, True, file_list_pos)
    data_loader_evaluation_neg = get_custom_dataset(args, dataset, False, True, file_list_neg)

    total = 0
    it_neg = iter(data_loader_evaluation_neg)
    correct_classified = 0
    correct_classified_se = 0
    for i, sample_batched in enumerate(tqdm(data_loader_evaluation_pos, desc=desc, disable=(desc is None))):
        if max_samples != -1 and i * len(sample_batched["label"]) > max_samples:
            break
        output = get_model_output(args, sample_batched, model, device)
        batch_neg = next(it_neg)
        output_neg = get_model_output(args, batch_neg, model, device)
        correct_classified = correct_classified + (output_neg[:, 1] < output[:, 1]).nonzero().size(
            0)
        correct_classified_se = correct_classified_se + (output_neg[:, 1] <= output[:, 1]).nonzero().size(
            0)
        total += len(output)
    return 100. * correct_classified / total, 100. * correct_classified_se / total, correct_classified, total
예제 #3
0
def validation_dataset(dataset):
    if ds_dataset(dataset):
        # train on D_S: evaluate D_S, D_S+
        return ["./dataset/data/ds/validate/", "./dataset/data/dsplus/validate/"]
    if "dsplus" in dataset:
        # trained on D_S+ and evaluated on D_S+
        return ["./dataset/data/dsplus/validate/"]
    return []
def predict_one_out_of_many(args, model, device, dataset, maximum=-1, desc=None):
    model.eval()
    file_list_pos = [s for s in os.listdir(dataset) if ("_1.txt" in s)]

    if ds_dataset(dataset):
        file_list_pos.sort(key=lambda x: len(open(dataset + x).readlines()), reverse=True)  # my_collate_rnn
    else:
        file_list_pos.sort(key=lambda x: int(x.split("-")[2]), reverse=True)  # my_collate_rnn

    pos_scores = list()
    data_loader_evaluation_pos = get_custom_dataset(args, dataset, False, True, file_list_pos)

    for batch_idx, sample_batched in enumerate(tqdm(data_loader_evaluation_pos, desc=desc, disable=(desc is None))):
        output = F.softmax(get_model_output(args, sample_batched, model, device), dim=1)
        pos_scores.extend(output[:, 1].tolist())
        if maximum != -1 and args.batch_size * batch_idx > maximum:
            break

    correct = 0
    se_correct = 0
    total = 0
    (totalFileListNeg, filelist_counter) = get_negative_list(len(pos_scores), file_list_pos, dataset, args, maximum)

    assert (len(filelist_counter) == len(pos_scores))
    data_loader_evaluation_neg = get_custom_dataset(args, dataset, False, True, totalFileListNeg)

    neg_scores = list()
    for sample_batched in data_loader_evaluation_neg:
        output = F.softmax(get_model_output(args, sample_batched, model, device), dim=1)
        neg_scores.extend(output[:, 1].tolist())

    start = 0
    for i, pos_score in enumerate(pos_scores):
        if maximum != -1 and i >= maximum:
            break
        s_correct = True
        se_t_correct = True
        subsamples = neg_scores[start: (start + filelist_counter[i])]
        if len(subsamples) > 0:
            if max(subsamples) >= pos_score:
                s_correct = False
            if max(subsamples) > pos_score:
                se_t_correct = False
            start = start + filelist_counter[i]
        # else:
        # print("Only positive candidate: ", dataset, fileListPos[i], maximum, len(fileListPos), start, start + filelistCounter[i])
        total = total + 1
        correct = correct + int(s_correct)
        se_correct = se_correct + int(se_t_correct)
    return 100. * correct / total, 100. * se_correct / total, total
예제 #5
0
def getSelectedServer(name, dataset):
    if name == "MLP":
        if ds_dataset(dataset):
            selected_server = ("MLP", dataset, Data.MANUAL,
                               ManualType.Manual_MLP_OS, DataModeRNN.UNUSED,
                               "")
        else:
            selected_server = ("MLP", dataset, Data.MANUAL,
                               ManualType.Manual_MLP_MS, DataModeRNN.UNUSED,
                               "")
    elif name == "CNN":
        selected_server = ("CNN", dataset, Data.IMAGE, ManualType.UNUSED,
                           DataModeRNN.UNUSED, "")
    elif "ensembleRnnCnn" in name:
        if ds_dataset(dataset):
            selected_server = ("EnsembleRnnCnn", dataset,
                               Data.BOTH, ManualType.Manual_RNN_OS,
                               mode_for_name(name), "")
        else:
            selected_server = ("EnsembleRnnCnn", dataset,
                               Data.BOTH, ManualType.Manual_RNN_MS,
                               mode_for_name(name), "")
    elif "RNN" in name:
        if ds_dataset(dataset):
            selected_server = ("RNN", dataset,
                               Data.MANUAL, ManualType.Manual_RNN_OS,
                               mode_for_name(name), "")
        else:
            selected_server = ("RNN", dataset,
                               Data.MANUAL, ManualType.Manual_RNN_MS,
                               mode_for_name(name), "")
    else:
        print(name)
        print("Target not found")
    print("Selected server: " + selected_server[0])
    return selected_server
예제 #6
0
def evaluate(args, model, device, dataset_name, result_dict, optimizer):
    test_pairwise_accuracy = []
    test_one_of_many_accuracy = []
    if ds_dataset(dataset_name):
        eval_test_dataset_result_ds = evaluate_test_dataset(args, model, device, "ds", -1.0)
        # The results on ds are better since it is trained on ds (with these specific device dimensions)
        # with the modifications according to the synthesizer we tried to be as close as possible to the changes in ds+
        result_dict['test_pairwise_accuracy-ds'] = eval_test_dataset_result_ds[0]
        result_dict['test_accuracy_one_vs_many-ds'] = eval_test_dataset_result_ds[1]

        # Evaluate ds dataset on ds+
        update_arguments_for_dataset("./dataset/data/dsplus/test/", args)
        args.dataPrefixPath = "./dataset/data/dsplus/"
        eval_test_dataset_result = evaluate_test_dataset(args, model, device, "ds+", -1.0)
        test_pairwise_accuracy.append(eval_test_dataset_result[0])
        test_one_of_many_accuracy.append(eval_test_dataset_result[1])
    else:
        eval_test_dataset_result = evaluate_test_dataset(args, model, device, dataset_name, -1.0)
        test_pairwise_accuracy.append(eval_test_dataset_result[0])
        test_one_of_many_accuracy.append(eval_test_dataset_result[1])

    result_dict['test_pairwise_accuracy'] = test_pairwise_accuracy
    result_dict['test_accuracy_one_vs_many'] = test_one_of_many_accuracy