def evaluate_datasets(args, model, device, descriptor, train_acc): datapath = args.dataPrefixPath val_one_many_res = predict_one_out_of_many(args, model, device, (datapath + "validate/")) train_one_many_res = predict_one_out_of_many(args, model, device, (datapath + "train/"), 200) one_out_of_many_accuracy = ( '{}:val={:.2f}%,{:.2f}%,train={:.2f}%,{:.2f}%'.format(descriptor, val_one_many_res[0], val_one_many_res[1], train_one_many_res[0], train_one_many_res[1])) eval_ranking_result = predict_batch(args, model, device, (datapath + "validate/")) train_ranking_result = predict_batch(args, model, device, (datapath + "train/"), max_samples=400) pairwise_accuracy = ( '{}:val={:.2f}%,{:.2f}%,[{}/{}],train=,{:.2f}%,{:.2f}%,[{}/{}]' .format(descriptor, eval_ranking_result[0], eval_ranking_result[1], eval_ranking_result[2], eval_ranking_result[3], train_ranking_result[0], train_ranking_result[1], train_ranking_result[2], train_ranking_result[3])) # uncomment if you are not interested in results on ds+ for models trained on ds if ds_dataset(args.dataPrefixPath): update_arguments_for_dataset("./dataset/data/dsplus/test/", args) datapath = datapath.replace("ds", "dsplus") val_one_many_res = predict_one_out_of_many(args, model, device, (datapath + "validate/")) one_out_of_many_accuracy += (",val-ds+={:.2f}%,{:.2f}%".format(val_one_many_res[0], val_one_many_res[1])) eval_ranking_result = predict_batch(args, model, device, (datapath + "validate/")) pairwise_accuracy += (",val-ds+={:.2f}%,{:.2f}%,[{}/{}]".format(eval_ranking_result[0], eval_ranking_result[1], eval_ranking_result[2], eval_ranking_result[3])) update_arguments_for_dataset("./dataset/data/ds/test/", args) return pairwise_accuracy, one_out_of_many_accuracy
def predict_batch(args, model, device, dataset, desc=None, max_samples=-1): model.eval() file_list_neg = [s for s in os.listdir(dataset) if ("_0.txt" in s)] if ds_dataset(dataset): file_list_neg.sort(key=lambda x: len(open(dataset + x).readlines()), reverse=True) # my_collate_rnn else: file_list_neg.sort(key=lambda x: int(x.split("-")[2]), reverse=True) # my_collate_rnn file_list_pos = [good_file(s, dataset, args)[1] for s in file_list_neg] data_loader_evaluation_pos = get_custom_dataset(args, dataset, False, True, file_list_pos) data_loader_evaluation_neg = get_custom_dataset(args, dataset, False, True, file_list_neg) total = 0 it_neg = iter(data_loader_evaluation_neg) correct_classified = 0 correct_classified_se = 0 for i, sample_batched in enumerate(tqdm(data_loader_evaluation_pos, desc=desc, disable=(desc is None))): if max_samples != -1 and i * len(sample_batched["label"]) > max_samples: break output = get_model_output(args, sample_batched, model, device) batch_neg = next(it_neg) output_neg = get_model_output(args, batch_neg, model, device) correct_classified = correct_classified + (output_neg[:, 1] < output[:, 1]).nonzero().size( 0) correct_classified_se = correct_classified_se + (output_neg[:, 1] <= output[:, 1]).nonzero().size( 0) total += len(output) return 100. * correct_classified / total, 100. * correct_classified_se / total, correct_classified, total
def validation_dataset(dataset): if ds_dataset(dataset): # train on D_S: evaluate D_S, D_S+ return ["./dataset/data/ds/validate/", "./dataset/data/dsplus/validate/"] if "dsplus" in dataset: # trained on D_S+ and evaluated on D_S+ return ["./dataset/data/dsplus/validate/"] return []
def predict_one_out_of_many(args, model, device, dataset, maximum=-1, desc=None): model.eval() file_list_pos = [s for s in os.listdir(dataset) if ("_1.txt" in s)] if ds_dataset(dataset): file_list_pos.sort(key=lambda x: len(open(dataset + x).readlines()), reverse=True) # my_collate_rnn else: file_list_pos.sort(key=lambda x: int(x.split("-")[2]), reverse=True) # my_collate_rnn pos_scores = list() data_loader_evaluation_pos = get_custom_dataset(args, dataset, False, True, file_list_pos) for batch_idx, sample_batched in enumerate(tqdm(data_loader_evaluation_pos, desc=desc, disable=(desc is None))): output = F.softmax(get_model_output(args, sample_batched, model, device), dim=1) pos_scores.extend(output[:, 1].tolist()) if maximum != -1 and args.batch_size * batch_idx > maximum: break correct = 0 se_correct = 0 total = 0 (totalFileListNeg, filelist_counter) = get_negative_list(len(pos_scores), file_list_pos, dataset, args, maximum) assert (len(filelist_counter) == len(pos_scores)) data_loader_evaluation_neg = get_custom_dataset(args, dataset, False, True, totalFileListNeg) neg_scores = list() for sample_batched in data_loader_evaluation_neg: output = F.softmax(get_model_output(args, sample_batched, model, device), dim=1) neg_scores.extend(output[:, 1].tolist()) start = 0 for i, pos_score in enumerate(pos_scores): if maximum != -1 and i >= maximum: break s_correct = True se_t_correct = True subsamples = neg_scores[start: (start + filelist_counter[i])] if len(subsamples) > 0: if max(subsamples) >= pos_score: s_correct = False if max(subsamples) > pos_score: se_t_correct = False start = start + filelist_counter[i] # else: # print("Only positive candidate: ", dataset, fileListPos[i], maximum, len(fileListPos), start, start + filelistCounter[i]) total = total + 1 correct = correct + int(s_correct) se_correct = se_correct + int(se_t_correct) return 100. * correct / total, 100. * se_correct / total, total
def getSelectedServer(name, dataset): if name == "MLP": if ds_dataset(dataset): selected_server = ("MLP", dataset, Data.MANUAL, ManualType.Manual_MLP_OS, DataModeRNN.UNUSED, "") else: selected_server = ("MLP", dataset, Data.MANUAL, ManualType.Manual_MLP_MS, DataModeRNN.UNUSED, "") elif name == "CNN": selected_server = ("CNN", dataset, Data.IMAGE, ManualType.UNUSED, DataModeRNN.UNUSED, "") elif "ensembleRnnCnn" in name: if ds_dataset(dataset): selected_server = ("EnsembleRnnCnn", dataset, Data.BOTH, ManualType.Manual_RNN_OS, mode_for_name(name), "") else: selected_server = ("EnsembleRnnCnn", dataset, Data.BOTH, ManualType.Manual_RNN_MS, mode_for_name(name), "") elif "RNN" in name: if ds_dataset(dataset): selected_server = ("RNN", dataset, Data.MANUAL, ManualType.Manual_RNN_OS, mode_for_name(name), "") else: selected_server = ("RNN", dataset, Data.MANUAL, ManualType.Manual_RNN_MS, mode_for_name(name), "") else: print(name) print("Target not found") print("Selected server: " + selected_server[0]) return selected_server
def evaluate(args, model, device, dataset_name, result_dict, optimizer): test_pairwise_accuracy = [] test_one_of_many_accuracy = [] if ds_dataset(dataset_name): eval_test_dataset_result_ds = evaluate_test_dataset(args, model, device, "ds", -1.0) # The results on ds are better since it is trained on ds (with these specific device dimensions) # with the modifications according to the synthesizer we tried to be as close as possible to the changes in ds+ result_dict['test_pairwise_accuracy-ds'] = eval_test_dataset_result_ds[0] result_dict['test_accuracy_one_vs_many-ds'] = eval_test_dataset_result_ds[1] # Evaluate ds dataset on ds+ update_arguments_for_dataset("./dataset/data/dsplus/test/", args) args.dataPrefixPath = "./dataset/data/dsplus/" eval_test_dataset_result = evaluate_test_dataset(args, model, device, "ds+", -1.0) test_pairwise_accuracy.append(eval_test_dataset_result[0]) test_one_of_many_accuracy.append(eval_test_dataset_result[1]) else: eval_test_dataset_result = evaluate_test_dataset(args, model, device, dataset_name, -1.0) test_pairwise_accuracy.append(eval_test_dataset_result[0]) test_one_of_many_accuracy.append(eval_test_dataset_result[1]) result_dict['test_pairwise_accuracy'] = test_pairwise_accuracy result_dict['test_accuracy_one_vs_many'] = test_one_of_many_accuracy