def queries(training_set, test_set, train_vectors, test_vectors, path): threshold_start = 1 threshold_end = 10 thresholds = [] metrics_obj_list = [] for i in range(threshold_start, threshold_end + 1): thresholds.append(i) metrics_obj_list.append(Metrics()) fw = FileWriter(path) eval = Evaluator(training_set) for i in range(len(test_vectors)): scores = query(train_vectors, test_vectors[i], threshold_end) query_doc = test_set[i] for j in range(len(thresholds)): threshold = thresholds[j] eval.query([training_set[x] for (x, y) in scores[0:threshold]], query_doc) eval.calculate() metrics_obj_list[j].updateConfusionMatrix(eval) metrics_obj_list[j].updateMacroAverages(eval) for obj in metrics_obj_list: obj.calculate(len(test_set)) fw.writeToFiles(metrics_obj_list, thresholds)
counter = 0 for doc in test_set: results = [] temp_results = doc_results[doc["pmid"]] for temp_doc in temp_results: if temp_doc["pmid"] in training_set: counter += 1 results.append(temp_doc) for k in range(0, len(thresholds)): threshold = thresholds[k] eval.query(results[1:threshold + 1], doc) eval.calculate() metrics_obj_list[k].updateMacroAverages(eval) i += 1 printProgressBar(i, len(test_set)) print("Docs in training set: ", counter) for obj in metrics_obj_list: obj.calculate(len(test_set)) tlog("Done getting results. Writing to files.") fw.writeToFiles(metrics_obj_list, thresholds) tlog("Done.")