def _perform_similarity_test(self, embedding: Embedding): self._current_group_similarities.sort(key=lambda item: item[0], reverse=False) self._current_group_labels.sort(key=lambda item: item[0], reverse=False) test_input: str = " >\n".join([f"[{labels[1][0]}, {labels[1][1]}]" for labels in self._current_group_labels]) best_similarity: float = self._current_group_similarities[0][1] if not self.metric.is_better_than_noise(best_similarity, embedding): return CaseResult(test_input, self._stringify_better_than_noise(embedding), best_similarity, False) for i in range(1, len(self._current_group_similarities)): expected: float = self._current_group_similarities[i][1] actual: float = self._current_group_similarities[i-1][1] if not self.metric.is_better_than(actual, expected): a: str = self._current_group_labels[i - 1][1][0] b: str = self._current_group_labels[i - 1][1][1] c: str = self._current_group_labels[i][1][0] d: str = self._current_group_labels[i][1][1] expected_output: str = f"[{a, b}] > [{c, d}]" actual_output: str = f"[{a, b}] <= [{c, d}]" return CaseResult(test_input, expected_output, actual_output, False) return CaseResult(test_input, "similarity hierarchy", True, True)
def _perform_test(self, embedding): if len(self._current_group) <= 2: return CaseResult(", ".join(self._current_group_labels), "No expected output", "Too few elements in group", False) outlier = self._identify_outlier(embedding) outlier_index = self._current_group.index(outlier) return CaseResult(", ".join(self._current_group_labels), self._current_outlier_label, self._current_group_labels[outlier_index], self._current_outlier == outlier)
def _create_enabled_task_result(passed=True): task = CosineSimilarityTask("CosineSimilarityTask", Path()) task_result = TaskResult(task, True) task_result.add_case_result( CaseResult("expected", "expected", "expected", passed)) task_result.finalize() return task_result
def test_add_case_result_raises_if_finalized(self): task = CosineSimilarityTask("SimilarityTask", Path()) task_result = TaskResult(task, True) task_result.finalize() with pytest.raises(Exception): task_result.add_case_result( CaseResult("input", "expected", "actual", False))
def test_representation_contains_all_case_results(self): task_result = TestTaskResult._create_enabled_task_result() case_result = CaseResult("Vienna", "London", "Paris", False) task_result.add_case_result(case_result) task_result.finalize() assert len(str(task_result).split("\n")) == 3 assert task_result.__str__() == task_result.__repr__()
def _create_enabled_task_result(): task = CosineSimilarityTask("SimilarityTask", Path()) task_result = TaskResult(task, True) case_result = CaseResult("Berlin", "Paris", "Paris", True) task_result.add_case_result(case_result) return task_result
def _test_neighborhood(self, embedding): result = self.metric.batch_compute( [embedding[word] for word in self._current_group]) is_similar = self.metric.is_better_than_noise(result, embedding) passed = is_similar == self._current_is_expected_similar expected_result = self._stringify_expected_result( self._current_is_expected_similar) return CaseResult(", ".join(self._current_group_labels), expected_result, '%06.4f' % result, passed)
def test_pass_rate(self): task_result = TestTaskResult._create_enabled_task_result() assert task_result.pass_rate() == 0 case_result = CaseResult("Vienna", "London", "Paris", False) task_result.add_case_result(case_result) assert task_result.pass_rate() == 0 task_result.finalize() assert task_result.pass_rate() == 50
def test_success_representation(self): test_result = CaseResult("Angela_Merkel", "AKK", "AKK", True) assert str(test_result).startswith(CaseResult.PASSED_PREFIX) assert test_result.__str__() == test_result.__repr__() assert CaseResult.EXPECTED_OUTPUT_PREFIX not in str(test_result)
def test_failure_representation(self): test_result = CaseResult("Angela_Merkel", "AKK", "Donald Trump", False) assert str(test_result).startswith(CaseResult.NOT_PASSED_PREFIX) assert test_result.__str__() == test_result.__repr__() assert CaseResult.EXPECTED_OUTPUT_PREFIX in str(test_result)
def _run(self, topn=5): embedding = self._test_configuration.embedding linking = self._test_configuration.entity_linking labels = self._test_configuration.entity_labels for indexA, lineA in enumerate(self._test_set_lines()): for indexB, lineB in enumerate(self._test_set_lines()): if indexA >= indexB: continue a = linking[lineA[0]] b = linking[lineA[1]] c = linking[lineB[0]] d = linking[lineB[1]] if a.upper() in embedding.word_vectors.wv: a = a.upper() elif a.lower() in embedding.word_vectors.wv: a = a.lower() if b.upper() in embedding.word_vectors.wv: b = b.upper() elif b.lower() in embedding.word_vectors.wv: b = b.lower() if c.upper() in embedding.word_vectors.wv: c = c.upper() elif c.lower() in embedding.word_vectors.wv: c = c.lower() if d.upper() in embedding.word_vectors.wv: d = d.upper() elif d.lower() in embedding.word_vectors.wv: d = d.lower() label_a = labels[lineA[0]] label_b = labels[lineA[1]] label_c = labels[lineB[0]] label_d = labels[lineB[1]] if any(x not in embedding.word_vectors.wv for x in [a, b, c, d]): logging.warning( f"Any of {[a, b, c, d]} is not in vocabulary") yield CaseResult( f"{label_a} : {label_b} like {label_c} : [?]", label_d, "OUT OF VOCABULARY", False) continue predictions = embedding.word_vectors.most_similar( positive=[b, c], negative=[a], topn=topn) top_entities = [entity.lower() for entity, _ in predictions] top_entity_labels = ', '.join(labels[entity] for entity in top_entities) if label_d.lower() in top_entities: yield CaseResult( f"{label_a} : {label_b} like {label_c} : [?]", label_d, top_entity_labels, True) else: yield CaseResult( f"{label_a} : {label_b} like {label_c} : [?]", d, top_entity_labels, d.lower() in top_entities)