def test_orders_by_task_passed_to_the_query(self): task_passed_to_query = 'the-task-id' task_to_ignore = 'some-other-task' create_tasks([task_passed_to_query, task_to_ignore]) similar_service = ServiceBuilder(self.organization).with_location( self.location).create() dissimilar_service = ServiceBuilder(self.organization).with_location( self.location).create() lower_score = 0.1 low_score = 0.2 high_score = 0.8 higher_score = 0.9 self.set_service_similarity_score(task_passed_to_query, similar_service.id, high_score) self.set_service_similarity_score(task_passed_to_query, dissimilar_service.id, low_score) # Test verifies that these scores are ignored, # if they were considered then dissimilar_service would be returned as the first element self.set_service_similarity_score(task_to_ignore, similar_service.id, lower_score) self.set_service_similarity_score(task_to_ignore, dissimilar_service.id, higher_score) url = '/v1/services_at_location/?related_to_task={0}'.format( task_passed_to_query) json = self.client.get(url).json() self.assertEqual(len(json), 2) self.assertEqual(json[0]['service']['name'], similar_service.name) self.assertEqual(json[1]['service']['name'], dissimilar_service.name)
def setUp(self): self.organization = OrganizationBuilder().create() self.three_task_ids = [a_string() for i in range(3)] create_tasks(self.three_task_ids) services = [ ServiceBuilder(self.organization).create() for i in range(3) ] self.three_service_ids = [service.id for service in services]
def test_saves_required_number_of_records_for_each_row(self): ids = [a_string() for i in range(5)] create_tasks(ids) scores = scipy.sparse.csr_matrix([[a_float() for i in range(5)] for j in range(5)]) scores_to_save_per_row = 2 save_task_similarities(ids, scores, scores_to_save_per_row) scores_saved_in_all = 5 * 2 self.assertEqual(TaskSimilarityScore.objects.count(), scores_saved_in_all)
def test_deletes_existing_records(self): first_task_id = a_string() second_task_id = a_string() create_tasks([first_task_id, second_task_id]) record = TaskSimilarityScore(first_task_id=first_task_id, second_task_id=second_task_id, similarity_score=a_float()) record.save() save_task_similarities([], [], 0) self.assertEqual(TaskSimilarityScore.objects.count(), 0)
def test_can_create_row(self): first_id = a_string() second_id = a_string() score = a_float() create_tasks([first_id, second_id]) score_record = TaskSimilarityScore(first_task_id=first_id, second_task_id=second_id, similarity_score=score) score_record_from_db = validate_save_and_reload(score_record) self.assertEqual(score_record_from_db.first_task_id, first_id) self.assertEqual(score_record_from_db.second_task_id, second_id) self.assertAlmostEqual(score_record_from_db.similarity_score, score)
def test_saves_all_off_diagonal_scores_if_number_of_scores_to_save_is_large( self): ids = [a_string() for i in range(5)] create_tasks(ids) scores = scipy.sparse.csr_matrix([[a_float() for i in range(5)] for j in range(5)]) too_many_records_to_save = 2000 save_task_similarities(ids, scores, too_many_records_to_save) number_of_off_diagonal_elements = 5 * 4 self.assertEqual(TaskSimilarityScore.objects.count(), number_of_off_diagonal_elements)
def test_can_create_row(self): organization = OrganizationBuilder().create() service = ServiceBuilder(organization).create() task_id = a_string() score = a_float() create_tasks([task_id]) score_record = TaskServiceSimilarityScore(task_id=task_id, service=service, similarity_score=score) score_record_from_db = validate_save_and_reload(score_record) self.assertEqual(score_record_from_db.task_id, task_id) self.assertEqual(score_record_from_db.service_id, service.id) self.assertAlmostEqual(score_record_from_db.similarity_score, score)
def test_does_not_return_unrelated_services(self): task_id = 'the-task-id' create_tasks([task_id]) related_service = ServiceBuilder(self.organization).with_location( self.location).create() self.set_service_similarity_score(task_id, related_service.id, a_float()) unrelated_service = ServiceBuilder(self.organization).with_location( self.location).create() url = '/v1/services_at_location/?related_to_task={0}'.format(task_id) json = self.client.get(url).json() self.assertEqual(len(json), 1) self.assertEqual(json[0]['service']['name'], related_service.name)
def test_can_order_by_similarity_to_task(self): task_id = 'the-task-id' create_tasks([task_id]) similar_service = ServiceBuilder(self.organization).with_location( self.location).create() dissimilar_service = ServiceBuilder(self.organization).with_location( self.location).create() self.set_service_similarity_score(task_id, similar_service.id, 0.9) self.set_service_similarity_score(task_id, dissimilar_service.id, 0.1) url = '/v1/services_at_location/?related_to_task={0}'.format(task_id) json = self.client.get(url).json() self.assertEqual(len(json), 2) self.assertEqual(json[0]['service']['name'], similar_service.name) self.assertEqual(json[1]['service']['name'], dissimilar_service.name)
def test_saves_elements_with_second_task_id(self): ids = [a_string() for i in range(4)] create_tasks(ids) scores = scipy.sparse.csr_matrix([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]) records_to_save_per_row = 2 save_task_similarities(ids, scores, records_to_save_per_row) records = TaskSimilarityScore.objects.order_by('similarity_score') self.assertEqual(len(records), 8) self.assertEqual(records[0].second_task_id, ids[2]) self.assertEqual(records[1].second_task_id, ids[3]) self.assertEqual(records[2].second_task_id, ids[2]) self.assertEqual(records[3].second_task_id, ids[3]) self.assertEqual(records[4].second_task_id, ids[1]) self.assertEqual(records[5].second_task_id, ids[3]) self.assertEqual(records[6].second_task_id, ids[1]) self.assertEqual(records[7].second_task_id, ids[2])
def test_saves_two_non_diagonal_elements_with_the_highest_scores_in_each_row( self): ids = [a_string() for i in range(4)] create_tasks(ids) scores = scipy.sparse.csr_matrix([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]) records_to_save_per_row = 2 save_task_similarities(ids, scores, records_to_save_per_row) records = TaskSimilarityScore.objects.order_by('similarity_score') self.assertEqual(len(records), 8) self.assertEqual(records[0].similarity_score, 3.0) self.assertEqual(records[1].similarity_score, 4.0) self.assertEqual(records[2].similarity_score, 7.0) self.assertEqual(records[3].similarity_score, 8.0) self.assertEqual(records[4].similarity_score, 10.0) self.assertEqual(records[5].similarity_score, 12.0) self.assertEqual(records[6].similarity_score, 14.0) self.assertEqual(records[7].similarity_score, 15.0)
def test_includes_related_topics_from_database_in_order_of_declining_similarity_score( self): task_id = a_string() similar_task_id = a_string() create_tasks([task_id, similar_task_id]) a_high_score = 0.9 TaskSimilarityScore(first_task_id=task_id, second_task_id=similar_task_id, similarity_score=a_high_score).save() dissimilar_task_id = a_string() create_tasks([dissimilar_task_id]) a_low_score = 0.1 TaskSimilarityScore(first_task_id=task_id, second_task_id=dissimilar_task_id, similarity_score=a_low_score).save() path = 'some/path/chapter/topics/{0}/en.Learn_english.txt'.format( task_id) result = parse_task_files([[path, a_string()]]) self.assertEqual(result['taskMap'][task_id]['relatedTasks'], [similar_task_id, dissimilar_task_id])
def test_deletes_existing_records(self): two_preexisting_tasks = [a_string(), a_string()] helpers.create_tasks(two_preexisting_tasks) save_topics(self.one_task, self.counts) self.assertEqual(Task.objects.count(), 1)