def test_similar_values_different_facets_then_return_no_question(self): question_generator = QuestionGenerator() facet_a1 = Facet('NameA', 'ValueA') facet_a2 = Facet('NameA', 'ValueA') facets_by_document = {'1': [facet_a1], '2': [facet_a2], '3': [facet_a2]} questions = question_generator.generate_questions(facets_by_document) self.assertEqual(len(questions), 0)
def test_one_discriminating_facet_then_return_one_element_question(self): question_generator = QuestionGenerator() facet_a1 = Facet('NameA', 'ValueA1') facet_a2 = Facet('NameA', 'ValueA2') facets_by_document = {'1': [facet_a1], '2': [facet_a2], '3': [facet_a2]} questions = question_generator.generate_questions(facets_by_document) self.assertEqual(len(questions), 1) self.assertEqual(questions[0].question.facet_name, "NameA") self.assertEqual(sorted(questions[0].question.facet_values), ['ValueA1', 'ValueA2'])
def test_similar_values_different_facets_then_return_one_question(self): question_generator = QuestionGenerator() facet_a = Facet('NameA', 'ValueA') facet_a2 = Facet('NameA', 'ValueA') facet_b = Facet('NameB', 'ValueB') facets_by_document = {'1': [facet_a2, facet_b], '2': [facet_a]} questions = question_generator.generate_questions(facets_by_document) self.assertEqual(questions[0].facet_name, "NameB") self.assertEqual(sorted(questions[0].facet_values), ['ValueB'])
def test_when_no_discriminating_facet_then_return_no_facet(self): discriminating_algo = DiscriminatingFacetsAlgo() facet_a1 = Facet('NameA', 'ValueA1') facet_a2 = Facet('NameA', 'ValueA2') facet_b = Facet('NameB', 'ValueB') facets_by_document = {'1': [facet_a1], '2': [facet_a2], '3': [facet_b]} discriminating_facets = discriminating_algo.get_discriminating_facets( facets_by_document) self.assertEqual(len(discriminating_facets), 0)
def test_create_facet_dictionary(self): facet_dictionary = FacetDictionary() tuples = [('uri1', [Facet('@y', '21'), Facet('@m', '9')]), ('uri2', [Facet('@y', '19')]), ('uri3', [Facet('@m', '9')])] facets_by_document = dict(tuples) facets = facet_dictionary.create_facet_dict(facets_by_document) self.assertEqual(2, len(facets)) self.assertEqual(1, list(facets.keys()).index('@y')) self.assertEqual(0, list(facets.get('@y')).index('19'))
def generate_data(self): facetA = Facet("FacetA", "FacetValueA") facetA2 = Facet("FacetA", "FacetValueA2") facetB = Facet("FacetB", "FacetValueB") facetC = Facet("FacetC", "FacetValueC") return {'Document1': [facetA, facetB], 'Document2': [facetB, facetC], 'Document3': [facetA, facetC], 'Document4': [facetA2, facetA], 'Document5': [facetA2, facetC]}
def test_similar_values_different_facets_then_return_one_question(self): question_generator = QuestionGenerator() facet_a1 = Facet('NameA', 'ValueA') facet_a2 = Facet('NameA', 'ValueA') facet_a3 = Facet('NameA', 'ValueA') facet_b1 = Facet('NameB', 'ValueB1') facet_b2 = Facet('NameB', 'ValueB2') facets_by_document = {'1': [facet_a1, facet_b1], '2': [facet_a2, facet_b1], '3': [facet_a3, facet_b2]} questions = question_generator.generate_questions(facets_by_document) self.assertEqual(len(questions), 1) self.assertEqual(questions[0].question.facet_name, "NameB") self.assertEqual(sorted(questions[0].question.facet_values), ['ValueB1', 'ValueB2'])
def test_when_facet_dont_have_2_values_then_return_0_facet(self): discriminating_algo = DiscriminatingFacetsAlgo() facet_a = Facet('NameA', 'ValueA') facet_b = Facet('NameB', 'ValueB') facets_by_document = { '1': [facet_a], '2': [facet_a], '3': [facet_b], '4': [facet_a] } discriminating_facets = discriminating_algo.get_discriminating_facets( facets_by_document) self.assertEqual(len(discriminating_facets), 0)
def test_when_facet_dont_have_3_documents_then_return_0_facet(self): discriminating_algo = DiscriminatingFacetsAlgo() facet_a1 = Facet('NameA', 'ValueA1') facet_a2 = Facet('NameA', 'ValueA2') facet_a3 = Facet('NameA', 'ValueA3') facet_a4 = Facet('NameA', 'ValueA4') facets_by_document = { '1': [facet_a1, facet_a2], '2': [facet_a3, facet_a4] } discriminating_facets = discriminating_algo.get_discriminating_facets( facets_by_document) self.assertEqual(len(discriminating_facets), 0)
def test_when_same_facet_then_return_no_questions(self): question_generator = QuestionGenerator() facet_a = Facet('NameA', 'ValueA') facets_by_document = {'1': [facet_a], '2': [facet_a]} questions = question_generator.generate_questions(facets_by_document) self.assertEqual(len(questions), 0)
def extract_facet_from_file(file_path): with open(file_path, errors='ignore') as jsonfile: data = json.load(jsonfile) name = data['FacetName'] value = data['FacetValue'] documents = [] for document in data['Documents']: documents.append(document['ClickUri']) return Facet(name, value), documents
def test_when_1_discriminating_facet_then_rerun_algorithm_and_return_1_facet( self): discriminating_algo = DiscriminatingFacetsAlgo() facet_a1 = Facet('NameA', 'ValueA1') facet_a2 = Facet('NameA', 'ValueA2') facet_b = Facet('NameB', 'ValueB') facets_by_document = { '1': [facet_a1], '2': [facet_a2], '3': [facet_a2], '4': [facet_b] } discriminating_facets = discriminating_algo.get_discriminating_facets( facets_by_document) self.assertEqual(len(discriminating_facets), 1) self.assertEqual(discriminating_facets[0].name, 'NameA') self.assertEqual(discriminating_facets[0].values, ['ValueA1', 'ValueA2']) self.assertEqual(discriminating_facets[0].score, 1)
def test_must_not_have_facet_a_or_b_chained(self): document_filter = DocumentFilter() documents = document_filter.keep_documents_without_facets(self.generate_data(), [Facet("FacetA", "FacetValueA")]) documents = document_filter.keep_documents_without_facets(documents, [Facet("FacetB", "FacetValueB")]) self.assertEqual(1, len(documents)) self.assertTrue("Document1" not in documents) self.assertTrue("Document2" not in documents) self.assertTrue("Document3" not in documents) self.assertTrue("Document4" not in documents) self.assertTrue("Document5" in documents)
def test_multiple_values_then_return_multiple_questions(self): question_generator = QuestionGenerator() facet_a = Facet('NameA', 'ValueA') facet_b = Facet('NameA', 'ValueB') facet_c = Facet('NameB', 'Value1') facet_d = Facet('NameB', 'Value2') facet_e = Facet('NameB', 'Value3') facets_by_document = { '1': [facet_a, facet_d, facet_e], '2': [facet_b, facet_c, facet_e] } questions = sorted( question_generator.generate_questions(facets_by_document), key=lambda x: x.facet_name) self.assertEqual(questions[0].facet_name, 'NameA') self.assertEqual(sorted(questions[0].facet_values), ['ValueA', 'ValueB']) self.assertEqual(questions[1].facet_name, 'NameB') self.assertEqual(sorted(questions[1].facet_values), ['Value1', 'Value2'])
def test_when_2_discriminating_facets_then_run_one_time_algorithm_and_return_2_facets( self): discriminating_algo = DiscriminatingFacetsAlgo() facet_a1 = Facet('NameA', 'ValueA1') facet_a2 = Facet('NameA', 'ValueA2') facet_b = Facet('NameB', 'ValueB') facet_c1 = Facet('NameC', 'ValueC1') facet_c2 = Facet('NameC', 'ValueC2') facet_c3 = Facet('NameC', 'ValueC3') facets_by_document = { '1': [facet_a1], '2': [facet_a2, facet_c1], '3': [facet_a1, facet_c2, facet_c3], '4': [facet_b, facet_c3] } discriminating_facets = discriminating_algo.get_discriminating_facets( facets_by_document) self.assertEqual(len(discriminating_facets), 2) self.assertEqual(discriminating_facets[0].name, 'NameC') self.assertEqual(discriminating_facets[0].values, ['ValueC1', 'ValueC2', 'ValueC3']) self.assertEqual(discriminating_facets[0].score, 1) self.assertEqual(discriminating_facets[1].name, 'NameA') self.assertEqual(discriminating_facets[1].values, ['ValueA1', 'ValueA2']) self.assertEqual(round(discriminating_facets[1].score, 2), 0.67)
def filter_document_by_facets(): content = request.get_json() documents_to_filter = content['Documents'] documents = dict((k, facets_by_document[k]) for k in documents_to_filter if k in facets_by_document) if content['MustHaveFacets'] is not None: must_have_facets = [ Facet(val['Name'], val['Value']) for val in content['MustHaveFacets'] ] documents = DocumentFilter.keep_documents_with_facets( documents, must_have_facets) if content['MustNotHaveFacets'] is not None: must_not_have_facets = [ Facet(val['Name'], val['Value']) for val in content['MustNotHaveFacets'] ] documents = DocumentFilter.keep_documents_without_facets( documents, must_not_have_facets) return jsonify(list(documents.keys()))
def generate_data(self): facetA = Facet("NameA", "ValueA") facetB = Facet("NameB", "ValueB") return {'document1': [facetA, facetB], 'document2': [facetA]}
def test_must_have_facet_a(self): document_filter = DocumentFilter() documents = document_filter.keep_documents_with_facets(self.generate_data(), [Facet("FacetA", "FacetValueA")]) self.assertEqual(3, len(documents)) self.assertTrue("Document1" in documents) self.assertTrue("Document2" not in documents) self.assertTrue("Document3" in documents) self.assertTrue("Document4" in documents) self.assertTrue("Document5" not in documents)
def test_must_not_have_facet_d(self): document_filter = DocumentFilter() documents = document_filter.keep_documents_without_facets(self.generate_data(), [Facet("FacetD", "FacetValueD")]) self.assertEqual(5, len(documents)) self.assertTrue("Document1" in documents) self.assertTrue("Document2" in documents) self.assertTrue("Document3" in documents) self.assertTrue("Document4" in documents) self.assertTrue("Document5" in documents)
def test_when_document_counts_in_facet_values_have_more_than_35_in_standard_deviation_then_return_0_facet( self): discriminating_algo = DiscriminatingFacetsAlgo() facet_a1 = Facet('NameA', 'ValueA1') facet_a2 = Facet('NameA', 'ValueA2') facet_b = Facet('NameB', 'ValueB') facets_by_document = { '1': [facet_a1], '2': [facet_a2], '3': [facet_a2], '4': [facet_a2], '5': [facet_a2], '6': [facet_a2], '7': [facet_a2], '8': [facet_a2], '9': [facet_a2], '10': [facet_a2], '11': [facet_a2], '12': [facet_a2], '13': [facet_a2], '14': [facet_a2], '15': [facet_a2], '16': [facet_a2], '17': [facet_a2], '18': [facet_a2], '19': [facet_a2], '20': [facet_a2], '21': [facet_a2], '22': [facet_a2], '23': [facet_a2], '24': [facet_a2], '25': [facet_a2], '26': [facet_a2], '27': [facet_a2], '28': [facet_a2], '29': [facet_a2], '30': [facet_a2], '31': [facet_a2], '32': [facet_a2], '33': [facet_a2], '34': [facet_a2], '35': [facet_a2], '36': [facet_a2], '37': [facet_a2], '38': [facet_a2], '39': [facet_a2], '40': [facet_a2], '41': [facet_a2], '42': [facet_a2], '43': [facet_a2], '45': [facet_a2], '46': [facet_a2], '47': [facet_a2], '48': [facet_a2], '49': [facet_a2], '50': [facet_a2], '51': [facet_a2], '52': [facet_a2], '53': [facet_a2], '54': [facet_a2], '55': [facet_a2], '56': [facet_a2], '57': [facet_a2], '58': [facet_b] } documents_per_facet_value_counts = [1, 55] standard_deviation = statistics.stdev(documents_per_facet_value_counts) self.assertTrue(standard_deviation > 35) discriminating_facets = discriminating_algo.get_discriminating_facets( facets_by_document) self.assertEqual(len(discriminating_facets), 0)