Esempio n. 1
0
    def test_similar_values_different_facets_then_return_no_question(self):
        question_generator = QuestionGenerator()
        facet_a1 = Facet('NameA', 'ValueA')
        facet_a2 = Facet('NameA', 'ValueA')
        facets_by_document = {'1': [facet_a1], '2': [facet_a2], '3': [facet_a2]}

        questions = question_generator.generate_questions(facets_by_document)
        self.assertEqual(len(questions), 0)
Esempio n. 2
0
    def test_one_discriminating_facet_then_return_one_element_question(self):
        question_generator = QuestionGenerator()
        facet_a1 = Facet('NameA', 'ValueA1')
        facet_a2 = Facet('NameA', 'ValueA2')
        facets_by_document = {'1': [facet_a1], '2': [facet_a2], '3': [facet_a2]}

        questions = question_generator.generate_questions(facets_by_document)
        self.assertEqual(len(questions), 1)
        self.assertEqual(questions[0].question.facet_name, "NameA")
        self.assertEqual(sorted(questions[0].question.facet_values), ['ValueA1', 'ValueA2'])
Esempio n. 3
0
    def test_similar_values_different_facets_then_return_one_question(self):
        question_generator = QuestionGenerator()
        facet_a = Facet('NameA', 'ValueA')
        facet_a2 = Facet('NameA', 'ValueA')
        facet_b = Facet('NameB', 'ValueB')
        facets_by_document = {'1': [facet_a2, facet_b], '2': [facet_a]}

        questions = question_generator.generate_questions(facets_by_document)
        self.assertEqual(questions[0].facet_name, "NameB")
        self.assertEqual(sorted(questions[0].facet_values), ['ValueB'])
    def test_when_no_discriminating_facet_then_return_no_facet(self):
        discriminating_algo = DiscriminatingFacetsAlgo()
        facet_a1 = Facet('NameA', 'ValueA1')
        facet_a2 = Facet('NameA', 'ValueA2')
        facet_b = Facet('NameB', 'ValueB')
        facets_by_document = {'1': [facet_a1], '2': [facet_a2], '3': [facet_b]}

        discriminating_facets = discriminating_algo.get_discriminating_facets(
            facets_by_document)
        self.assertEqual(len(discriminating_facets), 0)
    def test_create_facet_dictionary(self):
        facet_dictionary = FacetDictionary()

        tuples = [('uri1', [Facet('@y', '21'), Facet('@m', '9')]), ('uri2', [Facet('@y', '19')]), ('uri3', [Facet('@m', '9')])]
        facets_by_document = dict(tuples)

        facets = facet_dictionary.create_facet_dict(facets_by_document)

        self.assertEqual(2, len(facets))
        self.assertEqual(1, list(facets.keys()).index('@y'))
        self.assertEqual(0, list(facets.get('@y')).index('19'))
    def generate_data(self):
        facetA = Facet("FacetA", "FacetValueA")
        facetA2 = Facet("FacetA", "FacetValueA2")
        facetB = Facet("FacetB", "FacetValueB")
        facetC = Facet("FacetC", "FacetValueC")

        return {'Document1': [facetA, facetB],
                'Document2': [facetB, facetC],
                'Document3': [facetA, facetC],
                'Document4': [facetA2, facetA],
                'Document5': [facetA2, facetC]}
Esempio n. 7
0
    def test_similar_values_different_facets_then_return_one_question(self):
        question_generator = QuestionGenerator()
        facet_a1 = Facet('NameA', 'ValueA')
        facet_a2 = Facet('NameA', 'ValueA')
        facet_a3 = Facet('NameA', 'ValueA')
        facet_b1 = Facet('NameB', 'ValueB1')
        facet_b2 = Facet('NameB', 'ValueB2')
        facets_by_document = {'1': [facet_a1, facet_b1], '2': [facet_a2, facet_b1], '3': [facet_a3, facet_b2]}

        questions = question_generator.generate_questions(facets_by_document)
        self.assertEqual(len(questions), 1)
        self.assertEqual(questions[0].question.facet_name, "NameB")
        self.assertEqual(sorted(questions[0].question.facet_values), ['ValueB1', 'ValueB2'])
    def test_when_facet_dont_have_2_values_then_return_0_facet(self):
        discriminating_algo = DiscriminatingFacetsAlgo()
        facet_a = Facet('NameA', 'ValueA')
        facet_b = Facet('NameB', 'ValueB')
        facets_by_document = {
            '1': [facet_a],
            '2': [facet_a],
            '3': [facet_b],
            '4': [facet_a]
        }

        discriminating_facets = discriminating_algo.get_discriminating_facets(
            facets_by_document)
        self.assertEqual(len(discriminating_facets), 0)
    def test_when_facet_dont_have_3_documents_then_return_0_facet(self):
        discriminating_algo = DiscriminatingFacetsAlgo()
        facet_a1 = Facet('NameA', 'ValueA1')
        facet_a2 = Facet('NameA', 'ValueA2')
        facet_a3 = Facet('NameA', 'ValueA3')
        facet_a4 = Facet('NameA', 'ValueA4')
        facets_by_document = {
            '1': [facet_a1, facet_a2],
            '2': [facet_a3, facet_a4]
        }

        discriminating_facets = discriminating_algo.get_discriminating_facets(
            facets_by_document)
        self.assertEqual(len(discriminating_facets), 0)
Esempio n. 10
0
    def test_when_same_facet_then_return_no_questions(self):
        question_generator = QuestionGenerator()
        facet_a = Facet('NameA', 'ValueA')
        facets_by_document = {'1': [facet_a], '2': [facet_a]}

        questions = question_generator.generate_questions(facets_by_document)
        self.assertEqual(len(questions), 0)
 def extract_facet_from_file(file_path):
     with open(file_path, errors='ignore') as jsonfile:
         data = json.load(jsonfile)
         name = data['FacetName']
         value = data['FacetValue']
         documents = []
         for document in data['Documents']:
             documents.append(document['ClickUri'])
         return Facet(name, value), documents
    def test_when_1_discriminating_facet_then_rerun_algorithm_and_return_1_facet(
            self):
        discriminating_algo = DiscriminatingFacetsAlgo()
        facet_a1 = Facet('NameA', 'ValueA1')
        facet_a2 = Facet('NameA', 'ValueA2')
        facet_b = Facet('NameB', 'ValueB')
        facets_by_document = {
            '1': [facet_a1],
            '2': [facet_a2],
            '3': [facet_a2],
            '4': [facet_b]
        }

        discriminating_facets = discriminating_algo.get_discriminating_facets(
            facets_by_document)
        self.assertEqual(len(discriminating_facets), 1)
        self.assertEqual(discriminating_facets[0].name, 'NameA')
        self.assertEqual(discriminating_facets[0].values,
                         ['ValueA1', 'ValueA2'])
        self.assertEqual(discriminating_facets[0].score, 1)
    def test_must_not_have_facet_a_or_b_chained(self):
        document_filter = DocumentFilter()
        documents = document_filter.keep_documents_without_facets(self.generate_data(), [Facet("FacetA", "FacetValueA")])
        documents = document_filter.keep_documents_without_facets(documents, [Facet("FacetB", "FacetValueB")])

        self.assertEqual(1, len(documents))
        self.assertTrue("Document1" not in documents)
        self.assertTrue("Document2" not in documents)
        self.assertTrue("Document3" not in documents)
        self.assertTrue("Document4" not in documents)
        self.assertTrue("Document5" in documents)
Esempio n. 14
0
    def test_multiple_values_then_return_multiple_questions(self):
        question_generator = QuestionGenerator()
        facet_a = Facet('NameA', 'ValueA')
        facet_b = Facet('NameA', 'ValueB')
        facet_c = Facet('NameB', 'Value1')
        facet_d = Facet('NameB', 'Value2')
        facet_e = Facet('NameB', 'Value3')
        facets_by_document = {
            '1': [facet_a, facet_d, facet_e],
            '2': [facet_b, facet_c, facet_e]
        }

        questions = sorted(
            question_generator.generate_questions(facets_by_document),
            key=lambda x: x.facet_name)
        self.assertEqual(questions[0].facet_name, 'NameA')
        self.assertEqual(sorted(questions[0].facet_values),
                         ['ValueA', 'ValueB'])
        self.assertEqual(questions[1].facet_name, 'NameB')
        self.assertEqual(sorted(questions[1].facet_values),
                         ['Value1', 'Value2'])
    def test_when_2_discriminating_facets_then_run_one_time_algorithm_and_return_2_facets(
            self):
        discriminating_algo = DiscriminatingFacetsAlgo()
        facet_a1 = Facet('NameA', 'ValueA1')
        facet_a2 = Facet('NameA', 'ValueA2')
        facet_b = Facet('NameB', 'ValueB')
        facet_c1 = Facet('NameC', 'ValueC1')
        facet_c2 = Facet('NameC', 'ValueC2')
        facet_c3 = Facet('NameC', 'ValueC3')
        facets_by_document = {
            '1': [facet_a1],
            '2': [facet_a2, facet_c1],
            '3': [facet_a1, facet_c2, facet_c3],
            '4': [facet_b, facet_c3]
        }

        discriminating_facets = discriminating_algo.get_discriminating_facets(
            facets_by_document)
        self.assertEqual(len(discriminating_facets), 2)
        self.assertEqual(discriminating_facets[0].name, 'NameC')
        self.assertEqual(discriminating_facets[0].values,
                         ['ValueC1', 'ValueC2', 'ValueC3'])
        self.assertEqual(discriminating_facets[0].score, 1)
        self.assertEqual(discriminating_facets[1].name, 'NameA')
        self.assertEqual(discriminating_facets[1].values,
                         ['ValueA1', 'ValueA2'])
        self.assertEqual(round(discriminating_facets[1].score, 2), 0.67)
Esempio n. 16
0
def filter_document_by_facets():
    content = request.get_json()
    documents_to_filter = content['Documents']
    documents = dict((k, facets_by_document[k]) for k in documents_to_filter
                     if k in facets_by_document)

    if content['MustHaveFacets'] is not None:
        must_have_facets = [
            Facet(val['Name'], val['Value'])
            for val in content['MustHaveFacets']
        ]
        documents = DocumentFilter.keep_documents_with_facets(
            documents, must_have_facets)

    if content['MustNotHaveFacets'] is not None:
        must_not_have_facets = [
            Facet(val['Name'], val['Value'])
            for val in content['MustNotHaveFacets']
        ]
        documents = DocumentFilter.keep_documents_without_facets(
            documents, must_not_have_facets)

    return jsonify(list(documents.keys()))
Esempio n. 17
0
 def generate_data(self):
     facetA = Facet("NameA", "ValueA")
     facetB = Facet("NameB", "ValueB")
     return {'document1': [facetA, facetB], 'document2': [facetA]}
    def test_must_have_facet_a(self):
        document_filter = DocumentFilter()
        documents = document_filter.keep_documents_with_facets(self.generate_data(), [Facet("FacetA", "FacetValueA")])

        self.assertEqual(3, len(documents))
        self.assertTrue("Document1" in documents)
        self.assertTrue("Document2" not in documents)
        self.assertTrue("Document3" in documents)
        self.assertTrue("Document4" in documents)
        self.assertTrue("Document5" not in documents)
    def test_must_not_have_facet_d(self):
        document_filter = DocumentFilter()
        documents = document_filter.keep_documents_without_facets(self.generate_data(), [Facet("FacetD", "FacetValueD")])

        self.assertEqual(5, len(documents))
        self.assertTrue("Document1" in documents)
        self.assertTrue("Document2" in documents)
        self.assertTrue("Document3" in documents)
        self.assertTrue("Document4" in documents)
        self.assertTrue("Document5" in documents)
    def test_when_document_counts_in_facet_values_have_more_than_35_in_standard_deviation_then_return_0_facet(
            self):
        discriminating_algo = DiscriminatingFacetsAlgo()
        facet_a1 = Facet('NameA', 'ValueA1')
        facet_a2 = Facet('NameA', 'ValueA2')
        facet_b = Facet('NameB', 'ValueB')
        facets_by_document = {
            '1': [facet_a1],
            '2': [facet_a2],
            '3': [facet_a2],
            '4': [facet_a2],
            '5': [facet_a2],
            '6': [facet_a2],
            '7': [facet_a2],
            '8': [facet_a2],
            '9': [facet_a2],
            '10': [facet_a2],
            '11': [facet_a2],
            '12': [facet_a2],
            '13': [facet_a2],
            '14': [facet_a2],
            '15': [facet_a2],
            '16': [facet_a2],
            '17': [facet_a2],
            '18': [facet_a2],
            '19': [facet_a2],
            '20': [facet_a2],
            '21': [facet_a2],
            '22': [facet_a2],
            '23': [facet_a2],
            '24': [facet_a2],
            '25': [facet_a2],
            '26': [facet_a2],
            '27': [facet_a2],
            '28': [facet_a2],
            '29': [facet_a2],
            '30': [facet_a2],
            '31': [facet_a2],
            '32': [facet_a2],
            '33': [facet_a2],
            '34': [facet_a2],
            '35': [facet_a2],
            '36': [facet_a2],
            '37': [facet_a2],
            '38': [facet_a2],
            '39': [facet_a2],
            '40': [facet_a2],
            '41': [facet_a2],
            '42': [facet_a2],
            '43': [facet_a2],
            '45': [facet_a2],
            '46': [facet_a2],
            '47': [facet_a2],
            '48': [facet_a2],
            '49': [facet_a2],
            '50': [facet_a2],
            '51': [facet_a2],
            '52': [facet_a2],
            '53': [facet_a2],
            '54': [facet_a2],
            '55': [facet_a2],
            '56': [facet_a2],
            '57': [facet_a2],
            '58': [facet_b]
        }

        documents_per_facet_value_counts = [1, 55]
        standard_deviation = statistics.stdev(documents_per_facet_value_counts)
        self.assertTrue(standard_deviation > 35)

        discriminating_facets = discriminating_algo.get_discriminating_facets(
            facets_by_document)
        self.assertEqual(len(discriminating_facets), 0)