def test_sample_ranking_with_no_exceptions(self):
     """
     Test if ranking is built without any exception.
     """
     sample1 = Indexable(1, 'this is an indexable metadata')
     sample2 = Indexable(2, 'this is an indexable super metadata')
     sample3 = Indexable(3, 'this is another indexable metadata')
     self.rank.build_rank([sample1, sample2, sample3])
 def test_indexed_doc_count(self):
     """
     Test if the number of indexed object is retrieved correctly.
     """
     sample1 = Indexable(1, 'this is an indexable metadata')
     sample2 = Indexable(2, 'this is an indexable super metadata')
     sample3 = Indexable(3, 'this is another indexable metadata')
     self.build_sample_index([sample1, sample2, sample3])
     self.assertEqual(self.engine.count(), 3)
    def test_non_existent_term_search(self):
        """
        Test if search is correctly performed.
        """
        sample1 = Indexable(1, 'this is an indexable metadata')
        sample2 = Indexable(2, 'this is an indexable super metadata')
        sample3 = Indexable(3, 'this is another indexable metadata')
        self.build_sample_index([sample1, sample2, sample3])

        expected_results = []

        results = self.engine.search('asdasdasdas')
        self.assertListEqual(results, expected_results)
    def test_two_terms_search(self):
        """
        Test if the search for two term returns expected results.
        """
        sample1 = Indexable(1, 'this is an indexable simple metadata')
        sample2 = Indexable(2, 'this is an indexable super metadata')
        sample3 = Indexable(3, 'this is another indexable super metadata')

        expected_indices = [1, 2]

        self.index.build_index([sample1, sample2, sample3])
        search_results = self.index.search_terms(['indexable', 'super'])
        self.assertItemsEqual(search_results, expected_indices)
    def test_stop_word_search(self):
        """
        Test if stop words are correctly ignored.
        """
        sample1 = Indexable(1, 'this is an indexable metadata')
        sample2 = Indexable(2, 'this is an indexable super metadata')
        sample3 = Indexable(3, 'this is another indexable super metadata')

        expected_indices = []

        self.index.build_index([sample1, sample2, sample3])
        search_results = self.index.search_terms(['this'])
        self.assertItemsEqual(search_results, expected_indices)
    def test_mixed_valid_invalid_term_search(self):
        """
        Test if the search returns when there are valid and invalid terms mixed.
        """
        sample1 = Indexable(1, 'this is an indexable simple metadata')
        sample2 = Indexable(2, 'this is an indexable super metadata')
        sample3 = Indexable(3, 'this is another indexable metadata')

        expected_indices = []

        self.index.build_index([sample1, sample2, sample3])
        search_results = self.index.search_terms(['not_valid_term', 'super'])
        self.assertItemsEqual(search_results, expected_indices)
    def test_one_term_search(self):
        """
        Test if the search for one term returns expected results.
        """
        sample1 = Indexable(1, "this is an indexable metadata")
        sample2 = Indexable(2, "this is an indexable super metadata")
        sample3 = Indexable(3, "this is another indexable super metadata")

        expected_indices = [1, 2]

        self.index.build_index([sample1, sample2, sample3])
        search_results = self.index.search_terms(["super"])
        self.assertItemsEqual(search_results, expected_indices)
    def test_invalid_term_search(self):
        """
        Test if the search returns when the term is not found.
        """
        sample1 = Indexable(1, "this is an indexable simple metadata")
        sample2 = Indexable(2, "this is an indexable super metadata")
        sample3 = Indexable(3, "this is another indexable metadata")

        expected_indices = []

        self.index.build_index([sample1, sample2, sample3])
        search_results = self.index.search_terms(["not_valid_term"])
        self.assertItemsEqual(search_results, expected_indices)
    def test_doc_frequency_matrix_with_sample2(self):
        """
        Test if document frequency matrix is correctly built.
        """
        sample1 = Indexable(1, 'the sky is blue')
        sample2 = Indexable(2, 'the sun is bright')
        self.rank.build_rank([sample1, sample2])

        expected_vocab_indices = {'blue': 0, 'sun': 2, 'bright': 3, 'sky': 1}

        expected_tf = np.array([[1, 1, 0, 0], [0, 0, 1, 1]])

        self.assertEqual(self.rank.vocabulary, expected_vocab_indices)
        np.testing.assert_array_equal(self.rank.ft_matrix.todense(),
                                      expected_tf)
    def test_search_result_limit(self):
        """
        Test if search results can be limited.
        """
        sample1 = Indexable(1, 'this is an indexable metadata')
        sample2 = Indexable(2, 'this is an indexable super metadata')
        sample3 = Indexable(3, 'this is another indexable metadata')
        self.build_sample_index([sample1, sample2, sample3])

        expected_results = [
            IndexableResult(1.414214, sample1),
        ]

        results = self.engine.search('indexable metadata', 1)
        self.assertListEqual(results, expected_results)
    def test_doc_inverse_term_frequency_vector2(self):
        """
        Test if document inverse term frequency vector is correctly built.
        """
        sample1 = Indexable(1, 'the sky is blue')
        sample2 = Indexable(2, 'the sun is bright')
        self.rank.build_rank([sample1, sample2])

        expected_idf = [1.40546511, 1.40546511, 1.40546511, 1.40546511]
        expected_tf_idf = [[0.70710678, 0.70710678, 0, 0],
                           [0, 0, 0.70710678, 0.70710678]]

        np.testing.assert_almost_equal(self.rank.ifd_diag_matrix.diagonal(),
                                       expected_idf, 4)

        np.testing.assert_almost_equal(self.rank.tf_idf_matrix.todense(),
                                       expected_tf_idf, 4)
    def test_existent_term_search(self):
        """
        Test if search is correctly performed.
        """
        sample1 = Indexable(1, 'this is an indexable metadata')
        sample2 = Indexable(2, 'this is an indexable super metadata')
        sample3 = Indexable(3, 'this is another indexable metadata')
        self.build_sample_index([sample1, sample2, sample3])

        expected_results = [
            IndexableResult(1.414214, sample1),
            IndexableResult(0.906589, sample2),
            IndexableResult(0.906589, sample3),
        ]

        results = self.engine.search('indexable metadata')
        self.assertListEqual(results, expected_results)
    def test_doc_inverse_term_frequency_vector1(self):
        """
        Test if document inverse term frequency vector is correctly built.
        """
        sample1 = Indexable(1, 'this is an indexable metadata')
        sample2 = Indexable(2, 'this is an indexable super metadata')
        sample3 = Indexable(3, 'this is another indexable metadata')
        self.rank.build_rank([sample1, sample2, sample3])

        expected_idf = [1., 1., 1.28768207, 1.69314718, 1.69314718]
        expected_tf_idf = [[0.52284231, 0.52284231, 0.67325467, 0, 0],
                           [0.39148397, 0.39148397, 0.50410689, 0.66283998, 0],
                           [0.45329466, 0.45329466, 0, 0, 0.76749457]]

        np.testing.assert_almost_equal(self.rank.ifd_diag_matrix.diagonal(),
                                       expected_idf, 4)

        np.testing.assert_almost_equal(self.rank.tf_idf_matrix.todense(),
                                       expected_tf_idf, 4)
    def test_score_computation(self):
        """
        Test if document score is correctly calculated.
        """
        sample1 = Indexable(1, 'the sky is blue')
        self.rank.build_rank([sample1])

        np.testing.assert_almost_equal(self.rank.compute_rank(0, ['blue']),
                                       0.707106, 5)
        np.testing.assert_almost_equal(self.rank.compute_rank(0, ['sky']),
                                       0.7071067, 5)
        np.testing.assert_almost_equal(
            self.rank.compute_rank(0, ['blue', 'sky']), 1.414213, 5)
    def test_doc_frequency_matrix_with_sample1(self):
        """
        Test if document frequency matrix is correctly built.
        """
        sample1 = Indexable(1, 'this is an indexable metadata')
        sample2 = Indexable(2, 'this is an indexable super metadata')
        sample3 = Indexable(3, 'this is another indexable metadata')
        self.rank.build_rank([sample1, sample2, sample3])

        expected_vocab_indices = {
            'an': 2,
            'super': 3,
            'indexable': 1,
            'another': 4,
            'metadata': 0
        }

        expected_tf = np.array([[1, 1, 1, 0, 0], [1, 1, 1, 1, 0],
                                [1, 1, 0, 0, 1]])

        self.assertEqual(self.rank.vocabulary, expected_vocab_indices)
        np.testing.assert_array_equal(self.rank.ft_matrix.todense(),
                                      expected_tf)
 def __init__(self, iid, word):
     Indexable.__init__(self, iid, word)
     # self.title = title
     # self.singer = singer
     self.word = word
Esempio n. 17
0
 def __init__(self, iid, title, author, metadata):
     Indexable.__init__(self, iid, metadata)
     self.title = title
     self.author = author
Esempio n. 18
0
 def __init__(self, iid, word, isBinaryWord):
     Indexable.__init__(self, iid, word, isBinaryWord)
     # self.title = title
     # self.singer = singer
     self.word = word
Esempio n. 19
0
 def __init__(self, iid, title, author, metadata):
     Indexable.__init__(self, iid, metadata)
     self.title = title
     self.author = author
 def test_sample_indexing_with_no_exceptions(self):
     sample1 = Indexable(1, 'this is an indexable metadata')
     sample2 = Indexable(2, 'this is an indexable super metadata')
     sample3 = Indexable(3, 'this is another indexable metadata')
     self.index.build_index([sample1, sample2, sample3])