Example #1
0
 def test_cannot_union_mixed_document_corpora_by_meta_data(self):
     a = track.DocumentCorpus(
         "test",
         documents=[
             track.Documents(
                 source_format=track.Documents.SOURCE_FORMAT_BULK,
                 number_of_documents=5,
                 target_index="logs-01"),
         ],
         meta_data={"with-metadata": False})
     b = track.DocumentCorpus(
         "test",
         documents=[
             track.Documents(
                 source_format=track.Documents.SOURCE_FORMAT_BULK,
                 number_of_documents=5,
                 target_index="logs-02"),
         ],
         meta_data={"with-metadata": True})
     with self.assertRaises(exceptions.RallyAssertionError) as ae:
         a.union(b)
     self.assertEqual(
         ae.exception.message,
         "Corpora meta-data differ: [{'with-metadata': False}] and [{'with-metadata': True}]."
     )
Example #2
0
    def test_filter_documents_by_format_and_indices(self):
        corpus = track.DocumentCorpus(
            "test",
            documents=[
                track.Documents(
                    source_format=track.Documents.SOURCE_FORMAT_BULK,
                    number_of_documents=5,
                    target_index="logs-01"),
                track.Documents(
                    source_format=track.Documents.SOURCE_FORMAT_BULK,
                    number_of_documents=6,
                    target_index="logs-02"),
                track.Documents(
                    source_format=track.Documents.SOURCE_FORMAT_BULK,
                    number_of_documents=7,
                    target_index="logs-03"),
                track.Documents(
                    source_format=track.Documents.SOURCE_FORMAT_BULK,
                    number_of_documents=8,
                    target_index=None)
            ])

        filtered_corpus = corpus.filter(
            source_format=track.Documents.SOURCE_FORMAT_BULK,
            target_indices=["logs-01", "logs-02"])

        self.assertEqual("test", filtered_corpus.name)
        self.assertEqual(2, len(filtered_corpus.documents))
        self.assertEqual("logs-01", filtered_corpus.documents[0].target_index)
        self.assertEqual("logs-02", filtered_corpus.documents[1].target_index)
Example #3
0
    def test_filter_documents_by_data_streams(self):
        corpus = track.DocumentCorpus(
            "test",
            documents=[
                track.Documents(
                    source_format=track.Documents.SOURCE_FORMAT_BULK,
                    number_of_documents=5,
                    target_data_stream="logs-01"),
                track.Documents(source_format="other",
                                number_of_documents=6,
                                target_data_stream="logs-02"),
                track.Documents(
                    source_format=track.Documents.SOURCE_FORMAT_BULK,
                    number_of_documents=7,
                    target_data_stream="logs-03"),
                track.Documents(source_format=None,
                                number_of_documents=8,
                                target_data_stream=None)
            ])

        filtered_corpus = corpus.filter(target_data_streams=["logs-02"])
        self.assertEqual("test", filtered_corpus.name)
        self.assertEqual(1, len(filtered_corpus.documents))
        self.assertEqual("logs-02",
                         filtered_corpus.documents[0].target_data_stream)
Example #4
0
    def test_filter_documents_by_indices(self):
        corpus = track.DocumentCorpus(
            "test",
            documents=[
                track.Documents(
                    source_format=track.Documents.SOURCE_FORMAT_BULK,
                    number_of_documents=5,
                    target_index="logs-01"),
                track.Documents(source_format="other",
                                number_of_documents=6,
                                target_index="logs-02"),
                track.Documents(
                    source_format=track.Documents.SOURCE_FORMAT_BULK,
                    number_of_documents=7,
                    target_index="logs-03"),
                track.Documents(source_format=None,
                                number_of_documents=8,
                                target_index=None),
            ],
        )

        filtered_corpus = corpus.filter(target_indices=["logs-02"])

        assert filtered_corpus.name == "test"
        assert len(filtered_corpus.documents) == 1
        assert filtered_corpus.documents[0].target_index == "logs-02"
Example #5
0
    def test_do_not_filter(self):
        corpus = track.DocumentCorpus(
            "test",
            documents=[
                track.Documents(
                    source_format=track.Documents.SOURCE_FORMAT_BULK,
                    number_of_documents=5,
                    target_index="logs-01"),
                track.Documents(source_format="other",
                                number_of_documents=6,
                                target_index="logs-02"),
                track.Documents(
                    source_format=track.Documents.SOURCE_FORMAT_BULK,
                    number_of_documents=7,
                    target_index="logs-03"),
                track.Documents(source_format=None,
                                number_of_documents=8,
                                target_index=None)
            ],
            meta_data={"average-document-size-in-bytes": 12})

        filtered_corpus = corpus.filter()

        self.assertEqual(corpus.name, filtered_corpus.name)
        self.assertListEqual(corpus.documents, filtered_corpus.documents)
        self.assertDictEqual(corpus.meta_data, filtered_corpus.meta_data)
Example #6
0
 def test_union_document_corpus_is_reflexive(self):
     corpus = track.DocumentCorpus("test", documents=[
         track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, number_of_documents=5, target_index="logs-01"),
         track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, number_of_documents=6, target_index="logs-02"),
         track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, number_of_documents=7, target_index="logs-03"),
         track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, number_of_documents=8, target_index=None)
     ])
     self.assertTrue(corpus.union(corpus) is corpus)
Example #7
0
 def test_cannot_union_mixed_document_corpora(self):
     a = track.DocumentCorpus("test", documents=[
         track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, number_of_documents=5, target_index="logs-01"),
     ])
     b = track.DocumentCorpus("other", documents=[
         track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, number_of_documents=5, target_index="logs-02"),
     ])
     with self.assertRaisesRegex(exceptions.RallyAssertionError, "Both document corpora must have the same name"):
         a.union(b)
Example #8
0
 def test_union_document_corpora_is_symmetric(self):
     a = track.DocumentCorpus("test", documents=[
         track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, number_of_documents=5, target_index="logs-01"),
     ])
     b = track.DocumentCorpus("test", documents=[
         track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, number_of_documents=5, target_index="logs-02"),
     ])
     self.assertEqual(b.union(a), a.union(b))
     self.assertEqual(2, len(a.union(b).documents))
Example #9
0
    def test_do_not_filter(self):
        corpus = track.DocumentCorpus("test", documents=[
            track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, number_of_documents=5, target_index="logs-01"),
            track.Documents(source_format="other", number_of_documents=6, target_index="logs-02"),
            track.Documents(source_format=track.Documents.SOURCE_FORMAT_BULK, number_of_documents=7, target_index="logs-03"),
            track.Documents(source_format=None, number_of_documents=8, target_index=None)
        ])

        filtered_corpus = corpus.filter()

        self.assertEqual(corpus.name, filtered_corpus.name)
        self.assertListEqual(corpus.documents, filtered_corpus.documents)
Example #10
0
 def test_cannot_union_mixed_document_corpora_by_name(self):
     a = track.DocumentCorpus(
         "test",
         documents=[
             track.Documents(
                 source_format=track.Documents.SOURCE_FORMAT_BULK,
                 number_of_documents=5,
                 target_index="logs-01"),
         ])
     b = track.DocumentCorpus(
         "other",
         documents=[
             track.Documents(
                 source_format=track.Documents.SOURCE_FORMAT_BULK,
                 number_of_documents=5,
                 target_index="logs-02"),
         ])
     with self.assertRaises(exceptions.RallyAssertionError) as ae:
         a.union(b)
     self.assertEqual(ae.exception.message,
                      "Corpora names differ: [test] and [other].")
Example #11
0
 def test_cannot_union_mixed_document_corpora_by_name(self):
     a = track.DocumentCorpus(
         "test",
         documents=[
             track.Documents(
                 source_format=track.Documents.SOURCE_FORMAT_BULK,
                 number_of_documents=5,
                 target_index="logs-01"),
         ],
     )
     b = track.DocumentCorpus(
         "other",
         documents=[
             track.Documents(
                 source_format=track.Documents.SOURCE_FORMAT_BULK,
                 number_of_documents=5,
                 target_index="logs-02"),
         ],
     )
     with pytest.raises(exceptions.RallyAssertionError) as exc:
         a.union(b)
     assert exc.value.message == "Corpora names differ: [test] and [other]."