Example #1
0
 def test_matches_if_catch_all_pattern_is_defined(self):
     self.assertTrue(
         track.Index("test", auto_managed=TrackTests,
                     types=[]).matches(pattern="*"))
     self.assertTrue(
         track.Index("test", auto_managed=TrackTests,
                     types=[]).matches(pattern="_all"))
Example #2
0
 def test_index_names(self):
     idx1 = track.Index(name="foo")
     idx2 = track.Index(name="bar")
     track_ = track.Track(name="unittest",
                          description="unittest track",
                          indices=[idx1, idx2])
     assert track_.index_names() == ["foo", "bar"]
Example #3
0
 def test_matches_exactly(self):
     self.assertTrue(
         track.Index("test", auto_managed=TrackTests,
                     types=[]).matches("test"))
     self.assertFalse(
         track.Index("test", auto_managed=TrackTests,
                     types=[]).matches(" test"))
Example #4
0
    def test_passes_all_indices_by_default(self):
        index1 = track.Index(name="index1", auto_managed=True, types=[])
        index2 = track.Index(name="index2", auto_managed=True, types=[])

        source = params.BulkIndexParamSource(indices=[index1, index2],
                                             params={
                                                 "conflicts": "random",
                                                 "bulk-size": 5000,
                                                 "batch-size": 20000,
                                                 "pipeline": "test-pipeline"
                                             })

        partition = source.partition(0, 1)
        self.assertEqual(partition.indices, [index1, index2])
Example #5
0
    def test_generate_two_bulks(self):
        type1 = track.Type("type1", mapping_file="", number_of_documents=10)
        index1 = track.Index(name="index1", auto_managed=True, types=[type1])

        bulks = params.bulk_data_based(num_clients=1, client_index=0, indices=[index1], action_metadata=params.ActionMetaData.NoMetaData,
                                       batch_size=5, bulk_size=5, id_conflicts=params.IndexIdConflict.NoConflicts, pipeline=None,
                                       original_params={
                                           "my-custom-parameter": "foo",
                                           "my-custom-parameter-2": True
                                       }, create_reader=BulkDataGeneratorTests.
                                       create_test_reader([["1", "2", "3", "4", "5"], ["6", "7", "8"]]))
        all_bulks = list(bulks)
        self.assertEqual(2, len(all_bulks))
        self.assertEqual({
            "action_metadata_present": False,
            "body": ["1", "2", "3", "4", "5"],
            "bulk-id": "0-1",
            "bulk-size": 5,
            "index": index1,
            "type": type1,
            "my-custom-parameter": "foo",
            "my-custom-parameter-2": True
        }, all_bulks[0])

        self.assertEqual({
            "action_metadata_present": False,
            "body": ["6", "7", "8"],
            "bulk-id": "0-2",
            "bulk-size": 3,
            "index": index1,
            "type": type1,
            "my-custom-parameter": "foo",
            "my-custom-parameter-2": True
        }, all_bulks[1])
Example #6
0
    def _create_index(self, index_spec, mapping_dir, data_dir):
        index_name = self._r(index_spec, "name")
        auto_managed = self._r(index_spec,
                               "auto-managed",
                               mandatory=False,
                               default_value=True)
        types = [
            self._create_type(type_spec, mapping_dir, data_dir)
            for type_spec in self._r(
                index_spec, "types", mandatory=auto_managed, default_value=[])
        ]
        valid_document_data = False
        for type in types:
            if type.has_valid_document_data():
                valid_document_data = True
                break
        if not valid_document_data:
            console.warn(
                "None of the types for index [%s] defines documents. Please check that you either don't want to index data or "
                "parameter sources are defined for indexing." % index_name,
                logger=logger)

        return track.Index(name=index_name,
                           auto_managed=auto_managed,
                           types=types)
Example #7
0
    def _create_index(self, index_spec, mapping_dir):
        index_name = self._r(index_spec, "name")
        if self.override_auto_manage_indices is not None:
            auto_managed = self.override_auto_manage_indices
            logger.info(
                "User explicitly forced auto-managed indices to [%s] on the command line."
                % str(auto_managed))
        else:
            auto_managed = self._r(index_spec,
                                   "auto-managed",
                                   mandatory=False,
                                   default_value=True)
            logger.info(
                "Using index auto-management setting from track which is set to [%s]."
                % str(auto_managed))

        types = [
            self._create_type(type_spec, mapping_dir) for type_spec in self._r(
                index_spec, "types", mandatory=auto_managed, default_value=[])
        ]
        valid_document_data = False
        for type in types:
            if type.has_valid_document_data():
                valid_document_data = True
                break
        if not valid_document_data:
            console.warn(
                "None of the types for index [%s] defines documents. Please check that you either don't want to index data or "
                "parameter sources are defined for indexing." % index_name,
                logger=logger)

        return track.Index(name=index_name,
                           auto_managed=auto_managed,
                           types=types)
Example #8
0
    def test_internal_params_take_precedence(self):
        type1 = track.Type("type1", mapping={}, number_of_documents=3)
        index1 = track.Index(name="index1", auto_managed=True, types=[type1])

        bulks = params.bulk_data_based(
            num_clients=1,
            client_index=0,
            indices=[index1],
            batch_size=3,
            bulk_size=3,
            id_conflicts=params.IndexIdConflict.NoConflicts,
            pipeline=None,
            original_params={
                "body": "foo",
                "custom-param": "bar"
            },
            create_reader=BulkDataGeneratorTests.create_test_reader(
                [["1", "2", "3"]]))
        all_bulks = list(bulks)
        self.assertEqual(1, len(all_bulks))
        # body must not contain 'foo'!
        self.assertEqual(
            {
                "action_metadata_present": True,
                "body": ["1", "2", "3"],
                "bulk-id": "0-1",
                "bulk-size": 3,
                "index": index1,
                "type": type1,
                "custom-param": "bar"
            }, all_bulks[0])
Example #9
0
    def test_passes_request_parameters(self):
        type1 = track.Type("type1", mapping={}, number_of_documents=3)
        index1 = track.Index(name="index1", auto_managed=True, types=[type1])

        source = params.SearchParamSource(indices=[index1],
                                          params={
                                              "request-params": {
                                                  "_source_include":
                                                  "some_field"
                                              },
                                              "body": {
                                                  "query": {
                                                      "match_all": {}
                                                  }
                                              }
                                          })
        p = source.params()

        self.assertEqual(5, len(p))
        self.assertEqual("index1", p["index"])
        self.assertEqual("type1", p["type"])
        self.assertEqual({"_source_include": "some_field"},
                         p["request_params"])
        self.assertFalse(p["use_request_cache"])
        self.assertEqual({"query": {"match_all": {}}}, p["body"])
Example #10
0
    def test_filters_indices(self):
        index1 = track.Index(name="index1", auto_managed=True, types=[])
        index2 = track.Index(name="index2", auto_managed=True, types=[])

        source = params.BulkIndexParamSource(
            indices=[index1, index2],
            params={
                "index": "index2",
                "action-and-meta-data": "generate",
                "conflicts": "random",
                "bulk-size": 5000,
                "batch-size": 20000,
                "pipeline": "test-pipeline"
            })

        partition = source.partition(0, 1)
        self.assertEqual(partition.indices, [index2])
Example #11
0
    def test_create_with_metadata_in_source_file_but_conflicts(self):
        type1 = track.Type("type1",
                           mapping={},
                           number_of_documents=10,
                           includes_action_and_meta_data=True)
        index1 = track.Index(name="index1", auto_managed=True, types=[type1])

        with self.assertRaises(exceptions.InvalidSyntax) as ctx:
            params.BulkIndexParamSource(indices=[index1],
                                        params={"conflicts": "random"})

        self.assertEqual(
            "Cannot generate id conflicts [random] as type [index1] in index [type1] already contains "
            "an action and meta-data line.", ctx.exception.args[0])
Example #12
0
    def test_raises_exception_if_no_index_matches(self):
        index1 = track.Index(name="index1", auto_managed=True, types=[])

        source = params.BulkIndexParamSource(indices=[index1],
                                             params={
                                                 "index": "does_not_exist",
                                                 "conflicts": "random",
                                                 "bulk-size": 5000,
                                                 "batch-size": 20000,
                                                 "pipeline": "test-pipeline"
                                             })

        with self.assertRaises(exceptions.RallyAssertionError) as ctx:
            source.partition(0, 1)
        self.assertEqual(
            "The provided index [does_not_exist] does not match any of the indices [index1].",
            ctx.exception.args[0])
Example #13
0
    def test_sets_absolute_path(self):
        from esrally import config
        from esrally.track import track

        cfg = config.Config()
        cfg.add(config.Scope.application, "benchmarks", "local.dataset.cache",
                "/data")

        default_challenge = track.Challenge(
            "default",
            description="default challenge",
            default=True,
            schedule=[
                track.Task(operation=track.Operation(
                    "index", operation_type=track.OperationType.Index),
                           clients=4)
            ])
        another_challenge = track.Challenge(
            "other", description="non-default challenge", default=False)
        t = track.Track(
            name="unittest",
            short_description="unittest track",
            challenges=[another_challenge, default_challenge],
            indices=[
                track.Index(name="test",
                            auto_managed=True,
                            types=[
                                track.Type(
                                    "docs",
                                    mapping={},
                                    document_file="docs/documents.json",
                                    document_archive="docs/documents.json.bz2")
                            ])
            ])

        loader.set_absolute_data_path(cfg, t)

        self.assertEqual("/data/docs/documents.json",
                         t.indices[0].types[0].document_file)
        self.assertEqual("/data/docs/documents.json.bz2",
                         t.indices[0].types[0].document_archive)
Example #14
0
 def test_matches_if_catch_all_pattern_is_defined(self):
     self.assertTrue(track.Index("test").matches(pattern="*"))
     self.assertTrue(track.Index("test").matches(pattern="_all"))
Example #15
0
 def test_str(self):
     self.assertEqual("test", str(track.Index("test")))
Example #16
0
 def test_matches_exactly(self):
     self.assertTrue(track.Index("test").matches("test"))
     self.assertFalse(track.Index("test").matches(" test"))
Example #17
0
 def test_matches_if_no_pattern_is_defined(self):
     self.assertTrue(track.Index("test").matches(pattern=None))
Example #18
0
 def test_matches_if_no_pattern_is_defined(self):
     self.assertTrue(track.Index("test", auto_managed=TrackTests, types=[]).matches(pattern=None))
Example #19
0
 def test_str(self):
     self.assertEqual("test", str(track.Index("test", auto_managed=TrackTests, types=[])))
Example #20
0
 def test_str(self):
     assert str(track.Index("test")) == "test"
Example #21
0

tinyTrackSpec = track.Track(
    name="tiny",
    short_description=
    "First 2k documents of the geonames track for local tests",
    description=
    "This test indexes 8.6M documents (POIs from Geonames, total 2.8 GB json) using 8 client threads and 5000 docs per bulk "
    "request against Elasticsearch",
    source_root_url="http://benchmarks.elastic.co/corpora/tiny",
    indices=[
        track.Index(name=TINY_INDEX_NAME,
                    types=[
                        track.Type(name=TINY_TYPE_NAME,
                                   mapping_file_name="mappings.json",
                                   document_file_name="documents.json.bz2",
                                   number_of_documents=2000,
                                   compressed_size_in_bytes=28333,
                                   uncompressed_size_in_bytes=564930)
                    ])
    ],
    # Queries to use in the search benchmark
    queries=[
        DefaultQuery(),
        TermQuery(),
        PhraseQuery(),
        CountryAggQuery(use_request_cache=False),
        CountryAggQuery(suffix="_cached", use_request_cache=True),
        ScrollQuery()
    ],
    track_setups=track.track_setups)
Example #22
0
 def test_matches_if_catch_all_pattern_is_defined(self):
     assert track.Index("test").matches(pattern="*")
     assert track.Index("test").matches(pattern="_all")
Example #23
0
    }''')


percolatorTrackSpec = track.Track(
    name="percolator",
    description=
    "This test indexes 2M AOL queries and use the percolator query to match",
    source_root_url="http://benchmarks.elastic.co/corpora/percolator",
    indices=[
        track.Index(
            name="queries",
            types=[
                # The type for the percolator queries:
                track.Type(name=".percolator",
                           mapping_file_name="queries-mapping.json",
                           document_file_name="queries.json.bz2",
                           number_of_documents=2000000,
                           compressed_size_in_bytes=123502,
                           uncompressed_size_in_bytes=148039748),
                # The used for documents being percolated:
                track.Type(name="content",
                           mapping_file_name="document-mapping.json")
            ])
    ],
    estimated_benchmark_time_in_minutes=5,
    # Queries to use in the search benchmark
    queries=[
        PercolatorQuery(content="president bush"),
        PercolatorQuery(content="saddam hussein"),
        PercolatorQuery(content="hurricane katrina"),
        PercolatorQuery(content="google"),
        PercolatorQueryNoScoring(content="google"),
Example #24
0
 def test_matches_exactly(self):
     assert track.Index("test").matches("test")
     assert not track.Index("test").matches(" test")