Ejemplo n.º 1
0
    def test_passes_request_parameters(self):
        type1 = track.Type("type1", mapping={}, number_of_documents=3)
        index1 = track.Index(name="index1", auto_managed=True, types=[type1])

        source = params.SearchParamSource(indices=[index1],
                                          params={
                                              "request-params": {
                                                  "_source_include":
                                                  "some_field"
                                              },
                                              "body": {
                                                  "query": {
                                                      "match_all": {}
                                                  }
                                              }
                                          })
        p = source.params()

        self.assertEqual(5, len(p))
        self.assertEqual("index1", p["index"])
        self.assertEqual("type1", p["type"])
        self.assertEqual({"_source_include": "some_field"},
                         p["request_params"])
        self.assertFalse(p["use_request_cache"])
        self.assertEqual({"query": {"match_all": {}}}, p["body"])
Ejemplo n.º 2
0
    def _create_type(self, type_spec, mapping_dir):
        docs = self._r(type_spec, "documents", mandatory=False)
        if docs:
            if io.is_archive(docs):
                document_archive = docs
                document_file = io.splitext(docs)[0]
            else:
                document_archive = None
                document_file = docs
            number_of_documents = self._r(type_spec, "document-count")
            compressed_bytes = self._r(type_spec, "compressed-bytes", mandatory=False)
            uncompressed_bytes = self._r(type_spec, "uncompressed-bytes", mandatory=False)
        else:
            document_archive = None
            document_file = None
            number_of_documents = 0
            compressed_bytes = 0
            uncompressed_bytes = 0

        mapping_file = os.path.join(mapping_dir, self._r(type_spec, "mapping"))
        with self.source(mapping_file, "rt") as f:
            mapping = json.load(f)

        return track.Type(name=self._r(type_spec, "name"),
                          mapping=mapping,
                          document_file=document_file,
                          document_archive=document_archive,
                          includes_action_and_meta_data=self._r(type_spec, "includes-action-and-meta-data", mandatory=False,
                                                                default_value=False),
                          number_of_documents=number_of_documents,
                          compressed_size_in_bytes=compressed_bytes,
                          uncompressed_size_in_bytes=uncompressed_bytes)
Ejemplo n.º 3
0
    def test_internal_params_take_precedence(self):
        type1 = track.Type("type1", mapping={}, number_of_documents=3)
        index1 = track.Index(name="index1", auto_managed=True, types=[type1])

        bulks = params.bulk_data_based(
            num_clients=1,
            client_index=0,
            indices=[index1],
            batch_size=3,
            bulk_size=3,
            id_conflicts=params.IndexIdConflict.NoConflicts,
            pipeline=None,
            original_params={
                "body": "foo",
                "custom-param": "bar"
            },
            create_reader=BulkDataGeneratorTests.create_test_reader(
                [["1", "2", "3"]]))
        all_bulks = list(bulks)
        self.assertEqual(1, len(all_bulks))
        # body must not contain 'foo'!
        self.assertEqual(
            {
                "action_metadata_present": True,
                "body": ["1", "2", "3"],
                "bulk-id": "0-1",
                "bulk-size": 3,
                "index": index1,
                "type": type1,
                "custom-param": "bar"
            }, all_bulks[0])
Ejemplo n.º 4
0
    def _create_type(self, type_spec, mapping_dir, data_dir):
        compressed_docs = self._r(type_spec, "documents", mandatory=False)
        if compressed_docs:
            document_archive = "%s/%s" % (data_dir, compressed_docs)
            document_file = "%s/%s" % (data_dir,
                                       io.splitext(compressed_docs)[0])
        else:
            document_archive = None
            document_file = None

        return track.Type(
            name=self._r(type_spec, "name"),
            mapping_file="%s/%s" %
            (mapping_dir, self._r(type_spec, "mapping")),
            document_file=document_file,
            document_archive=document_archive,
            number_of_documents=self._r(type_spec,
                                        "document-count",
                                        mandatory=False,
                                        default_value=0),
            compressed_size_in_bytes=self._r(type_spec,
                                             "compressed-bytes",
                                             mandatory=False),
            uncompressed_size_in_bytes=self._r(type_spec,
                                               "uncompressed-bytes",
                                               mandatory=False))
Ejemplo n.º 5
0
    def test_generate_two_bulks(self):
        type1 = track.Type("type1", mapping_file="", number_of_documents=10)
        index1 = track.Index(name="index1", auto_managed=True, types=[type1])

        bulks = params.bulk_data_based(num_clients=1, client_index=0, indices=[index1], action_metadata=params.ActionMetaData.NoMetaData,
                                       batch_size=5, bulk_size=5, id_conflicts=params.IndexIdConflict.NoConflicts, pipeline=None,
                                       original_params={
                                           "my-custom-parameter": "foo",
                                           "my-custom-parameter-2": True
                                       }, create_reader=BulkDataGeneratorTests.
                                       create_test_reader([["1", "2", "3", "4", "5"], ["6", "7", "8"]]))
        all_bulks = list(bulks)
        self.assertEqual(2, len(all_bulks))
        self.assertEqual({
            "action_metadata_present": False,
            "body": ["1", "2", "3", "4", "5"],
            "bulk-id": "0-1",
            "bulk-size": 5,
            "index": index1,
            "type": type1,
            "my-custom-parameter": "foo",
            "my-custom-parameter-2": True
        }, all_bulks[0])

        self.assertEqual({
            "action_metadata_present": False,
            "body": ["6", "7", "8"],
            "bulk-id": "0-2",
            "bulk-size": 3,
            "index": index1,
            "type": type1,
            "my-custom-parameter": "foo",
            "my-custom-parameter-2": True
        }, all_bulks[1])
Ejemplo n.º 6
0
    def test_no_distribution_version_for_source_distro(self):
        cfg = config.Config()
        cfg.add(config.Scope.application, "source", "distribution.version", "")

        t = track.Type("test", "test-mapping.json")

        marshal = track.Marshal(cfg)
        self.assertEqual(marshal.mapping_file_name(t), "test-mapping.json")
Ejemplo n.º 7
0
    def test_create_with_metadata_in_source_file_but_conflicts(self):
        type1 = track.Type("type1",
                           mapping={},
                           number_of_documents=10,
                           includes_action_and_meta_data=True)
        index1 = track.Index(name="index1", auto_managed=True, types=[type1])

        with self.assertRaises(exceptions.InvalidSyntax) as ctx:
            params.BulkIndexParamSource(indices=[index1],
                                        params={"conflicts": "random"})

        self.assertEqual(
            "Cannot generate id conflicts [random] as type [index1] in index [type1] already contains "
            "an action and meta-data line.", ctx.exception.args[0])
Ejemplo n.º 8
0
    def test_sets_absolute_path(self):
        from esrally import config
        from esrally.track import track

        cfg = config.Config()
        cfg.add(config.Scope.application, "benchmarks", "local.dataset.cache",
                "/data")

        default_challenge = track.Challenge(
            "default",
            description="default challenge",
            default=True,
            schedule=[
                track.Task(operation=track.Operation(
                    "index", operation_type=track.OperationType.Index),
                           clients=4)
            ])
        another_challenge = track.Challenge(
            "other", description="non-default challenge", default=False)
        t = track.Track(
            name="unittest",
            short_description="unittest track",
            challenges=[another_challenge, default_challenge],
            indices=[
                track.Index(name="test",
                            auto_managed=True,
                            types=[
                                track.Type(
                                    "docs",
                                    mapping={},
                                    document_file="docs/documents.json",
                                    document_archive="docs/documents.json.bz2")
                            ])
            ])

        loader.set_absolute_data_path(cfg, t)

        self.assertEqual("/data/docs/documents.json",
                         t.indices[0].types[0].document_file)
        self.assertEqual("/data/docs/documents.json.bz2",
                         t.indices[0].types[0].document_archive)
Ejemplo n.º 9
0

tinyTrackSpec = track.Track(
    name="tiny",
    short_description=
    "First 2k documents of the geonames track for local tests",
    description=
    "This test indexes 8.6M documents (POIs from Geonames, total 2.8 GB json) using 8 client threads and 5000 docs per bulk "
    "request against Elasticsearch",
    source_root_url="http://benchmarks.elastic.co/corpora/tiny",
    indices=[
        track.Index(name=TINY_INDEX_NAME,
                    types=[
                        track.Type(name=TINY_TYPE_NAME,
                                   mapping_file_name="mappings.json",
                                   document_file_name="documents.json.bz2",
                                   number_of_documents=2000,
                                   compressed_size_in_bytes=28333,
                                   uncompressed_size_in_bytes=564930)
                    ])
    ],
    # Queries to use in the search benchmark
    queries=[
        DefaultQuery(),
        TermQuery(),
        PhraseQuery(),
        CountryAggQuery(use_request_cache=False),
        CountryAggQuery(suffix="_cached", use_request_cache=True),
        ScrollQuery()
    ],
    track_setups=track.track_setups)
Ejemplo n.º 10
0
    }''')


percolatorTrackSpec = track.Track(
    name="percolator",
    description=
    "This test indexes 2M AOL queries and use the percolator query to match",
    source_root_url="http://benchmarks.elastic.co/corpora/percolator",
    indices=[
        track.Index(
            name="queries",
            types=[
                # The type for the percolator queries:
                track.Type(name=".percolator",
                           mapping_file_name="queries-mapping.json",
                           document_file_name="queries.json.bz2",
                           number_of_documents=2000000,
                           compressed_size_in_bytes=123502,
                           uncompressed_size_in_bytes=148039748),
                # The used for documents being percolated:
                track.Type(name="content",
                           mapping_file_name="document-mapping.json")
            ])
    ],
    estimated_benchmark_time_in_minutes=5,
    # Queries to use in the search benchmark
    queries=[
        PercolatorQuery(content="president bush"),
        PercolatorQuery(content="saddam hussein"),
        PercolatorQuery(content="hurricane katrina"),
        PercolatorQuery(content="google"),
        PercolatorQueryNoScoring(content="google"),