def test_passes_request_parameters(self): type1 = track.Type("type1", mapping={}, number_of_documents=3) index1 = track.Index(name="index1", auto_managed=True, types=[type1]) source = params.SearchParamSource(indices=[index1], params={ "request-params": { "_source_include": "some_field" }, "body": { "query": { "match_all": {} } } }) p = source.params() self.assertEqual(5, len(p)) self.assertEqual("index1", p["index"]) self.assertEqual("type1", p["type"]) self.assertEqual({"_source_include": "some_field"}, p["request_params"]) self.assertFalse(p["use_request_cache"]) self.assertEqual({"query": {"match_all": {}}}, p["body"])
def _create_type(self, type_spec, mapping_dir): docs = self._r(type_spec, "documents", mandatory=False) if docs: if io.is_archive(docs): document_archive = docs document_file = io.splitext(docs)[0] else: document_archive = None document_file = docs number_of_documents = self._r(type_spec, "document-count") compressed_bytes = self._r(type_spec, "compressed-bytes", mandatory=False) uncompressed_bytes = self._r(type_spec, "uncompressed-bytes", mandatory=False) else: document_archive = None document_file = None number_of_documents = 0 compressed_bytes = 0 uncompressed_bytes = 0 mapping_file = os.path.join(mapping_dir, self._r(type_spec, "mapping")) with self.source(mapping_file, "rt") as f: mapping = json.load(f) return track.Type(name=self._r(type_spec, "name"), mapping=mapping, document_file=document_file, document_archive=document_archive, includes_action_and_meta_data=self._r(type_spec, "includes-action-and-meta-data", mandatory=False, default_value=False), number_of_documents=number_of_documents, compressed_size_in_bytes=compressed_bytes, uncompressed_size_in_bytes=uncompressed_bytes)
def test_internal_params_take_precedence(self): type1 = track.Type("type1", mapping={}, number_of_documents=3) index1 = track.Index(name="index1", auto_managed=True, types=[type1]) bulks = params.bulk_data_based( num_clients=1, client_index=0, indices=[index1], batch_size=3, bulk_size=3, id_conflicts=params.IndexIdConflict.NoConflicts, pipeline=None, original_params={ "body": "foo", "custom-param": "bar" }, create_reader=BulkDataGeneratorTests.create_test_reader( [["1", "2", "3"]])) all_bulks = list(bulks) self.assertEqual(1, len(all_bulks)) # body must not contain 'foo'! self.assertEqual( { "action_metadata_present": True, "body": ["1", "2", "3"], "bulk-id": "0-1", "bulk-size": 3, "index": index1, "type": type1, "custom-param": "bar" }, all_bulks[0])
def _create_type(self, type_spec, mapping_dir, data_dir): compressed_docs = self._r(type_spec, "documents", mandatory=False) if compressed_docs: document_archive = "%s/%s" % (data_dir, compressed_docs) document_file = "%s/%s" % (data_dir, io.splitext(compressed_docs)[0]) else: document_archive = None document_file = None return track.Type( name=self._r(type_spec, "name"), mapping_file="%s/%s" % (mapping_dir, self._r(type_spec, "mapping")), document_file=document_file, document_archive=document_archive, number_of_documents=self._r(type_spec, "document-count", mandatory=False, default_value=0), compressed_size_in_bytes=self._r(type_spec, "compressed-bytes", mandatory=False), uncompressed_size_in_bytes=self._r(type_spec, "uncompressed-bytes", mandatory=False))
def test_generate_two_bulks(self): type1 = track.Type("type1", mapping_file="", number_of_documents=10) index1 = track.Index(name="index1", auto_managed=True, types=[type1]) bulks = params.bulk_data_based(num_clients=1, client_index=0, indices=[index1], action_metadata=params.ActionMetaData.NoMetaData, batch_size=5, bulk_size=5, id_conflicts=params.IndexIdConflict.NoConflicts, pipeline=None, original_params={ "my-custom-parameter": "foo", "my-custom-parameter-2": True }, create_reader=BulkDataGeneratorTests. create_test_reader([["1", "2", "3", "4", "5"], ["6", "7", "8"]])) all_bulks = list(bulks) self.assertEqual(2, len(all_bulks)) self.assertEqual({ "action_metadata_present": False, "body": ["1", "2", "3", "4", "5"], "bulk-id": "0-1", "bulk-size": 5, "index": index1, "type": type1, "my-custom-parameter": "foo", "my-custom-parameter-2": True }, all_bulks[0]) self.assertEqual({ "action_metadata_present": False, "body": ["6", "7", "8"], "bulk-id": "0-2", "bulk-size": 3, "index": index1, "type": type1, "my-custom-parameter": "foo", "my-custom-parameter-2": True }, all_bulks[1])
def test_no_distribution_version_for_source_distro(self): cfg = config.Config() cfg.add(config.Scope.application, "source", "distribution.version", "") t = track.Type("test", "test-mapping.json") marshal = track.Marshal(cfg) self.assertEqual(marshal.mapping_file_name(t), "test-mapping.json")
def test_create_with_metadata_in_source_file_but_conflicts(self): type1 = track.Type("type1", mapping={}, number_of_documents=10, includes_action_and_meta_data=True) index1 = track.Index(name="index1", auto_managed=True, types=[type1]) with self.assertRaises(exceptions.InvalidSyntax) as ctx: params.BulkIndexParamSource(indices=[index1], params={"conflicts": "random"}) self.assertEqual( "Cannot generate id conflicts [random] as type [index1] in index [type1] already contains " "an action and meta-data line.", ctx.exception.args[0])
def test_sets_absolute_path(self): from esrally import config from esrally.track import track cfg = config.Config() cfg.add(config.Scope.application, "benchmarks", "local.dataset.cache", "/data") default_challenge = track.Challenge( "default", description="default challenge", default=True, schedule=[ track.Task(operation=track.Operation( "index", operation_type=track.OperationType.Index), clients=4) ]) another_challenge = track.Challenge( "other", description="non-default challenge", default=False) t = track.Track( name="unittest", short_description="unittest track", challenges=[another_challenge, default_challenge], indices=[ track.Index(name="test", auto_managed=True, types=[ track.Type( "docs", mapping={}, document_file="docs/documents.json", document_archive="docs/documents.json.bz2") ]) ]) loader.set_absolute_data_path(cfg, t) self.assertEqual("/data/docs/documents.json", t.indices[0].types[0].document_file) self.assertEqual("/data/docs/documents.json.bz2", t.indices[0].types[0].document_archive)
tinyTrackSpec = track.Track( name="tiny", short_description= "First 2k documents of the geonames track for local tests", description= "This test indexes 8.6M documents (POIs from Geonames, total 2.8 GB json) using 8 client threads and 5000 docs per bulk " "request against Elasticsearch", source_root_url="http://benchmarks.elastic.co/corpora/tiny", indices=[ track.Index(name=TINY_INDEX_NAME, types=[ track.Type(name=TINY_TYPE_NAME, mapping_file_name="mappings.json", document_file_name="documents.json.bz2", number_of_documents=2000, compressed_size_in_bytes=28333, uncompressed_size_in_bytes=564930) ]) ], # Queries to use in the search benchmark queries=[ DefaultQuery(), TermQuery(), PhraseQuery(), CountryAggQuery(use_request_cache=False), CountryAggQuery(suffix="_cached", use_request_cache=True), ScrollQuery() ], track_setups=track.track_setups)
}''') percolatorTrackSpec = track.Track( name="percolator", description= "This test indexes 2M AOL queries and use the percolator query to match", source_root_url="http://benchmarks.elastic.co/corpora/percolator", indices=[ track.Index( name="queries", types=[ # The type for the percolator queries: track.Type(name=".percolator", mapping_file_name="queries-mapping.json", document_file_name="queries.json.bz2", number_of_documents=2000000, compressed_size_in_bytes=123502, uncompressed_size_in_bytes=148039748), # The used for documents being percolated: track.Type(name="content", mapping_file_name="document-mapping.json") ]) ], estimated_benchmark_time_in_minutes=5, # Queries to use in the search benchmark queries=[ PercolatorQuery(content="president bush"), PercolatorQuery(content="saddam hussein"), PercolatorQuery(content="hurricane katrina"), PercolatorQuery(content="google"), PercolatorQueryNoScoring(content="google"),