def test_read_bulk_smaller_than_number_of_docs_and_multiple_clients(self): data = [ '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}', '{"key": "value4"}', '{"key": "value5"}', '{"key": "value6"}', '{"key": "value7"}', ] bulk_size = 3 # only 5 documents to index for this client source = params.Slice(io.StringAsFileSource, 0, 5) am_handler = params.GenerateActionMetaData("test_index", "test_type", conflicting_ids=None) reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler, index_name="test_index", type_name="test_type") expected_bulk_sizes = [3, 2] # lines should include meta-data expected_line_sizes = [6, 4] self.assert_bulks_sized(reader, expected_bulk_sizes, expected_line_sizes)
def test_read_bulks_and_assume_metadata_line_in_source_file(self): data = [ '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value1"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value2"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value3"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value4"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value5"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value6"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value7"}' ] bulk_size = 3 source = params.Slice(io.StringAsFileSource, 0, len(data)) am_handler = params.SourceActionMetaData(source) reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler, index_name="test_index", type_name="test_type") expected_bulk_sizes = [3, 3, 1] # lines should include meta-data expected_line_sizes = [6, 6, 2] self.assert_bulks_sized(reader, expected_bulk_sizes, expected_line_sizes)
def test_read_bulks_and_assume_metadata_line_in_source_file(self): data = [ '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value1"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value2"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value3"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value4"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value5"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value6"}', '{"index": {"_index": "test_index", "_type": "test_type"}', '{"key": "value7"}' ] bulk_size = 3 source = params.Slice(StringAsFileSource, 0, len(data)) am_handler = params.SourceActionMetaData(source) reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler, index_name="test_index", type_name="test_type") # always double the amount as one line contains the data and one line contains the index command expected_bulk_sizes = [6, 6, 2] self.assert_bulks_sized(reader, expected_bulk_sizes)
def test_read_bulk_with_offset(self): data = [ '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}', '{"key": "value4"}', '{"key": "value5"}' ] bulk_size = 50 source = params.Slice(io.StringAsFileSource, 3, len(data)) am_handler = params.GenerateActionMetaData("test_index", "test_type", conflicting_ids=None) reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler, index_name="test_index", type_name="test_type") expected_bulk_sizes = [(len(data) - 3)] # lines should include meta-data expected_line_sizes = [(len(data) - 3) * 2] self.assert_bulks_sized(reader, expected_bulk_sizes, expected_line_sizes)
def test_read_bulk_smaller_than_number_of_docs_and_multiple_clients(self): data = [ '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}', '{"key": "value4"}', '{"key": "value5"}', '{"key": "value6"}', '{"key": "value7"}', ] bulk_size = 3 # only 5 documents to index for this client source = params.Slice(StringAsFileSource, 0, 5) am_handler = params.GenerateActionMetaData("test_index", "test_type", conflicting_ids=None) reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler, index_name="test_index", type_name="test_type") # always double the amount as one line contains the data and one line contains the index command expected_bulk_sizes = [6, 4] self.assert_bulks_sized(reader, expected_bulk_sizes)
def test_read_bulk_smaller_than_number_of_docs_and_multiple_clients(self): data = [ '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}', '{"key": "value4"}', '{"key": "value5"}', '{"key": "value6"}', '{"key": "value7"}', ] bulk_size = 3 reader = params.IndexDataReader(data, docs_to_index=5, conflicting_ids=None, index_name="test_index", type_name="test_type", bulk_size=bulk_size, file_source=StringAsFileSource) # always double the amount as one line contains the data and one line contains the index command expected_bulk_lengths = [6, 4] with reader: bulk_index = 0 for bulk in reader: self.assertEqual(expected_bulk_lengths[bulk_index], len(bulk)) bulk_index += 1
def test_read_bulk_larger_than_number_of_docs(self): data = [ '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}', '{"key": "value4"}', '{"key": "value5"}' ] bulk_size = 50 reader = params.IndexDataReader(data, docs_to_index=len(data), conflicting_ids=None, index_name="test_index", type_name="test_type", bulk_size=bulk_size, file_source=StringAsFileSource) with reader: for bulk in reader: self.assertEqual(len(data) * 2, len(bulk))
def test_read_bulk_larger_than_number_of_docs(self): data = [ '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}', '{"key": "value4"}', '{"key": "value5"}' ] bulk_size = 50 source = params.Slice(io.StringAsFileSource, 0, len(data)) am_handler = params.GenerateActionMetaData("test_index", "test_type", conflicting_ids=None) reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler, index_name="test_index", type_name="test_type") expected_bulk_sizes = [len(data) * 2] self.assert_bulks_sized(reader, expected_bulk_sizes)
def test_read_bulks_and_assume_no_metadata(self): data = [ '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}', '{"key": "value4"}', '{"key": "value5"}', '{"key": "value6"}', '{"key": "value7"}' ] bulk_size = 3 source = params.Slice(io.StringAsFileSource, 0, len(data)) am_handler = params.NoneActionMetaData() reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler, index_name="test_index", type_name="test_type") # no meta-data, hence line numbers and bulk sizes need to be identical expected_bulk_sizes = [3, 3, 1] self.assert_bulks_sized(reader, expected_bulk_sizes, expected_bulk_sizes)