Example #1
0
    def test_read_bulk_smaller_than_number_of_docs_and_multiple_clients(self):
        data = [
            '{"key": "value1"}',
            '{"key": "value2"}',
            '{"key": "value3"}',
            '{"key": "value4"}',
            '{"key": "value5"}',
            '{"key": "value6"}',
            '{"key": "value7"}',
        ]
        bulk_size = 3

        # only 5 documents to index for this client
        source = params.Slice(io.StringAsFileSource, 0, 5)
        am_handler = params.GenerateActionMetaData("test_index",
                                                   "test_type",
                                                   conflicting_ids=None)

        reader = params.IndexDataReader(data,
                                        batch_size=bulk_size,
                                        bulk_size=bulk_size,
                                        file_source=source,
                                        action_metadata=am_handler,
                                        index_name="test_index",
                                        type_name="test_type")

        expected_bulk_sizes = [3, 2]
        # lines should include meta-data
        expected_line_sizes = [6, 4]
        self.assert_bulks_sized(reader, expected_bulk_sizes,
                                expected_line_sizes)
Example #2
0
    def test_read_bulks_and_assume_metadata_line_in_source_file(self):
        data = [
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value1"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value2"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value3"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value4"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value5"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value6"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value7"}'
        ]
        bulk_size = 3

        source = params.Slice(io.StringAsFileSource, 0, len(data))
        am_handler = params.SourceActionMetaData(source)

        reader = params.IndexDataReader(data,
                                        batch_size=bulk_size,
                                        bulk_size=bulk_size,
                                        file_source=source,
                                        action_metadata=am_handler,
                                        index_name="test_index",
                                        type_name="test_type")

        expected_bulk_sizes = [3, 3, 1]
        # lines should include meta-data
        expected_line_sizes = [6, 6, 2]
        self.assert_bulks_sized(reader, expected_bulk_sizes,
                                expected_line_sizes)
Example #3
0
    def test_read_bulks_and_assume_metadata_line_in_source_file(self):
        data = [
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value1"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value2"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value3"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value4"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value5"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value6"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value7"}'
        ]
        bulk_size = 3

        source = params.Slice(StringAsFileSource, 0, len(data))
        am_handler = params.SourceActionMetaData(source)

        reader = params.IndexDataReader(data,
                                        batch_size=bulk_size,
                                        bulk_size=bulk_size,
                                        file_source=source,
                                        action_metadata=am_handler,
                                        index_name="test_index",
                                        type_name="test_type")

        # always double the amount as one line contains the data and one line contains the index command
        expected_bulk_sizes = [6, 6, 2]
        self.assert_bulks_sized(reader, expected_bulk_sizes)
Example #4
0
    def test_read_bulk_with_offset(self):
        data = [
            '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}',
            '{"key": "value4"}', '{"key": "value5"}'
        ]
        bulk_size = 50

        source = params.Slice(io.StringAsFileSource, 3, len(data))
        am_handler = params.GenerateActionMetaData("test_index",
                                                   "test_type",
                                                   conflicting_ids=None)

        reader = params.IndexDataReader(data,
                                        batch_size=bulk_size,
                                        bulk_size=bulk_size,
                                        file_source=source,
                                        action_metadata=am_handler,
                                        index_name="test_index",
                                        type_name="test_type")

        expected_bulk_sizes = [(len(data) - 3)]
        # lines should include meta-data
        expected_line_sizes = [(len(data) - 3) * 2]
        self.assert_bulks_sized(reader, expected_bulk_sizes,
                                expected_line_sizes)
Example #5
0
    def test_read_bulk_smaller_than_number_of_docs_and_multiple_clients(self):
        data = [
            '{"key": "value1"}',
            '{"key": "value2"}',
            '{"key": "value3"}',
            '{"key": "value4"}',
            '{"key": "value5"}',
            '{"key": "value6"}',
            '{"key": "value7"}',
        ]
        bulk_size = 3

        # only 5 documents to index for this client
        source = params.Slice(StringAsFileSource, 0, 5)
        am_handler = params.GenerateActionMetaData("test_index",
                                                   "test_type",
                                                   conflicting_ids=None)

        reader = params.IndexDataReader(data,
                                        batch_size=bulk_size,
                                        bulk_size=bulk_size,
                                        file_source=source,
                                        action_metadata=am_handler,
                                        index_name="test_index",
                                        type_name="test_type")

        # always double the amount as one line contains the data and one line contains the index command
        expected_bulk_sizes = [6, 4]
        self.assert_bulks_sized(reader, expected_bulk_sizes)
Example #6
0
    def test_read_bulk_smaller_than_number_of_docs_and_multiple_clients(self):
        data = [
            '{"key": "value1"}',
            '{"key": "value2"}',
            '{"key": "value3"}',
            '{"key": "value4"}',
            '{"key": "value5"}',
            '{"key": "value6"}',
            '{"key": "value7"}',
        ]
        bulk_size = 3

        reader = params.IndexDataReader(data,
                                        docs_to_index=5,
                                        conflicting_ids=None,
                                        index_name="test_index",
                                        type_name="test_type",
                                        bulk_size=bulk_size,
                                        file_source=StringAsFileSource)

        # always double the amount as one line contains the data and one line contains the index command
        expected_bulk_lengths = [6, 4]
        with reader:
            bulk_index = 0
            for bulk in reader:
                self.assertEqual(expected_bulk_lengths[bulk_index], len(bulk))
                bulk_index += 1
Example #7
0
    def test_read_bulk_larger_than_number_of_docs(self):
        data = [
            '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}',
            '{"key": "value4"}', '{"key": "value5"}'
        ]
        bulk_size = 50

        reader = params.IndexDataReader(data,
                                        docs_to_index=len(data),
                                        conflicting_ids=None,
                                        index_name="test_index",
                                        type_name="test_type",
                                        bulk_size=bulk_size,
                                        file_source=StringAsFileSource)
        with reader:
            for bulk in reader:
                self.assertEqual(len(data) * 2, len(bulk))
Example #8
0
    def test_read_bulk_larger_than_number_of_docs(self):
        data = [
            '{"key": "value1"}',
            '{"key": "value2"}',
            '{"key": "value3"}',
            '{"key": "value4"}',
            '{"key": "value5"}'
        ]
        bulk_size = 50

        source = params.Slice(io.StringAsFileSource, 0, len(data))
        am_handler = params.GenerateActionMetaData("test_index", "test_type", conflicting_ids=None)

        reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler,
                                        index_name="test_index", type_name="test_type")

        expected_bulk_sizes = [len(data) * 2]
        self.assert_bulks_sized(reader, expected_bulk_sizes)
Example #9
0
    def test_read_bulks_and_assume_no_metadata(self):
        data = [
            '{"key": "value1"}',
            '{"key": "value2"}',
            '{"key": "value3"}',
            '{"key": "value4"}',
            '{"key": "value5"}',
            '{"key": "value6"}',
            '{"key": "value7"}'
        ]
        bulk_size = 3

        source = params.Slice(io.StringAsFileSource, 0, len(data))
        am_handler = params.NoneActionMetaData()

        reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler,
                                        index_name="test_index", type_name="test_type")

        # no meta-data, hence line numbers and bulk sizes need to be identical
        expected_bulk_sizes = [3, 3, 1]
        self.assert_bulks_sized(reader, expected_bulk_sizes, expected_bulk_sizes)