Example #1
0
    def test_read_bulk_smaller_than_number_of_docs_and_multiple_clients(self):
        data = [
            '{"key": "value1"}',
            '{"key": "value2"}',
            '{"key": "value3"}',
            '{"key": "value4"}',
            '{"key": "value5"}',
            '{"key": "value6"}',
            '{"key": "value7"}',
        ]
        bulk_size = 3

        # only 5 documents to index for this client
        source = params.Slice(io.StringAsFileSource, 0, 5)
        am_handler = params.GenerateActionMetaData("test_index",
                                                   "test_type",
                                                   conflicting_ids=None)

        reader = params.IndexDataReader(data,
                                        batch_size=bulk_size,
                                        bulk_size=bulk_size,
                                        file_source=source,
                                        action_metadata=am_handler,
                                        index_name="test_index",
                                        type_name="test_type")

        expected_bulk_sizes = [3, 2]
        # lines should include meta-data
        expected_line_sizes = [6, 4]
        self.assert_bulks_sized(reader, expected_bulk_sizes,
                                expected_line_sizes)
Example #2
0
    def test_read_bulks_and_assume_metadata_line_in_source_file(self):
        data = [
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value1"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value2"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value3"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value4"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value5"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value6"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value7"}'
        ]
        bulk_size = 3

        source = params.Slice(io.StringAsFileSource, 0, len(data))
        am_handler = params.SourceActionMetaData(source)

        reader = params.IndexDataReader(data,
                                        batch_size=bulk_size,
                                        bulk_size=bulk_size,
                                        file_source=source,
                                        action_metadata=am_handler,
                                        index_name="test_index",
                                        type_name="test_type")

        expected_bulk_sizes = [3, 3, 1]
        # lines should include meta-data
        expected_line_sizes = [6, 6, 2]
        self.assert_bulks_sized(reader, expected_bulk_sizes,
                                expected_line_sizes)
Example #3
0
    def test_read_bulk_with_offset(self):
        data = [
            '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}',
            '{"key": "value4"}', '{"key": "value5"}'
        ]
        bulk_size = 50

        source = params.Slice(io.StringAsFileSource, 3, len(data))
        am_handler = params.GenerateActionMetaData("test_index",
                                                   "test_type",
                                                   conflicting_ids=None)

        reader = params.IndexDataReader(data,
                                        batch_size=bulk_size,
                                        bulk_size=bulk_size,
                                        file_source=source,
                                        action_metadata=am_handler,
                                        index_name="test_index",
                                        type_name="test_type")

        expected_bulk_sizes = [(len(data) - 3)]
        # lines should include meta-data
        expected_line_sizes = [(len(data) - 3) * 2]
        self.assert_bulks_sized(reader, expected_bulk_sizes,
                                expected_line_sizes)
Example #4
0
    def test_read_bulks_and_assume_metadata_line_in_source_file(self):
        data = [
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value1"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value2"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value3"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value4"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value5"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value6"}',
            '{"index": {"_index": "test_index", "_type": "test_type"}',
            '{"key": "value7"}'
        ]
        bulk_size = 3

        source = params.Slice(StringAsFileSource, 0, len(data))
        am_handler = params.SourceActionMetaData(source)

        reader = params.IndexDataReader(data,
                                        batch_size=bulk_size,
                                        bulk_size=bulk_size,
                                        file_source=source,
                                        action_metadata=am_handler,
                                        index_name="test_index",
                                        type_name="test_type")

        # always double the amount as one line contains the data and one line contains the index command
        expected_bulk_sizes = [6, 6, 2]
        self.assert_bulks_sized(reader, expected_bulk_sizes)
Example #5
0
    def test_read_bulk_smaller_than_number_of_docs_and_multiple_clients(self):
        data = [
            '{"key": "value1"}',
            '{"key": "value2"}',
            '{"key": "value3"}',
            '{"key": "value4"}',
            '{"key": "value5"}',
            '{"key": "value6"}',
            '{"key": "value7"}',
        ]
        bulk_size = 3

        # only 5 documents to index for this client
        source = params.Slice(StringAsFileSource, 0, 5)
        am_handler = params.GenerateActionMetaData("test_index",
                                                   "test_type",
                                                   conflicting_ids=None)

        reader = params.IndexDataReader(data,
                                        batch_size=bulk_size,
                                        bulk_size=bulk_size,
                                        file_source=source,
                                        action_metadata=am_handler,
                                        index_name="test_index",
                                        type_name="test_type")

        # always double the amount as one line contains the data and one line contains the index command
        expected_bulk_sizes = [6, 4]
        self.assert_bulks_sized(reader, expected_bulk_sizes)
Example #6
0
    def test_slice_with_slice_larger_than_source(self):
        source = params.Slice(io.StringAsFileSource, 0, 5)
        data = [
            '{"key": "value1"}',
            '{"key": "value2"}',
            '{"key": "value3"}',
        ]

        source.open(data, "r")
        self.assertEqual(data, list(source))
        source.close()
Example #7
0
    def test_slice_with_source_larger_than_slice(self):
        source = params.Slice(StringAsFileSource, 2, 5)
        data = [
            '{"key": "value1"}', '{"key": "value2"}', '{"key": "value3"}',
            '{"key": "value4"}', '{"key": "value5"}', '{"key": "value6"}',
            '{"key": "value7"}', '{"key": "value8"}', '{"key": "value9"}',
            '{"key": "value10"}'
        ]

        source.open(data, "r")
        self.assertEqual(data[2:7], list(source))
        source.close()
Example #8
0
def create_reader(bulk_size):
    metadata = params.GenerateActionMetaData(index_name="test-idx",
                                             type_name=None)

    source = params.Slice(StaticSource, 0, sys.maxsize)
    reader = params.MetadataIndexDataReader(data_file="bogus",
                                            batch_size=bulk_size,
                                            bulk_size=bulk_size,
                                            file_source=source,
                                            action_metadata=metadata,
                                            index_name="test-idx",
                                            type_name=None)
    return reader
Example #9
0
    def test_source_file_action_meta_data(self):
        source = params.Slice(io.StringAsFileSource, 0, 5)
        generator = params.SourceActionMetaData(source)

        data = [
            '{"index": {"_index": "test_index", "_type": "test_type", "_id": "1"}}',
            '{"index": {"_index": "test_index", "_type": "test_type", "_id": "2"}}',
            '{"index": {"_index": "test_index", "_type": "test_type", "_id": "3"}}',
            '{"index": {"_index": "test_index", "_type": "test_type", "_id": "4"}}',
            '{"index": {"_index": "test_index", "_type": "test_type", "_id": "5"}}',
        ]

        source.open(data, "r")
        self.assertEqual(data, list(generator))
        source.close()
Example #10
0
    def test_read_bulk_larger_than_number_of_docs(self):
        data = [
            '{"key": "value1"}',
            '{"key": "value2"}',
            '{"key": "value3"}',
            '{"key": "value4"}',
            '{"key": "value5"}'
        ]
        bulk_size = 50

        source = params.Slice(io.StringAsFileSource, 0, len(data))
        am_handler = params.GenerateActionMetaData("test_index", "test_type", conflicting_ids=None)

        reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler,
                                        index_name="test_index", type_name="test_type")

        expected_bulk_sizes = [len(data) * 2]
        self.assert_bulks_sized(reader, expected_bulk_sizes)
Example #11
0
    def test_read_bulks_and_assume_no_metadata(self):
        data = [
            '{"key": "value1"}',
            '{"key": "value2"}',
            '{"key": "value3"}',
            '{"key": "value4"}',
            '{"key": "value5"}',
            '{"key": "value6"}',
            '{"key": "value7"}'
        ]
        bulk_size = 3

        source = params.Slice(io.StringAsFileSource, 0, len(data))
        am_handler = params.NoneActionMetaData()

        reader = params.IndexDataReader(data, batch_size=bulk_size, bulk_size=bulk_size, file_source=source, action_metadata=am_handler,
                                        index_name="test_index", type_name="test_type")

        # no meta-data, hence line numbers and bulk sizes need to be identical
        expected_bulk_sizes = [3, 3, 1]
        self.assert_bulks_sized(reader, expected_bulk_sizes, expected_bulk_sizes)