コード例 #1
0
    def test__ensure_list_no_pandas(self):
        class TestCase:
            def __init__(self, docs_in, expected_docs):
                self.docs_in = docs_in
                self.expected_docs = expected_docs

        tests_success = {
            'dict': TestCase(
                docs_in=self.docs[0],
                expected_docs=[self.docs[0]],
            ),
            'list': TestCase(
                docs_in=self.docs,
                expected_docs=self.docs,
            ),
        }

        tests_fail = {
            'string input': TestCase(
                docs_in='docs',
                expected_docs=None,
            ),
            'set input': TestCase(
                docs_in={'docs'},
                expected_docs=None,
            ),
        }

        for test_name, test in tests_success.items():
            docs_out = ElasticBuffer._ensure_list(test.docs_in)
            self.assertListEqual(docs_out, test.expected_docs, test_name)

        for test_name, test in tests_fail.items():
            with self.assertRaises(ValueError, msg=test_name):
                _ = ElasticBuffer._ensure_list(test.docs_in)
コード例 #2
0
            def __init__(self, n_success=0, bulk_errs=None, side_effect=None):
                self.eb = ElasticBuffer()
                self.eb._buffer = TestElasticBuffer.docs
                self.eb._oldest_doc_timestamp = TestElasticBuffer.timestamp

                self.return_value = (n_success, bulk_errs)
                self.side_effect = side_effect
コード例 #3
0
    def test__to_file(self, mocked_file):
        dump_dir = '/tmp'

        eb = ElasticBuffer(dump_dir=dump_dir)

        expected_dump_file = os.path.join(
            dump_dir,
            f'{eb.__class__.__name__}_buffer_dump_{self.timestamp}',
        )

        eb.add(self.docs)
        eb._to_file(timestamp=self.timestamp)

        mocked_file.assert_called_once_with(expected_dump_file, 'w')

        self.assertEqual(mocked_file().write.call_count, len(self.docs),
                         'write should be called once for every document')

        expected_write_call_args = [
            json.dumps(doc) + '\n' for doc in self.docs
        ]
        write_call_args = [
            arg[0][0] for arg in mocked_file().write.call_args_list
        ]
        self.assertListEqual(
            write_call_args, expected_write_call_args,
            'write should be called with each document (json serialized and newline)'
        )
コード例 #4
0
    def test_context_error(self, mock_flush):
        class TestCase:
            def __init__(self, n_docs, buffer_size, n_expected_flush_calls):
                self.n_docs = n_docs
                self.buffer_size = buffer_size
                self.n_expected_flush_calls = n_expected_flush_calls

        tests = {
            'flush is not called on exit due to exception with empty buffer':
            TestCase(
                n_docs=0,
                buffer_size=10,
                n_expected_flush_calls=0,
            ),
            'flush is not called on exit due to exception with populated buffer':
            TestCase(
                n_docs=5,
                buffer_size=10,
                n_expected_flush_calls=0,
            ),
            'flush is called once when buffer is full but not again on exit':
            TestCase(
                n_docs=5,
                buffer_size=2,
                n_expected_flush_calls=1,
            ),
        }

        default_err = ValueError

        for test_name, test in tests.items():
            mock_flush.reset_mock()
            mock_flush.side_effect = ElasticBufferFlushError
            err = ElasticBufferFlushError if test.n_expected_flush_calls > 0 else default_err
            docs = [self.docs[0]] * test.n_docs

            with self.assertRaises(err, msg=test_name):
                with ElasticBuffer(size=test.buffer_size) as eb:
                    eb.add(docs)
                    raise default_err(
                    )  # only raised when eb.add does not result in an Exception
            self.assertEqual(mock_flush.call_count,
                             test.n_expected_flush_calls, test_name)
コード例 #5
0
    def test_context_success(self, mock_flush):
        class TestCase:
            def __init__(self, n_docs, buffer_size, n_expected_flush_calls):
                self.n_docs = n_docs
                self.buffer_size = buffer_size
                self.n_expected_flush_calls = n_expected_flush_calls

        tests = {
            'flush is called on exit with empty buffer':
            TestCase(
                n_docs=0,
                buffer_size=10,
                n_expected_flush_calls=1,
            ),
            'flush is called on exit with populated buffer':
            TestCase(
                n_docs=5,
                buffer_size=10,
                n_expected_flush_calls=1,
            ),
            'flush is called once when buffer is full and once on exit':
            TestCase(
                n_docs=5,
                buffer_size=2,
                n_expected_flush_calls=2,
            ),
        }

        for test_name, test in tests.items():
            mock_flush.reset_mock()

            docs = ['a'] * test.n_docs
            with ElasticBuffer(size=test.buffer_size) as eb:
                mock_flush.side_effect = eb._clear_buffer
                eb.add(docs)
            self.assertEqual(mock_flush.call_count,
                             test.n_expected_flush_calls, test_name)
コード例 #6
0
            def __init__(
                self,
                documents,
                documents_timestamp,
                expected_buffer,
                expected_oldest_doc_timestamp,
                expected_flush_called,
                buffer_size=10,
                # used for multiple adds
                more_documents=None,
                more_documents_timestamp=None,
            ):
                self.documents = documents
                self.documents_timestamp = documents_timestamp

                self.expected_buffer = expected_buffer
                self.expected_oldest_doc_timestamp = expected_oldest_doc_timestamp
                self.expected_flush_called = expected_flush_called

                self.more_documents = [] if more_documents is None else more_documents
                self.more_documents_timestamp = \
                    None if more_documents_timestamp is None else more_documents_timestamp

                self.eb = ElasticBuffer(size=buffer_size)
コード例 #7
0
    def test_flush_success(self, mock_bulk):
        mock_bulk.return_value = (len(self.docs), [])

        eb = ElasticBuffer()
        eb._buffer = self.docs
        eb._oldest_doc_timestamp = self.timestamp
        eb.flush()

        # assert contents of buffer were passed to bulk
        (_, called_docs), _ = mock_bulk.call_args
        self.assertListEqual(
            called_docs, self.docs,
            'contents of buffer should have been passed to bulk')

        # assert state was cleared
        self.assertListEqual(eb._buffer, [],
                             'buffer should be empty after successful insert')
        self.assertIsNone(eb._oldest_doc_timestamp,
                          'timestamp should be None after successful insert')
コード例 #8
0
 def __init__(self, n_items):
     self.n_items = n_items
     self.eb = ElasticBuffer()
     if n_items > 0:
         self.eb._buffer = ['a'] * n_items
コード例 #9
0
 def __init__(self, buf, timestamp):
     self.eb = ElasticBuffer()
     if buf:
         self.eb._buffer = buf
     if timestamp:
         self.eb._oldest_doc_timestamp = timestamp
コード例 #10
0
            def __init__(self, oldest_doc_timestamp, timestamp, expected=None):
                self.timestamp = timestamp
                self.expected = expected

                self.eb = ElasticBuffer()
                self.eb._oldest_doc_timestamp = oldest_doc_timestamp
コード例 #11
0
    def test__ensure_list_with_pandas(self):

        series_list = [doc['c'] for doc in self.docs]

        class TestCase:
            def __init__(self, docs_in, expected_docs):
                self.docs_in = docs_in
                self.expected_docs = expected_docs

        tests = {
            'series':
            TestCase(
                docs_in=pd.Series(series_list),
                expected_docs=[{
                    0: item
                } for item in series_list],
            ),
            'named series':
            TestCase(
                docs_in=pd.Series(series_list).rename('my_name'),
                expected_docs=[{
                    'my_name': item
                } for item in series_list],
            ),
            'series with named index':
            TestCase(
                docs_in=pd.Series(series_list).rename_axis('my_axis', axis=0),
                expected_docs=[{
                    0: item,
                    'my_axis': i
                } for i, item in enumerate(series_list)],
            ),
            'named series with named index':
            TestCase(
                docs_in=pd.Series(series_list).rename_axis(
                    'my_axis', axis=0).rename('my_name'),
                expected_docs=[{
                    'my_name': item,
                    'my_axis': i
                } for i, item in enumerate(series_list)],
            ),
            'series with single row':
            TestCase(
                docs_in=pd.Series(series_list[0]),
                expected_docs=[{
                    0: item
                } for item in [series_list[0]]],
            ),
            'named series with single row':
            TestCase(
                docs_in=pd.Series(series_list[0]).rename('my_name'),
                expected_docs=[{
                    'my_name': item
                } for item in [series_list[0]]],
            ),
            'named series with single row with named index':
            TestCase(
                docs_in=pd.Series(series_list[0]).rename_axis(
                    'my_index', axis=0).rename('my_name'),
                expected_docs=[{
                    'my_name': item,
                    'my_index': i
                } for i, item in enumerate([series_list[0]])],
            ),
            'dataframe':
            TestCase(
                docs_in=pd.DataFrame(self.docs),
                expected_docs=self.docs,
            ),
            'dataframe with named index':
            TestCase(
                docs_in=pd.DataFrame(self.docs).set_index('c'),
                expected_docs=self.docs,
            ),
            'dataframe with single row':
            TestCase(
                docs_in=pd.DataFrame(self.docs[0], index=[0]),
                expected_docs=[self.docs[0]],
            ),
            'dataframe with single row and named index':
            TestCase(
                docs_in=pd.DataFrame(self.docs[0], index=[0]).set_index('c'),
                expected_docs=[self.docs[0]],
            ),
        }

        for test_name, test in tests.items():
            docs_out = ElasticBuffer._ensure_list(test.docs_in)
            self.assertListEqual(docs_out, test.expected_docs, test_name)
コード例 #12
0
    def test__apply_metadata_funcs(self):
        class TestCase:
            def __init__(self, docs_in, metadata_funcs, expected_docs):
                self.docs_in = docs_in
                self.metadata_funcs = metadata_funcs
                self.expected_docs = expected_docs

        def _index(doc):
            return 'my-index'

        def _id(doc):
            return sum(doc.values())

        tests = {
            'no metadata funcs':
            TestCase(
                docs_in=[
                    {
                        'a': 1,
                        'b': 2
                    },
                    {
                        'a': 8,
                        'b': 9
                    },
                ],
                metadata_funcs={},
                expected_docs=[
                    {
                        'a': 1,
                        'b': 2
                    },
                    {
                        'a': 8,
                        'b': 9
                    },
                ],
            ),
            'single metadata func':
            TestCase(
                docs_in=[
                    {
                        'a': 1,
                        'b': 2
                    },
                    {
                        'a': 8,
                        'b': 9
                    },
                ],
                metadata_funcs={
                    '_index': _index,
                },
                expected_docs=[
                    {
                        'a': 1,
                        'b': 2,
                        '_index': 'my-index'
                    },
                    {
                        'a': 8,
                        'b': 9,
                        '_index': 'my-index'
                    },
                ],
            ),
            'multiple metadata funcs':
            TestCase(
                docs_in=[
                    {
                        'a': 1,
                        'b': 2
                    },
                    {
                        'a': 8,
                        'b': 9
                    },
                ],
                metadata_funcs={
                    '_index': _index,
                    '_id': _id,
                },
                expected_docs=[
                    {
                        'a': 1,
                        'b': 2,
                        '_index': 'my-index',
                        '_id': 3
                    },
                    {
                        'a': 8,
                        'b': 9,
                        '_index': 'my-index',
                        '_id': 17
                    },
                ],
            ),
        }

        for test_name, test in tests.items():
            eb = ElasticBuffer(**test.metadata_funcs)
            docs_out = eb._apply_metadata_funcs(test.docs_in)
            self.assertListEqual(docs_out, test.expected_docs, test_name)
コード例 #13
0
 def test_flush_empty_buffer(self, mock_bulk):
     eb = ElasticBuffer()
     eb.flush()
     mock_bulk.assert_not_called()