def start_agent() -> None: """Start the record processor.""" app = create_web_app() with app.app_context(): process_stream(FulltextRecordProcessor, app.config, extra=dict(config=app.config))
def process_stream(duration: Optional[int] = None) -> None: """ Configure and run the record processor. Parameters ---------- duration : int Time (in seconds) to run record processing. If None (default), will run "forever". """ # We use the Flask application instance for configuration, and to manage # integrations with metadata service, search index. agent.process_stream(MetadataRecordProcessor, app.config, duration=duration)
def test_process_stream(self, mock_tasks, mock_client_factory): """Run :func:`.process_stream` with a :class:`.ExtractionAgent`.""" mock_client = mock.MagicMock() mock_client_factory.return_value = mock_client mock_client.get_shard_iterator.return_value = {'ShardIterator': '1'} class Stream(object): def __init__(self): self.max_records = 20 self.yielded = 0 def get_records(self, *args, **kwargs): to_yield = min(self.max_records, self.yielded + 10) records = { "Records": [ { 'SequenceNumber': f'{i}', 'Data': json.dumps({ 'document_id': f'{i}v5' }).encode('utf-8') } for i in range(self.yielded, to_yield) ], "NextShardIterator": f"{to_yield + 1}" } self.yielded = to_yield return records stream = Stream() mock_client.get_records.side_effect = stream.get_records class Checkpoint(object): def __init__(self): self.position = None def checkpoint(self, position): self.position = position process_stream(consumer.ExtractionAgent, self.config, Checkpoint(), 20) self.assertEqual(mock_tasks.process_document.call_count, 20, "Should call process_document for each record") self.assertEqual(mock_tasks.process_document.call_count, stream.yielded, "Should call process_document for each record")
def start_agent() -> None: """Start the record processor.""" app = create_web_app() with app.app_context(): process_stream(ExtractionAgent, app.config)