コード例 #1
0
ファイル: __main__.py プロジェクト: arXiv/arxiv-fulltext
def start_agent() -> None:
    """Start the record processor."""
    app = create_web_app()
    with app.app_context():
        process_stream(FulltextRecordProcessor,
                       app.config,
                       extra=dict(config=app.config))
コード例 #2
0
ファイル: __init__.py プロジェクト: katsby-skye/arxiv-search
def process_stream(duration: Optional[int] = None) -> None:
    """
    Configure and run the record processor.

    Parameters
    ----------
    duration : int
        Time (in seconds) to run record processing. If None (default), will
        run "forever".

    """
    # We use the Flask application instance for configuration, and to manage
    # integrations with metadata service, search index.
    agent.process_stream(MetadataRecordProcessor, app.config,
                         duration=duration)
コード例 #3
0
    def test_process_stream(self, mock_tasks, mock_client_factory):
        """Run :func:`.process_stream` with a :class:`.ExtractionAgent`."""
        mock_client = mock.MagicMock()
        mock_client_factory.return_value = mock_client
        mock_client.get_shard_iterator.return_value = {'ShardIterator': '1'}

        class Stream(object):
            def __init__(self):
                self.max_records = 20
                self.yielded = 0

            def get_records(self, *args, **kwargs):
                to_yield = min(self.max_records, self.yielded + 10)
                records = {
                    "Records": [
                        {
                            'SequenceNumber': f'{i}',
                            'Data': json.dumps({
                                'document_id': f'{i}v5'
                            }).encode('utf-8')
                        } for i in range(self.yielded, to_yield)
                    ],
                    "NextShardIterator": f"{to_yield + 1}"
                }
                self.yielded = to_yield
                return records
        stream = Stream()
        mock_client.get_records.side_effect = stream.get_records

        class Checkpoint(object):
            def __init__(self):
                self.position = None

            def checkpoint(self, position):
                self.position = position

        process_stream(consumer.ExtractionAgent, self.config, Checkpoint(), 20)
        self.assertEqual(mock_tasks.process_document.call_count, 20,
                         "Should call process_document for each record")
        self.assertEqual(mock_tasks.process_document.call_count,
                         stream.yielded,
                         "Should call process_document for each record")
コード例 #4
0
def start_agent() -> None:
    """Start the record processor."""
    app = create_web_app()
    with app.app_context():
        process_stream(ExtractionAgent, app.config)