def create_worker_app() -> Celery: """Initialize an instance of the worker application.""" logging.getLogger('boto').setLevel(logging.ERROR) logging.getLogger('boto3').setLevel(logging.ERROR) logging.getLogger('botocore').setLevel(logging.ERROR) flask_app = Flask('references') flask_app.config.from_pyfile('config.py') celery_app.conf.update(flask_app.config) data_store.init_app(flask_app) cermine.init_app(flask_app) grobid.init_app(flask_app) refextract.init_app(flask_app) retrieve.init_app(flask_app) return flask_app
def test_raw_extractions_integration(self, mock_app): document_id = '123.4566v8' extractor = 'baz_extractor' mock_app.config = { 'DYNAMODB_ENDPOINT': 'https://localhost:4569', 'DYNAMODB_VERIFY': 'false' } mock_app._get_current_object = mock.MagicMock(return_value=mock_app) data_store.init_app(mock_app) data_store.init_db() data_store.store_raw_extraction(document_id, extractor, valid_data) data = data_store.get_raw_extraction(document_id, extractor) self.assertEqual(data['document'], document_id) self.assertEqual(data['extractor'], extractor) self.assertListEqual(data['references'], valid_data)
def create_web_app() -> Flask: """Initialize an instance of the extractor backend service.""" logging.getLogger('boto').setLevel(logging.ERROR) logging.getLogger('boto3').setLevel(logging.ERROR) logging.getLogger('botocore').setLevel(logging.ERROR) app = Flask('references', static_folder='static', template_folder='templates') app.config.from_pyfile('config.py') from arxiv.base.converter import ArXivConverter app.url_map.converters['arxiv'] = ArXivConverter data_store.init_app(app) cermine.init_app(app) grobid.init_app(app) refextract.init_app(app) retrieve.init_app(retrieve) app.register_blueprint(routes.blueprint) return app
def test_process_record(self, mock_app): """Initiate extraction via the agent.""" mock_app.config = { 'DYNAMODB_ENDPOINT': DYNAMODB_ENDPOINT, 'DYNAMODB_VERIFY': DYNAMODB_VERIFY, 'CLOUDWATCH_ENDPOINT': CLOUDWATCH_ENDPOINT, 'CLOUDWATCH_VERIFY': CLOUDWATCH_VERIFY, 'AWS_REGION': AWS_REGION, 'RAW_TABLE_NAME': RAW_TABLE_NAME, 'EXTRACTIONS_TABLE_NAME': EXTRACTIONS_TABLE_NAME, 'REFERENCES_TABLE_NAME': REFERENCES_TABLE_NAME, 'INSTANCE_CREDENTIALS': '', 'AWS_ACCESS_KEY_ID': AWS_ACCESS_KEY_ID, 'AWS_SECRET_ACCESS_KEY': AWS_SECRET_ACCESS_KEY, } mock_app._get_current_object = mock.MagicMock(return_value=mock_app) from references.services import data_store data_store.init_app(mock_app) data_store.init_db() document_id = '1606.00123' payload = json.dumps({ "document_id": document_id, "url": "https://arxiv.org/pdf/%s" % document_id }).encode('utf-8') self.client.put_record(StreamName='PDFIsAvailable', Data=payload, PartitionKey='0') time.sleep(30) target = urljoin(EXTRACTION_ENDPOINT, '/references/%s' % document_id) response = self._session.get(target) retries = 0 while response.status_code != 200: if retries > 5: self.fail('Record not processed') time.sleep(10) response = self._session.get(target) retries += 1
def setUpClass(cls, mock_app): status_endpoint = urljoin(EXTRACTION_ENDPOINT, "/status") logger.debug('Check status at %s' % status_endpoint) cls._session = requests.Session() cls._adapter = requests.adapters.HTTPAdapter( max_retries=Retry(connect=30, read=10, backoff_factor=5)) cls._session.mount('http://', cls._adapter) response = cls._session.get(status_endpoint, timeout=1) if response.status_code != 200: raise IOError('ack!') mock_app.config = { 'DYNAMODB_ENDPOINT': DYNAMODB_ENDPOINT, 'DYNAMODB_VERIFY': DYNAMODB_VERIFY, 'CLOUDWATCH_ENDPOINT': CLOUDWATCH_ENDPOINT, 'CLOUDWATCH_VERIFY': CLOUDWATCH_VERIFY, 'AWS_REGION': AWS_REGION, 'RAW_TABLE_NAME': RAW_TABLE_NAME, 'EXTRACTIONS_TABLE_NAME': EXTRACTIONS_TABLE_NAME, 'REFERENCES_TABLE_NAME': REFERENCES_TABLE_NAME, 'AWS_ACCESS_KEY_ID': AWS_ACCESS_KEY_ID, 'AWS_SECRET_ACCESS_KEY': AWS_SECRET_ACCESS_KEY, } mock_app._get_current_object = mock.MagicMock(return_value=mock_app) from references.services import data_store data_store.init_app(mock_app) data_store.init_db() cls.dyn = boto3.client('dynamodb', verify=DYNAMODB_VERIFY, region_name=AWS_REGION, endpoint_url=DYNAMODB_ENDPOINT, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, aws_session_token=AWS_SESSION_TOKEN)