예제 #1
0
def create_worker_app() -> Celery:
    """Initialize an instance of the worker application."""
    logging.getLogger('boto').setLevel(logging.ERROR)
    logging.getLogger('boto3').setLevel(logging.ERROR)
    logging.getLogger('botocore').setLevel(logging.ERROR)
    flask_app = Flask('references')
    flask_app.config.from_pyfile('config.py')
    celery_app.conf.update(flask_app.config)
    data_store.init_app(flask_app)
    cermine.init_app(flask_app)
    grobid.init_app(flask_app)
    refextract.init_app(flask_app)
    retrieve.init_app(flask_app)
    return flask_app
예제 #2
0
 def test_raw_extractions_integration(self, mock_app):
     document_id = '123.4566v8'
     extractor = 'baz_extractor'
     mock_app.config = {
         'DYNAMODB_ENDPOINT': 'https://localhost:4569',
         'DYNAMODB_VERIFY': 'false'
     }
     mock_app._get_current_object = mock.MagicMock(return_value=mock_app)
     data_store.init_app(mock_app)
     data_store.init_db()
     data_store.store_raw_extraction(document_id, extractor, valid_data)
     data = data_store.get_raw_extraction(document_id, extractor)
     self.assertEqual(data['document'], document_id)
     self.assertEqual(data['extractor'], extractor)
     self.assertListEqual(data['references'], valid_data)
예제 #3
0
def create_web_app() -> Flask:
    """Initialize an instance of the extractor backend service."""
    logging.getLogger('boto').setLevel(logging.ERROR)
    logging.getLogger('boto3').setLevel(logging.ERROR)
    logging.getLogger('botocore').setLevel(logging.ERROR)

    app = Flask('references', static_folder='static',
                template_folder='templates')
    app.config.from_pyfile('config.py')
    from arxiv.base.converter import ArXivConverter
    app.url_map.converters['arxiv'] = ArXivConverter

    data_store.init_app(app)
    cermine.init_app(app)
    grobid.init_app(app)
    refextract.init_app(app)
    retrieve.init_app(retrieve)
    app.register_blueprint(routes.blueprint)
    return app
예제 #4
0
    def test_process_record(self, mock_app):
        """Initiate extraction via the agent."""
        mock_app.config = {
            'DYNAMODB_ENDPOINT': DYNAMODB_ENDPOINT,
            'DYNAMODB_VERIFY': DYNAMODB_VERIFY,
            'CLOUDWATCH_ENDPOINT': CLOUDWATCH_ENDPOINT,
            'CLOUDWATCH_VERIFY': CLOUDWATCH_VERIFY,
            'AWS_REGION': AWS_REGION,
            'RAW_TABLE_NAME': RAW_TABLE_NAME,
            'EXTRACTIONS_TABLE_NAME': EXTRACTIONS_TABLE_NAME,
            'REFERENCES_TABLE_NAME': REFERENCES_TABLE_NAME,
            'INSTANCE_CREDENTIALS': '',
            'AWS_ACCESS_KEY_ID': AWS_ACCESS_KEY_ID,
            'AWS_SECRET_ACCESS_KEY': AWS_SECRET_ACCESS_KEY,
        }
        mock_app._get_current_object = mock.MagicMock(return_value=mock_app)

        from references.services import data_store
        data_store.init_app(mock_app)
        data_store.init_db()

        document_id = '1606.00123'
        payload = json.dumps({
            "document_id": document_id,
            "url": "https://arxiv.org/pdf/%s" % document_id
        }).encode('utf-8')
        self.client.put_record(StreamName='PDFIsAvailable', Data=payload,
                               PartitionKey='0')

        time.sleep(30)
        target = urljoin(EXTRACTION_ENDPOINT, '/references/%s' % document_id)
        response = self._session.get(target)
        retries = 0
        while response.status_code != 200:
            if retries > 5:
                self.fail('Record not processed')
            time.sleep(10)
            response = self._session.get(target)
            retries += 1
예제 #5
0
    def setUpClass(cls, mock_app):
        status_endpoint = urljoin(EXTRACTION_ENDPOINT, "/status")
        logger.debug('Check status at %s' % status_endpoint)

        cls._session = requests.Session()
        cls._adapter = requests.adapters.HTTPAdapter(
            max_retries=Retry(connect=30, read=10, backoff_factor=5))
        cls._session.mount('http://', cls._adapter)

        response = cls._session.get(status_endpoint, timeout=1)
        if response.status_code != 200:
            raise IOError('ack!')

        mock_app.config = {
            'DYNAMODB_ENDPOINT': DYNAMODB_ENDPOINT,
            'DYNAMODB_VERIFY': DYNAMODB_VERIFY,
            'CLOUDWATCH_ENDPOINT': CLOUDWATCH_ENDPOINT,
            'CLOUDWATCH_VERIFY': CLOUDWATCH_VERIFY,
            'AWS_REGION': AWS_REGION,
            'RAW_TABLE_NAME': RAW_TABLE_NAME,
            'EXTRACTIONS_TABLE_NAME': EXTRACTIONS_TABLE_NAME,
            'REFERENCES_TABLE_NAME': REFERENCES_TABLE_NAME,
            'AWS_ACCESS_KEY_ID': AWS_ACCESS_KEY_ID,
            'AWS_SECRET_ACCESS_KEY': AWS_SECRET_ACCESS_KEY,
        }
        mock_app._get_current_object = mock.MagicMock(return_value=mock_app)

        from references.services import data_store
        data_store.init_app(mock_app)
        data_store.init_db()
        cls.dyn = boto3.client('dynamodb', verify=DYNAMODB_VERIFY,
                               region_name=AWS_REGION,
                               endpoint_url=DYNAMODB_ENDPOINT,
                               aws_access_key_id=AWS_ACCESS_KEY_ID,
                               aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
                               aws_session_token=AWS_SESSION_TOKEN)