Beispiel #1
0
    def test_with_valid_token(self, mock_controller):
        """Client auth token has required public read scope."""
        document = mocks.document()
        docs = {
            "results": [document],
            "metadata": {
                "start": 0,
                "end": 1,
                "size": 50,
                "total": 1
            },
        }
        r_data = {"results": docs, "query": APIQuery()}
        mock_controller.search.return_value = r_data, HTTPStatus.OK, {}
        token = helpers.generate_token("1234",
                                       "*****@*****.**",
                                       "foouser",
                                       scope=[auth.scopes.READ_PUBLIC])
        response = self.client.get("/", headers={"Authorization": token})
        self.assertEqual(response.status_code, HTTPStatus.OK)

        data = json.loads(response.data)
        res = jsonschema.RefResolver(
            "file://%s/" % os.path.abspath(os.path.dirname(self.SCHEMA_PATH)),
            None,
        )
        self.assertIsNone(
            jsonschema.validate(data, self.schema, resolver=res),
            "Response content is valid per schema",
        )

        for field in get_required_fields():
            self.assertIn(field, data["results"][0])
Beispiel #2
0
    def test_create_thing(self, mock_create_a_thing: Any) -> None:
        """POST to endpoint /zero/api/thing creates and stores a Thing."""
        foo_data = {'name': 'A New Thing'}
        return_data = {
            'name': 'A New Thing',
            'id': 25,
            'created': datetime.now(),
            'url': '/zero/api/thing/25'
        }
        headers = {'Location': '/zero/api/thing/25'}
        mock_create_a_thing.return_value = \
            return_data, HTTPStatus.CREATED, headers

        token = generate_token('1234',
                               '*****@*****.**',
                               'foouser',
                               scope=[READ_THING, WRITE_THING])

        response = self.client.post('/zero/api/thing',
                                    data=json.dumps(foo_data),
                                    headers={'Authorization': token},
                                    content_type='application/json')

        expected_data = {
            'id': return_data['id'],
            'name': return_data['name'],
            'created': return_data['created'].isoformat(),  #type: ignore
            'url': return_data['url']
        }

        self.assertEqual(response.status_code, HTTPStatus.CREATED, "Created")
        self.assertDictEqual(json.loads(response.data), expected_data)
    def setUpClass(cls):
        """Start up the file manager service."""
        print('starting file management service')
        os.environ['JWT_SECRET'] = 'foosecret'
        start_fm = subprocess.run(
            'docker run -d -e JWT_SECRET=foosecret -p 8003:8000 arxiv/filemanager:0.0.3 /bin/bash -c \'python bootstrap.py; uwsgi --http-socket :8000 -M -t 3000 --manage-script-name --processes 8 --threads 1 --async 100 --ugreen --mount /=/opt/arxiv/wsgi.py --logformat "%(addr) %(addr) - %(user_id)|%(session_id) [%(rtime)] [%(uagent)] \\"%(method) %(uri) %(proto)\\" %(status) %(size) %(micros) %(ttfb)"\'',
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            shell=True)

        if start_fm.returncode != 0:
            print(start_fm.stdout, start_fm.stderr)
            raise RuntimeError(
                f'Could not start file management service: {start_fm.stdout}.'
                f' Is one already running? Is port 8003 available?')
        time.sleep(2)

        cls.fm_container = start_fm.stdout.decode('ascii').strip()
        print(f'file management service started as {cls.fm_container}')

        cls.token = generate_token(
            '1',
            '*****@*****.**',
            'theuser',
            scope=[scopes.WRITE_UPLOAD, scopes.READ_UPLOAD])
Beispiel #4
0
    def test_get_thing(self, mock_get_thing: Any) -> None:
        """Endpoint /zero/api/thing/<int> returns JSON about a Thing."""
        with open('schema/thing.json') as f:
            schema = json.load(f)

        foo_data = {'id': 4, 'name': 'First thing', 'created': datetime.now()}
        mock_get_thing.return_value = foo_data, HTTPStatus.OK, {}

        token = generate_token('1234',
                               '*****@*****.**',
                               'foouser',
                               scope=[READ_THING])

        response = self.client.get('/zero/api/thing/4',
                                   headers={'Authorization': token})

        expected_data = {
            'id': foo_data['id'],
            'name': foo_data['name'],
            'created': foo_data['created'].isoformat()  # type: ignore
        }

        self.assertEqual(response.status_code, HTTPStatus.OK)
        self.assertDictEqual(json.loads(response.data), expected_data)

        try:
            jsonschema.validate(json.loads(response.data), schema)
        except jsonschema.exceptions.SchemaError as e:
            self.fail(e)
Beispiel #5
0
    def test_auth_code_workflow(self):
        """Test authorization code workflow."""
        with self.app.app_context():
            user_token = generate_token(
                '1234',
                '*****@*****.**',
                'foouser',
                scope=[Scope('something', 'read'),
                       Scope('baz', 'bat')])
            user_headers = {'Authorization': user_token}
            params = {
                'response_type': 'code',
                'client_id': self.client_id,
                'redirect_uri': self.client.redirect_uri,
                'scope': 'something:read baz:bat'
            }
            response = self.user_agent.get('/authorize?%s' % urlencode(params),
                                           headers=user_headers)
            self.assertEqual(response.status_code, status.HTTP_200_OK,
                             'User can access authorization page')

            params['confirm'] = 'ok'  # Embedded in confirmation page.
            response = self.user_agent.post('/authorize',
                                            data=params,
                                            headers=user_headers)

            self.assertEqual(response.status_code, status.HTTP_302_FOUND,
                             'User is redirected to client redirect URI')
            target = urlparse(response.headers.get('Location'))
            code = parse_qs(target.query).get('code')
            self.assertEqual(target.netloc,
                             urlparse(self.client.redirect_uri).netloc,
                             'User is redirected to client redirect URI')
            self.assertEqual(target.path,
                             urlparse(self.client.redirect_uri).path,
                             'User is redirected to client redirect URI')
            self.assertIsNotNone(
                code, 'Authorization code is passed in redirect URL')

            payload = {
                'client_id': self.client_id,
                'client_secret': self.secret,
                'code': code,
                'grant_type': 'authorization_code',
                'redirect_uri': self.client.redirect_uri
            }
            response = self.test_client.post('/token', data=payload)
            self.assertEqual(response.status_code, status.HTTP_200_OK)
            self.assertEqual(response.content_type, 'application/json')
            data = json.loads(response.data)

            self.assertIn('access_token', data,
                          'Response contains access token')
            self.assertIn('expires_in', data, 'Response contains expiration')
            self.assertGreater(data['expires_in'], 0)
            self.assertEqual(data['scope'], 'something:read baz:bat',
                             'Requested code in granted')
            self.assertEqual(data['token_type'], 'Bearer',
                             'Access token is a bearer token')
Beispiel #6
0
 def test_with_token_lacking_scope(self):
     """Client auth token lacks required public read scope."""
     token = helpers.generate_token('1234',
                                    '*****@*****.**',
                                    'foouser',
                                    scope=[Scope('something', 'read')])
     response = self.client.get('/', headers={'Authorization': token})
     self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN)
Beispiel #7
0
 def test_with_token_lacking_scope(self):
     """Client auth token lacks required public read scope."""
     token = helpers.generate_token(
         "1234",
         "*****@*****.**",
         "foouser",
         scope=[Scope("something", "read")],
     )
     response = self.client.get("/", headers={"Authorization": token})
     self.assertEqual(response.status_code, HTTPStatus.FORBIDDEN)
 def setUp(self) -> None:
     self.app = create_web_app()
     self.app.config['JWT_SECRET'] = 'foosecret'
     self.client = self.app.test_client()
     with self.app.app_context():
         auth_scope = [
             scopes.READ_UPLOAD, scopes.READ_UPLOAD_SERVICE_LOGS,
             scopes.READ_UPLOAD_LOGS
         ]
         self.token = generate_token('123',
                                     '*****@*****.**',
                                     'foouser',
                                     scope=auth_scope)
 def test_upload_package_without_authorization(self):
     """Upload a new package without authorization."""
     fpath = os.path.join(
         os.path.split(os.path.abspath(__file__))[0], 'data', 'test.zip')
     pointer = FileStorage(open(fpath, 'rb'),
                           filename='test.zip',
                           content_type='application/tar+gz')
     token = generate_token('1',
                            '*****@*****.**',
                            'theuser',
                            scope=[scopes.READ_UPLOAD])
     with self.assertRaises(exceptions.RequestForbidden):
         FileManager.upload_package(pointer, token)
Beispiel #10
0
def _new_auth_token():
    return generate_token('10',
                          '*****@*****.**',
                          'foouser',
                          scope=[
                              scopes.READ_PUBLIC, scopes.CREATE_SUBMISSION,
                              scopes.EDIT_SUBMISSION, scopes.VIEW_SUBMISSION,
                              scopes.DELETE_SUBMISSION, scopes.READ_UPLOAD,
                              scopes.WRITE_UPLOAD, scopes.DELETE_UPLOAD_FILE,
                              scopes.READ_UPLOAD_LOGS, scopes.READ_COMPILE,
                              scopes.CREATE_COMPILE
                          ],
                          endorsements=["*.*"])
Beispiel #11
0
 def test_get_nonexistant_extraction(self):
     """Request for a non-existant extraction from an arXiv e-print."""
     token = generate_token('1234',
                            '*****@*****.**',
                            'foouser',
                            scope=[
                                scopes.READ_COMPILE, scopes.CREATE_COMPILE,
                                scopes.READ_FULLTEXT, scopes.CREATE_FULLTEXT
                            ])
     with self.app.app_context():
         response = self.client.get('/arxiv/2102.00123',
                                    headers={'Authorization': token})
     self.assertEqual(response.status_code, status.NOT_FOUND,
                      "Returns 404 Not Found")
Beispiel #12
0
 def test_auth_confirmation_has_unauthorized_scope(self):
     """User is directed with scope for which client is unauthorized."""
     user_token = generate_token('1234', '*****@*****.**', 'foouser',
                                 scope=[Scope('something', 'read')])
     user_headers = {'Authorization': user_token}
     params = {
         'response_type': 'code',
         'client_id': self.client_id,
         'redirect_uri': self.client.redirect_uri,
         'scope': 'somethingelse:delete'
     }
     response = self.user_agent.get('/authorize?%s' % urlencode(params),
                                    headers=user_headers)
     self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST,
                      'A 400 Bad Request is returned')
Beispiel #13
0
 def setUp(self):
     """Create a test app and client."""
     self.app = factory.create_app()
     self.client = self.app.test_client()
     self.app.config['JWT_SECRET'] = 'foosecret'
     self.app.config['S3_BUCKET'] = 'test-submission-bucket'
     self.app.config['AWS_ACCESS_KEY_ID'] = 'fookey'
     self.app.config['AWS_SECRET_ACCESS_KEY'] = 'foosecret'
     self.user_id = '123'
     with self.app.app_context():
         self.token = generate_token(
             self.user_id,
             '*****@*****.**',
             'foouser',
             scope=[scopes.CREATE_COMPILE, scopes.READ_COMPILE])
    def test_get_upload_status_without_authorization(self):
        """Get the status of an upload without the right scope."""
        fpath = os.path.join(
            os.path.split(os.path.abspath(__file__))[0], 'data', 'test.zip')
        pointer = FileStorage(open(fpath, 'rb'),
                              filename='test.zip',
                              content_type='application/tar+gz')
        token = generate_token('1',
                               '*****@*****.**',
                               'theuser',
                               scope=[scopes.WRITE_UPLOAD])
        data = FileManager.upload_package(pointer, self.token)

        with self.assertRaises(exceptions.RequestForbidden):
            FileManager.get_upload_status(data.identifier, token)
Beispiel #15
0
 def test_auth_confirmation_has_invalid_client(self):
     """User is directed to an auth page with an invalid client ID."""
     user_token = generate_token('1234', '*****@*****.**', 'foouser',
                                 scope=[Scope('something', 'read')])
     user_headers = {'Authorization': user_token}
     params = {
         'response_type': 'code',
         'client_id': '5678',   # Invalid client ID.
         'redirect_uri': self.client.redirect_uri,
         'scope': 'something:read'
     }
     response = self.user_agent.get('/authorize?%s' % urlencode(params),
                                    headers=user_headers)
     self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST,
                      'A 400 Bad Request is returned')
Beispiel #16
0
    def test_get_thing(self, mock_get_thing: Any) -> None:
        """Endpoint /thing/<int> returns HTML page about a Thing."""
        foo_data = {'id': 4, 'name': 'First thing', 'created': datetime.now()}
        mock_get_thing.return_value = foo_data, 200, {}

        token = generate_token('1234',
                               '*****@*****.**',
                               'foouser',
                               scope=[READ_THING])

        response = self.client.get('/thing/4',
                                   headers={'Authorization': token})

        self.assertEqual(response.status_code, 200)
        self.assertEqual(response.headers['Content-Type'],
                         'text/html; charset=utf-8')
Beispiel #17
0
 def setUp(self):
     """Instantiate and configure an API app."""
     jwt_secret = "foosecret"
     os.environ["JWT_SECRET"] = jwt_secret
     self.app = factory.create_classic_api_web_app()
     self.app.config["JWT_SECRET"] = jwt_secret
     self.client = self.app.test_client()
     self.auth_header = {
         "Authorization":
         helpers.generate_token(
             "1234",
             "*****@*****.**",
             "foouser",
             scope=[auth.scopes.READ_PUBLIC],
         )
     }
    def test_get_upload_status_nacho_upload(self):
        """Get the status of someone elses' upload."""
        fpath = os.path.join(
            os.path.split(os.path.abspath(__file__))[0], 'data', 'test.zip')
        pointer = FileStorage(open(fpath, 'rb'),
                              filename='test.zip',
                              content_type='application/tar+gz')

        data = FileManager.upload_package(pointer, self.token)

        token = generate_token('2',
                               '*****@*****.**',
                               'theotheruser',
                               scope=[scopes.READ_UPLOAD])
        with self.assertRaises(exceptions.RequestForbidden):
            FileManager.get_upload_status(data.identifier, token)
Beispiel #19
0
    def test_auth_confirmation_post_missing_confirmation(self):
        """User agent issues POST request without confirmation."""
        user_token = generate_token('1234', '*****@*****.**', 'foouser',
                                    scope=[Scope('something', 'read')])
        user_headers = {'Authorization': user_token}
        # Missing `confirm` field.
        params = {
            'response_type': 'code',
            'client_id': self.client_id,
            'redirect_uri': self.client.redirect_uri,
            'scope': 'something:read'
        }
        response = self.user_agent.post('/authorize', data=params,
                                        headers=user_headers)

        self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST,
                         'A 400 Bad Request is returned')
    def test_all_endorsements(self):
        """User is endorsed for everything."""
        self.token = generate_token(
            '1234',
            '*****@*****.**',
            'foouser',
            scope=[
                scopes.CREATE_SUBMISSION, scopes.EDIT_SUBMISSION,
                scopes.VIEW_SUBMISSION, scopes.READ_UPLOAD,
                scopes.WRITE_UPLOAD, scopes.DELETE_UPLOAD_FILE
            ],
            endorsements=["*.*"])
        self.headers = {'Authorization': self.token}

        # Get the submission creation page.
        response = self.client.get('/', headers=self.headers)
        self.assertEqual(response.status_code, status.OK)
        self.assertEqual(response.content_type, 'text/html; charset=utf-8')
        token = self._parse_csrf_token(response)

        # Create a submission.
        response = self.client.post('/',
                                    data={
                                        'new': 'new',
                                        'csrf_token': token
                                    },
                                    headers=self.headers)
        self.assertEqual(response.status_code, status.SEE_OTHER)

        # Get the next page in the process. This should be the verify_user
        # stage.
        next_page = urlparse(response.headers['Location'])
        self.assertIn('verify_user', next_page.path)
        response = self.client.get(next_page.path, headers=self.headers)
        self.assertNotIn(
            b'Your account does not currently have any endorsed categories.',
            response.data, 'User should see no messaging about endorsement.')
        self.assertNotIn(b'You are currently endorsed for', response.data,
                         'User should see no messaging about endorsement.')
    def test_some_archives(self):
        """User is endorsed (auto or otherwise) for some whole archives."""
        self.token = generate_token(
            '1234',
            '*****@*****.**',
            'foouser',
            scope=[
                scopes.CREATE_SUBMISSION, scopes.EDIT_SUBMISSION,
                scopes.VIEW_SUBMISSION, scopes.READ_UPLOAD,
                scopes.WRITE_UPLOAD, scopes.DELETE_UPLOAD_FILE
            ],
            endorsements=[Category("cs.*"),
                          Category("math.*")])
        self.headers = {'Authorization': self.token}

        # Get the submission creation page.
        response = self.client.get('/', headers=self.headers)
        self.assertEqual(response.status_code, status.OK)
        self.assertEqual(response.content_type, 'text/html; charset=utf-8')
        token = self._parse_csrf_token(response)

        # Create a submission.
        response = self.client.post('/',
                                    data={
                                        'new': 'new',
                                        'csrf_token': token
                                    },
                                    headers=self.headers)
        self.assertEqual(response.status_code, status.SEE_OTHER)

        # Get the next page in the process. This should be the verify_user
        # stage.
        next_page = urlparse(response.headers['Location'])
        self.assertIn('verify_user', next_page.path)
        response = self.client.get(next_page.path, headers=self.headers)
        self.assertIn(
            b'You are currently endorsed for', response.data,
            'User should be informed that they have some endorsements.')
 def setUp(self):
     """Create an application instance."""
     self.app = create_ui_web_app()
     os.environ['JWT_SECRET'] = self.app.config.get('JWT_SECRET')
     _, self.db = tempfile.mkstemp(suffix='.db')
     self.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{self.db}'
     self.token = generate_token(
         '1234',
         '*****@*****.**',
         'foouser',
         scope=[
             scopes.CREATE_SUBMISSION, scopes.EDIT_SUBMISSION,
             scopes.VIEW_SUBMISSION, scopes.READ_UPLOAD,
             scopes.WRITE_UPLOAD, scopes.DELETE_UPLOAD_FILE
         ],
         endorsements=[
             Category('astro-ph.GA'),
             Category('astro-ph.CO'),
         ])
     self.headers = {'Authorization': self.token}
     self.client = self.app.test_client()
     with self.app.app_context():
         classic.create_all()
Beispiel #23
0
    def test_create_a_thing_and_mutate_it(self) -> None:
        """Create and mutate a thing via the API."""
        token = generate_token('1234',
                               '*****@*****.**',
                               'foouser',
                               scope=[READ_THING, WRITE_THING])
        thing_data = {'name': 'The Thing'}

        # Create the thing:
        #
        #  Client                App              Database
        #    |                    |                  |
        #    | -- POST /thing --> |                  |
        #    |                    | -- Write row --> |
        #    |                    | <-- Row data --- |
        #    | <-- 201 w/ data -- |
        #   Location: /thing/<id>
        response = self.client.post('/zero/api/thing',
                                    data=json.dumps(thing_data),
                                    headers={'Authorization': token},
                                    content_type='application/json')

        self.assertEqual(response.status_code, HTTPStatus.CREATED, "Created")

        response_data = json.loads(response.data)
        self.assertEqual(response_data['name'], thing_data['name'])
        self.assertIn('created', response_data)
        self.assertIn('id', response_data)
        self.assertIn('url', response_data)

        # Get the thing:
        #
        #  Client                  App                 Database
        #    |                      |                     |
        #    | - GET /thing/<id> -> |                     |
        #    |                      | -- Select by ID --> |
        #    |                      | <---- Row data ---- |
        #    | <--- 200 w/ data --  |
        get_response = self.client.get(response_data['url'],
                                       headers={'Authorization': token})
        self.assertEqual(get_response.data, b'The Thing')

        # Mutate the thing:
        #
        #  Client                     App              Queue
        #    | -- POST /thing/<id> --> |                  |
        #    |                         |                  |
        #    |                         | --- New task --> |
        #    |                         | <--- Task ID --- |
        #    | <--- 202 w/task ID ---- |
        #   Location: /mutation/<task id>
        mutate_response = self.client.post(response_data['url'],
                                           data=json.dumps({}),
                                           headers={'Authorization': token},
                                           content_type='application/json')
        self.assertEqual(mutate_response.status_code, HTTPStatus.ACCEPTED,
                         "Accepted")

        # Get mutation task status (not yet complete):
        #
        #  Client                            App              Results
        #    | -- GET /mutation/<task id> --> |                  |
        #    |                                | -- Get task ---> |
        #    |                                | <- Task status - |
        #    | <----- 200 w/task status ----- |
        status_path = parse.urlparse(mutate_response.headers['Location']).path
        status_response = self.client.get(status_path,
                                          headers={'Authorization': token})
        self.assertEqual(status_response.status_code, HTTPStatus.OK,
                         "Status resource found")

        # Meanwhile, worker process gets task and executes:
        #
        #  Worker            Queue      Database    Results
        #    | -- POP task --> |           |          |
        #    | <- Task sig. -- |           |          |
        #    |                             |          |
        #    | ----- Get thing by ID --- > |          |
        #    | <---- Return thing data --- |          |
        #    .. .. .. .. .. work work work .. .. .. ..
        #    | --- Update thing data --- > |          |
        #    | <---- Return thing data --- |          |
        #    |                                        |
        #    | ----------- Update result -----------> |
        time.sleep(6)  # Wait for task to complete.

        # Get mutation task status (complete):
        #
        #  Client                            App              Results
        #    | -- GET /mutation/<task id> --> |                  |
        #    |                                | -- Get task ---> |
        #    |                                | <- Task status - |
        #    | <----- 303 w/task status ----- |
        #   Location: /thing/<id>
        status_response = self.client.get(status_path,
                                          headers={'Authorization': token})
        self.assertEqual(status_response.status_code, HTTPStatus.SEE_OTHER,
                         "See other")
        status_response_data = json.loads(status_response.data)

        self.assertIn("result", status_response_data)
        self.assertIn("status", status_response_data)
        self.assertIn("location", status_response.headers)
        N_ones = status_response_data['result']['result'] - 9

        # Get the thing one last time:
        #
        #  Client                  App                 Database
        #    |                      |                     |
        #    | - GET /thing/<id> -> |                     |
        #    |                      | -- Select by ID --> |
        #    |                      | <---- Row data ---- |
        #    | <--- 200 w/ data --  |
        final_response = self.client.get(parse.urlparse(
            status_response.headers['Location']).path,
                                         headers={'Authorization': token})
        self.assertEqual(final_response.status_code, HTTPStatus.OK, "OK")
        self.assertEqual(final_response.data, b'The Thing' + b'1' * N_ones)
Beispiel #24
0
    def test_request_extraction_of_submission(self):
        """Request extraction of a submission."""
        token = generate_token(self.user_id,
                               '*****@*****.**',
                               'foouser',
                               scope=[
                                   scopes.READ_COMPILE, scopes.CREATE_COMPILE,
                                   scopes.READ_FULLTEXT, scopes.CREATE_FULLTEXT
                               ])

        # Since we are running Celery in "eager" mode for these tests, the
        # extraction will block and run here.
        with self.app.app_context():
            response = self.client.post(f'/submission/{self.SUBMISSION_CASE}',
                                        headers={'Authorization': token})

        self.assertEqual(response.status_code, status.ACCEPTED,
                         "Returns 202 Accepted")
        self.assertEqual(
            response.headers['Location'],
            f'http://localhost/submission/{self.SUBMISSION_CASE}/status',
            "Redirects to task status endpoint")

        # Verify that authn/z requirements are enforced for extraction
        # endpoint.
        with self.app.app_context():
            unauthz = generate_token(self.user_id,
                                     '*****@*****.**',
                                     'foouser',
                                     scope=[
                                         scopes.READ_COMPILE,
                                         scopes.CREATE_COMPILE,
                                         scopes.READ_FULLTEXT
                                     ])
            response = self.client.post(f'/submission/{self.SUBMISSION_CASE}',
                                        headers={'Authorization': unauthz})
            self.assertEqual(response.status_code, status.FORBIDDEN,
                             "The fulltext:create scope is required")

            response = self.client.post(f'/submission/{self.SUBMISSION_CASE}')
            self.assertEqual(response.status_code, status.UNAUTHORIZED,
                             "Authentication required to request extraction")

            other = generate_token('1235',
                                   '*****@*****.**',
                                   'foouser',
                                   scope=[
                                       scopes.READ_COMPILE,
                                       scopes.CREATE_COMPILE,
                                       scopes.CREATE_FULLTEXT,
                                       scopes.READ_FULLTEXT
                                   ])
            response = self.client.post(f'/submission/{self.SUBMISSION_CASE}',
                                        headers={'Authorization': other})
            self.assertEqual(response.status_code, status.NOT_FOUND,
                             "Not the owner; pretend it does not exist")

        # Since this is happening assynchronously in these tests (see above),
        # we expect the task to have not completed.
        with self.app.app_context():
            response = self.client.get(
                f'/submission/{self.SUBMISSION_CASE}/status',
                headers={'Authorization': token})
        self.assertEqual(response.status_code, status.OK, "Returns 200 OK")

        tries = 0
        while True:
            if tries > 30:
                self.fail('Waited too long')
            time.sleep(2)
            with self.app.app_context():
                response = self.client.get(
                    f'/submission/{self.SUBMISSION_CASE}/status',
                    headers={'Authorization': token})
                if response.json['status'] == 'failed':
                    self.fail('Extraction failed')
                elif response.json['status'] == 'succeeded':
                    break
            tries += 1

        self.assertEqual(response.json['status'], 'succeeded', "Succeeded!")
        self.assertIsNone(response.json['content'], "No content is included")
        self.assertIsNotNone(response.json['started'], "Start time is set")
        self.assertIsNotNone(response.json['ended'], "End time is set")
        self.assertIsNone(response.json['exception'], "No exception occurred")
        self.assertEqual(response.json['owner'], self.user_id,
                         "This is a submission; owner is set.")
        self.assertIsNotNone(response.json['task_id'], "Task ID is set")
        self.assertEqual(response.json['version'], self.extractor_version)

        self.assertEqual(
            response.headers['Location'],
            f'http://localhost/submission/{self.SUBMISSION_CASE}',
            'Redirects to content')

        # Verify that authn/z requirements are enforced for status endpoint.
        with self.app.app_context():
            unauthz = generate_token('1234',
                                     '*****@*****.**',
                                     'foouser',
                                     scope=[
                                         scopes.READ_COMPILE,
                                         scopes.CREATE_COMPILE,
                                         scopes.CREATE_FULLTEXT
                                     ])
            response = self.client.get(
                f'/submission/{self.SUBMISSION_CASE}/status',
                headers={'Authorization': unauthz})
            self.assertEqual(response.status_code, status.FORBIDDEN,
                             "The fulltext:read scope is required for status")

            response = self.client.get(
                f'/submission/{self.SUBMISSION_CASE}/status')
            self.assertEqual(response.status_code, status.UNAUTHORIZED,
                             "Authentication is required to view status")

            other = generate_token('1235',
                                   '*****@*****.**',
                                   'foouser',
                                   scope=[
                                       scopes.READ_COMPILE,
                                       scopes.CREATE_COMPILE,
                                       scopes.CREATE_FULLTEXT,
                                       scopes.READ_FULLTEXT
                                   ])
            response = self.client.get(
                f'/submission/{self.SUBMISSION_CASE}/status',
                headers={'Authorization': other})
            self.assertEqual(response.status_code, status.NOT_FOUND,
                             "Not the owner; pretend it does not exist")

        # We should now be able to retrieve the content,
        with self.app.app_context():
            response = self.client.get(f'/submission/{self.SUBMISSION_CASE}',
                                       headers={'Authorization': token})

        self.assertEqual(response.status_code, status.OK, 'Returns 200 OK')
        self.assertEqual(response.json['status'], 'succeeded', "Succeeded!")
        self.assertIsNotNone(response.json['content'], 'Content is included')
        self.assertIsNotNone(response.json['started'], "Start time is set")
        self.assertIsNotNone(response.json['ended'], "End time is set")
        self.assertIsNone(response.json['exception'], "No exception occurred")
        self.assertEqual(response.json['owner'], self.user_id,
                         "This is a submission; owner is set.")
        self.assertIsNotNone(response.json['task_id'], "Task ID is set")
        self.assertEqual(response.json['version'], self.extractor_version)

        # We should now be able to retrieve the PSV content, as well.
        with self.app.app_context():
            response = self.client.get(
                f'/submission/{self.SUBMISSION_CASE}/format/psv',
                headers={'Authorization': token})

        self.assertEqual(response.status_code, status.OK, 'Returns 200 OK')

        with self.app.app_context():
            # These should also work.
            valid_urls = [
                f'/submission/{self.SUBMISSION_CASE}/version/0.3',
                f'/submission/{self.SUBMISSION_CASE}/version/0.3/format/plain',
                f'/submission/{self.SUBMISSION_CASE}/version/0.3/format/psv'
            ]
            for url in valid_urls:
                response = self.client.get(url,
                                           headers={'Authorization': token})
                self.assertEqual(response.status_code, status.OK,
                                 f'{url} should exist')

            # But not these.
            invalid_urls = [
                f'/submission/{self.SUBMISSION_CASE}/version/0.2',
                f'/submission/{self.SUBMISSION_CASE}/format/magic',
                f'/submission/{self.SUBMISSION_CASE}/version/0.3/format/magic'
            ]
            for url in invalid_urls:
                response = self.client.get(url,
                                           headers={'Authorization': token})
                self.assertEqual(response.status_code, status.NOT_FOUND,
                                 f'{url} should not exist')

        # Verify that authn/z requirements are enforced for content endpoint.
        with self.app.app_context():
            unauthz = generate_token('1234',
                                     '*****@*****.**',
                                     'foouser',
                                     scope=[
                                         scopes.READ_COMPILE,
                                         scopes.CREATE_COMPILE,
                                         scopes.CREATE_FULLTEXT
                                     ])
            response = self.client.get(f'/submission/{self.SUBMISSION_CASE}',
                                       headers={'Authorization': unauthz})
            self.assertEqual(response.status_code, status.FORBIDDEN,
                             "The fulltext:read scope is required for status")

            response = self.client.get(f'/submission/{self.SUBMISSION_CASE}')
            self.assertEqual(response.status_code, status.UNAUTHORIZED,
                             "Authentication is required to view status")

            other = generate_token('1235',
                                   '*****@*****.**',
                                   'foouser',
                                   scope=[
                                       scopes.READ_COMPILE,
                                       scopes.CREATE_COMPILE,
                                       scopes.CREATE_FULLTEXT,
                                       scopes.READ_FULLTEXT
                                   ])
            response = self.client.get(f'/submission/{self.SUBMISSION_CASE}',
                                       headers={'Authorization': other})
            self.assertEqual(response.status_code, status.NOT_FOUND,
                             "Not the owner; pretend it does not exist")
Beispiel #25
0
    def test_request_extraction(self):
        """Request extraction of an (announced) arXiv e-print."""
        # Mock the responses to HEAD and GET requests for the e-print PDF.
        token = generate_token('1234',
                               '*****@*****.**',
                               'foouser',
                               scope=[
                                   scopes.READ_COMPILE, scopes.CREATE_COMPILE,
                                   scopes.READ_FULLTEXT, scopes.CREATE_FULLTEXT
                               ])

        # Since we are running Celery in "eager" mode for these tests, the
        # extraction will block and run here.
        with self.app.app_context():
            response = self.client.post(f'/arxiv/{self.SUCCESS_CASE}',
                                        headers={'Authorization': token})

        self.assertEqual(response.status_code, status.ACCEPTED,
                         "Returns 202 Accepted")
        self.assertEqual(response.headers['Location'],
                         f'http://localhost/arxiv/{self.SUCCESS_CASE}/status',
                         "Redirects to task status endpoint")

        # Verify that we don't do the same thing twice.
        with self.app.app_context():
            response = self.client.post(f'/arxiv/{self.SUCCESS_CASE}',
                                        headers={'Authorization': token})

        self.assertEqual(response.status_code, status.SEE_OTHER,
                         "Returns 303 See Other")
        self.assertEqual(
            response.headers['Location'],
            f'http://localhost/arxiv/{self.SUCCESS_CASE}/status',
            "Redirects to task status endpoint, since the task"
            " has not yet completed.")
        tries = 0
        response = self.client.get(f'/arxiv/{self.SUCCESS_CASE}/status',
                                   headers={'Authorization': token})
        while response.status_code != status.SEE_OTHER:
            if tries > 30:
                self.fail('Waited too long for result')
            time.sleep(2)
            with self.app.app_context():
                response = self.client.get(
                    f'/arxiv/{self.SUCCESS_CASE}/status',
                    headers={'Authorization': token})
            tries += 1

        response = self.client.get(f'/arxiv/{self.SUCCESS_CASE}',
                                   headers={'Authorization': token})
        self.assertEqual(response.status_code, status.OK)

        # Verify that authn/z requirements are enforced for extraction
        # endpoint.
        with self.app.app_context():
            unauthz = generate_token('1234',
                                     '*****@*****.**',
                                     'foouser',
                                     scope=[
                                         scopes.READ_COMPILE,
                                         scopes.CREATE_COMPILE,
                                         scopes.READ_FULLTEXT
                                     ])
            response = self.client.post(f'/arxiv/{self.SUCCESS_CASE}',
                                        headers={'Authorization': unauthz})
            self.assertEqual(response.status_code, status.FORBIDDEN,
                             "The fulltext:create scope is required")

            response = self.client.post(f'/arxiv/{self.SUCCESS_CASE}')
            self.assertEqual(response.status_code, status.UNAUTHORIZED,
                             "Authentication required to request extraction")

        # Since this is happening synchronously in these tests (see above),
        # we expect the task to have completed.
        with self.app.app_context():
            response = self.client.get(f'/arxiv/{self.SUCCESS_CASE}/status',
                                       headers={'Authorization': token})

        self.assertEqual(response.status_code, status.SEE_OTHER,
                         "Returns 303 See Other")
        self.assertEqual(response.json['status'], 'succeeded', "Succeeded!")
        self.assertIsNone(response.json['content'], "No content is included")
        self.assertIsNotNone(response.json['started'], "Start time is set")
        self.assertIsNotNone(response.json['ended'], "End time is set")
        self.assertIsNone(response.json['exception'], "No exception occurred")
        self.assertIsNone(response.json['owner'],
                          "This is an announced e-print; owner is not set.")
        self.assertIsNotNone(response.json['task_id'], "Task ID is set")
        self.assertEqual(response.json['version'], self.extractor_version)

        self.assertEqual(response.headers['Location'],
                         f'http://localhost/arxiv/{self.SUCCESS_CASE}',
                         'Redirects to content')

        # Verify that authn/z requirements are enforced for status endpoint.
        with self.app.app_context():
            unauthz = generate_token('1234',
                                     '*****@*****.**',
                                     'foouser',
                                     scope=[
                                         scopes.READ_COMPILE,
                                         scopes.CREATE_COMPILE,
                                         scopes.CREATE_FULLTEXT
                                     ])
            response = self.client.get(f'/arxiv/{self.SUCCESS_CASE}/status',
                                       headers={'Authorization': unauthz})
            self.assertEqual(response.status_code, status.FORBIDDEN,
                             "The fulltext:read scope is required for status")

            response = self.client.get(f'/arxiv/{self.SUCCESS_CASE}/status')
            self.assertEqual(response.status_code, status.UNAUTHORIZED,
                             "Authentication is required to view status")

        # We should now be able to retrieve the content.
        with self.app.app_context():
            response = self.client.get(f'/arxiv/{self.SUCCESS_CASE}',
                                       headers={'Authorization': token})

        self.assertEqual(response.status_code, status.OK, 'Returns 200 OK')
        self.assertEqual(response.json['status'], 'succeeded', "Succeeded!")
        self.assertIsNotNone(response.json['content'], 'Content is included')
        self.assertIsNotNone(response.json['started'], "Start time is set")
        self.assertIsNotNone(response.json['ended'], "End time is set")
        self.assertIsNone(response.json['exception'], "No exception occurred")
        self.assertIsNone(response.json['owner'],
                          "This is an announced e-print; owner is not set.")
        self.assertIsNotNone(response.json['task_id'], "Task ID is set")
        self.assertEqual(response.json['version'], self.extractor_version)

        # We should now be able to retrieve the PSV content, as well.
        with self.app.app_context():
            response = self.client.get(
                f'/arxiv/{self.SUCCESS_CASE}/format/psv',
                headers={'Authorization': token})

        self.assertEqual(response.status_code, status.OK, 'Returns 200 OK')

        with self.app.app_context():
            # These should also work.
            valid_urls = [
                f'/arxiv/{self.SUCCESS_CASE}/version/0.3',
                f'/arxiv/{self.SUCCESS_CASE}/version/0.3/format/plain',
                f'/arxiv/{self.SUCCESS_CASE}/version/0.3/format/psv'
            ]
            for url in valid_urls:
                response = self.client.get(url,
                                           headers={'Authorization': token})
                self.assertEqual(response.status_code, status.OK,
                                 f'{url} should exist')

            # But not these.
            invalid_urls = [
                f'/arxiv/{self.SUCCESS_CASE}/version/0.2',
                f'/arxiv/{self.SUCCESS_CASE}/format/magic',
                f'/arxiv/{self.SUCCESS_CASE}/version/0.3/format/magic'
            ]
            for url in invalid_urls:
                response = self.client.get(url,
                                           headers={'Authorization': token})
                self.assertEqual(response.status_code, status.NOT_FOUND,
                                 f'{url} should not exist')

        # Verify that authn/z requirements are enforced for content endpoint.
        with self.app.app_context():
            unauthz = generate_token('1234',
                                     '*****@*****.**',
                                     'foouser',
                                     scope=[
                                         scopes.READ_COMPILE,
                                         scopes.CREATE_COMPILE,
                                         scopes.CREATE_FULLTEXT
                                     ])
            response = self.client.get(f'/arxiv/{self.SUCCESS_CASE}',
                                       headers={'Authorization': unauthz})
            self.assertEqual(response.status_code, status.FORBIDDEN,
                             "The fulltext:read scope is required for status")

            response = self.client.get(f'/arxiv/{self.SUCCESS_CASE}')
            self.assertEqual(response.status_code, status.UNAUTHORIZED,
                             "Authentication is required to view status")
Beispiel #26
0
        for obj in categories:
            session.add(obj)
        logger.info('Added %i categories', len(categories))
        users = classic.bootstrap.users(10)
        for obj in users:
            session.add(obj)
            created_users.append(obj)
        logger.info('Added %i users', len(users))
        session.commit()

        scope = [
            scopes.READ_PUBLIC, scopes.CREATE_SUBMISSION,
            scopes.EDIT_SUBMISSION, scopes.VIEW_SUBMISSION,
            scopes.DELETE_SUBMISSION, scopes.READ_UPLOAD, scopes.WRITE_UPLOAD,
            scopes.DELETE_UPLOAD_FILE, scopes.READ_UPLOAD_LOGS,
            scopes.READ_COMPILE, scopes.CREATE_COMPILE
        ]
        for user in created_users:
            token = generate_token(user.user_id,
                                   user.email,
                                   user.email,
                                   scope=scope,
                                   first_name=user.first_name,
                                   last_name=user.last_name,
                                   suffix_name=user.suffix_name,
                                   endorsements=["*.*"])
            print(user.user_id, user.email, token)

        exit(0)
    logger.info('Nothing to do')
Beispiel #27
0
    def test_with_valid_token(self, mock_controller):
        """Client auth token has required public read scope."""
        document = domain.Document(
            submitted_date=datetime.now(),
            submitted_date_first=datetime.now(),
            announced_date_first=datetime.now(),
            id='1234.5678',
            abstract='very abstract',
            authors=[
                domain.Person(full_name='F. Bar', orcid='1234-5678-9012-3456')
            ],
            submitter=domain.Person(full_name='S. Ubmitter', author_id='su_1'),
            modified_date=datetime.now(),
            updated_date=datetime.now(),
            is_current=True,
            is_withdrawn=False,
            license={
                'uri': 'http://foo.license/1',
                'label': 'Notalicense 5.4'
            },
            paper_id='1234.5678',
            paper_id_v='1234.5678v6',
            title='tiiiitle',
            source={
                'flags': 'A',
                'format': 'pdftotex',
                'size_bytes': 2
            },
            version=6,
            latest='1234.5678v6',
            latest_version=6,
            report_num='somenum1',
            msc_class=['c1'],
            acm_class=['z2'],
            journal_ref='somejournal (1991): 2-34',
            doi='10.123456/7890',
            comments='very science',
            abs_categories='astro-ph.CO foo.BR',
            formats=['pdf', 'other'],
            primary_classification=domain.Classification(
                group={
                    'id': 'foo',
                    'name': 'Foo Group'
                },
                archive={
                    'id': 'foo',
                    'name': 'Foo Archive'
                },
                category={
                    'id': 'foo.BR',
                    'name': 'Foo Category'
                },
            ),
            secondary_classification=[
                domain.Classification(
                    group={
                        'id': 'foo',
                        'name': 'Foo Group'
                    },
                    archive={
                        'id': 'foo',
                        'name': 'Foo Archive'
                    },
                    category={
                        'id': 'foo.BZ',
                        'name': 'Baz Category'
                    },
                )
            ])
        docs = domain.DocumentSet(results=[document],
                                  metadata={
                                      'start': 0,
                                      'end': 1,
                                      'size': 50,
                                      'total': 1
                                  })
        r_data = {'results': docs, 'query': domain.APIQuery()}
        mock_controller.search.return_value = r_data, status.HTTP_200_OK, {}
        token = helpers.generate_token('1234',
                                       '*****@*****.**',
                                       'foouser',
                                       scope=[auth.scopes.READ_PUBLIC])
        response = self.client.get('/', headers={'Authorization': token})
        self.assertEqual(response.status_code, status.HTTP_200_OK)

        data = json.loads(response.data)
        res = jsonschema.RefResolver(
            'file://%s/' % os.path.abspath(os.path.dirname(self.SCHEMA_PATH)),
            None)
        self.assertIsNone(jsonschema.validate(data, self.schema, resolver=res),
                          'Response content is valid per schema')

        for field in domain.api.get_required_fields():
            self.assertIn(field, data['results'][0])
Beispiel #28
0
    def test_extraction_fails(self):
        """Extraction of an e-print fails."""
        # Mock the responses to HEAD and GET requests for the e-print PDF.
        token = generate_token('1234',
                               '*****@*****.**',
                               'foouser',
                               scope=[
                                   scopes.READ_COMPILE, scopes.CREATE_COMPILE,
                                   scopes.READ_FULLTEXT, scopes.CREATE_FULLTEXT
                               ])

        with self.app.app_context():
            response = self.client.post(f'/arxiv/{self.FAIL_CASE}',
                                        headers={'Authorization': token})

        self.assertEqual(response.status_code, status.ACCEPTED,
                         "Returns 202 Accepted")
        self.assertEqual(response.headers['Location'],
                         f'http://localhost/arxiv/{self.FAIL_CASE}/status',
                         "Redirects to task status endpoint")

        tries = 0
        response = self.client.get(f'/arxiv/{self.FAIL_CASE}/status',
                                   headers={'Authorization': token})
        while True:
            if tries > 30:
                self.fail('Waited too long for result')
            time.sleep(2)
            with self.app.app_context():
                response = self.client.get(f'/arxiv/{self.FAIL_CASE}/status',
                                           headers={'Authorization': token})
                response_data = response.json
                print('::', response_data)
                if response_data['status'] == 'failed':
                    break
                elif response_data['status'] == 'succeeded':
                    self.fail('Extraction should not succeed')
            tries += 1

        # The status endpoint will reflect the failure state.
        with self.app.app_context():
            response = self.client.get(f'/arxiv/{self.FAIL_CASE}/status',
                                       headers={'Authorization': token})

        self.assertEqual(response.status_code, status.OK, "Returns 200 OK")
        self.assertEqual(response.json['status'], 'failed', "Failed!")
        self.assertIsNone(response.json['content'], "No content is included")
        self.assertIsNotNone(response.json['started'], "Start time is set")
        self.assertIsNotNone(response.json['ended'], "End time is set")
        self.assertEqual(response.json['exception'],
                         "1205.00123: unexpected status for PDF: 500")
        self.assertIsNone(response.json['owner'],
                          "This is an announced e-print; owner is not set.")
        self.assertIsNotNone(response.json['task_id'], "Task ID is set")
        self.assertEqual(response.json['version'], self.extractor_version)

        # The extraction endpoint will reflect the failure state.
        with self.app.app_context():
            response = self.client.get(f'/arxiv/{self.FAIL_CASE}',
                                       headers={'Authorization': token})

        self.assertEqual(response.status_code, status.OK, "Returns 200 OK")
        self.assertEqual(response.json['status'], 'failed', "Failed!")
        self.assertIsNone(response.json['content'], "No content is included")
        self.assertIsNotNone(response.json['started'], "Start time is set")
        self.assertIsNotNone(response.json['ended'], "End time is set")
        self.assertEqual(response.json['exception'],
                         "1205.00123: unexpected status for PDF: 500")
        self.assertIsNone(response.json['owner'],
                          "This is an announced e-print; owner is not set.")
        self.assertIsNotNone(response.json['task_id'], "Task ID is set")
        self.assertEqual(response.json['version'], self.extractor_version)

        # We can re-start extraction by forcing.
        with self.app.app_context():
            response = self.client.post(f'/arxiv/{self.FAIL_CASE}',
                                        json={'force': True},
                                        headers={'Authorization': token})

        self.assertEqual(response.status_code, status.ACCEPTED,
                         "Returns 202 Accepted")
        self.assertEqual(response.headers['Location'],
                         f'http://localhost/arxiv/{self.FAIL_CASE}/status',
                         "Redirects to task status endpoint")
    def setUp(self):
        """Create an application instance."""
        self.app = create_ui_web_app()
        os.environ['JWT_SECRET'] = self.app.config.get('JWT_SECRET')
        _, self.db = tempfile.mkstemp(suffix='.db')
        self.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{self.db}'
        self.user = User('1234',
                         '*****@*****.**',
                         endorsements=['astro-ph.GA', 'astro-ph.CO'])
        self.token = generate_token(
            '1234',
            '*****@*****.**',
            'foouser',
            scope=[
                scopes.CREATE_SUBMISSION, scopes.EDIT_SUBMISSION,
                scopes.VIEW_SUBMISSION, scopes.READ_UPLOAD,
                scopes.WRITE_UPLOAD, scopes.DELETE_UPLOAD_FILE
            ],
            endorsements=[
                Category('astro-ph.GA'),
                Category('astro-ph.CO'),
            ])
        self.headers = {'Authorization': self.token}
        self.client = self.app.test_client()

        # Create and announce a submission.
        with self.app.app_context():
            classic.create_all()
            session = classic.current_session()

            cc0 = 'http://creativecommons.org/publicdomain/zero/1.0/'
            self.submission, _ = save(
                CreateSubmission(creator=self.user),
                ConfirmContactInformation(creator=self.user),
                ConfirmAuthorship(creator=self.user, submitter_is_author=True),
                SetLicense(creator=self.user,
                           license_uri=cc0,
                           license_name='CC0 1.0'),
                ConfirmPolicy(creator=self.user),
                SetPrimaryClassification(creator=self.user,
                                         category='astro-ph.GA'),
                SetUploadPackage(
                    creator=self.user,
                    checksum="a9s9k342900skks03330029k",
                    source_format=SubmissionContent.Format.TEX,
                    identifier=123,
                    uncompressed_size=593992,
                    compressed_size=59392,
                ), SetTitle(creator=self.user, title='foo title'),
                SetAbstract(creator=self.user, abstract='ab stract' * 20),
                SetComments(creator=self.user, comments='indeed'),
                SetReportNumber(creator=self.user, report_num='the number 12'),
                SetAuthors(creator=self.user,
                           authors=[
                               Author(order=0,
                                      forename='Bob',
                                      surname='Paulson',
                                      email='*****@*****.**',
                                      affiliation='Fight Club')
                           ]), FinalizeSubmission(creator=self.user))

            # announced!
            db_submission = session.query(classic.models.Submission) \
                .get(self.submission.submission_id)
            db_submission.status = classic.models.Submission.ANNOUNCED
            db_document = classic.models.Document(paper_id='1234.5678')
            db_submission.doc_paper_id = '1234.5678'
            db_submission.document = db_document
            session.add(db_submission)
            session.add(db_document)
            session.commit()

        self.submission_id = self.submission.submission_id