def s3_upload_tmp_file(s3_bucket, tmp_file, filename, headers, directory='', file_type_check=True, return_key_only=False, conn_name=DEFAULT_CONN, with_encryption=False, upload_root_dir=None): """ Upload the content of a temporary file to s3 and delete the file """ try: if file_type_check: check_type(tmp_file.name) content = tmp_file.read() if with_encryption: secret = app.config.get('FILE_ENCRYPTION_KEY') cipher = AESWithGCM(secret) content = cipher.encrypt(content) fp = BytesIO(content) url = s3_upload_file(s3_bucket, fp, filename, headers, upload_root_dir, directory, return_key_only, conn_name) finally: os.unlink(tmp_file.name) return url
def test_proxy_key_err(self, http_get): res = MagicMock() res.json.return_value = {'error': 'an error occurred'} http_get.return_value = res admin, owner = UserFactory.create_batch(2) project = ProjectFactory.create( owner=owner, info={'ext_config': { 'encryption': { 'key_id': 123 } }}) encryption_key = 'testkey' aes = AESWithGCM(encryption_key) content = json.dumps(dict(a=1, b="2")) encrypted_content = aes.encrypt(content) task = TaskFactory.create( project=project, info={'private_json__encrypted_payload': encrypted_content}) signature = signer.dumps({'task_id': task.id}) url = '/fileproxy/encrypted/taskpayload/%s/%s?api_key=%s&task-signature=%s' \ % (project.id, task.id, admin.api_key, signature) with patch.dict(self.flask_app.config, self.app_config): res = self.app.get(url, follow_redirects=True) assert res.status_code == 500, res.status_code bad_project_id = 9999 url = '/fileproxy/encrypted/taskpayload/%s/%s?api_key=%s&task-signature=%s' \ % (bad_project_id, task.id, admin.api_key, signature) with patch.dict(self.flask_app.config, self.app_config): res = self.app.get(url, follow_redirects=True) assert res.status_code == 400, res.status_code
def test_proxy_owner(self, http_get): res = MagicMock() res.json.return_value = {'key': 'testkey'} http_get.return_value = res project = ProjectFactory.create( info={'ext_config': { 'encryption': { 'key_id': 123 } }}) encryption_key = 'testkey' aes = AESWithGCM(encryption_key) content = json.dumps(dict(a=1, b="2")) encrypted_content = aes.encrypt(content) task = TaskFactory.create( project=project, info={'private_json__encrypted_payload': encrypted_content}) owner = project.owner signature = signer.dumps({'task_id': task.id}) url = '/fileproxy/encrypted/taskpayload/%s/%s?api_key=%s&task-signature=%s' \ % (project.id, task.id, owner.api_key, signature) with patch.dict(self.flask_app.config, self.app_config): res = self.app.get(url, follow_redirects=True) assert res.status_code == 200, res.status_code assert res.data == content, res.data
def test_proxy_admin(self, http_get, hdfs_get): res = MagicMock() res.json.return_value = {'key': 'testkey'} http_get.return_value = res admin, owner = UserFactory.create_batch(2) project = ProjectFactory.create( owner=owner, info={'ext_config': { 'encryption': { 'key_id': 123 } }}) url = '/fileproxy/hdfs/test/%s/file.pdf' % project.id task = TaskFactory.create(project=project, info={'url': url}) signature = signer.dumps({'task_id': task.id}) req_url = '%s?api_key=%s&task-signature=%s' % (url, admin.api_key, signature) encryption_key = 'testkey' aes = AESWithGCM(encryption_key) hdfs_get.return_value = aes.encrypt('the content') with patch.dict(self.flask_app.config, self.app_config): res = self.app.get(req_url, follow_redirects=True) assert res.status_code == 200, res.status_code assert res.data == 'the content', res.data
class TestAes(object): def setUp(self): iv_length = 12 tag_length = 16 secret = bytearray('very secret', 'ascii') self.aes = AESWithGCM(secret, iv_length, tag_length) def test_aes(self): text = 'testing simple encrytion' encrypted = self.aes.encrypt(text) assert encrypted != text decrypted = self.aes.decrypt(encrypted) assert decrypted == text def test_aes_2(self): original = 'this is a test string I plan to encrypt' encrypted = 'DMj4/yC2pgzgAg76TApmk7zVZlaG0B47KASCnS/TqH6fQpA9UaHjmGLHqCfvGVVQcSivX76Oy349QivZjOJ2yfXZRb0=' secret = bytearray('this is my super secret key', 'ascii') aes = AESWithGCM(secret) assert aes.decrypt(encrypted) == original def test_aes_unicode(self): text = u'∀ z ∈ ℂ, ζ(z) = 0 ⇒ ((z ∈ -2ℕ) ∨ (Re(z) = -½))' encrypted = self.aes.encrypt(text.encode('utf-8')) decrypted = self.aes.decrypt(encrypted).decode('utf-8') assert text == decrypted
def hdfs_file(project_id, cluster, path): if not current_app.config.get('HDFS_CONFIG'): raise NotFound('Not Found') signature = request.args.get('task-signature') if not signature: raise Forbidden('No signature') project = get_project_data(project_id) timeout = project['info'].get('timeout', ContributionsGuard.STAMP_TTL) payload = signer.loads(signature, max_age=timeout) task_id = payload['task_id'] check_allowed(current_user.id, task_id, project, request.path) client = HDFSKerberos(**current_app.config['HDFS_CONFIG'][cluster]) try: content = client.get('/{}'.format(path)) project_encryption = project['info'].get('ext_config', {}).get('encryption', {}) if project_encryption and all(project_encryption.values()): secret = get_secret_from_vault(project_encryption) cipher = AESWithGCM(secret) content = cipher.decrypt(content) except Exception: current_app.logger.exception('Project id {} get task file {}'.format( project_id, path)) raise InternalServerError('An Error Occurred') return Response(content)
def test_file_user_key_from_vault(self, get_secret, has_lock, create_connection): has_lock.return_value = True admin, owner, user = UserFactory.create_batch(3) project = ProjectFactory.create(info={'encryption': {'key': 'abc'}}) url = '/fileproxy/encrypted/s3/anothertest/%s/file.pdf' % project.id task = TaskFactory.create(project=project, info={'url': url}) signature = signer.dumps({'task_id': task.id}) req_url = '%s?api_key=%s&task-signature=%s' % (url, user.api_key, signature) encryption_key = 'testkey' aes = AESWithGCM(encryption_key) key = self.get_key(create_connection) key.get_contents_as_string.return_value = aes.encrypt('the content') get_secret.return_value = encryption_key with patch.dict( self.flask_app.config, { 'FILE_ENCRYPTION_KEY': 'another key', 'S3_REQUEST_BUCKET': 'test', 'ENCRYPTION_CONFIG_PATH': ['encryption'] }): res = self.app.get(req_url, follow_redirects=True) assert res.status_code == 200, res.status_code assert res.data == 'the content', res.data
def test_decrypts_file_from_s3(self, get_contents): config = self.default_config.copy() config['FILE_ENCRYPTION_KEY'] = 'abcd' config['ENABLE_ENCRYPTION'] = True cipher = AESWithGCM('abcd') get_contents.return_value = cipher.encrypt('hello world') with patch.dict(self.flask_app.config, config): fp = get_file_from_s3('test_bucket', '/the/key', decrypt=True) content = fp.read() assert content == 'hello world'
def get_content_and_key_from_s3(s3_bucket, path, conn_name=DEFAULT_CONN, decrypt=False, secret=None): _, key = get_s3_bucket_key(s3_bucket, path, conn_name) content = key.get_contents_as_string() if decrypt: if not secret: secret = app.config.get('FILE_ENCRYPTION_KEY') cipher = AESWithGCM(secret) content = cipher.decrypt(content) return content, key
def get_file_from_s3(s3_bucket, path, conn_name=DEFAULT_CONN, decrypt=False): temp_file = NamedTemporaryFile() _, key = get_s3_bucket_key(s3_bucket, path, conn_name) content = key.get_contents_as_string() if decrypt: secret = app.config.get('FILE_ENCRYPTION_KEY') cipher = AESWithGCM(secret) content = cipher.decrypt(content) temp_file.write(content) temp_file.seek(0) return temp_file
def encrypt_task_response_data(task_id, project_id, data): content = None task = task_repo.get_task(task_id) if not (task and isinstance(task.info, dict) and 'private_json__encrypted_payload' in task.info): return content project = get_project_data(project_id) secret = get_encryption_key(project) cipher = AESWithGCM(secret) content = json.dumps(data) content = cipher.encrypt(content.encode('utf8')).decode('utf8') return content
def hdfs_file(project_id, cluster, path): if not current_app.config.get('HDFS_CONFIG'): raise NotFound('Not Found') signature = request.args.get('task-signature') if not signature: raise Forbidden('No signature') size_signature = len(signature) if size_signature > TASK_SIGNATURE_MAX_SIZE: current_app.logger.exception( 'Project id {}, cluster {} path {} invalid task signature. Signature length {} exceeds max allowed length {}.' \ .format(project_id, cluster, path, size_signature, TASK_SIGNATURE_MAX_SIZE)) raise Forbidden('Invalid signature') project = get_project_data(project_id) timeout = project['info'].get('timeout', ContributionsGuard.STAMP_TTL) payload = signer.loads(signature, max_age=timeout) task_id = payload['task_id'] try: check_allowed( current_user.id, task_id, project, is_valid_hdfs_url(request.path, request.args.to_dict(flat=False))) except Exception: current_app.logger.exception( 'Project id %s not allowed to get file %s %s', project_id, path, str(request.args)) raise current_app.logger.info( "Project id %s, task id %s. Accessing hdfs cluster %s, path %s", project_id, task_id, cluster, path) client = HDFSKerberos(**current_app.config['HDFS_CONFIG'][cluster]) offset = request.args.get('offset') length = request.args.get('length') try: offset = int(offset) if offset else None length = int(length) if length else None content = client.get('/{}'.format(path), offset=offset, length=length) project_encryption = get_project_encryption(project) if project_encryption and all(project_encryption.values()): secret = get_secret_from_vault(project_encryption) cipher = AESWithGCM(secret) content = cipher.decrypt(content) except Exception: current_app.logger.exception( "Project id %s, task id %s, cluster %s, get task file %s, %s", project_id, task_id, cluster, path, str(request.args)) raise InternalServerError('An Error Occurred') return Response(content)
def encrypted_task_payload(project_id, task_id): """Proxy to decrypt encrypted task payload""" current_app.logger.info( 'Project id {}, task id {}, decrypt task payload.'.format( project_id, task_id)) signature = request.args.get('task-signature') if not signature: current_app.logger.exception( 'Project id {}, task id {} has no signature.'.format( project_id, task_id)) raise Forbidden('No signature') size_signature = len(signature) if size_signature > TASK_SIGNATURE_MAX_SIZE: current_app.logger.exception( 'Project id {}, task id {} invalid task signature. Signature length {} exceeds max allowed length {}.' \ .format(project_id, task_id, size_signature, TASK_SIGNATURE_MAX_SIZE)) raise Forbidden('Invalid signature') project = get_project_data(project_id) if not project: current_app.logger.exception('Invalid project id {}.'.format( project_id, task_id)) raise BadRequest('Invalid Project') timeout = project['info'].get('timeout', ContributionsGuard.STAMP_TTL) payload = signer.loads(signature, max_age=timeout) task_id = payload.get('task_id', 0) validate_task(project, task_id, current_user.id) ## decrypt encrypted task data under private_json__encrypted_payload try: secret = get_encryption_key(project) task = task_repo.get_task(task_id) content = task.info.get('private_json__encrypted_payload') if content: cipher = AESWithGCM(secret) content = cipher.decrypt(content) else: content = '' except Exception as e: current_app.logger.exception( 'Project id {} task {} decrypt encrypted data {}'.format( project_id, task_id, e)) raise InternalServerError('An Error Occurred') response = Response(content, content_type='application/json') return response
def test_count_tasks_encrypted(self, s3_get): k = Mock() s3_get.return_value = '', k cont = 'req\n1' cipher = AESWithGCM('abcd') k.get_contents_as_string.return_value = cipher.encrypt(cont) config = { 'S3_IMPORT_BUCKET': 'aadf', 'FILE_ENCRYPTION_KEY': 'abcd', 'ENABLE_ENCRYPTION': True } with patch.dict(self.flask_app.config, config): number_of_tasks = self.importer.count_tasks() assert number_of_tasks is 1, number_of_tasks
def test_taskrun_with_encrypted_payload(self, encr_key, upload_from_string, set_content): with patch.dict(self.flask_app.config, self.patch_config): project = ProjectFactory.create() encryption_key = 'testkey' encr_key.return_value = encryption_key aes = AESWithGCM(encryption_key) content = 'some data' encrypted_content = aes.encrypt(content) task = TaskFactory.create( project=project, info={'private_json__encrypted_payload': encrypted_content}) self.app.get('/api/project/%s/newtask?api_key=%s' % (project.id, project.owner.api_key)) taskrun_data = {'another_field': 42} data = dict(project_id=project.id, task_id=task.id, info=taskrun_data) datajson = json.dumps(data) url = '/api/taskrun?api_key=%s' % project.owner.api_key success = self.app.post(url, data=datajson) assert success.status_code == 200, success.data set_content.assert_called() res = json.loads(success.data) assert len(res['info']) == 2 encrypted_response = res['info'][ 'private_json__encrypted_response'] decrypted_content = aes.decrypt(encrypted_response) assert decrypted_content == json.dumps( taskrun_data ), "private_json__encrypted_response decrypted data mismatch" url = res['info']['pyb_answer_url'] args = { 'host': self.host, 'bucket': self.bucket, 'project_id': project.id, 'task_id': task.id, 'user_id': project.owner.id, 'filename': 'pyb_answer.json' } expected = 'https://{host}/{bucket}/{project_id}/{task_id}/{user_id}/{filename}'.format( **args) assert url == expected, url
def test_proxy_regular_user_has_lock(self, http_get): res = MagicMock() res.json.return_value = {'key': 'testkey'} http_get.return_value = res admin, owner, user = UserFactory.create_batch(3) project = ProjectFactory.create( owner=owner, info={'ext_config': { 'encryption': { 'key_id': 123 } }}) encryption_key = 'testkey' aes = AESWithGCM(encryption_key) content = json.dumps(dict(a=1, b="2")) encrypted_content = aes.encrypt(content) task = TaskFactory.create( project=project, info={'private_json__encrypted_payload': encrypted_content}) signature = signer.dumps({'task_id': task.id}) url = '/fileproxy/encrypted/taskpayload/%s/%s?api_key=%s&task-signature=%s' \ % (project.id, task.id, user.api_key, signature) with patch('pybossa.view.fileproxy.has_lock') as has_lock: has_lock.return_value = True with patch.dict(self.flask_app.config, self.app_config): res = self.app.get(url, follow_redirects=True) assert res.status_code == 200, res.status_code assert res.data == content, res.data with patch('pybossa.view.fileproxy.has_lock') as has_lock: has_lock.return_value = False with patch.dict(self.flask_app.config, self.app_config): res = self.app.get(url, follow_redirects=True) assert res.status_code == 403, res.status_code # coowner can access the task project.owners_ids.append(user.id) with patch('pybossa.view.fileproxy.has_lock') as has_lock: has_lock.return_value = False with patch.dict(self.flask_app.config, self.app_config): res = self.app.get(url, follow_redirects=True) assert res.status_code == 200, res.status_code
def encrypted_file(store, bucket, project_id, path): """Proxy encrypted task file in a cloud storage""" current_app.logger.info('Project id {} decrypt file. {}'.format( project_id, path)) conn_args = current_app.config.get('S3_TASK_REQUEST', {}) signature = request.args.get('task-signature') if not signature: current_app.logger.exception('Project id {} no signature {}'.format( project_id, path)) raise Forbidden('No signature') project = get_project_data(project_id) timeout = project['info'].get('timeout', ContributionsGuard.STAMP_TTL) payload = signer.loads(signature, max_age=timeout) task_id = payload['task_id'] check_allowed(current_user.id, task_id, project, request.path) ## download file try: key = '/{}/{}'.format(project_id, path) conn = create_connection(**conn_args) _bucket = conn.get_bucket(bucket, validate=False) _key = _bucket.get_key(key, validate=False) content = _key.get_contents_as_string() except S3ResponseError as e: current_app.logger.exception( 'Project id {} get task file {} {}'.format(project_id, path, e)) if e.error_code == 'NoSuchKey': raise NotFound('File Does Not Exist') else: raise InternalServerError('An Error Occurred') ## decyrpt file secret = current_app.config.get('FILE_ENCRYPTION_KEY') cipher = AESWithGCM(secret) decrypted = cipher.decrypt(content) response = Response(decrypted, content_type=_key.content_type) response.headers.add('Content-Encoding', _key.content_encoding) response.headers.add('Content-Disposition', _key.content_disposition) return response
def test_proxy_admin(self, create_connection): admin, owner = UserFactory.create_batch(2) project = ProjectFactory.create(owner=owner) url = '/fileproxy/encrypted/s3/test/%s/file.pdf' % project.id task = TaskFactory.create(project=project, info={'url': url}) signature = signer.dumps({'task_id': task.id}) req_url = '%s?api_key=%s&task-signature=%s' % (url, admin.api_key, signature) encryption_key = 'testkey' aes = AESWithGCM(encryption_key) key = self.get_key(create_connection) key.get_contents_as_string.return_value = aes.encrypt('the content') with patch.dict(self.flask_app.config, {'FILE_ENCRYPTION_KEY': encryption_key}): res = self.app.get(req_url, follow_redirects=True) assert res.status_code == 200, res.status_code assert res.data == 'the content', res.data
def setUp(self): iv_length = 12 tag_length = 16 secret = bytearray('very secret', 'ascii') self.aes = AESWithGCM(secret, iv_length, tag_length)
def test_aes_2(self): original = 'this is a test string I plan to encrypt' encrypted = 'DMj4/yC2pgzgAg76TApmk7zVZlaG0B47KASCnS/TqH6fQpA9UaHjmGLHqCfvGVVQcSivX76Oy349QivZjOJ2yfXZRb0=' secret = bytearray('this is my super secret key', 'ascii') aes = AESWithGCM(secret) assert aes.decrypt(encrypted) == original
def test_taskrun_with_upload(self, upload_from_string, set_content): with patch.dict(self.flask_app.config, self.patch_config): project = ProjectFactory.create() task = TaskFactory.create(project=project) self.app.get('/api/project/%s/newtask?api_key=%s' % (project.id, project.owner.api_key)) data = dict(project_id=project.id, task_id=task.id, info={ 'test__upload_url': { 'filename': 'hello.txt', 'content': 'abc' }, 'another_field': 42 }) datajson = json.dumps(data) url = '/api/taskrun?api_key=%s' % project.owner.api_key success = self.app.post(url, data=datajson) assert success.status_code == 200, success.data set_content.assert_called() res = json.loads(success.data) assert len(res['info']) == 1 url = res['info']['pyb_answer_url'] args = { 'host': self.host, 'bucket': self.bucket, 'project_id': project.id, 'task_id': task.id, 'user_id': project.owner.id, 'filename': 'pyb_answer.json' } expected = 'https://{host}/{bucket}/{project_id}/{task_id}/{user_id}/{filename}'.format( **args) assert url == expected, url aes = AESWithGCM('testkey') # first call first_call = set_content.call_args_list[0] args, kwargs = first_call encrypted = args[0].read() content = aes.decrypt(encrypted) assert encrypted != content assert content == 'abc' upload_from_string.assert_called() args, kwargs = set_content.call_args content = aes.decrypt(args[0].read()) actual_content = json.loads(content) args = { 'host': self.host, 'bucket': self.bucket, 'project_id': project.id, 'task_id': task.id, 'user_id': project.owner.id, 'filename': 'hello.txt' } expected = 'https://{host}/{bucket}/{project_id}/{task_id}/{user_id}/{filename}'.format( **args) assert actual_content['test__upload_url'] == expected assert actual_content['another_field'] == 42