def hdfs_file(project_id, cluster, path): if not current_app.config.get('HDFS_CONFIG'): raise NotFound('Not Found') signature = request.args.get('task-signature') if not signature: raise Forbidden('No signature') project = get_project_data(project_id) timeout = project['info'].get('timeout', ContributionsGuard.STAMP_TTL) payload = signer.loads(signature, max_age=timeout) task_id = payload['task_id'] check_allowed(current_user.id, task_id, project, request.path) client = HDFSKerberos(**current_app.config['HDFS_CONFIG'][cluster]) try: content = client.get('/{}'.format(path)) project_encryption = project['info'].get('ext_config', {}).get('encryption', {}) if project_encryption and all(project_encryption.values()): secret = get_secret_from_vault(project_encryption) cipher = AESWithGCM(secret) content = cipher.decrypt(content) except Exception: current_app.logger.exception('Project id {} get task file {}'.format( project_id, path)) raise InternalServerError('An Error Occurred') return Response(content)
def _verify_auth(self, item): if not current_user.is_authenticated(): return False if current_user.admin or current_user.subadmin: return True project = Project(**get_project_data(item.project_id)) pwd_manager = get_pwd_manager(project) return not pwd_manager.password_needed(project, get_user_id_or_ip())
def update_gold_stats(user_id, task_id, data): task = task_repo.get_task(task_id) # TODO: read gold_answer from s3 if task.calibration: answer_fields = get_project_data(task.project_id)['info'].get( 'answer_fields', {}) answer = data['info'] _update_gold_stats(task.project_id, user_id, answer_fields, task.gold_answers, answer)
def _select_attributes(self, data): project_id = data['project_id'] if current_user.admin or \ (current_user.subadmin and current_user.id in get_project_data(project_id)['owners_ids']): pass else: data.pop('gold_answers', None) data.pop('calibration', None) return data
def update_gold_stats(user_id, task_id, data, gold_answers=None): task = task_repo.get_task(task_id) if not task.calibration: return if gold_answers is None: gold_answers = get_gold_answers(task) answer_fields = get_project_data(task.project_id)['info'].get( 'answer_fields', {}) answer = data['info'] _update_gold_stats(task.project_id, user_id, answer_fields, gold_answers, answer)
def encrypted_file(store, bucket, project_id, path): """Proxy encrypted task file in a cloud storage""" current_app.logger.info('Project id {} decrypt file. {}'.format( project_id, path)) signature = request.args.get('task-signature') if not signature: current_app.logger.exception('Project id {} no signature {}'.format( project_id, path)) raise Forbidden('No signature') size_signature = len(signature) if size_signature > TASK_SIGNATURE_MAX_SIZE: current_app.logger.exception( 'Project id {}, path {} invalid task signature. Signature length {} exceeds max allowed length {}.' \ .format(project_id, path, size_signature, TASK_SIGNATURE_MAX_SIZE)) raise Forbidden('Invalid signature') project = get_project_data(project_id) timeout = project['info'].get('timeout', ContributionsGuard.STAMP_TTL) payload = signer.loads(signature, max_age=timeout) task_id = payload['task_id'] check_allowed(current_user.id, task_id, project, lambda v: v == request.path) ## download file if bucket != current_app.config.get('S3_REQUEST_BUCKET'): secret = get_encryption_key(project) else: secret = current_app.config.get('FILE_ENCRYPTION_KEY') try: key_name = '/{}/{}'.format(project_id, path) decrypted, key = get_content_and_key_from_s3(bucket, key_name, 'S3_TASK_REQUEST', decrypt=secret, secret=secret) except S3ResponseError as e: current_app.logger.exception( 'Project id {} get task file {} {}'.format(project_id, path, e)) if e.error_code == 'NoSuchKey': raise NotFound('File Does Not Exist') else: raise InternalServerError('An Error Occurred') response = Response(decrypted, content_type=key.content_type) if key.content_encoding: response.headers.add('Content-Encoding', key.content_encoding) if key.content_disposition: response.headers.add('Content-Disposition', key.content_disposition) return response
def encrypt_task_response_data(task_id, project_id, data): content = None task = task_repo.get_task(task_id) if not (task and isinstance(task.info, dict) and 'private_json__encrypted_payload' in task.info): return content project = get_project_data(project_id) secret = get_encryption_key(project) cipher = AESWithGCM(secret) content = json.dumps(data) content = cipher.encrypt(content.encode('utf8')).decode('utf8') return content
def hdfs_file(project_id, cluster, path): if not current_app.config.get('HDFS_CONFIG'): raise NotFound('Not Found') signature = request.args.get('task-signature') if not signature: raise Forbidden('No signature') size_signature = len(signature) if size_signature > TASK_SIGNATURE_MAX_SIZE: current_app.logger.exception( 'Project id {}, cluster {} path {} invalid task signature. Signature length {} exceeds max allowed length {}.' \ .format(project_id, cluster, path, size_signature, TASK_SIGNATURE_MAX_SIZE)) raise Forbidden('Invalid signature') project = get_project_data(project_id) timeout = project['info'].get('timeout', ContributionsGuard.STAMP_TTL) payload = signer.loads(signature, max_age=timeout) task_id = payload['task_id'] try: check_allowed( current_user.id, task_id, project, is_valid_hdfs_url(request.path, request.args.to_dict(flat=False))) except Exception: current_app.logger.exception( 'Project id %s not allowed to get file %s %s', project_id, path, str(request.args)) raise current_app.logger.info( "Project id %s, task id %s. Accessing hdfs cluster %s, path %s", project_id, task_id, cluster, path) client = HDFSKerberos(**current_app.config['HDFS_CONFIG'][cluster]) offset = request.args.get('offset') length = request.args.get('length') try: offset = int(offset) if offset else None length = int(length) if length else None content = client.get('/{}'.format(path), offset=offset, length=length) project_encryption = get_project_encryption(project) if project_encryption and all(project_encryption.values()): secret = get_secret_from_vault(project_encryption) cipher = AESWithGCM(secret) content = cipher.decrypt(content) except Exception: current_app.logger.exception( "Project id %s, task id %s, cluster %s, get task file %s, %s", project_id, task_id, cluster, path, str(request.args)) raise InternalServerError('An Error Occurred') return Response(content)
def _preprocess_post_data(self, data): project_id = data["project_id"] info = data["info"] duplicate = task_repo.find_duplicate(project_id=project_id, info=info) if duplicate: message = {'reason': 'DUPLICATE_TASK', 'task_id': duplicate} raise Conflict(json.dumps(message)) if 'n_answers' not in data: project = Project(**get_project_data(project_id)) data['n_answers'] = project.get_default_n_answers() invalid_fields = validate_required_fields(info) if invalid_fields: raise BadRequest('Missing or incorrect required fields: {}'.format( ','.join(invalid_fields)))
def encrypted_task_payload(project_id, task_id): """Proxy to decrypt encrypted task payload""" current_app.logger.info( 'Project id {}, task id {}, decrypt task payload.'.format( project_id, task_id)) signature = request.args.get('task-signature') if not signature: current_app.logger.exception( 'Project id {}, task id {} has no signature.'.format( project_id, task_id)) raise Forbidden('No signature') size_signature = len(signature) if size_signature > TASK_SIGNATURE_MAX_SIZE: current_app.logger.exception( 'Project id {}, task id {} invalid task signature. Signature length {} exceeds max allowed length {}.' \ .format(project_id, task_id, size_signature, TASK_SIGNATURE_MAX_SIZE)) raise Forbidden('Invalid signature') project = get_project_data(project_id) if not project: current_app.logger.exception('Invalid project id {}.'.format( project_id, task_id)) raise BadRequest('Invalid Project') timeout = project['info'].get('timeout', ContributionsGuard.STAMP_TTL) payload = signer.loads(signature, max_age=timeout) task_id = payload.get('task_id', 0) validate_task(project, task_id, current_user.id) ## decrypt encrypted task data under private_json__encrypted_payload try: secret = get_encryption_key(project) task = task_repo.get_task(task_id) content = task.info.get('private_json__encrypted_payload') if content: cipher = AESWithGCM(secret) content = cipher.decrypt(content) else: content = '' except Exception as e: current_app.logger.exception( 'Project id {} task {} decrypt encrypted data {}'.format( project_id, task_id, e)) raise InternalServerError('An Error Occurred') response = Response(content, content_type='application/json') return response
def encrypted_file(store, bucket, project_id, path): """Proxy encrypted task file in a cloud storage""" current_app.logger.info('Project id {} decrypt file. {}'.format( project_id, path)) conn_args = current_app.config.get('S3_TASK_REQUEST', {}) signature = request.args.get('task-signature') if not signature: current_app.logger.exception('Project id {} no signature {}'.format( project_id, path)) raise Forbidden('No signature') project = get_project_data(project_id) timeout = project['info'].get('timeout', ContributionsGuard.STAMP_TTL) payload = signer.loads(signature, max_age=timeout) task_id = payload['task_id'] check_allowed(current_user.id, task_id, project, request.path) ## download file try: key = '/{}/{}'.format(project_id, path) conn = create_connection(**conn_args) _bucket = conn.get_bucket(bucket, validate=False) _key = _bucket.get_key(key, validate=False) content = _key.get_contents_as_string() except S3ResponseError as e: current_app.logger.exception( 'Project id {} get task file {} {}'.format(project_id, path, e)) if e.error_code == 'NoSuchKey': raise NotFound('File Does Not Exist') else: raise InternalServerError('An Error Occurred') ## decyrpt file secret = current_app.config.get('FILE_ENCRYPTION_KEY') cipher = AESWithGCM(secret) decrypted = cipher.decrypt(content) response = Response(decrypted, content_type=_key.content_type) response.headers.add('Content-Encoding', _key.content_encoding) response.headers.add('Content-Disposition', _key.content_disposition) return response
def _preprocess_post_data(self, data): project_id = data["project_id"] info = data["info"] duplicate = task_repo.find_duplicate(project_id=project_id, info=info) if duplicate: message = {'reason': 'DUPLICATE_TASK', 'task_id': duplicate} raise Conflict(json.dumps(message)) if 'n_answers' not in data: project = Project(**get_project_data(project_id)) data['n_answers'] = project.get_default_n_answers() invalid_fields = validate_required_fields(info) if invalid_fields: raise BadRequest('Missing or incorrect required fields: {}'.format( ','.join(invalid_fields))) if data.get('gold_answers'): try: gold_answers = data['gold_answers'] if type(gold_answers) is dict: data['calibration'] = 1 data['exported'] = True except Exception as e: raise BadRequest('Invalid gold_answers')
def encrypted_file(store, bucket, project_id, path): """Proxy encrypted task file in a cloud storage""" current_app.logger.info('Project id {} decrypt file. {}'.format( project_id, path)) signature = request.args.get('task-signature') if not signature: current_app.logger.exception('Project id {} no signature {}'.format( project_id, path)) raise Forbidden('No signature') project = get_project_data(project_id) timeout = project['info'].get('timeout', ContributionsGuard.STAMP_TTL) payload = signer.loads(signature, max_age=timeout) task_id = payload['task_id'] check_allowed(current_user.id, task_id, project, lambda v: v == request.path) ## download file try: key_name = '/{}/{}'.format(project_id, path) decrypted, key = get_content_and_key_from_s3(bucket, key_name, 'S3_TASK_REQUEST', decrypt=True) except S3ResponseError as e: current_app.logger.exception( 'Project id {} get task file {} {}'.format(project_id, path, e)) if e.error_code == 'NoSuchKey': raise NotFound('File Does Not Exist') else: raise InternalServerError('An Error Occurred') response = Response(decrypted, content_type=key.content_type) response.headers.add('Content-Encoding', key.content_encoding) response.headers.add('Content-Disposition', key.content_disposition) return response
def _sign_item(self, item): project_id = item['project_id'] if current_user.admin or \ current_user.id in get_project_data(project_id)['owners_ids']: sign_task(item)
def _select_attributes(self, data): return TaskAuth.apply_access_control(data, user=current_user, project_data=get_project_data(data['project_id']))