def tag_data_package(publisher, package): """ DPR metadata put operation. This API is responsible for tagging data package --- tags: - package parameters: - in: path name: publisher type: string required: true description: publisher name - in: path name: package type: string required: true description: package name - in: body name: version type: string required: true description: version value responses: 400: description: JWT is invalid or req body is not valid 401: description: Invalid Header for JWT 403: description: User not allowed for operation 404: description: User not found 500: description: Internal Server Error 200: description: Success Message schema: id: put_package_success properties: status: type: string description: Status of the operation default: OK """ try: data = request.get_json() if 'version' not in data: return handle_error('ATTRIBUTE_MISSING', 'version not found', 400) bitstore = BitStore(publisher, package) status_db = MetaDataDB.create_or_update_version(publisher, package, data['version']) status_bitstore = bitstore.copy_to_new_version(data['version']) if status_db is False or status_bitstore is False: raise Exception("failed to tag data package") return jsonify({"status": "OK"}), 200 except Exception as e: app.logger.error(e) return handle_error('GENERIC_ERROR', e.message, 500)
def test_return_none_if_no_readme_found(self): with self.app.app_context(): bit_store = BitStore('test_pub', 'test_package') s3 = boto3.client('s3') bucket_name = self.app.config['S3_BUCKET_NAME'] s3.create_bucket(Bucket=bucket_name) read_me_key = bit_store.build_s3_key('test.md') s3.put_object(Bucket=bucket_name, Key=read_me_key, Body='') self.assertEqual(bit_store.get_readme_object_key(), None)
def get_s3_signed_url(): """ This API is responsible for generate signed url to post data to S3 --- tags: - auth parameters: - in: body name: publisher type: string required: true description: publisher name - in: body name: package type: string required: true description: package name - in: body name: path type: string required: true description: relative path of the resources responses: 200: description: Success schema: id: get_signed_url properties: key: type: string description: signed url for post data to S3 400: description: Publisher or package can not be empty 500: description: Internal Server Error """ try: data = request.get_json() publisher = data.get('publisher', None) package = data.get('package', None) path = data.get('path', None) md5 = data.get('md5', None) if publisher is None or package is None: return handle_error('INVALID_INPUT', 'publisher or package can not be empty', 400) if md5 is None: return handle_error('INVALID_INPUT', 'md5 hash can not be empty', 400) metadata = BitStore(publisher=publisher, package=package) url = metadata.generate_pre_signed_put_obj_url(path, md5) return jsonify({'key': url}), 200 except Exception as e: app.logger.error(e) return handle_error('GENERIC_ERROR', e.message, 500)
def finalize_metadata(publisher, package): """ DPR metadata finalize operation. This API is responsible for getting data from S3 and push it to RDS. --- tags: - package parameters: - in: path name: publisher type: string required: true description: publisher name - in: path name: package type: string required: true description: package name responses: 200: description: Data transfer complete 400: description: JWT is invalid 401: description: Invalid Header for JWT 403: description: User name and publisher not matched 404: description: User not found 500: description: Internal Server Error """ try: user = _request_ctx_stack.top.current_user user_id = user['user'] user = User.query.filter_by(id=user_id).first() if user is not None: if user.name == publisher: bit_store = BitStore(publisher, package) body = bit_store.get_metadata_body() if body is not None: readme = bit_store.get_s3_object( bit_store.get_readme_object_key()) MetaDataDB.create_or_update(name=package, publisher_name=publisher, descriptor=body, readme=readme) return jsonify({"status": "OK"}), 200 raise Exception("Failed to get data from s3") return handle_error('NOT_PERMITTED', 'user name and publisher not matched', 403) return handle_error('USER_NOT_FOUND', 'user not found', 404) except Exception as e: app.logger.error(e) return handle_error('GENERIC_ERROR', e.message, 500)
def test_get_metadata_body(self): with self.app.app_context(): s3 = boto3.client('s3') bucket_name = self.app.config['S3_BUCKET_NAME'] s3.create_bucket(Bucket=bucket_name) metadata = BitStore(publisher="pub_test", package="test_package", body='hi') s3.put_object(Bucket=bucket_name, Key=metadata.build_s3_key('datapackage.json'), Body=metadata.body) self.assertEqual(metadata.body, metadata.get_metadata_body())
def test_get_empty_metadata_name_for_publisher(self): with self.app.app_context(): s3 = boto3.client('s3') bucket_name = self.app.config['S3_BUCKET_NAME'] s3.create_bucket(Bucket=bucket_name) metadata = BitStore(publisher="pub_test", package="test_package", body='hi') s3.put_object(Bucket=bucket_name, Key='test/key.json', Body=metadata.body) self.assertEqual( 0, len(metadata.get_all_metadata_name_for_publisher()))
def test_save(self): with self.app.app_context(): s3 = boto3.client('s3') bucket_name = self.app.config['S3_BUCKET_NAME'] s3.create_bucket(Bucket=bucket_name) metadata = BitStore(publisher="pub_test", package="test_package", body='hi') key = metadata.build_s3_key('datapackage.json') metadata.save() obs_list = list(s3.list_objects(Bucket=bucket_name, Prefix=key).\ get('Contents')) self.assertEqual(1, len(obs_list)) self.assertEqual(key, obs_list[0]['Key'])
def test_generate_pre_signed_put_obj_url(self): with self.app.app_context(): s3 = boto3.client('s3') bucket_name = self.app.config['S3_BUCKET_NAME'] s3.create_bucket(Bucket=bucket_name) metadata = BitStore(publisher="pub_test", package="test_package", body='hi') url = metadata.generate_pre_signed_put_obj_url( 'datapackage.json', 'm') parsed = urlparse(url) self.assertEqual( parsed.netloc, 's3-{region}.amazonaws.com'.format( region=self.app.config['AWS_REGION']))
def populate_data(publisher_name): data = json.loads(open('fixtures/datapackage.json').read()) data_csv = open('fixtures//data/demo-resource.csv').read() readme = open('fixtures/README.md').read() publisher = Publisher.query.filter_by(name=publisher_name).one() metadata = MetaDataDB(name="demo-package") metadata.descriptor, metadata.status, metadata.private, metadata.readme \ = json.dumps(data), 'active', False, readme publisher.packages.append(metadata) db.session.add(publisher) db.session.commit() bitstore = BitStore(publisher_name, package='demo-package') key = bitstore.build_s3_key('demo-resource.csv') bucket_name = app.config['S3_BUCKET_NAME'] s3_client = app.config['S3'] s3_client.put_object(Bucket=bucket_name, Key=key, Body=data_csv)
def purge_data_package(publisher, package): """ DPR data package hard delete operation. This API is responsible for deletion of data package --- tags: - package parameters: - in: path name: publisher type: string required: true description: publisher name - in: path name: package type: string required: true description: package name responses: 500: description: Internal Server Error 200: description: Success Message schema: id: put_package_success properties: status: type: string default: OK """ try: bitstore = BitStore(publisher=publisher, package=package) status_acl = bitstore.delete_data_package() status_db = MetaDataDB.delete_data_package(publisher, package) if status_acl and status_db: return jsonify({"status": "OK"}), 200 if not status_acl: raise Exception('Failed to delete from s3') if not status_db: raise Exception('Failed to delete from db') except Exception as e: app.logger.error(e) return handle_error('GENERIC_ERROR', e.message, 500)
def test_change_acl(self): with self.app.app_context(): bit_store = BitStore('test_pub', 'test_package') s3 = boto3.client('s3') bucket_name = self.app.config['S3_BUCKET_NAME'] s3.create_bucket(Bucket=bucket_name) read_me_key = bit_store.build_s3_key('test.md') data_key = bit_store.build_s3_key('data.csv') metadata_key = bit_store.build_s3_key('datapackage.json') s3.put_object(Bucket=bucket_name, Key=read_me_key, Body='') s3.put_object(Bucket=bucket_name, Key=data_key, Body='') s3.put_object(Bucket=bucket_name, Key=metadata_key, Body='') bit_store.change_acl("private") res = s3.get_object_acl(Bucket=bucket_name, Key=read_me_key) grant = res['Grants'][0]['Permission'] self.assertEqual(grant, 'FULL_CONTROL')
def test_throw_403_if_not_owner_or_member_of_publisher(self): s3 = boto3.client('s3') s3.create_bucket(Bucket=self.bucket_name) bit_store = BitStore('test_pub', 'test_package') read_me_key = bit_store.build_s3_key('test.md') data_key = bit_store.build_s3_key('data.csv') metadata_key = bit_store.build_s3_key('datapackage.json') s3.put_object(Bucket=self.bucket_name, Key=read_me_key, Body='readme') s3.put_object(Bucket=self.bucket_name, Key=data_key, Body='data') s3.put_object(Bucket=self.bucket_name, Key=metadata_key, Body='metedata') response = self.client.post(self.jwt_url, data=json.dumps({ 'username': self.user_not_allowed_name, 'secret': 'super_secret' }), content_type='application/json') data = json.loads(response.data) jwt_not_allowed = data['token'] auth_not_allowed = "bearer %s" % jwt_not_allowed response = self.client.post( self.url, data=json.dumps({'version': 'tag_one'}), content_type='application/json', headers=dict(Authorization=auth_not_allowed)) self.assertEqual(response.status_code, 403) with self.app.app_context(): data_latest = MetaDataDB.query.join(Publisher). \ filter(Publisher.name == self.publisher_name, MetaDataDB.name == self.package).all() self.assertEqual(1, len(data_latest)) bit_store_tagged = BitStore('test_pub', 'test_package', 'tag_one') objects_nu = s3.list_objects( Bucket=self.bucket_name, Prefix=bit_store_tagged.build_s3_versioned_prefix()) self.assertTrue('Contents' not in objects_nu)
def test_delete_data_package(self): with self.app.app_context(): bit_store = BitStore('test_pub', 'test_package') s3 = boto3.client('s3') bucket_name = self.app.config['S3_BUCKET_NAME'] s3.create_bucket(Bucket=bucket_name) read_me_key = bit_store.build_s3_key('test.md') data_key = bit_store.build_s3_key('data.csv') metadata_key = bit_store.build_s3_key('datapackage.json') s3.put_object(Bucket=bucket_name, Key=read_me_key, Body='readme') s3.put_object(Bucket=bucket_name, Key=data_key, Body='data') s3.put_object(Bucket=bucket_name, Key=metadata_key, Body='metedata') status = bit_store.delete_data_package() read_me_res = s3.list_objects(Bucket=bucket_name, Prefix=read_me_key) self.assertTrue('Contents' not in read_me_res) data_res = s3.list_objects(Bucket=bucket_name, Prefix=data_key) self.assertTrue('Contents' not in data_res) self.assertTrue(status)
def test_metadata_s3_key(self): metadata = BitStore(publisher="pub_test", package="test_package") expected = "{t}/pub_test/test_package/_v/latest/datapackage.json".\ format(t=metadata.prefix) self.assertEqual(expected, metadata.build_s3_key('datapackage.json'))
def save_metadata(publisher, package): """ DPR metadata put operation. This API is responsible for pushing datapackage.json to S3. --- tags: - package parameters: - in: path name: publisher type: string required: true description: publisher name - in: path name: package type: string required: true description: package name responses: 400: description: JWT is invalid or req body is not valid 401: description: Invalid Header for JWT 403: description: User name and publisher not matched 404: description: User not found 500: description: Internal Server Error 200: description: Success Message schema: id: put_package_success properties: status: type: string description: Status of the operation default: OK """ try: user = _request_ctx_stack.top.current_user user_id = user['user'] user = User.query.filter_by(id=user_id).first() if user is not None: if user.name == publisher: metadata = BitStore(publisher=publisher, package=package, body=request.data) is_valid = metadata.validate() if not is_valid: return handle_error('INVALID_DATA', 'Missing required field in metadata', 400) metadata.save() return jsonify({"status": "OK"}), 200 return handle_error('NOT_PERMITTED', 'user name and publisher not matched', 403) return handle_error('USER_NOT_FOUND', 'user not found', 404) except Exception as e: app.logger.error(e) return handle_error('GENERIC_ERROR', e.message, 500)
def get_resource(publisher, package, resource): """ DPR resource get operation. This API is responsible for getting resource from S3. --- tags: - package parameters: - in: path name: publisher type: string required: true description: publisher name - in: path name: package type: string required: true description: package name - to retrieve the data package metadata - in: path name: resource type: string required: true description: resource index or name responses: 200: description: Get Data package for one key schema: id: get_data_package properties: data: type: string description: The resource 500: description: Internal Server Error """ try: path = request.path metadata = BitStore(publisher, package) if path.endswith('csv'): resource_key = metadata.build_s3_key(resource + '.csv') data = metadata.get_s3_object(resource_key) def generate(): for row in data.splitlines(): yield row + '\n' return Response(generate()), 200 else: resource_key = metadata.build_s3_key(resource + '.csv') data = metadata.get_s3_object(resource_key) data = csv.DictReader(data.splitlines()) # taking first and adding at the end to avoid last comma first_row = next(data) def generate(): yield '[' for row in data: yield json.dumps(row) + ',' yield json.dumps(first_row) + ']' return Response(generate(), content_type='application/json'), 200 except Exception as e: return handle_error('GENERIC_ERROR', e.message, 500)
def test_should_copy_all_object_from_latest_to_tag(self): numeric_version = 0.8 with self.app.app_context(): bit_store = BitStore('test_pub', 'test_package') s3 = boto3.client('s3') bucket_name = self.app.config['S3_BUCKET_NAME'] s3.create_bucket(Bucket=bucket_name) read_me_key = bit_store.build_s3_key('test.md') data_key = bit_store.build_s3_key('data.csv') metadata_key = bit_store.build_s3_key('datapackage.json') s3.put_object(Bucket=bucket_name, Key=read_me_key, Body='readme') s3.put_object(Bucket=bucket_name, Key=data_key, Body='data') s3.put_object(Bucket=bucket_name, Key=metadata_key, Body='metedata') bit_store.copy_to_new_version(numeric_version) bit_store_numeric = BitStore('test_pub', 'test_package', numeric_version) objects_nu = s3.list_objects( Bucket=bucket_name, Prefix=bit_store_numeric.build_s3_versioned_prefix()) objects_old = s3.list_objects( Bucket=bucket_name, Prefix=bit_store.build_s3_versioned_prefix()) self.assertEqual(len(objects_nu['Contents']), len(objects_old['Contents']))
def test_metadata_s3_prefix(self): metadata = BitStore(publisher="pub_test", package="test_package") expected = "{t}/pub_test".format(t=metadata.prefix) self.assertEqual(expected, metadata.build_s3_prefix())