예제 #1
0
def populate_data(publisher_name):
    data = json.loads(open('fixtures/datapackage.json').read())
    data_csv = open('fixtures/data/demo-resource.csv').read()
    readme = open('fixtures/README.md').read()
    package = Package.query.join(Publisher)\
        .filter(Package.name == "demo-package",
                Publisher.name == publisher_name).first()
    if package:
        db.session.delete(Package.query.get(package.id))
        db.session.commit()
    publisher = Publisher.query.filter_by(name=publisher_name).first()
    metadata = Package(name="demo-package")
    metadata.descriptor, metadata.status, metadata.private, metadata.readme \
        = json.dumps(data), 'active', False, readme

    publisher.packages.append(metadata)
    db.session.add(publisher)
    db.session.commit()
    bitstore = BitStore(publisher_name,
                        package='demo-package',
                        body=json.dumps(data))
    bitstore.save_metadata()
    key = bitstore.build_s3_key('demo-resource.csv')
    bucket_name = app.config['S3_BUCKET_NAME']
    s3_client = app.config['S3']
    s3_client.put_object(Bucket=bucket_name,
                         Key=key,
                         Body=data_csv,
                         ACL='public-read')
예제 #2
0
    def test_save(self):
        with self.app.app_context():
            s3 = boto3.client('s3')
            bucket_name = self.app.config['S3_BUCKET_NAME']
            s3.create_bucket(Bucket=bucket_name)
            metadata = BitStore(publisher="pub_test",
                                package="test_package",
                                body='hi')
            key = metadata.build_s3_key('datapackage.json')
            metadata.save_metadata()
            obs_list = list(s3.list_objects(Bucket=bucket_name, Prefix=key).\
                            get('Contents'))
            self.assertEqual(1, len(obs_list))
            self.assertEqual(key, obs_list[0]['Key'])
            res = s3.get_object_acl(Bucket=bucket_name, Key=key)

            owner_id = res['Owner']['ID']
            aws_all_user_group_url = 'http://acs.amazonaws.com/groups/global/AllUsers'

            full_control = filter(
                lambda grant: grant['Permission'] == 'FULL_CONTROL',
                res['Grants'])
            self.assertEqual(len(full_control), 1)
            self.assertEqual(full_control[0].get('Grantee')['ID'], owner_id)

            read_control = filter(lambda grant: grant['Permission'] == 'READ',
                                  res['Grants'])
            self.assertEqual(len(read_control), 1)
            self.assertEqual(read_control[0].get('Grantee')['URI'],
                             aws_all_user_group_url)
예제 #3
0
def get_s3_signed_url():
    """
    This API is responsible for generate signed url to post data to S3
    ---
    tags:
        - auth
    parameters:
        - in: body
          name: publisher
          type: string
          required: true
          description: publisher name
        - in: body
          name: package
          type: string
          required: true
          description: package name
        - in: body
          name: path
          type: string
          required: true
          description: relative path of the resources
    responses:
        200:
            description: Success
            schema:
                id: get_signed_url
                properties:
                    key:
                        type: string
                        description: signed url for post data to S3
        400:
            description: Publisher or package can not be empty
        500:
            description: Internal Server Error
    """
    try:
        data = request.get_json()
        publisher = data.get('publisher', None)
        package = data.get('package', None)
        path = data.get('path', None)
        md5 = data.get('md5', None)
        if publisher is None or package is None:
            return handle_error('INVALID_INPUT',
                                'publisher or package can not be empty', 400)
        if md5 is None:
            return handle_error('INVALID_INPUT', 'md5 hash can not be empty',
                                400)
        if path == 'datapackage.json':
            return handle_error(
                'INVALID_INPUT',
                'datapackage.json should not publish with this api', 400)
        metadata = BitStore(publisher=publisher, package=package)
        url = metadata.generate_pre_signed_post_object(path, md5)
        return jsonify({'data': url}), 200
    except Exception as e:
        app.logger.error(e)
        return handle_error('GENERIC_ERROR', e.message, 500)
예제 #4
0
 def test_return_none_if_no_readme_found(self):
     with self.app.app_context():
         bit_store = BitStore('test_pub', 'test_package')
         s3 = boto3.client('s3')
         bucket_name = self.app.config['S3_BUCKET_NAME']
         s3.create_bucket(Bucket=bucket_name)
         read_me_key = bit_store.build_s3_key('test.md')
         s3.put_object(Bucket=bucket_name, Key=read_me_key, Body='')
         self.assertEqual(bit_store.get_readme_object_key(), None)
예제 #5
0
 def test_get_metadata_body(self):
     with self.app.app_context():
         s3 = boto3.client('s3')
         bucket_name = self.app.config['S3_BUCKET_NAME']
         s3.create_bucket(Bucket=bucket_name)
         metadata = BitStore(publisher="pub_test",
                             package="test_package",
                             body='hi')
         s3.put_object(Bucket=bucket_name,
                       Key=metadata.build_s3_key('datapackage.json'),
                       Body=metadata.body)
         self.assertEqual(metadata.body, metadata.get_metadata_body())
예제 #6
0
def undelete_data_package(publisher, package):
    """
    DPR data package un-delete operation.
    This API is responsible for un-mark the mark for delete of data package
    ---
    tags:
        - package
    parameters:
        - in: path
          name: publisher
          type: string
          required: true
          description: publisher name
        - in: path
          name: package
          type: string
          required: true
          description: package name
        - in: header
          name: Authorization
          type: string
          required: true
          description: >
            Jwt token in format of "bearer {token}.
            The token can be generated from /api/auth/token"
    responses:
        500:
            description: Internal Server Error
        200:
            description: Success Message
            schema:
                id: put_package_success
                properties:
                    status:
                        type: string
                        default: OK

    """
    try:
        bitstore = BitStore(publisher=publisher, package=package)
        status_acl = bitstore.change_acl('public-read')
        status_db = Package.change_status(publisher, package,
                                          PackageStateEnum.active)
        if status_acl and status_db:
            return jsonify({"status": "OK"}), 200
        if not status_acl:
            raise Exception('Failed to change acl')
        if not status_db:
            raise Exception('Failed to change status')
    except Exception as e:
        app.logger.error(e)
        return handle_error('GENERIC_ERROR', e.message, 500)
예제 #7
0
 def test_get_empty_metadata_name_for_publisher(self):
     with self.app.app_context():
         s3 = boto3.client('s3')
         bucket_name = self.app.config['S3_BUCKET_NAME']
         s3.create_bucket(Bucket=bucket_name)
         metadata = BitStore(publisher="pub_test",
                             package="test_package",
                             body='hi')
         s3.put_object(Bucket=bucket_name,
                       Key='test/key.json',
                       Body=metadata.body)
         self.assertEqual(
             0, len(metadata.get_all_metadata_name_for_publisher()))
예제 #8
0
    def test_generate_pre_signed_put_obj_url(self):
        with self.app.app_context():
            s3 = boto3.client('s3')
            bucket_name = self.app.config['S3_BUCKET_NAME']
            s3.create_bucket(Bucket=bucket_name)

            metadata = BitStore(publisher="pub_test",
                                package="test_package",
                                body='hi')
            post = metadata.generate_pre_signed_post_object(
                'datapackage.json', 123)
            parsed = urlparse(post['url'])
            self.assertEqual(
                parsed.netloc, 's3-{region}.amazonaws.com'.format(
                    region=self.app.config['AWS_REGION']))
            self.assertEqual('public-read', post['fields']['acl'])
예제 #9
0
    def test_throw_403_if_not_owner_or_member_of_publisher(self):
        s3 = boto3.client('s3')
        s3.create_bucket(Bucket=self.bucket_name)
        bit_store = BitStore('test_pub', 'test_package')
        read_me_key = bit_store.build_s3_key('test.md')
        data_key = bit_store.build_s3_key('data.csv')
        metadata_key = bit_store.build_s3_key('datapackage.json')
        s3.put_object(Bucket=self.bucket_name, Key=read_me_key, Body='readme')
        s3.put_object(Bucket=self.bucket_name, Key=data_key, Body='data')
        s3.put_object(Bucket=self.bucket_name,
                      Key=metadata_key,
                      Body='metedata')

        response = self.client.post(self.jwt_url,
                                    data=json.dumps({
                                        'username': self.user_not_allowed_name,
                                        'secret': 'super_secret'
                                    }),
                                    content_type='application/json')
        data = json.loads(response.data)
        jwt_not_allowed = data['token']
        auth_not_allowed = "bearer %s" % jwt_not_allowed

        response = self.client.post(
            self.url,
            data=json.dumps({'version': 'tag_one'}),
            content_type='application/json',
            headers=dict(Authorization=auth_not_allowed))
        self.assertEqual(response.status_code, 403)

        with self.app.app_context():
            data_latest = Package.query.join(Publisher). \
                filter(Publisher.name == self.publisher_name,
                       Package.name == self.package).all()
            self.assertEqual(1, len(data_latest))
        bit_store_tagged = BitStore('test_pub', 'test_package', 'tag_one')
        objects_nu = s3.list_objects(
            Bucket=self.bucket_name,
            Prefix=bit_store_tagged.build_s3_versioned_prefix())
        self.assertTrue('Contents' not in objects_nu)
예제 #10
0
    def test_change_acl(self):
        with self.app.app_context():
            public_grants = {'CanonicalUser': '******', 'Group': 'READ'}
            private_grants = {'CanonicalUser': '******'}
            bit_store = BitStore('test_pub', 'test_package', body='test')
            s3 = boto3.client('s3')
            bucket_name = self.app.config['S3_BUCKET_NAME']
            s3.create_bucket(Bucket=bucket_name)
            metadata_key = bit_store.build_s3_key('datapackage.json')

            bit_store.save_metadata()

            res = s3.get_object_acl(Bucket=bucket_name, Key=metadata_key)

            owner_id = res['Owner']['ID']
            aws_all_user_group_url = 'http://acs.amazonaws.com/groups/global/AllUsers'

            full_control = filter(
                lambda grant: grant['Permission'] == 'FULL_CONTROL',
                res['Grants'])
            self.assertEqual(len(full_control), 1)
            self.assertEqual(full_control[0].get('Grantee')['ID'], owner_id)

            read_control = filter(lambda grant: grant['Permission'] == 'READ',
                                  res['Grants'])
            self.assertEqual(len(read_control), 1)
            self.assertEqual(read_control[0].get('Grantee')['URI'],
                             aws_all_user_group_url)

            # for grant in res['Grants']:
            #     self.assertTrue(grant['Permission'] ==
            #                     public_grants[grant['Grantee']['Type']])
            #
            bit_store.change_acl("private")
            res = s3.get_object_acl(Bucket=bucket_name, Key=metadata_key)
            full_control = filter(
                lambda grant: grant['Permission'] == 'FULL_CONTROL',
                res['Grants'])
            self.assertEqual(len(full_control), 1)
            self.assertEqual(full_control[0].get('Grantee')['ID'], owner_id)
            read_control = filter(lambda grant: grant['Permission'] == 'READ',
                                  res['Grants'])
            self.assertEqual(len(read_control), 0)
예제 #11
0
    def test_delete_data_package(self):
        with self.app.app_context():
            bit_store = BitStore('test_pub', 'test_package')
            s3 = boto3.client('s3')
            bucket_name = self.app.config['S3_BUCKET_NAME']
            s3.create_bucket(Bucket=bucket_name)
            read_me_key = bit_store.build_s3_key('test.md')
            data_key = bit_store.build_s3_key('data.csv')
            metadata_key = bit_store.build_s3_key('datapackage.json')
            s3.put_object(Bucket=bucket_name, Key=read_me_key, Body='readme')
            s3.put_object(Bucket=bucket_name, Key=data_key, Body='data')
            s3.put_object(Bucket=bucket_name,
                          Key=metadata_key,
                          Body='metedata')
            status = bit_store.delete_data_package()
            read_me_res = s3.list_objects(Bucket=bucket_name,
                                          Prefix=read_me_key)
            self.assertTrue('Contents' not in read_me_res)

            data_res = s3.list_objects(Bucket=bucket_name, Prefix=data_key)
            self.assertTrue('Contents' not in data_res)
            self.assertTrue(status)
예제 #12
0
def finalize_metadata(publisher, package):
    """
    DPR metadata finalize operation.
    This API is responsible for getting data from S3 and push it to RDS.
    ---
    tags:
        - package
    parameters:
        - in: path
          name: publisher
          type: string
          required: true
          description: publisher name
        - in: path
          name: package
          type: string
          required: true
          description: package name
        - in: header
          name: Authorization
          type: string
          required: true
          description: >
            Jwt token in format of "bearer {token}.
            The token can be generated from /api/auth/token"
    responses:
        200:
            description: Data transfer complete
        400:
            description: JWT is invalid
        401:
            description: Invalid Header for JWT
        403:
            description: User name and publisher not matched
        404:
            description: User not found
        500:
            description: Internal Server Error
    """
    try:
        user = _request_ctx_stack.top.current_user
        user_id = user['user']
        user = User.query.filter_by(id=user_id).first()
        if user is not None:
            if user.name == publisher:
                bit_store = BitStore(publisher, package)
                body = bit_store.get_metadata_body()
                if body is not None:
                    bit_store.change_acl('public-read')
                    readme = bit_store.get_s3_object(
                        bit_store.get_readme_object_key())
                    Package.create_or_update(name=package,
                                             publisher_name=publisher,
                                             descriptor=body,
                                             readme=readme)
                    return jsonify({"status": "OK"}), 200

                raise Exception("Failed to get data from s3")
            return handle_error('NOT_PERMITTED',
                                'user name and publisher not matched', 403)
        return handle_error('USER_NOT_FOUND', 'user not found', 404)
    except Exception as e:
        app.logger.error(e)
        return handle_error('GENERIC_ERROR', e.message, 500)
예제 #13
0
def save_metadata(publisher, package):
    """
    DPR metadata put operation.
    This API is responsible for pushing  datapackage.json to S3.
    ---
    tags:
        - package
    parameters:
        - in: path
          name: publisher
          type: string
          required: true
          description: publisher name
        - in: path
          name: package
          type: string
          required: true
          description: package name
        - in: header
          name: Authorization
          type: string
          required: true
          description: >
            Jwt token in format of "bearer {token}.
            The token can be generated from /api/auth/token"
    responses:
        400:
            description: JWT is invalid or req body is not valid
        401:
            description: Invalid Header for JWT
        403:
            description: User name and publisher not matched
        404:
            description: User not found
        500:
            description: Internal Server Error
        200:
            description: Success Message
            schema:
                id: put_package_success
                properties:
                    status:
                        type: string
                        description: Status of the operation
                        default: OK
    """
    try:
        user = _request_ctx_stack.top.current_user
        user_id = user['user']
        user = User.query.filter_by(id=user_id).first()
        if user is not None:
            if user.name == publisher:
                metadata = BitStore(publisher=publisher,
                                    package=package,
                                    body=request.data)
                is_valid = metadata.validate()
                if not is_valid:
                    return handle_error('INVALID_DATA',
                                        'Missing required field in metadata',
                                        400)
                metadata.save_metadata()
                return jsonify({"status": "OK"}), 200
            return handle_error('NOT_PERMITTED',
                                'user name and publisher not matched', 403)
        return handle_error('USER_NOT_FOUND', 'user not found', 404)
    except Exception as e:
        app.logger.error(e)
        return handle_error('GENERIC_ERROR', e.message, 500)
예제 #14
0
    def test_should_copy_all_object_from_latest_to_tag(self):
        numeric_version = 0.8
        with self.app.app_context():
            bit_store = BitStore('test_pub', 'test_package')
            s3 = boto3.client('s3')
            bucket_name = self.app.config['S3_BUCKET_NAME']
            s3.create_bucket(Bucket=bucket_name)

            read_me_key = bit_store.build_s3_key('test.md')
            data_key = bit_store.build_s3_key('data.csv')
            metadata_key = bit_store.build_s3_key('datapackage.json')
            s3.put_object(Bucket=bucket_name, Key=read_me_key, Body='readme')
            s3.put_object(Bucket=bucket_name, Key=data_key, Body='data')
            s3.put_object(Bucket=bucket_name,
                          Key=metadata_key,
                          Body='metedata')

            bit_store.copy_to_new_version(numeric_version)

            bit_store_numeric = BitStore('test_pub', 'test_package',
                                         numeric_version)
            objects_nu = s3.list_objects(
                Bucket=bucket_name,
                Prefix=bit_store_numeric.build_s3_versioned_prefix())
            objects_old = s3.list_objects(
                Bucket=bucket_name,
                Prefix=bit_store.build_s3_versioned_prefix())
            self.assertEqual(len(objects_nu['Contents']),
                             len(objects_old['Contents']))
예제 #15
0
 def test_metadata_s3_key(self):
     metadata = BitStore(publisher="pub_test", package="test_package")
     expected = "{t}/pub_test/test_package/_v/latest/datapackage.json".\
                format(t=metadata.prefix)
     self.assertEqual(expected, metadata.build_s3_key('datapackage.json'))
예제 #16
0
def get_resource(publisher, package, resource):
    """
    DPR resource get operation.
    This API is responsible for getting resource from S3.
    ---
    tags:
        - package
    parameters:
        - in: path
          name: publisher
          type: string
          required: true
          description: publisher name
        - in: path
          name: package
          type: string
          required: true
          description: package name - to retrieve the data package metadata
        - in: path
          name: resource
          type: string
          required: true
          description: resource index or name
    responses:

        200:
            description: Get Data package for one key
            schema:
                id: get_data_package
                properties:
                    data:
                        type: string
                        description: The resource
        500:
            description: Internal Server Error
    """
    try:
        path = request.path
        metadata = BitStore(publisher, package)
        if path.endswith('csv'):
            resource_key = metadata.build_s3_key(resource + '.csv')
            data = metadata.get_s3_object(resource_key)

            def generate():
                for row in data.splitlines():
                    yield row + '\n'

            return Response(generate()), 200
        else:
            resource_key = metadata.build_s3_key(resource + '.csv')
            data = metadata.get_s3_object(resource_key)
            data = csv.DictReader(data.splitlines())
            # taking first and adding at the end to avoid last comma
            first_row = next(data)

            def generate():
                yield '['
                for row in data:
                    yield json.dumps(row) + ','
                yield json.dumps(first_row) + ']'

            return Response(generate(), content_type='application/json'), 200
    except Exception as e:
        return handle_error('GENERIC_ERROR', e.message, 500)
예제 #17
0
def tag_data_package(publisher, package):
    """
    DPR metadata put operation.
    This API is responsible for tagging data package
    ---
    tags:
        - package
    parameters:
        - in: path
          name: publisher
          type: string
          required: true
          description: publisher name
        - in: path
          name: package
          type: string
          required: true
          description: package name
        - in: body
          name: version
          type: string
          required: true
          description: version value
        - in: header
          name: Authorization
          type: string
          required: true
          description: >
            Jwt token in format of "bearer {token}.
            The token can be generated from /api/auth/token"
    responses:
        400:
            description: JWT is invalid or req body is not valid
        401:
            description: Invalid Header for JWT
        403:
            description: User not allowed for operation
        404:
            description: User not found
        500:
            description: Internal Server Error
        200:
            description: Success Message
            schema:
                id: put_package_success
                properties:
                    status:
                        type: string
                        description: Status of the operation
                        default: OK
    """
    try:
        data = request.get_json()
        if 'version' not in data:
            return handle_error('ATTRIBUTE_MISSING', 'version not found', 400)

        bitstore = BitStore(publisher, package)
        status_db = Package.create_or_update_version(publisher, package,
                                                     data['version'])
        status_bitstore = bitstore.copy_to_new_version(data['version'])

        if status_db is False or status_bitstore is False:
            raise Exception("failed to tag data package")
        return jsonify({"status": "OK"}), 200
    except Exception as e:
        app.logger.error(e)
        return handle_error('GENERIC_ERROR', e.message, 500)
예제 #18
0
 def test_metadata_s3_prefix(self):
     metadata = BitStore(publisher="pub_test", package="test_package")
     expected = "{t}/pub_test/test_package".format(t=metadata.prefix)
     self.assertEqual(expected, metadata.build_s3_base_prefix())