Exemple #1
0
    def test_call_s3select_incomplete_response(self):
        """
        Test that an incomplete response from S3 Select is
        detected and an exception is raised.
        """
        bucket = "bucket"
        key = ".quilt/packages/manifest_hash"

        expected_sql = "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s"
        expected_args = {
            'Bucket': bucket,
            'Key': key,
            'Expression': expected_sql,
            'ExpressionType': 'SQL',
            'InputSerialization': {
                'CompressionType': 'NONE',
                'JSON': {'Type': 'LINES'}
                },
            'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}},
        }

        mock_s3 = boto3.client('s3')
        with patch.object(
                mock_s3,
                'select_object_content',
                return_value=self.s3response_incomplete
        ) as patched:
            with self.assertRaises(IncompleteResultException):
                query_manifest_content(
                    mock_s3,
                    bucket=bucket,
                    key=key,
                    sql_stmt=expected_sql
                )
                patched.assert_called_once_with(**expected_args)
Exemple #2
0
    def test_call_s3select(self):
        """
        Test that parameters are correctly passed to
        S3 Select (without a prefix)
        """
        bucket = "bucket"
        key = ".quilt/packages/manifest_hash"

        expected_sql = "SELECT SUBSTRING(s.logical_key, 1) AS logical_key FROM s3object s"
        expected_args = {
            'Bucket': bucket,
            'Key': key,
            'Expression': expected_sql,
            'ExpressionType': 'SQL',
            'InputSerialization': {
                'CompressionType': 'NONE',
                'JSON': {'Type': 'LINES'}
                },
            'OutputSerialization': {'JSON': {'RecordDelimiter': '\n'}},
        }

        mock_s3 = boto3.client('s3')
        with patch.object(
                mock_s3,
                'select_object_content',
                return_value=self.s3response
        ) as patched:
            query_manifest_content(
                mock_s3,
                bucket=bucket,
                key=key,
                sql_stmt=expected_sql)
            patched.assert_called_once_with(**expected_args)
Exemple #3
0
def select_package_stats(s3_client, bucket, manifest_key) -> str:
    """use s3 select to generate file stats for package"""
    logger_ = get_quilt_logger()
    try:
        raw_stats = query_manifest_content(
            s3_client,
            bucket=bucket,
            key=manifest_key,
            sql_stmt=SELECT_PACKAGE_STATS
        ).read()

        if raw_stats:
            stats = json.loads(raw_stats)
            assert isinstance(stats['total_bytes'], int)
            assert isinstance(stats['total_files'], int)

            return stats

    except (
            AssertionError,
            botocore.exceptions.ClientError,
            json.JSONDecodeError,
            KeyError,
    ) as err:
        logger_.exception("Unable to compute package stats via S3 select")

    return None
Exemple #4
0
def select_manifest_meta(s3_client, bucket: str, key: str):
    """
    wrapper for retry and returning a string
    """
    try:
        raw = query_manifest_content(s3_client,
                                     bucket=bucket,
                                     key=key,
                                     sql_stmt=SELECT_PACKAGE_META)
        return raw.read()
    except botocore.exceptions.ClientError as cle:
        print(f"Unable to S3 select manifest: {cle}")

    return None
Exemple #5
0
def lambda_handler(request):
    """
    Parse a manifest to return a folder-like view of its contents (logical keys).

    Returns:
        JSON response
    """
    bucket = request.args['bucket']
    key = request.args['manifest']
    prefix = request.args.get('prefix')
    logical_key = request.args.get('logical_key')
    access_key = request.args.get('access_key')
    secret_key = request.args.get('secret_key')
    session_token = request.args.get('session_token')
    allow_anonymous_access = bool(os.getenv('ALLOW_ANONYMOUS_ACCESS'))

    # If credentials are passed in, use them
    # for the client. If no credentials are supplied, test that
    # the manifest object is publicly accessible. If so, create
    # an s3 client using the underlying IAM role's permissions.

    if access_key and secret_key and session_token:
        s3_client = create_s3_client(aws_access_key_id=access_key,
                                     aws_secret_access_key=secret_key,
                                     aws_session_token=session_token)
    elif (allow_anonymous_access and access_key is None and secret_key is None
          and session_token is None):
        # Test to see if the target key is publicly accessible. If not, the call
        # below will raise and exception and return a 403 response
        anons3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
        try:
            anons3.head_object(Bucket=bucket, Key=key)
        except botocore.exceptions.ClientError as error:
            if error.response.get('Error'):
                code = error.response['Error']['Code']
                if code == '403':
                    return make_json_response(
                        403, {
                            'title': 'Access Denied',
                            'detail': f"Access denied reading manifest: {key}"
                        })
            raise error

        # Use the default S3 client configuration
        s3_client = boto3.client('s3')
    else:
        return make_json_response(
            401, {
                'title': 'Incomplete credentials',
                'detail':
                "access_key, secret_key and session_token are required"
            })
    assert s3_client

    # Get details of a single file in the package
    if logical_key is not None:
        sql_stmt = f"SELECT s.* FROM s3object s WHERE s.logical_key = '{sql_escape(logical_key)}' LIMIT 1"
        response_data = json.load(
            query_manifest_content(s3_client,
                                   bucket=bucket,
                                   key=key,
                                   sql_stmt=sql_stmt))
    else:
        # Call s3 select to fetch only logical keys matching the
        # desired prefix (folder path)
        prefix_length = len(prefix) if prefix is not None else 0
        sql_stmt = (
            f"SELECT SUBSTRING(s.logical_key, {prefix_length + 1}) AS logical_key"
            ", s.\"size\", s.physical_keys[0] as physical_key FROM s3object s")
        if prefix:
            sql_stmt += f" WHERE SUBSTRING(s.logical_key, 1, {prefix_length}) = '{sql_escape(prefix)}'"
        result = query_manifest_content(s3_client,
                                        bucket=bucket,
                                        key=key,
                                        sql_stmt=sql_stmt)
        # Parse the response into a logical folder view
        df = pd.read_json(result, lines=True)
        response_data = file_list_to_folder(df)

        # Fetch package-level or directory-level metadata
        if prefix:
            sql_stmt = f"SELECT s.meta FROM s3object s WHERE s.logical_key = '{sql_escape(prefix)}'"
        else:
            sql_stmt = "SELECT s.* FROM s3object s WHERE s.logical_key is NULL"
        result = query_manifest_content(s3_client,
                                        bucket=bucket,
                                        key=key,
                                        sql_stmt=sql_stmt)
        meta = json.load(result) if result else {}
        response_data.update(dict(meta=meta))

    ret_val = make_json_response(200, {'contents': response_data})

    return ret_val