コード例 #1
0
ファイル: app.py プロジェクト: markdboyd/thin-egress-app
def try_download_head(bucket, filename):

    client = get_data_dl_s3_client()
    # Check for range request
    range_header = get_range_header_val()
    try:

        if not range_header:
            download = client.get_object(Bucket=bucket, Key=filename)
        else:
            log.info("Downloading range {0}".format(range_header))
            download = client.get_object(Bucket=bucket,
                                         Key=filename,
                                         Range=range_header)
    except ClientError as e:
        log.warning("Could get head for s3://{0}/{1}: {2}".format(
            bucket, filename, e))
        template_vars = {
            'contentstring': 'File not found',
            'title': 'File not found'
        }
        headers = {}
        return make_html_response(template_vars, headers, 404, 'error.html')
    log.debug(download)
    #return 'Finish this thing'

    response_headers = {'Content-Type': download['ContentType']}
    for header in download['ResponseMetadata']['HTTPHeaders']:
        name = header_map[header] if header in header_map else header
        value = download['ResponseMetadata']['HTTPHeaders'][
            header] if header != 'server' else 'egress'
        log.debug("setting header {0} to {1}.".format(name, value))
        response_headers[name] = value

    # Try Redirecting to HEAD. There should be a better way.
    cookievars = get_cookie_vars(app.current_request.headers)
    if 'urs-user-id' in cookievars:
        user_id = cookievars['urs-user-id']
    else:
        user_id = 'Unknown'

    # Generate URL
    creds = get_role_creds(user_id=user_id)
    client = get_data_dl_s3_client()
    bucket_region = client.get_bucket_location(
        Bucket=bucket)['LocationConstraint']
    bucket_region = 'us-east-1' if not bucket_region else bucket_region
    presigned_url = get_presigned_url(creds, bucket, filename, bucket_region,
                                      24 * 3600, user_id, 'HEAD')
    s3_host = urlparse(presigned_url).netloc

    # Return a redirect to a HEAD
    log.debug("Presigned HEAD URL host was {0}".format(s3_host))
    return make_redriect(presigned_url, {}, 303)
コード例 #2
0
def try_download_from_bucket(bucket, filename, user_profile):

    # Attempt to pull userid from profile
    user_id = None
    if isinstance(user_profile, dict):
        if 'urs-user-id' in user_profile:
            user_id = user_profile['urs-user-id']
        elif 'uid' in user_profile:
            user_id = user_profile['uid']
    log.info("User Id for download is {0}".format(user_id))

    is_in_region = check_in_region_request(
        app.current_request.context['identity']['sourceIp'])
    creds = get_role_creds(user_id, is_in_region)

    session = get_role_session(creds=creds, user_id=user_id)

    try:
        bucket_region = get_bucket_region(session, bucket)
    except ClientError as e:
        log.error(
            f'ClientError while {user_id} tried downloading {bucket}/{filename}: {e}'
        )
        template_vars = {
            'contentstring': 'There was a problem accessing download data.',
            'title': 'Data Not Available'
        }
        headers = {}
        return make_html_response(template_vars, headers, 500, 'error.html')

    log.debug('this region: {}'.format(
        os.getenv('AWS_DEFAULT_REGION', 'env var doesnt exist')))
    if bucket_region != os.getenv('AWS_DEFAULT_REGION'):
        log.warning(
            "bucket {0} is in region {1}, we are in region {2}! " +
            "This is double egress in Proxy mode!".format(
                bucket, bucket_region, os.getenv('AWS_DEFAULT_REGION')))
    params = {}
    # now that we know where the bucket is, connect in THAT region
    params['config'] = bc_Config(**get_bcconfig(user_id))
    client = session.client('s3', bucket_region, **params)

    log.info("Attempting to download s3://{0}/{1}".format(bucket, filename))

    try:
        # Make sure this file exists, don't ACTUALLY download
        range_header = get_range_header_val()
        if not range_header:
            client.get_object(Bucket=bucket, Key=filename)
            redirheaders = {}
        else:
            client.get_object(Bucket=bucket, Key=filename, Range=range_header)
            redirheaders = {'Range': range_header}

        # Generate URL
        presigned_url = get_presigned_url(creds, bucket, filename,
                                          bucket_region, 24 * 3600, user_id)
        s3_host = urlparse(presigned_url).netloc
        log.debug("Presigned URL host was {0}".format(s3_host))

        log.info("Using REDIRECT because no PROXY in egresslambda")
        return make_redriect(presigned_url, redirheaders, 303)

    except ClientError as e:
        log.warning("Could not download s3://{0}/{1}: {2}".format(
            bucket, filename, e))

        # Watch for bad range request:
        if e.response['ResponseMetadata']['HTTPStatusCode'] == 416:
            return Response(body='Invalid Range', status_code=416, headers={})

        template_vars = {
            'contentstring': 'Could not find requested data.',
            'title': 'Data Not Available'
        }
        headers = {}
        return make_html_response(template_vars, headers, 404, 'error.html')
コード例 #3
0
def try_download_head(bucket, filename):
    t = [time.time()]
    client = get_data_dl_s3_client()
    t.append(time.time())
    # Check for range request
    range_header = get_range_header_val()
    try:
        if not range_header:
            download = client.get_object(Bucket=bucket, Key=filename)
        else:
            log.info("Downloading range {0}".format(range_header))
            download = client.get_object(Bucket=bucket,
                                         Key=filename,
                                         Range=range_header)
        t.append(time.time())
    except ClientError as e:
        log.warning("Could not get head for s3://{0}/{1}: {2}".format(
            bucket, filename, e))
        # cumulus uses this log message for metrics purposes.

        template_vars = {
            'contentstring': 'File not found',
            'title': 'File not found',
            'requestid': get_request_id(),
        }
        headers = {}
        cumulus_log_message(
            'failure', 404, 'HEAD', {
                'reason': 'Could not find requested data',
                's3': f'{bucket}/{filename}'
            })
        return make_html_response(template_vars, headers, 404, 'error.html')
    log.debug(download)

    response_headers = {'Content-Type': download['ContentType']}
    for header in download['ResponseMetadata']['HTTPHeaders']:
        name = header_map[header] if header in header_map else header
        value = download['ResponseMetadata']['HTTPHeaders'][
            header] if header != 'server' else 'egress'
        log.debug("setting header {0} to {1}.".format(name, value))
        response_headers[name] = value

    # Try Redirecting to HEAD. There should be a better way.
    user_id = get_jwt_field(get_cookie_vars(app.current_request.headers),
                            'urs-user-id')
    log_context(user_id=user_id)

    # Generate URL
    t.append(time.time())
    creds, offset = get_role_creds(user_id=user_id)
    url_lifespan = 3600 - offset
    bucket_region = client.get_bucket_location(
        Bucket=bucket)['LocationConstraint']
    bucket_region = 'us-east-1' if not bucket_region else bucket_region
    t.append(time.time())
    presigned_url = get_presigned_url(creds, bucket, filename, bucket_region,
                                      url_lifespan, user_id, 'HEAD')
    t.append(time.time())
    s3_host = urlparse(presigned_url).netloc

    # Return a redirect to a HEAD
    log.debug("Presigned HEAD URL host was {0}".format(s3_host))
    log.debug('timing for try_download_head()')
    log.debug('ET for get_data_dl_s3_client(): {}s'.format(t[1] - t[0]))
    log.debug('ET for client.get_object(): {}s'.format(t[2] - t[1]))
    log.debug('ET for get_role_creds(): {}s'.format(t[4] - t[3]))
    log.debug('ET for get_presigned_url(): {}s'.format(t[5] - t[4]))

    return make_redirect(presigned_url, {}, 303)
コード例 #4
0
ファイル: app.py プロジェクト: markdboyd/thin-egress-app
def try_download_from_bucket(bucket, filename, user_profile):

    # Attempt to pull userid from profile
    user_id = None
    if isinstance(user_profile, dict):
        if 'urs-user-id' in user_profile:
            user_id = user_profile['urs-user-id']
        elif 'uid' in user_profile:
            user_id = user_profile['uid']
    log.info("User Id for download is {0}".format(user_id))

    is_in_region = check_in_region_request(
        app.current_request.context['identity']['sourceIp'])
    creds = get_role_creds(user_id, is_in_region)
    session = get_role_session(creds=creds, user_id=user_id)

    params = {}

    BCCONFIG = {
        "user_agent": "RAIN Egress App for userid={0}".format(user_id),
        "s3": {
            "addressing_style": "path"
        },
        "connect_timeout": 600,
        "read_timeout": 600,
        "retries": {
            "max_attempts": 10
        }
    }

    if os.getenv('S3_SIGNATURE_VERSION'):
        BCCONFIG['signature_version'] = os.getenv('S3_SIGNATURE_VERSION')

    # Figure out bucket region
    try:
        bucket_region = session.client(
            's3',
            **params).get_bucket_location(Bucket=bucket)['LocationConstraint']
        bucket_region = 'us-east-1' if not bucket_region else bucket_region
        log.debug("bucket {0} is in region {1}".format(bucket, bucket_region))
    except ClientError as e:
        # We hit here if the download role cannot access a bucket, or if it doesn't exist
        log.error("Coud not access download bucket {0}: {1}".format(bucket, e))

        template_vars = {
            'contentstring': 'There was a problem accessing download data.',
            'title': 'Data Not Available'
        }
        headers = {}
        return make_html_response(template_vars, headers, 500, 'error.html')

    log.debug('this region: {}'.format(
        os.getenv('AWS_DEFAULT_REGION', 'env var doesnt exist')))
    if bucket_region != os.getenv('AWS_DEFAULT_REGION'):
        log.warning(
            "bucket {0} is in region {1}, we are in region {2}! This is double egress in Proxy mode!"
            .format(bucket, bucket_region, os.getenv('AWS_DEFAULT_REGION')))

    # now that we know where the bucket is, connect in THAT region
    params['config'] = bc_Config(**BCCONFIG)
    client = session.client('s3', bucket_region, **params)

    log.info("Attempting to download s3://{0}/{1}".format(bucket, filename))

    try:
        # Make sure this file exists, don't ACTUALLY download
        range_header = get_range_header_val()
        if not range_header:
            client.get_object(Bucket=bucket, Key=filename)
            redirheaders = {}
        else:
            client.get_object(Bucket=bucket, Key=filename, Range=range_header)
            redirheaders = {'Range': range_header}

        # Generate URL
        presigned_url = get_presigned_url(creds, bucket, filename,
                                          bucket_region, 24 * 3600, user_id)
        s3_host = urlparse(presigned_url).netloc
        log.debug("Presigned URL host was {0}".format(s3_host))

        log.info("Using REDIRECT because no PROXY in egresslambda")
        return make_redriect(presigned_url, redirheaders, 303)

    except ClientError as e:
        log.warning("Could not download s3://{0}/{1}: {2}".format(
            bucket, filename, e))

        # Watch for bad range request:
        if e.response['ResponseMetadata']['HTTPStatusCode'] == 416:
            return Response(body='Invalid Range', status_code=416, headers={})

        template_vars = {
            'contentstring': 'Could not find requested data.',
            'title': 'Data Not Available'
        }
        headers = {}
        return make_html_response(template_vars, headers, 404, 'error.html')
コード例 #5
0
def try_download_from_bucket(bucket, filename, user_profile, headers: dict):
    # Attempt to pull userid from profile
    user_id = None
    if isinstance(user_profile, dict):
        if 'urs-user-id' in user_profile:
            user_id = user_profile['urs-user-id']
        elif 'uid' in user_profile:
            user_id = user_profile['uid']
    log.info("User Id for download is {0}".format(user_id))
    log_context(user_id=user_id)

    t0 = time.time()
    is_in_region = check_in_region_request(
        app.current_request.context['identity']['sourceIp'])
    t1 = time.time()
    creds, offset = get_role_creds(user_id, is_in_region)
    t2 = time.time()
    session = get_role_session(creds=creds, user_id=user_id)
    t3 = time.time()

    try:
        bucket_region = get_bucket_region(session, bucket)
        t4 = time.time()
    except ClientError as e:
        try:
            code = e.response['ResponseMetadata']['HTTPStatusCode']
        except (AttributeError, KeyError, IndexError):
            code = 400
        log.debug(f'response: {e.response}')
        log.error(
            f'ClientError while {user_id} tried downloading {bucket}/{filename}: {e}'
        )
        cumulus_log_message('failure', code, 'GET', {
            'reason': 'ClientError',
            's3': f'{bucket}/{filename}'
        })
        template_vars = {
            'contentstring': 'There was a problem accessing download data.',
            'title': 'Data Not Available',
            'requestid': get_request_id(),
        }

        headers = {}
        return make_html_response(template_vars, headers, code, 'error.html')

    log.debug('this region: {}'.format(
        os.getenv('AWS_DEFAULT_REGION', 'env var doesnt exist')))
    if bucket_region != os.getenv('AWS_DEFAULT_REGION'):
        log.warning(
            "bucket {0} is in region {1}, we are in region {2}! " +
            "This is double egress in Proxy mode!".format(
                bucket, bucket_region, os.getenv('AWS_DEFAULT_REGION')))
    client = get_bc_config_client(user_id)

    log.debug('timing for try_download_from_bucket(): ')
    log.debug('ET for check_in_region_request(): {}s'.format(t1 - t0))
    log.debug('ET for get_role_creds(): {}s'.format(t2 - t1))
    log.debug('ET for get_role_session(): {}s'.format(t3 - t2))
    log.debug('ET for get_bucket_region(): {}s'.format(t4 - t3))
    log.debug('ET for total: {}'.format(t4 - t0))

    log.info("Attempting to download s3://{0}/{1}".format(bucket, filename))

    try:
        # Make sure this file exists, don't ACTUALLY download
        range_header = get_range_header_val()
        if not range_header:
            if not os.getenv("SUPPRESS_HEAD"):
                client.head_object(Bucket=bucket, Key=filename)
            redirheaders = {}
        else:
            if not os.getenv("SUPPRESS_HEAD"):
                client.head_object(Bucket=bucket,
                                   Key=filename,
                                   Range=range_header)
            redirheaders = {'Range': range_header}

        expires_in = 3600 - offset
        redirheaders['Cache-Control'] = 'private, max-age={0}'.format(
            expires_in - 60)
        if isinstance(headers, dict):
            log.debug(f'adding {headers} to redirheaders {redirheaders}')
            redirheaders.update(headers)

        # Generate URL
        presigned_url = get_presigned_url(creds, bucket, filename,
                                          bucket_region, expires_in, user_id)
        s3_host = urlparse(presigned_url).netloc
        log.debug("Presigned URL host was {0}".format(s3_host))

        return make_redirect(presigned_url, redirheaders, 303)

    except ClientError as e:
        # Watch for bad range request:
        if e.response['ResponseMetadata']['HTTPStatusCode'] == 416:
            # cumulus uses this log message for metrics purposes.
            log.error(
                f"Invalid Range 416, Could not get range {get_range_header_val()} s3://{bucket}/{filename}: {e}"
            )
            cumulus_log_message(
                'failure', 416, 'GET', {
                    'reason': 'Invalid Range',
                    's3': f'{bucket}/{filename}',
                    'range': get_range_header_val()
                })
            return Response(body='Invalid Range', status_code=416, headers={})

        # cumulus uses this log message for metrics purposes.
        log.warning("Could not download s3://{0}/{1}: {2}".format(
            bucket, filename, e))
        template_vars = {
            'contentstring': 'Could not find requested data.',
            'title': 'Data Not Available',
            'requestid': get_request_id(),
        }
        headers = {}
        cumulus_log_message(
            'failure', 404, 'GET', {
                'reason': 'Could not find requested data',
                's3': f'{bucket}/{filename}'
            })
        return make_html_response(template_vars, headers, 404, 'error.html')