def try_download_head(bucket, filename): client = get_data_dl_s3_client() # Check for range request range_header = get_range_header_val() try: if not range_header: download = client.get_object(Bucket=bucket, Key=filename) else: log.info("Downloading range {0}".format(range_header)) download = client.get_object(Bucket=bucket, Key=filename, Range=range_header) except ClientError as e: log.warning("Could get head for s3://{0}/{1}: {2}".format( bucket, filename, e)) template_vars = { 'contentstring': 'File not found', 'title': 'File not found' } headers = {} return make_html_response(template_vars, headers, 404, 'error.html') log.debug(download) #return 'Finish this thing' response_headers = {'Content-Type': download['ContentType']} for header in download['ResponseMetadata']['HTTPHeaders']: name = header_map[header] if header in header_map else header value = download['ResponseMetadata']['HTTPHeaders'][ header] if header != 'server' else 'egress' log.debug("setting header {0} to {1}.".format(name, value)) response_headers[name] = value # Try Redirecting to HEAD. There should be a better way. cookievars = get_cookie_vars(app.current_request.headers) if 'urs-user-id' in cookievars: user_id = cookievars['urs-user-id'] else: user_id = 'Unknown' # Generate URL creds = get_role_creds(user_id=user_id) client = get_data_dl_s3_client() bucket_region = client.get_bucket_location( Bucket=bucket)['LocationConstraint'] bucket_region = 'us-east-1' if not bucket_region else bucket_region presigned_url = get_presigned_url(creds, bucket, filename, bucket_region, 24 * 3600, user_id, 'HEAD') s3_host = urlparse(presigned_url).netloc # Return a redirect to a HEAD log.debug("Presigned HEAD URL host was {0}".format(s3_host)) return make_redriect(presigned_url, {}, 303)
def try_download_from_bucket(bucket, filename, user_profile): # Attempt to pull userid from profile user_id = None if isinstance(user_profile, dict): if 'urs-user-id' in user_profile: user_id = user_profile['urs-user-id'] elif 'uid' in user_profile: user_id = user_profile['uid'] log.info("User Id for download is {0}".format(user_id)) is_in_region = check_in_region_request( app.current_request.context['identity']['sourceIp']) creds = get_role_creds(user_id, is_in_region) session = get_role_session(creds=creds, user_id=user_id) try: bucket_region = get_bucket_region(session, bucket) except ClientError as e: log.error( f'ClientError while {user_id} tried downloading {bucket}/{filename}: {e}' ) template_vars = { 'contentstring': 'There was a problem accessing download data.', 'title': 'Data Not Available' } headers = {} return make_html_response(template_vars, headers, 500, 'error.html') log.debug('this region: {}'.format( os.getenv('AWS_DEFAULT_REGION', 'env var doesnt exist'))) if bucket_region != os.getenv('AWS_DEFAULT_REGION'): log.warning( "bucket {0} is in region {1}, we are in region {2}! " + "This is double egress in Proxy mode!".format( bucket, bucket_region, os.getenv('AWS_DEFAULT_REGION'))) params = {} # now that we know where the bucket is, connect in THAT region params['config'] = bc_Config(**get_bcconfig(user_id)) client = session.client('s3', bucket_region, **params) log.info("Attempting to download s3://{0}/{1}".format(bucket, filename)) try: # Make sure this file exists, don't ACTUALLY download range_header = get_range_header_val() if not range_header: client.get_object(Bucket=bucket, Key=filename) redirheaders = {} else: client.get_object(Bucket=bucket, Key=filename, Range=range_header) redirheaders = {'Range': range_header} # Generate URL presigned_url = get_presigned_url(creds, bucket, filename, bucket_region, 24 * 3600, user_id) s3_host = urlparse(presigned_url).netloc log.debug("Presigned URL host was {0}".format(s3_host)) log.info("Using REDIRECT because no PROXY in egresslambda") return make_redriect(presigned_url, redirheaders, 303) except ClientError as e: log.warning("Could not download s3://{0}/{1}: {2}".format( bucket, filename, e)) # Watch for bad range request: if e.response['ResponseMetadata']['HTTPStatusCode'] == 416: return Response(body='Invalid Range', status_code=416, headers={}) template_vars = { 'contentstring': 'Could not find requested data.', 'title': 'Data Not Available' } headers = {} return make_html_response(template_vars, headers, 404, 'error.html')
def try_download_head(bucket, filename): t = [time.time()] client = get_data_dl_s3_client() t.append(time.time()) # Check for range request range_header = get_range_header_val() try: if not range_header: download = client.get_object(Bucket=bucket, Key=filename) else: log.info("Downloading range {0}".format(range_header)) download = client.get_object(Bucket=bucket, Key=filename, Range=range_header) t.append(time.time()) except ClientError as e: log.warning("Could not get head for s3://{0}/{1}: {2}".format( bucket, filename, e)) # cumulus uses this log message for metrics purposes. template_vars = { 'contentstring': 'File not found', 'title': 'File not found', 'requestid': get_request_id(), } headers = {} cumulus_log_message( 'failure', 404, 'HEAD', { 'reason': 'Could not find requested data', 's3': f'{bucket}/{filename}' }) return make_html_response(template_vars, headers, 404, 'error.html') log.debug(download) response_headers = {'Content-Type': download['ContentType']} for header in download['ResponseMetadata']['HTTPHeaders']: name = header_map[header] if header in header_map else header value = download['ResponseMetadata']['HTTPHeaders'][ header] if header != 'server' else 'egress' log.debug("setting header {0} to {1}.".format(name, value)) response_headers[name] = value # Try Redirecting to HEAD. There should be a better way. user_id = get_jwt_field(get_cookie_vars(app.current_request.headers), 'urs-user-id') log_context(user_id=user_id) # Generate URL t.append(time.time()) creds, offset = get_role_creds(user_id=user_id) url_lifespan = 3600 - offset bucket_region = client.get_bucket_location( Bucket=bucket)['LocationConstraint'] bucket_region = 'us-east-1' if not bucket_region else bucket_region t.append(time.time()) presigned_url = get_presigned_url(creds, bucket, filename, bucket_region, url_lifespan, user_id, 'HEAD') t.append(time.time()) s3_host = urlparse(presigned_url).netloc # Return a redirect to a HEAD log.debug("Presigned HEAD URL host was {0}".format(s3_host)) log.debug('timing for try_download_head()') log.debug('ET for get_data_dl_s3_client(): {}s'.format(t[1] - t[0])) log.debug('ET for client.get_object(): {}s'.format(t[2] - t[1])) log.debug('ET for get_role_creds(): {}s'.format(t[4] - t[3])) log.debug('ET for get_presigned_url(): {}s'.format(t[5] - t[4])) return make_redirect(presigned_url, {}, 303)
def try_download_from_bucket(bucket, filename, user_profile): # Attempt to pull userid from profile user_id = None if isinstance(user_profile, dict): if 'urs-user-id' in user_profile: user_id = user_profile['urs-user-id'] elif 'uid' in user_profile: user_id = user_profile['uid'] log.info("User Id for download is {0}".format(user_id)) is_in_region = check_in_region_request( app.current_request.context['identity']['sourceIp']) creds = get_role_creds(user_id, is_in_region) session = get_role_session(creds=creds, user_id=user_id) params = {} BCCONFIG = { "user_agent": "RAIN Egress App for userid={0}".format(user_id), "s3": { "addressing_style": "path" }, "connect_timeout": 600, "read_timeout": 600, "retries": { "max_attempts": 10 } } if os.getenv('S3_SIGNATURE_VERSION'): BCCONFIG['signature_version'] = os.getenv('S3_SIGNATURE_VERSION') # Figure out bucket region try: bucket_region = session.client( 's3', **params).get_bucket_location(Bucket=bucket)['LocationConstraint'] bucket_region = 'us-east-1' if not bucket_region else bucket_region log.debug("bucket {0} is in region {1}".format(bucket, bucket_region)) except ClientError as e: # We hit here if the download role cannot access a bucket, or if it doesn't exist log.error("Coud not access download bucket {0}: {1}".format(bucket, e)) template_vars = { 'contentstring': 'There was a problem accessing download data.', 'title': 'Data Not Available' } headers = {} return make_html_response(template_vars, headers, 500, 'error.html') log.debug('this region: {}'.format( os.getenv('AWS_DEFAULT_REGION', 'env var doesnt exist'))) if bucket_region != os.getenv('AWS_DEFAULT_REGION'): log.warning( "bucket {0} is in region {1}, we are in region {2}! This is double egress in Proxy mode!" .format(bucket, bucket_region, os.getenv('AWS_DEFAULT_REGION'))) # now that we know where the bucket is, connect in THAT region params['config'] = bc_Config(**BCCONFIG) client = session.client('s3', bucket_region, **params) log.info("Attempting to download s3://{0}/{1}".format(bucket, filename)) try: # Make sure this file exists, don't ACTUALLY download range_header = get_range_header_val() if not range_header: client.get_object(Bucket=bucket, Key=filename) redirheaders = {} else: client.get_object(Bucket=bucket, Key=filename, Range=range_header) redirheaders = {'Range': range_header} # Generate URL presigned_url = get_presigned_url(creds, bucket, filename, bucket_region, 24 * 3600, user_id) s3_host = urlparse(presigned_url).netloc log.debug("Presigned URL host was {0}".format(s3_host)) log.info("Using REDIRECT because no PROXY in egresslambda") return make_redriect(presigned_url, redirheaders, 303) except ClientError as e: log.warning("Could not download s3://{0}/{1}: {2}".format( bucket, filename, e)) # Watch for bad range request: if e.response['ResponseMetadata']['HTTPStatusCode'] == 416: return Response(body='Invalid Range', status_code=416, headers={}) template_vars = { 'contentstring': 'Could not find requested data.', 'title': 'Data Not Available' } headers = {} return make_html_response(template_vars, headers, 404, 'error.html')
def try_download_from_bucket(bucket, filename, user_profile, headers: dict): # Attempt to pull userid from profile user_id = None if isinstance(user_profile, dict): if 'urs-user-id' in user_profile: user_id = user_profile['urs-user-id'] elif 'uid' in user_profile: user_id = user_profile['uid'] log.info("User Id for download is {0}".format(user_id)) log_context(user_id=user_id) t0 = time.time() is_in_region = check_in_region_request( app.current_request.context['identity']['sourceIp']) t1 = time.time() creds, offset = get_role_creds(user_id, is_in_region) t2 = time.time() session = get_role_session(creds=creds, user_id=user_id) t3 = time.time() try: bucket_region = get_bucket_region(session, bucket) t4 = time.time() except ClientError as e: try: code = e.response['ResponseMetadata']['HTTPStatusCode'] except (AttributeError, KeyError, IndexError): code = 400 log.debug(f'response: {e.response}') log.error( f'ClientError while {user_id} tried downloading {bucket}/{filename}: {e}' ) cumulus_log_message('failure', code, 'GET', { 'reason': 'ClientError', 's3': f'{bucket}/{filename}' }) template_vars = { 'contentstring': 'There was a problem accessing download data.', 'title': 'Data Not Available', 'requestid': get_request_id(), } headers = {} return make_html_response(template_vars, headers, code, 'error.html') log.debug('this region: {}'.format( os.getenv('AWS_DEFAULT_REGION', 'env var doesnt exist'))) if bucket_region != os.getenv('AWS_DEFAULT_REGION'): log.warning( "bucket {0} is in region {1}, we are in region {2}! " + "This is double egress in Proxy mode!".format( bucket, bucket_region, os.getenv('AWS_DEFAULT_REGION'))) client = get_bc_config_client(user_id) log.debug('timing for try_download_from_bucket(): ') log.debug('ET for check_in_region_request(): {}s'.format(t1 - t0)) log.debug('ET for get_role_creds(): {}s'.format(t2 - t1)) log.debug('ET for get_role_session(): {}s'.format(t3 - t2)) log.debug('ET for get_bucket_region(): {}s'.format(t4 - t3)) log.debug('ET for total: {}'.format(t4 - t0)) log.info("Attempting to download s3://{0}/{1}".format(bucket, filename)) try: # Make sure this file exists, don't ACTUALLY download range_header = get_range_header_val() if not range_header: if not os.getenv("SUPPRESS_HEAD"): client.head_object(Bucket=bucket, Key=filename) redirheaders = {} else: if not os.getenv("SUPPRESS_HEAD"): client.head_object(Bucket=bucket, Key=filename, Range=range_header) redirheaders = {'Range': range_header} expires_in = 3600 - offset redirheaders['Cache-Control'] = 'private, max-age={0}'.format( expires_in - 60) if isinstance(headers, dict): log.debug(f'adding {headers} to redirheaders {redirheaders}') redirheaders.update(headers) # Generate URL presigned_url = get_presigned_url(creds, bucket, filename, bucket_region, expires_in, user_id) s3_host = urlparse(presigned_url).netloc log.debug("Presigned URL host was {0}".format(s3_host)) return make_redirect(presigned_url, redirheaders, 303) except ClientError as e: # Watch for bad range request: if e.response['ResponseMetadata']['HTTPStatusCode'] == 416: # cumulus uses this log message for metrics purposes. log.error( f"Invalid Range 416, Could not get range {get_range_header_val()} s3://{bucket}/{filename}: {e}" ) cumulus_log_message( 'failure', 416, 'GET', { 'reason': 'Invalid Range', 's3': f'{bucket}/{filename}', 'range': get_range_header_val() }) return Response(body='Invalid Range', status_code=416, headers={}) # cumulus uses this log message for metrics purposes. log.warning("Could not download s3://{0}/{1}: {2}".format( bucket, filename, e)) template_vars = { 'contentstring': 'Could not find requested data.', 'title': 'Data Not Available', 'requestid': get_request_id(), } headers = {} cumulus_log_message( 'failure', 404, 'GET', { 'reason': 'Could not find requested data', 's3': f'{bucket}/{filename}' }) return make_html_response(template_vars, headers, 404, 'error.html')