def index_document(self): indexd_server = config.get("INDEXD") or config["BASE_URL"] + "/index" url = indexd_server + "/index/" try: res = requests.get(url + self.file_id) except Exception as e: logger.error("failed to reach indexd at {0}: {1}".format( url + self.file_id, e)) raise UnavailableError( "Fail to reach id service to find data location") if res.status_code == 200: try: json_response = res.json() if "urls" not in json_response: logger.error("URLs are not included in response from " "indexd: {}".format(url + self.file_id)) raise InternalError("URLs and metadata not found") return res.json() except Exception as e: logger.error("indexd response missing JSON field {}".format( url + self.file_id)) raise InternalError("internal error from indexd: {}".format(e)) elif res.status_code == 404: logger.error("Not Found. indexd could not find {}: {}".format( url + self.file_id, res.text)) raise NotFound("No indexed document found with id {}".format( self.file_id)) else: raise UnavailableError(res.text)
def get_signed_url(self, action, expires_in, public_data=False, force_signed_url=True, **kwargs): aws_creds = get_value(config, "AWS_CREDENTIALS", InternalError("credentials not configured")) s3_buckets = get_value(config, "S3_BUCKETS", InternalError("buckets not configured")) bucket_name = self.bucket_name() bucket = s3_buckets.get(bucket_name) if bucket and bucket.get("endpoint_url"): http_url = bucket["endpoint_url"].strip("/") + "/{}/{}".format( self.parsed_url.netloc, self.parsed_url.path.strip("/")) else: http_url = "https://{}.s3.amazonaws.com/{}".format( self.parsed_url.netloc, self.parsed_url.path.strip("/")) credential = S3IndexedFileLocation.get_credential_to_access_bucket( bucket_name, aws_creds, expires_in) # if it's public and we don't need to force the signed url, just return the raw # s3 url aws_access_key_id = get_value( credential, "aws_access_key_id", InternalError("aws configuration not found"), ) # `aws_access_key_id == "*"` is a special case to support public buckets # where we do *not* want to try signing at all. the other case is that the # data is public and user requested to not sign the url if aws_access_key_id == "*" or (public_data and not force_signed_url): return http_url region = self.get_bucket_region() if not region and not bucket.get("endpoint_url"): region = flask.current_app.boto.get_bucket_region( self.parsed_url.netloc, credential) user_info = _get_user_info() url = generate_aws_presigned_url( http_url, ACTION_DICT["s3"][action], credential, "s3", region, expires_in, user_info, ) return url
def make_signed_url(self, file_name, expires_in=None): """ Works for upload only; S3 only (only supported case for data upload flow currently). Args: file_name (str) expires_in (int) Return: S3IndexedFileLocation """ try: bucket = flask.current_app.config["DATA_UPLOAD_BUCKET"] except KeyError: raise InternalError( "amanuensis not configured with data upload bucket; can't create signed URL" ) s3_url = "s3://{}/{}/{}".format(bucket, self.guid, file_name) url = S3IndexedFileLocation(s3_url).get_signed_url( "upload", expires_in) self.logger.info( "created presigned URL to upload file {} with ID {}".format( file_name, self.guid)) return url
def generate_presigne_url_for_part_upload(self, uploadId, partNumber, expires_in): """ Generate presigned url for uploading object part given uploadId and part number Args: uploadId(str): uploadID of the multipart upload partNumber(int): part number expires(int): expiration time Returns: presigned_url(str) """ aws_creds = get_value(config, "AWS_CREDENTIALS", InternalError("credentials not configured")) credential = S3IndexedFileLocation.get_credential_to_access_bucket( self.bucket_name(), aws_creds, expires_in) region = self.get_bucket_region() if not region: region = flask.current_app.boto.get_bucket_region( self.parsed_url.netloc, credential) return multipart_upload.generate_presigned_url_for_uploading_part( self.parsed_url.netloc, self.parsed_url.path.strip("/"), credential, uploadId, partNumber, region, expires_in, )
def generate_presigned_url_for_uploading_part( bucket, key, credentials, uploadId, partNumber, region, expires ): """ Generate presigned url for uploading object part given uploadId and part number Args: bucket(str): bucket key(str): key credentials(dict): dictionary of aws credentials uploadId(str): uploadID of the multipart upload partNumber(int): part number region(str): bucket region expires(int): expiration time Returns: presigned_url(str) """ url = "https://{}.s3.amazonaws.com/{}".format(bucket, key) additional_signed_qs = {"partNumber": str(partNumber), "uploadId": uploadId} try: return generate_aws_presigned_url( url, "PUT", credentials, "s3", region, expires, additional_signed_qs ) except Exception as e: raise InternalError( "Can not generate presigned url for part number {} of key {}. Detail {}".format( partNumber, key, e ) )
def initilize_multipart_upload(bucket, key, credentials): """ Initialize multipart upload Args: bucket(str): bucket name key(str): object key credentials(dict): credential dictionary Returns: UploadId(str): uploadId """ session = boto3.Session( aws_access_key_id=credentials["aws_access_key_id"], aws_secret_access_key=credentials["aws_secret_access_key"], aws_session_token=credentials.get("aws_session_token"), ) s3client = session.client("s3") try: multipart_upload = retry_call( s3client.create_multipart_upload, fkwargs={"Bucket": bucket, "Key": key}, tries=MAX_TRIES, jitter=10, ) except ClientError as error: logger.error( "Error when create multiple part upload for object with uuid {}. Detail {}".format( key, error ) ) raise InternalError("Can not initilize multipart upload for {}".format(key)) return multipart_upload.get("UploadId")
def get_endpoints_descriptions(providers, session): desc = {} for provider in providers: if provider == "cdis": desc["/cdis"] = "access to Gen3 APIs" else: p = session.query(CloudProvider).filter_by(name=provider).first() if p is None: raise InternalError( "{} is not supported by the system!".format(provider)) desc["/" + provider] = p.description or "" return desc
def assume_role(cls, bucket_cred, expires_in, aws_creds_config, boto=None): """ Args: bucket_cred expires_in aws_creds_config boto (optional): provide `boto` when calling this function outside of application context, to avoid errors when using `flask.current_app`. """ boto = boto or flask.current_app.boto role_arn = get_value( bucket_cred, "role-arn", InternalError("role-arn of that bucket is missing")) assumed_role = boto.assume_role(role_arn, expires_in, aws_creds_config) cred = get_value(assumed_role, "Credentials", InternalError("fail to assume role")) return { "aws_access_key_id": get_value( cred, "AccessKeyId", InternalError("outdated format. AccessKeyId missing"), ), "aws_secret_access_key": get_value( cred, "SecretAccessKey", InternalError("outdated format. SecretAccessKey missing"), ), "aws_session_token": get_value( cred, "SessionToken", InternalError("outdated format. Sesssion token missing"), ), }
def __init__(self, credentials, logger): self.logger = logger self.clients = {} for provider, config in credentials.items(): if "backend" not in config: self.logger.error( "Storage provider {} is not configured with backend". format(provider)) raise InternalError("Something went wrong") backend = config["backend"] creds = copy.deepcopy(config) del creds["backend"] self.clients[provider] = get_client(config=config, backend=backend)
def get_bucket_region(self): s3_buckets = get_value(config, "S3_BUCKETS", InternalError("buckets not configured")) if len(s3_buckets) == 0: return None bucket_cred = s3_buckets.get(self.bucket_name()) if bucket_cred is None: return None if "region" not in bucket_cred: return None else: return bucket_cred["region"]
def bucket_name(self): """ Return: Optional[str]: bucket name or None if not not in cofig """ s3_buckets = get_value( flask.current_app.config, "S3_BUCKETS", InternalError("buckets not configured"), ) for bucket in s3_buckets: if re.match("^" + bucket + "$", self.parsed_url.netloc): return bucket return None
def get_bucket_region(self, bucket, config): try: if "aws_access_key_id" in config: self.s3_client = client("s3", **config) response = self.s3_client.get_bucket_location(Bucket=bucket) region = response.get("LocationConstraint") except Boto3Error as ex: self.logger.exception(ex) raise InternalError("Fail to get bucket region: {}".format(ex)) except Exception as ex: self.logger.exception(ex) raise UnavailableError("Fail to reach AWS: {}".format(ex)) if region is None: return "us-east-1" return region
def index_document(self): """ Get the record from indexd for this index. Return: dict: response from indexd (the contents of the record), containing ``guid`` and ``url`` """ index_url = self.indexd.rstrip("/") + "/index/blank/" params = {"uploader": self.uploader, "file_name": self.file_name} # if attempting to set record's authz field, need to pass token # through if self.authz: params["authz"] = self.authz token = get_jwt() auth = None headers = {"Authorization": f"bearer {token}"} logger.info( "passing users authorization header to create blank record") else: logger.info("using indexd basic auth to create blank record") auth = (config["INDEXD_USERNAME"], config["INDEXD_PASSWORD"]) headers = {} indexd_response = requests.post(index_url, json=params, headers=headers, auth=auth) if indexd_response.status_code not in [200, 201]: try: data = indexd_response.json() except ValueError: data = indexd_response.text self.logger.error( "could not create new record in indexd; got response: {}". format(data)) raise InternalError( "received error from indexd trying to create blank record") document = indexd_response.json() guid = document["did"] self.logger.info( "created blank index record with GUID {} for upload".format(guid)) return document
def init_multipart_upload(self, expires_in): """ Initialize multipart upload Args: expires(int): expiration time Returns: UploadId(str) """ aws_creds = get_value(config, "AWS_CREDENTIALS", InternalError("credentials not configured")) credentials = S3IndexedFileLocation.get_credential_to_access_bucket( self.bucket_name(), aws_creds, expires_in) return multipart_upload.initilize_multipart_upload( self.parsed_url.netloc, self.parsed_url.path.strip("/"), credentials)
def init_multipart_upload(key, expires_in=None): """ Initilize multipart upload given key Args: key(str): object key Returns: uploadId(str) """ try: bucket = flask.current_app.config["DATA_UPLOAD_BUCKET"] except KeyError: raise InternalError( "amanuensis not configured with data upload bucket; can't create signed URL" ) s3_url = "s3://{}/{}".format(bucket, key) return S3IndexedFileLocation(s3_url).init_multipart_upload(expires_in)
def complete_multipart_upload(bucket, key, credentials, uploadId, parts): """ Complete multipart upload. Raise exception if something wrong happens; otherwise success Args: bucket(str): bucket name key(str): object key or `GUID/filename` credentials(dict): aws credentials uploadId(str): upload id of the current upload parts(list(set)): List of part infos [{"Etag": "1234567", "PartNumber": 1}, {"Etag": "4321234", "PartNumber": 2}] Return: None """ session = boto3.Session( aws_access_key_id=credentials["aws_access_key_id"], aws_secret_access_key=credentials["aws_secret_access_key"], aws_session_token=credentials.get("aws_session_token"), ) s3client = session.client("s3") try: retry_call( s3client.complete_multipart_upload, fkwargs={ "Bucket": bucket, "Key": key, "MultipartUpload": {"Parts": parts}, "UploadId": uploadId, }, tries=MAX_TRIES, jitter=10, ) except ClientError as error: logger.error( "Error when completing multiple part upload for object with uuid {}. Detail {}".format( key, error ) ) raise InternalError( "Can not complete multipart upload for {}. Detail {}".format(key, error) )
def assume_role(self, role_arn, duration_seconds, config=None): assert ( duration_seconds ), 'assume_role() cannot be called without "duration_seconds" parameter; please check your "expires_in" parameters' try: if config and "aws_access_key_id" in config: self.sts_client = client("sts", **config) session_name_postfix = uuid.uuid4() return self.sts_client.assume_role( RoleArn=role_arn, DurationSeconds=duration_seconds, RoleSessionName="{}-{}".format("gen3", session_name_postfix), ) except Boto3Error as ex: self.logger.exception(ex) raise InternalError("Fail to assume role: {}".format(ex)) except Exception as ex: self.logger.exception(ex) raise UnavailableError("Fail to reach AWS: {}".format(ex))
def complete_multipart_upload(self, uploadId, parts, expires_in): """ Complete multipart upload. Args: uploadId(str): upload id of the current upload parts(list(set)): List of part infos [{"Etag": "1234567", "PartNumber": 1}, {"Etag": "4321234", "PartNumber": 2}] """ aws_creds = get_value(config, "AWS_CREDENTIALS", InternalError("credentials not configured")) credentials = S3IndexedFileLocation.get_credential_to_access_bucket( self.bucket_name(), aws_creds, expires_in) multipart_upload.complete_multipart_upload( self.parsed_url.netloc, self.parsed_url.path.strip("/"), credentials, uploadId, parts, )
def complete_multipart_upload(key, uploadId, parts, expires_in=None): """ Complete multipart upload Args: key(str): object key or `GUID/filename` uploadId(str): upload id of the current upload parts(list(set)): List of part infos [{"Etag": "1234567", "PartNumber": 1}, {"Etag": "4321234", "PartNumber": 2}] Returns: None if success otherwise an exception """ try: bucket = flask.current_app.config["DATA_UPLOAD_BUCKET"] except KeyError: raise InternalError( "amanuensis not configured with data upload bucket; can't create signed URL" ) s3_url = "s3://{}/{}".format(bucket, key) S3IndexedFileLocation(s3_url).complete_multipart_upload( uploadId, parts, expires_in)
def generate_aws_presigned_url_for_part(key, uploadId, partNumber, expires_in): """ Generate presigned url for each part Args: key(str): object key of `guid/filename` uploadID(str): uploadId of the current upload. partNumber(int): the part number Returns: presigned_url(str) """ try: bucket = flask.current_app.config["DATA_UPLOAD_BUCKET"] except KeyError: raise InternalError( "amanuensis not configured with data upload bucket; can't create signed URL" ) s3_url = "s3://{}/{}".format(bucket, key) return S3IndexedFileLocation( s3_url).generate_presigne_url_for_part_upload( uploadId, partNumber, expires_in)
def get_credential_to_access_bucket(cls, bucket_name, aws_creds, expires_in, boto=None): s3_buckets = get_value(config, "S3_BUCKETS", InternalError("buckets not configured")) if len(aws_creds) == 0 and len(s3_buckets) == 0: raise InternalError("no bucket is configured") if len(aws_creds) == 0 and len(s3_buckets) > 0: raise InternalError("credential for buckets is not configured") bucket_cred = s3_buckets.get(bucket_name) if bucket_cred is None: raise Unauthorized("permission denied for bucket") cred_key = get_value( bucket_cred, "cred", InternalError("credential of that bucket is missing")) # this is a special case to support public buckets where we do *not* want to # try signing at all if cred_key == "*": return {"aws_access_key_id": "*"} if "role-arn" not in bucket_cred: return get_value( aws_creds, cred_key, InternalError("aws credential of that bucket is not found"), ) else: aws_creds_config = get_value( aws_creds, cred_key, InternalError("aws credential of that bucket is not found"), ) return S3IndexedFileLocation.assume_role(bucket_cred, expires_in, aws_creds_config, boto)
def get_user_accesses(): user = udm.get_user_accesses() if not user: raise InternalError("Error: %s user does not exist" % flask.g.user.username) return user