Example #1
0
 def index_document(self):
     indexd_server = config.get("INDEXD") or config["BASE_URL"] + "/index"
     url = indexd_server + "/index/"
     try:
         res = requests.get(url + self.file_id)
     except Exception as e:
         logger.error("failed to reach indexd at {0}: {1}".format(
             url + self.file_id, e))
         raise UnavailableError(
             "Fail to reach id service to find data location")
     if res.status_code == 200:
         try:
             json_response = res.json()
             if "urls" not in json_response:
                 logger.error("URLs are not included in response from "
                              "indexd: {}".format(url + self.file_id))
                 raise InternalError("URLs and metadata not found")
             return res.json()
         except Exception as e:
             logger.error("indexd response missing JSON field {}".format(
                 url + self.file_id))
             raise InternalError("internal error from indexd: {}".format(e))
     elif res.status_code == 404:
         logger.error("Not Found. indexd could not find {}: {}".format(
             url + self.file_id, res.text))
         raise NotFound("No indexed document found with id {}".format(
             self.file_id))
     else:
         raise UnavailableError(res.text)
Example #2
0
    def get_signed_url(self,
                       action,
                       expires_in,
                       public_data=False,
                       force_signed_url=True,
                       **kwargs):
        aws_creds = get_value(config, "AWS_CREDENTIALS",
                              InternalError("credentials not configured"))
        s3_buckets = get_value(config, "S3_BUCKETS",
                               InternalError("buckets not configured"))

        bucket_name = self.bucket_name()
        bucket = s3_buckets.get(bucket_name)

        if bucket and bucket.get("endpoint_url"):
            http_url = bucket["endpoint_url"].strip("/") + "/{}/{}".format(
                self.parsed_url.netloc, self.parsed_url.path.strip("/"))
        else:
            http_url = "https://{}.s3.amazonaws.com/{}".format(
                self.parsed_url.netloc, self.parsed_url.path.strip("/"))

        credential = S3IndexedFileLocation.get_credential_to_access_bucket(
            bucket_name, aws_creds, expires_in)

        # if it's public and we don't need to force the signed url, just return the raw
        # s3 url
        aws_access_key_id = get_value(
            credential,
            "aws_access_key_id",
            InternalError("aws configuration not found"),
        )
        # `aws_access_key_id == "*"` is a special case to support public buckets
        # where we do *not* want to try signing at all. the other case is that the
        # data is public and user requested to not sign the url
        if aws_access_key_id == "*" or (public_data and not force_signed_url):
            return http_url

        region = self.get_bucket_region()
        if not region and not bucket.get("endpoint_url"):
            region = flask.current_app.boto.get_bucket_region(
                self.parsed_url.netloc, credential)

        user_info = _get_user_info()

        url = generate_aws_presigned_url(
            http_url,
            ACTION_DICT["s3"][action],
            credential,
            "s3",
            region,
            expires_in,
            user_info,
        )

        return url
Example #3
0
    def make_signed_url(self, file_name, expires_in=None):
        """
        Works for upload only; S3 only (only supported case for data upload flow
        currently).

        Args:
            file_name (str)
            expires_in (int)

        Return:
            S3IndexedFileLocation
        """
        try:
            bucket = flask.current_app.config["DATA_UPLOAD_BUCKET"]
        except KeyError:
            raise InternalError(
                "amanuensis not configured with data upload bucket; can't create signed URL"
            )
        s3_url = "s3://{}/{}/{}".format(bucket, self.guid, file_name)
        url = S3IndexedFileLocation(s3_url).get_signed_url(
            "upload", expires_in)
        self.logger.info(
            "created presigned URL to upload file {} with ID {}".format(
                file_name, self.guid))
        return url
Example #4
0
    def generate_presigne_url_for_part_upload(self, uploadId, partNumber,
                                              expires_in):
        """
        Generate presigned url for uploading object part given uploadId and part number

        Args:
            uploadId(str): uploadID of the multipart upload
            partNumber(int): part number
            expires(int): expiration time

        Returns:
            presigned_url(str)
        """
        aws_creds = get_value(config, "AWS_CREDENTIALS",
                              InternalError("credentials not configured"))
        credential = S3IndexedFileLocation.get_credential_to_access_bucket(
            self.bucket_name(), aws_creds, expires_in)

        region = self.get_bucket_region()
        if not region:
            region = flask.current_app.boto.get_bucket_region(
                self.parsed_url.netloc, credential)

        return multipart_upload.generate_presigned_url_for_uploading_part(
            self.parsed_url.netloc,
            self.parsed_url.path.strip("/"),
            credential,
            uploadId,
            partNumber,
            region,
            expires_in,
        )
def generate_presigned_url_for_uploading_part(
    bucket, key, credentials, uploadId, partNumber, region, expires
):
    """
    Generate presigned url for uploading object part given uploadId and part number

    Args:
        bucket(str): bucket
        key(str): key
        credentials(dict): dictionary of aws credentials
        uploadId(str): uploadID of the multipart upload
        partNumber(int): part number
        region(str): bucket region
        expires(int): expiration time

    Returns:
        presigned_url(str)
    """

    url = "https://{}.s3.amazonaws.com/{}".format(bucket, key)
    additional_signed_qs = {"partNumber": str(partNumber), "uploadId": uploadId}

    try:
        return generate_aws_presigned_url(
            url, "PUT", credentials, "s3", region, expires, additional_signed_qs
        )
    except Exception as e:
        raise InternalError(
            "Can not generate presigned url for part number {} of key {}. Detail {}".format(
                partNumber, key, e
            )
        )
def initilize_multipart_upload(bucket, key, credentials):
    """
    Initialize multipart upload

    Args:
        bucket(str): bucket name
        key(str): object key
        credentials(dict): credential dictionary

    Returns:
        UploadId(str): uploadId
    """
    session = boto3.Session(
        aws_access_key_id=credentials["aws_access_key_id"],
        aws_secret_access_key=credentials["aws_secret_access_key"],
        aws_session_token=credentials.get("aws_session_token"),
    )
    s3client = session.client("s3")

    try:
        multipart_upload = retry_call(
            s3client.create_multipart_upload,
            fkwargs={"Bucket": bucket, "Key": key},
            tries=MAX_TRIES,
            jitter=10,
        )
    except ClientError as error:
        logger.error(
            "Error when create multiple part upload for object with uuid {}. Detail {}".format(
                key, error
            )
        )
        raise InternalError("Can not initilize multipart upload for {}".format(key))

    return multipart_upload.get("UploadId")
Example #7
0
def get_endpoints_descriptions(providers, session):
    desc = {}
    for provider in providers:
        if provider == "cdis":
            desc["/cdis"] = "access to Gen3 APIs"
        else:
            p = session.query(CloudProvider).filter_by(name=provider).first()
            if p is None:
                raise InternalError(
                    "{} is not supported by the system!".format(provider))
            desc["/" + provider] = p.description or ""
    return desc
Example #8
0
    def assume_role(cls, bucket_cred, expires_in, aws_creds_config, boto=None):
        """
        Args:
            bucket_cred
            expires_in
            aws_creds_config
            boto (optional): provide `boto` when calling this function
                outside of application context, to avoid errors when
                using `flask.current_app`.
        """
        boto = boto or flask.current_app.boto

        role_arn = get_value(
            bucket_cred, "role-arn",
            InternalError("role-arn of that bucket is missing"))
        assumed_role = boto.assume_role(role_arn, expires_in, aws_creds_config)
        cred = get_value(assumed_role, "Credentials",
                         InternalError("fail to assume role"))
        return {
            "aws_access_key_id":
            get_value(
                cred,
                "AccessKeyId",
                InternalError("outdated format. AccessKeyId missing"),
            ),
            "aws_secret_access_key":
            get_value(
                cred,
                "SecretAccessKey",
                InternalError("outdated format. SecretAccessKey missing"),
            ),
            "aws_session_token":
            get_value(
                cred,
                "SessionToken",
                InternalError("outdated format. Sesssion token missing"),
            ),
        }
Example #9
0
    def __init__(self, credentials, logger):
        self.logger = logger
        self.clients = {}
        for provider, config in credentials.items():
            if "backend" not in config:
                self.logger.error(
                    "Storage provider {} is not configured with backend".
                    format(provider))
                raise InternalError("Something went wrong")

            backend = config["backend"]
            creds = copy.deepcopy(config)
            del creds["backend"]
            self.clients[provider] = get_client(config=config, backend=backend)
Example #10
0
    def get_bucket_region(self):
        s3_buckets = get_value(config, "S3_BUCKETS",
                               InternalError("buckets not configured"))
        if len(s3_buckets) == 0:
            return None

        bucket_cred = s3_buckets.get(self.bucket_name())
        if bucket_cred is None:
            return None

        if "region" not in bucket_cred:
            return None
        else:
            return bucket_cred["region"]
Example #11
0
 def bucket_name(self):
     """
     Return:
         Optional[str]: bucket name or None if not not in cofig
     """
     s3_buckets = get_value(
         flask.current_app.config,
         "S3_BUCKETS",
         InternalError("buckets not configured"),
     )
     for bucket in s3_buckets:
         if re.match("^" + bucket + "$", self.parsed_url.netloc):
             return bucket
     return None
Example #12
0
 def get_bucket_region(self, bucket, config):
     try:
         if "aws_access_key_id" in config:
             self.s3_client = client("s3", **config)
         response = self.s3_client.get_bucket_location(Bucket=bucket)
         region = response.get("LocationConstraint")
     except Boto3Error as ex:
         self.logger.exception(ex)
         raise InternalError("Fail to get bucket region: {}".format(ex))
     except Exception as ex:
         self.logger.exception(ex)
         raise UnavailableError("Fail to reach AWS: {}".format(ex))
     if region is None:
         return "us-east-1"
     return region
Example #13
0
    def index_document(self):
        """
        Get the record from indexd for this index.

        Return:
            dict:
                response from indexd (the contents of the record), containing ``guid``
                and ``url``
        """
        index_url = self.indexd.rstrip("/") + "/index/blank/"
        params = {"uploader": self.uploader, "file_name": self.file_name}

        # if attempting to set record's authz field, need to pass token
        # through
        if self.authz:
            params["authz"] = self.authz
            token = get_jwt()

            auth = None
            headers = {"Authorization": f"bearer {token}"}
            logger.info(
                "passing users authorization header to create blank record")
        else:
            logger.info("using indexd basic auth to create blank record")
            auth = (config["INDEXD_USERNAME"], config["INDEXD_PASSWORD"])
            headers = {}

        indexd_response = requests.post(index_url,
                                        json=params,
                                        headers=headers,
                                        auth=auth)
        if indexd_response.status_code not in [200, 201]:
            try:
                data = indexd_response.json()
            except ValueError:
                data = indexd_response.text
            self.logger.error(
                "could not create new record in indexd; got response: {}".
                format(data))
            raise InternalError(
                "received error from indexd trying to create blank record")
        document = indexd_response.json()
        guid = document["did"]
        self.logger.info(
            "created blank index record with GUID {} for upload".format(guid))
        return document
Example #14
0
    def init_multipart_upload(self, expires_in):
        """
        Initialize multipart upload

        Args:
            expires(int): expiration time

        Returns:
            UploadId(str)
        """
        aws_creds = get_value(config, "AWS_CREDENTIALS",
                              InternalError("credentials not configured"))
        credentials = S3IndexedFileLocation.get_credential_to_access_bucket(
            self.bucket_name(), aws_creds, expires_in)

        return multipart_upload.initilize_multipart_upload(
            self.parsed_url.netloc, self.parsed_url.path.strip("/"),
            credentials)
Example #15
0
    def init_multipart_upload(key, expires_in=None):
        """
        Initilize multipart upload given key

        Args:
            key(str): object key

        Returns:
            uploadId(str)
        """
        try:
            bucket = flask.current_app.config["DATA_UPLOAD_BUCKET"]
        except KeyError:
            raise InternalError(
                "amanuensis not configured with data upload bucket; can't create signed URL"
            )
        s3_url = "s3://{}/{}".format(bucket, key)
        return S3IndexedFileLocation(s3_url).init_multipart_upload(expires_in)
def complete_multipart_upload(bucket, key, credentials, uploadId, parts):
    """
    Complete multipart upload.
    Raise exception if something wrong happens; otherwise success

    Args:
        bucket(str): bucket name
        key(str): object key or `GUID/filename`
        credentials(dict): aws credentials
        uploadId(str): upload id of the current upload
        parts(list(set)): List of part infos
                [{"Etag": "1234567", "PartNumber": 1}, {"Etag": "4321234", "PartNumber": 2}]

    Return:
        None
    """
    session = boto3.Session(
        aws_access_key_id=credentials["aws_access_key_id"],
        aws_secret_access_key=credentials["aws_secret_access_key"],
        aws_session_token=credentials.get("aws_session_token"),
    )
    s3client = session.client("s3")

    try:
        retry_call(
            s3client.complete_multipart_upload,
            fkwargs={
                "Bucket": bucket,
                "Key": key,
                "MultipartUpload": {"Parts": parts},
                "UploadId": uploadId,
            },
            tries=MAX_TRIES,
            jitter=10,
        )
    except ClientError as error:
        logger.error(
            "Error when completing multiple part upload for object with uuid {}. Detail {}".format(
                key, error
            )
        )
        raise InternalError(
            "Can not complete multipart upload for {}. Detail {}".format(key, error)
        )
Example #17
0
 def assume_role(self, role_arn, duration_seconds, config=None):
     assert (
         duration_seconds
     ), 'assume_role() cannot be called without "duration_seconds" parameter; please check your "expires_in" parameters'
     try:
         if config and "aws_access_key_id" in config:
             self.sts_client = client("sts", **config)
         session_name_postfix = uuid.uuid4()
         return self.sts_client.assume_role(
             RoleArn=role_arn,
             DurationSeconds=duration_seconds,
             RoleSessionName="{}-{}".format("gen3", session_name_postfix),
         )
     except Boto3Error as ex:
         self.logger.exception(ex)
         raise InternalError("Fail to assume role: {}".format(ex))
     except Exception as ex:
         self.logger.exception(ex)
         raise UnavailableError("Fail to reach AWS: {}".format(ex))
Example #18
0
    def complete_multipart_upload(self, uploadId, parts, expires_in):
        """
        Complete multipart upload.

        Args:
            uploadId(str): upload id of the current upload
            parts(list(set)): List of part infos
                    [{"Etag": "1234567", "PartNumber": 1}, {"Etag": "4321234", "PartNumber": 2}]
        """
        aws_creds = get_value(config, "AWS_CREDENTIALS",
                              InternalError("credentials not configured"))

        credentials = S3IndexedFileLocation.get_credential_to_access_bucket(
            self.bucket_name(), aws_creds, expires_in)

        multipart_upload.complete_multipart_upload(
            self.parsed_url.netloc,
            self.parsed_url.path.strip("/"),
            credentials,
            uploadId,
            parts,
        )
Example #19
0
    def complete_multipart_upload(key, uploadId, parts, expires_in=None):
        """
        Complete multipart upload

        Args:
            key(str): object key or `GUID/filename`
            uploadId(str): upload id of the current upload
            parts(list(set)): List of part infos
                [{"Etag": "1234567", "PartNumber": 1}, {"Etag": "4321234", "PartNumber": 2}]

        Returns:
            None if success otherwise an exception
        """
        try:
            bucket = flask.current_app.config["DATA_UPLOAD_BUCKET"]
        except KeyError:
            raise InternalError(
                "amanuensis not configured with data upload bucket; can't create signed URL"
            )
        s3_url = "s3://{}/{}".format(bucket, key)
        S3IndexedFileLocation(s3_url).complete_multipart_upload(
            uploadId, parts, expires_in)
Example #20
0
    def generate_aws_presigned_url_for_part(key, uploadId, partNumber,
                                            expires_in):
        """
        Generate presigned url for each part

        Args:
            key(str): object key of `guid/filename`
            uploadID(str): uploadId of the current upload.
            partNumber(int): the part number

        Returns:
            presigned_url(str)
        """
        try:
            bucket = flask.current_app.config["DATA_UPLOAD_BUCKET"]
        except KeyError:
            raise InternalError(
                "amanuensis not configured with data upload bucket; can't create signed URL"
            )
        s3_url = "s3://{}/{}".format(bucket, key)
        return S3IndexedFileLocation(
            s3_url).generate_presigne_url_for_part_upload(
                uploadId, partNumber, expires_in)
Example #21
0
    def get_credential_to_access_bucket(cls,
                                        bucket_name,
                                        aws_creds,
                                        expires_in,
                                        boto=None):
        s3_buckets = get_value(config, "S3_BUCKETS",
                               InternalError("buckets not configured"))
        if len(aws_creds) == 0 and len(s3_buckets) == 0:
            raise InternalError("no bucket is configured")
        if len(aws_creds) == 0 and len(s3_buckets) > 0:
            raise InternalError("credential for buckets is not configured")

        bucket_cred = s3_buckets.get(bucket_name)
        if bucket_cred is None:
            raise Unauthorized("permission denied for bucket")

        cred_key = get_value(
            bucket_cred, "cred",
            InternalError("credential of that bucket is missing"))

        # this is a special case to support public buckets where we do *not* want to
        # try signing at all
        if cred_key == "*":
            return {"aws_access_key_id": "*"}

        if "role-arn" not in bucket_cred:
            return get_value(
                aws_creds,
                cred_key,
                InternalError("aws credential of that bucket is not found"),
            )
        else:
            aws_creds_config = get_value(
                aws_creds,
                cred_key,
                InternalError("aws credential of that bucket is not found"),
            )
            return S3IndexedFileLocation.assume_role(bucket_cred, expires_in,
                                                     aws_creds_config, boto)
Example #22
0
def get_user_accesses():
    user = udm.get_user_accesses()
    if not user:
        raise InternalError("Error: %s user does not exist" %
                            flask.g.user.username)
    return user