Beispiel #1
0
 def _uri_to_url(self,
                 drs_uri: str,
                 access_id: Optional[str] = None) -> str:
     """
     Translate a DRS URI into a DRS URL. All query params included in the DRS
     URI (eg '{drs_uri}?version=123') will be carried over to the DRS URL.
     Only hostname-based DRS URIs (drs://<hostname>/<id>) are supported while
     compact, identifier-based URIs (drs://[provider_code/]namespace:accession)
     are not.
     """
     parsed = furl(drs_uri)
     scheme = 'drs'
     require(parsed.scheme == scheme,
             f'The URI {drs_uri!r} does not have the {scheme!r} scheme')
     # "The colon character is not allowed in a hostname-based DRS URI".
     # https://ga4gh.github.io/data-repository-service-schemas/preview/develop/docs/#_drs_uris
     # It is worth noting that compact identifier-based URI can be hard to
     # parse when following RFC3986, with the 'namespace:accession' part
     # matching either the heir-part or path production depending if the
     # optional provider code and following slash is included.
     reject(':' in parsed.netloc or ':' in str(parsed.path),
            f'The DRS URI {drs_uri!r} is not hostname-based')
     parsed.scheme = 'https'
     object_id = one(parsed.path.segments)
     parsed.path.set(drs_object_url_path(object_id, access_id))
     return parsed.url
Beispiel #2
0
 def __attrs_post_init__(self):
     validate_uuid_prefix(self.common)
     assert ':' not in self.common, self.common
     if self.partition:
         assert isinstance(self.partition, int), self.partition
         # Version 4 UUIDs specify fixed bits in the third dash-seperated
         # group. To ensure that any concatenation of common and
         # partition_prefix is a valid UUID prefix, we restrict the number of
         # characters from the concatenation to be within the first
         # dash-seperated group.
         reject(
             len(self.common) + self.partition > 8,
             'Invalid common prefix and partition length', self)
Beispiel #3
0
 def create(cls, req: Requirement) -> Optional['PinnedRequirement']:
     if req.specifier:
         op, version = one(req.specs)
         assert op == '=='
         return cls(name=req.name.lower(), versions=Versions(version))
     elif req.vcs:
         reject(req.revision is None,
                'VCS requirements must carry a specific revision', req)
         return cls(name=req.name.lower())
     elif req.recursive:
         return None
     else:
         raise RequirementError('Unable to handle requirement', req)
Beispiel #4
0
 def check_bundle_manifest(self):
     """
     Verify bundle manifest contains required files
     """
     missing_files = []
     if 'project_0.json' not in self.manifest_entries:
         missing_files.append('project_0.json')
     if 'links.json' not in self.manifest_entries:
         missing_files.append('links.json')
     reject(bool(missing_files),
            f'File(s) {missing_files} not found in bundle {self.bundle_fqid}')
     for file_name, file_content in self.indexed_files.items():
         require('describedBy' in file_content,
                 '"describedBy" missing from file', file_name, self.bundle_fqid)
Beispiel #5
0
 def _parse_staging_area(self) -> Tuple[str, str]:
     """
     Validate and parse the given staging area URL into bucket and path values.
     Path value will not have a prefix '/' and will have a postfix '/' if not empty.
     """
     split_url = parse.urlsplit(self.args.staging_area)
     require(split_url.scheme == 'gs' and split_url.netloc,
             'Staging area URL must be in gs://<bucket>[/<path>] format')
     reject(split_url.path.endswith('/'),
            'Staging area URL must not end with a "/"')
     if split_url.path:
         path = split_url.path.lstrip('/') + '/'
     else:
         path = ''
     return split_url.netloc, path
Beispiel #6
0
 def _parse_file_id_column(self, file_id: Optional[str]) -> Optional[str]:
     # The file_id column is present for datasets, but is usually null, may
     # contain unexpected/unusable values, and NEVER produces usable DRS URLs,
     # so we avoid parsing the column altogether for datasets.
     if self.fqid.source.spec.is_snapshot:
         reject(file_id is None)
         # TDR stores the complete DRS URI in the file_id column, but we only
         # index the path component. These requirements prevent mismatches in
         # the DRS domain, and ensure that changes to the column syntax don't
         # go undetected.
         file_id = furl(file_id)
         require(file_id.scheme == 'drs')
         require(file_id.netloc == furl(config.tdr_service_url).netloc)
         return str(file_id.path).strip('/')
     else:
         return None
Beispiel #7
0
    def parse(cls, prefix: str) -> 'Prefix':
        """
        >>> Prefix.parse('aa/1')
        Prefix(common='aa', partition=1)

        >>> p = Prefix.parse('a')
        >>> print(p.partition)
        None
        >>> p.effective.partition == config.partition_prefix_length
        True

        >>> Prefix.parse('aa/')
        Traceback (most recent call last):
        ...
        azul.RequirementError: ('Prefix source cannot end in a delimiter.', 'aa/', '/')

        >>> Prefix.parse('8f538f53/1').partition_prefixes() # doctest: +NORMALIZE_WHITESPACE
        Traceback (most recent call last):
        ...
        azul.RequirementError: ('Invalid common prefix and partition length',
                                Prefix(common='8f538f53', partition=1))

        >>> list(Prefix.parse('8f538f53/0').partition_prefixes())
        ['']
        """
        source_delimiter = '/'
        reject(prefix.endswith(source_delimiter),
               'Prefix source cannot end in a delimiter.', prefix,
               source_delimiter)
        if prefix == '':
            entry = ''
            partition = None
        else:
            try:
                entry, partition = prefix.split(source_delimiter)
            except ValueError:
                entry = prefix
                partition = None
            if partition:
                try:
                    partition = int(partition)
                except ValueError:
                    raise ValueError(
                        'Partition prefix length must be an integer.',
                        partition)
        validate_uuid_prefix(entry)
        return cls(common=entry, partition=partition)
Beispiel #8
0
 def _parse_gcs_url(self, gcs_url: str) -> Tuple[gcs.Bucket, str]:
     """
     Parse a GCS URL into its Bucket and path components
     """
     split_url = parse.urlsplit(gcs_url)
     require(
         split_url.scheme == 'gs' and split_url.netloc,
         'Google Cloud Storage URL must be in gs://<bucket>[/<path>] format'
     )
     reject(split_url.path.endswith('/'),
            'Google Cloud Storage URL must not end with a "/"')
     if split_url.path:
         path = split_url.path.lstrip('/') + '/'
     else:
         path = ''
     bucket = gcs.Bucket(self.gcs, split_url.netloc)
     return bucket, path
Beispiel #9
0
def validate_uuid_prefix(uuid_prefix: str) -> None:
    """
    # The empty string is a valid prefix
    >>> validate_uuid_prefix('')

    >>> validate_uuid_prefix('8f53')

    # A complete UUID is a valid prefix
    >>> validate_uuid_prefix('8f53d355-b2fa-4bab-a2f2-6852d852d2ec')

    >>> validate_uuid_prefix('8F53')
    Traceback (most recent call last):
    ...
    azul.uuids.InvalidUUIDPrefixError: '8F53' is not a valid UUID prefix.

    >>> validate_uuid_prefix('8')

    >>> validate_uuid_prefix('8f538f53')

    >>> validate_uuid_prefix('8f538f5-')
    Traceback (most recent call last):
    ...
    azul.RequirementError: UUID prefix ends with an invalid character: 8f538f5-

    >>> validate_uuid_prefix('8f538f-')
    Traceback (most recent call last):
    ...
    azul.RequirementError: UUID prefix ends with an invalid character: 8f538f-

    >>> validate_uuid_prefix('8f538f53a')
    Traceback (most recent call last):
    ...
    azul.uuids.InvalidUUIDPrefixError: '8f538f53a' is not a valid UUID prefix.
    """
    valid_uuid_str = '26a8fccd-bbd2-4342-9c19-6ed7c9bb9278'
    reject(uuid_prefix.endswith('-'),
           f'UUID prefix ends with an invalid character: {uuid_prefix}')
    try:
        validate_uuid(uuid_prefix + valid_uuid_str[len(uuid_prefix):])
    except InvalidUUIDError:
        raise InvalidUUIDPrefixError(uuid_prefix)
Beispiel #10
0
 def _get_project(self, bundle) -> api.Project:
     project, *additional_projects = bundle.projects.values()
     reject(additional_projects,
            "Azul can currently only handle a single project per bundle")
     assert isinstance(project, api.Project)
     return project
Beispiel #11
0
def _reversible_join(joiner: str, parts: Iterable[str]):
    parts = list(parts)
    reject(any(joiner in part for part in parts), parts)
    return joiner.join(parts)
Beispiel #12
0
 def __attrs_post_init__(self):
     super().__attrs_post_init__()
     # Most bits in a v4 or v5 UUID are pseudo-random, including the leading
     # 32 bits but those are followed by a couple of deterministic ones.
     # For simplicity, we'll limit ourselves to 2 ** 32 leaf partitions.
     reject(self.prefix_length > 32)
Beispiel #13
0
 def _parse(cls, spec: str) -> Tuple[str, Prefix]:
     rest, sep, prefix = spec.rpartition(':')
     reject(sep == '', 'Invalid source specification', spec)
     prefix = Prefix.parse(prefix)
     return rest, prefix
    def _parse_range_request_header(
            self, range_specifier: str
    ) -> Sequence[Tuple[Optional[int], Optional[int]]]:
        """
        >>> rc = RepositoryController(lambda_context=None, file_url_func=None)
        >>> rc._parse_range_request_header('bytes=100-200,300-400')
        [(100, 200), (300, 400)]

        >>> rc._parse_range_request_header('bytes=-100')
        [(None, 100)]

        >>> rc._parse_range_request_header('bytes=100-')
        [(100, None)]

        >>> rc._parse_range_request_header('foo=100')
        []

        >>> rc._parse_range_request_header('')
        Traceback (most recent call last):
        ...
        chalice.app.BadRequestError: BadRequestError: Invalid range specifier ''

        >>> rc._parse_range_request_header('100-200')
        Traceback (most recent call last):
        ...
        chalice.app.BadRequestError: BadRequestError: Invalid range specifier '100-200'

        >>> rc._parse_range_request_header('bytes=')
        Traceback (most recent call last):
        ...
        chalice.app.BadRequestError: BadRequestError: Invalid range specifier 'bytes='

        >>> rc._parse_range_request_header('bytes=100')
        Traceback (most recent call last):
        ...
        chalice.app.BadRequestError: BadRequestError: Invalid range specifier 'bytes=100'

        >>> rc._parse_range_request_header('bytes=-')
        Traceback (most recent call last):
        ...
        chalice.app.BadRequestError: BadRequestError: Invalid range specifier 'bytes=-'

        >>> rc._parse_range_request_header('bytes=--')
        Traceback (most recent call last):
        ...
        chalice.app.BadRequestError: BadRequestError: Invalid range specifier 'bytes=--'
        """
        def to_int_or_none(value: str) -> Optional[int]:
            return None if value == '' else int(value)

        parsed_ranges = []
        try:
            unit, ranges = range_specifier.split('=')
            if unit == 'bytes':
                for range_spec in ranges.split(','):
                    start, end = range_spec.split('-')
                    reject(start == '' and end == '', 'Empty range')
                    parsed_ranges.append(
                        (to_int_or_none(start), to_int_or_none(end)))
            else:
                reject(unit == '', 'Empty range unit')
        except Exception as e:
            raise BadRequestError(
                f'Invalid range specifier {range_specifier!r}') from e
        return parsed_ranges
Beispiel #15
0
 def __attrs_post_init__(self):
     reject(self.prefix_length == 0 and self.prefix != 0)
     require(0 <= self.prefix < 2 ** self.prefix_length)
Beispiel #16
0
    def to_index(self, value_unit: Optional[JSON]) -> str:
        """
        >>> a = ValueAndUnit()
        >>> a.to_index({'value': '20', 'unit': 'year'})
        '20 year'

        >>> a.to_index({'value': '20', 'unit': None})
        '20'

        >>> a.to_index(None)
        '~null'

        >>> a.to_index({})
        Traceback (most recent call last):
        ...
        azul.RequirementError: A dictionary with entries for `value` and `unit` is required

        >>> a.to_index({'value': '1', 'unit': 'day', 'foo': 12})
        Traceback (most recent call last):
        ...
        azul.RequirementError: A dictionary with exactly two entries is required

        >>> a.to_index({'unit': 'day'})
        Traceback (most recent call last):
        ...
        azul.RequirementError: A dictionary with entries for `value` and `unit` is required

        >>> a.to_index({'value': '1'})
        Traceback (most recent call last):
        ...
        azul.RequirementError: A dictionary with entries for `value` and `unit` is required

        >>> a.to_index({'value': '', 'unit': 'year'})
        Traceback (most recent call last):
        ...
        azul.RequirementError: The `value` entry must not be empty

        >>> a.to_index({'value': '20', 'unit': ''})
        Traceback (most recent call last):
        ...
        azul.RequirementError: The `unit` entry must not be empty

        >>> a.to_index({'value': None, 'unit': 'years'})
        Traceback (most recent call last):
        ...
        azul.RequirementError: The `value` entry must not be null

        >>> a.to_index({'value': 20, 'unit': None})
        Traceback (most recent call last):
        ...
        azul.RequirementError: The `value` entry must be a string

        >>> a.to_index({'value': '20', 'unit': True})
        Traceback (most recent call last):
        ...
        azul.RequirementError: The `unit` entry must be a string

        >>> a.to_index({'value': '20 ', 'unit': None})
        Traceback (most recent call last):
        ...
        azul.RequirementError: The `value` entry must not contain space characters

        >>> a.to_index({'value': '20', 'unit': 'years '})
        Traceback (most recent call last):
        ...
        azul.RequirementError: The `unit` entry must not contain space characters
        """
        if value_unit is None:
            return NullableString.null_string
        else:
            try:
                value, unit = value_unit['value'], value_unit['unit']
            except KeyError:
                reject(
                    True,
                    'A dictionary with entries for `value` and `unit` is required'
                )
            else:
                require(
                    len(value_unit) == 2,
                    'A dictionary with exactly two entries is required')
                reject(value == '', 'The `value` entry must not be empty')
                reject(unit == '', 'The `unit` entry must not be empty')
                reject(value is None, 'The `value` entry must not be null')
                require(
                    type(value) is str, 'The `value` entry must be a string')
                reject(' ' in value,
                       'The `value` entry must not contain space characters')
                if unit is None:
                    return value
                else:
                    require(
                        type(unit) is str, 'The `unit` entry must be a string')
                    reject(
                        ' ' in unit,
                        'The `unit` entry must not contain space characters')
                    return f'{value} {unit}'
Beispiel #17
0
 def __attrs_post_init__(self):
     reject(
         len(self.id) > 254,
         'Terra requires IDs be no longer than 254 chars',
     )