def test_init_fail(): _init_fail(None, MissingParameterError('userid')) _init_fail(' \t ', MissingParameterError('userid')) _init_fail('foo \t bar', IllegalParameterError('userid contains control characters')) _init_fail('u' * 257, IllegalParameterError('userid exceeds maximum length of 256'))
def test_sample_build_fail(): # not testing every permutation of failing check_string here, just one test to make sure # it's there id_ = uuid.UUID('1234567890abcdef1234567890abcdef') u = UserID('user') sn = SampleNode('foo') tn = SampleNode('bar', SubSampleType.TECHNICAL_REPLICATE, 'foo') sn2 = SampleNode('baz') dup = SampleNode('foo') d = dt(8) _sample_build_fail( [sn], 'a' * 257, IllegalParameterError('name exceeds maximum length of 256')) _sample_build_fail([], None, MissingParameterError('At least one node per sample is required')) _sample_build_fail( [tn, sn], 'a', IllegalParameterError('The first node in a sample must be a BioReplicate')) _sample_build_fail([sn, tn, sn2], 'a', IllegalParameterError( 'BioReplicates must be the first nodes in the list of sample nodes.')) _sample_build_fail([sn, sn2, dup], 'a', IllegalParameterError( 'Duplicate sample node name: foo')) _sample_build_fail([sn2, tn], 'a', IllegalParameterError( 'Parent foo of node bar does not appear in node list prior to node.')) _sample_with_id_build_fail(None, u, [sn], d, None, None, ValueError('id_ cannot be a value that evaluates to false')) _sample_with_id_build_fail(id_, None, [sn], d, None, None, ValueError('user cannot be a value that evaluates to false')) _sample_with_id_build_fail(id_, u, [sn], None, None, None, ValueError( 'savetime cannot be a value that evaluates to false')) _sample_with_id_build_fail(id_, u, [sn], datetime.datetime.now(), None, None, ValueError( 'savetime cannot be a naive datetime')) _sample_with_id_build_fail(id_, u, [sn], d, None, 0, ValueError('version must be > 0'))
def _check_metadata_value(key: str, value: Dict[str, PrimitiveType], name: str) -> Dict[str, PrimitiveType]: if not value: raise IllegalParameterError( f'{name} metadata value associated with metadata key {key} is null or empty' ) for vk in value: cc = _control_char_first_pos(vk) if cc >= 0: raise IllegalParameterError( f"{name} metadata value key {vk} associated with metadata key {key} has a " + f'character at index {cc} that is a control character.') if len(vk) > _META_MAX_KEY_SIZE: raise IllegalParameterError( f'{name} metadata has a value key associated with metadata key {key} starting ' + f'with {vk[:_META_MAX_KEY_SIZE]} that exceeds maximum length of ' + f'{_META_MAX_KEY_SIZE}') val = value[vk] if type(val) == str: cc = _control_char_first_pos(_cast(str, val), allow_tabs_and_lf=True) if cc >= 0: raise IllegalParameterError( f"{name} metadata value associated with metadata key {key} and " + f'value key {vk} has a character at index {cc} that is a control character.' ) if len(_cast(str, val)) > _META_MAX_VALUE_SIZE: raise IllegalParameterError( f'{name} metadata has a value associated with metadata key {key} ' + f'and value key {vk} starting with {_cast(str, val)[:_META_MAX_KEY_SIZE]} ' + f'that exceeds maximum length of {_META_MAX_VALUE_SIZE}') return value
def _check_source_meta(m: List[SourceMetadata], controlled_metadata): _not_falsy_in_iterable(m, 'source_metadata') # it doesn't make sense to turn the whole thing into json as a rough measure of size as the # user is not responsible for the field names. total_bytes = 0 seen = set() for sm in m: if sm.key in seen: raise IllegalParameterError( f'Duplicate source metadata key: {sm.key}') seen.add(sm.key) # could check for duplicate sm.sourcekeys too, but possibly the source data is split into # two metadata keys? if sm.key not in controlled_metadata: raise IllegalParameterError( f'Source metadata key {sm.key} does not appear in the controlled metadata' ) total_bytes += len(sm.key.encode('utf-8')) + len( sm.sourcekey.encode('utf-8')) total_bytes += len( _json.dumps(dict(sm.sourcevalue), ensure_ascii=False).encode('utf-8')) # Would be nice if that could be streamed so we don't make a new byte array # This calculation is more convoluted than I would like and hard to reproduce for users # but it's really unlikely the limit is ever going to be hit so YAGNI re improvements, # at least for now. if total_bytes > _META_MAX_SIZE_B: raise IllegalParameterError( f'Source metadata is larger than maximum of {_META_MAX_SIZE_B}B')
def _sample_node_build_fail_metadata(meta, expected): with raises(Exception) as got: SampleNode('n', SubSampleType.BIOLOGICAL_REPLICATE, controlled_metadata=meta) assert_exception_correct(got.value, IllegalParameterError(expected.format('Controlled'))) with raises(Exception) as got: SampleNode('n', SubSampleType.BIOLOGICAL_REPLICATE, user_metadata=meta) assert_exception_correct(got.value, IllegalParameterError(expected.format('User')))
def test_build_fail_ownerless(): _build_fail_ownerless( [u('a'), None], None, None, ValueError( 'Index 1 of iterable admin cannot be a value that evaluates to false' )) _build_fail_ownerless( None, [None, None], None, ValueError( 'Index 0 of iterable write cannot be a value that evaluates to false' )) _build_fail_ownerless( None, None, [u('a'), u('b'), None], ValueError( 'Index 2 of iterable read cannot be a value that evaluates to false' )) # test that you cannot have a user in 2 acls _build_fail_ownerless([u('a'), u('z')], [u('a'), u('c')], [u('w'), u('b')], IllegalParameterError('User a appears in two ACLs')) _build_fail_ownerless([u('a'), u('z')], [u('b'), u('c')], [u('w'), u('a')], IllegalParameterError('User a appears in two ACLs')) _build_fail_ownerless([u('x'), u('z')], [u('b'), u('c'), u('w')], [u('w'), u('a')], IllegalParameterError('User w appears in two ACLs'))
def test_get_user_from_object_fail_bad_args(): _get_user_from_object_fail(None, 'us', ValueError('params cannot be None')) _get_user_from_object_fail({'us': 'foo'}, None, MissingParameterError('key')) _get_user_from_object_fail({'us': []}, 'us', IllegalParameterError( 'us must be a string if present')) _get_user_from_object_fail({'us': 'baz\tbaat'}, 'us', IllegalParameterError( # probably not worth the trouble to change the key name, we'll see 'userid contains control characters'))
def test_get_datetime_from_epochmilliseconds_in_object_fail_bad_args(): gt = _get_datetime_from_epochmilliseconds_in_object_fail gt(None, 'bar', ValueError('params cannot be None')) gt({'foo': 'a'}, 'foo', IllegalParameterError( "key 'foo' value of 'a' is not a valid epoch millisecond timestamp")) gt({'ts': 1.2}, 'ts', IllegalParameterError( "key 'ts' value of '1.2' is not a valid epoch millisecond timestamp"))
def test_sample_address_init_fail(): id1 = uuid.UUID('1234567890abcdef1234567890abcdef') _sample_address_init_fail(None, 6, ValueError( 'sampleid cannot be a value that evaluates to false')) _sample_address_init_fail(id1, None, IllegalParameterError('version must be > 0')) _sample_address_init_fail(id1, 0, IllegalParameterError('version must be > 0')) _sample_address_init_fail(id1, -5, IllegalParameterError('version must be > 0'))
def test_acls_from_dict_fail_bad_args(): _acls_from_dict_fail(None, ValueError('d cannot be a value that evaluates to false')) _acls_from_dict_fail({}, ValueError('d cannot be a value that evaluates to false')) m = 'ACLs must be supplied in the acls key and must be a mapping' _acls_from_dict_fail({'acls': None}, IllegalParameterError(m)) _acls_from_dict_fail({'acls': 'foo'}, IllegalParameterError(m)) _acls_from_dict_fail_acl_check('read') _acls_from_dict_fail_acl_check('write') _acls_from_dict_fail_acl_check('admin')
def create_sample_params_meta_fail(m, expected): create_sample_params_fail( {'sample': {'node_tree': [ {'id': 'foo', 'type': 'BioReplicate', 'meta_controlled': m}]}}, IllegalParameterError(expected.format(0, 'controlled metadata'))) create_sample_params_fail( {'sample': {'node_tree': [ {'id': 'bar', 'type': 'BioReplicate'}, {'id': 'foo', 'type': 'SubSample', 'parent': 'bar', 'meta_user': m}]}}, IllegalParameterError(expected.format(1, 'user metadata')))
def test_get_admin_request_from_object_fail_bad_args(): _get_admin_request_from_object_fail(None, '1', '2', ValueError('params cannot be None')) _get_admin_request_from_object_fail({'a': 'b'}, None, '2', MissingParameterError('as_admin')) _get_admin_request_from_object_fail({'a': 'b'}, '1', None, MissingParameterError('as_user')) _get_admin_request_from_object_fail( {'asa': True, 'asu': ['foo']}, 'asa', 'asu', IllegalParameterError( 'asu must be a string if present')) _get_admin_request_from_object_fail( {'asa': True, 'asu': 'whe\tee'}, 'asa', 'asu', IllegalParameterError( 'userid contains control characters'))
def test_get_data_unit_id_from_object_fail_bad_args(): _get_data_unit_id_from_object_fail(None, ValueError('params cannot be None')) _get_data_unit_id_from_object_fail({}, MissingParameterError('upa')) _get_data_unit_id_from_object_fail({'upa': '1/0/1'}, IllegalParameterError( '1/0/1 is not a valid UPA')) _get_data_unit_id_from_object_fail({'upa': 82}, IllegalParameterError( 'upa key is not a string as required')) _get_data_unit_id_from_object_fail({'upa': '1/1/1', 'dataid': []}, IllegalParameterError( 'dataid key is not a string as required')) _get_data_unit_id_from_object_fail({'upa': '1/1/1', 'dataid': 'f\t/b'}, IllegalParameterError( 'dataid contains control characters'))
def test_get_id_from_object_fail_bad_args(): _get_id_from_object_fail({'id': 6}, True, IllegalParameterError( 'Sample ID 6 must be a UUID string')) _get_id_from_object_fail({ 'id': 'f5bd78c3-823e-40b2-9f93-20e78680e41'}, False, IllegalParameterError( 'Sample ID f5bd78c3-823e-40b2-9f93-20e78680e41 must be a UUID string')) _get_id_from_object_fail(None, True, MissingParameterError('Sample ID')) _get_id_from_object_fail({}, True, MissingParameterError('Sample ID')) _get_id_from_object_fail({'id': None}, True, MissingParameterError('Sample ID'))
def test_get_version_from_object_fail_bad_args(): get_version_from_object_fail(None, False, ValueError('params cannot be None')) get_version_from_object_fail({}, True, MissingParameterError('version')) get_version_from_object_fail( {'version': None}, True, MissingParameterError('version')) get_version_from_object_fail( {'version': 'whee'}, False, IllegalParameterError('Illegal version argument: whee')) get_version_from_object_fail( {'version': 0}, True, IllegalParameterError('Illegal version argument: 0')) get_version_from_object_fail( {'version': -3}, False, IllegalParameterError('Illegal version argument: -3'))
def _check_metadata_key(key: str, name: str) -> str: if not key or not key.strip(): raise IllegalParameterError( f'{name} metadata keys may not be null or whitespace only') if len(key) > _META_MAX_KEY_SIZE: raise IllegalParameterError( f'{name} metadata has key starting with {key[:_META_MAX_KEY_SIZE]} that ' + f'exceeds maximum length of {_META_MAX_KEY_SIZE}') cc = _control_char_first_pos(key) if cc >= 0: raise IllegalParameterError( f"{name} metadata key {key}'s character at index {cc} is a control character." ) return key
def __init__( self, nodes: List[SampleNode], name: Optional[str] = None, ): ''' Create the the sample. :param nodes: The tree nodes in the sample. BIOLOGICAL_REPLICATES must come first in the list, and parents must come before children in the list. :param name: The name of the sample. Cannot contain control characters or be longer than 255 characters. :raise MissingParameterError: if no nodes are provided. :raises IllegalParameterError: if the name is too long or contains illegal characters, the first node in the list is not a BIOLOGICAL_REPLICATE, all the BIOLOGICAL_REPLICATES are not at the start of this list, node names are not unique, or parent nodes do not appear in the list prior to their children. ''' self.name = _check_string(name, 'name', max_len=_MAX_SAMPLE_NAME_LEN, optional=True) if not nodes: raise MissingParameterError( 'At least one node per sample is required') if len(nodes) > _MAX_SAMPLE_NODES: raise IllegalParameterError( f'At most {_MAX_SAMPLE_NODES} nodes are allowed per sample') if nodes[0].type != SubSampleType.BIOLOGICAL_REPLICATE: raise IllegalParameterError( f'The first node in a sample must be a {SubSampleType.BIOLOGICAL_REPLICATE.value}' ) no_more_bio = False seen_names: _Set[str] = set() for n in nodes: if no_more_bio and n.type == SubSampleType.BIOLOGICAL_REPLICATE: raise IllegalParameterError( f'{SubSampleType.BIOLOGICAL_REPLICATE.value}s must be the first ' + 'nodes in the list of sample nodes.') if n.type != SubSampleType.BIOLOGICAL_REPLICATE: no_more_bio = True if n.name in seen_names: raise IllegalParameterError( f'Duplicate sample node name: {n.name}') if n.parent and n.parent not in seen_names: raise IllegalParameterError( f'Parent {n.parent} of node {n.name} does not ' + 'appear in node list prior to node.') seen_names.add(n.name) self.nodes = tuple(nodes) # make hashable
def test_check_string_control_characters(): for string in ['foo \b bar', 'foo\u200bbar', 'foo\0bar', 'foo\bbar']: with raises(Exception) as got: check_string(string, 'var name') assert_exception_correct( got.value, IllegalParameterError('var name contains control characters'))
def check_string(string: Optional[str], name: str, max_len: int = None, optional: bool = False) -> Optional[str]: ''' Check that a string meets a set of criteria: - it is not None or whitespace only (unless the optional parameter is specified) - it contains no control characters - (optional) it is less than some specified maximum length :param string: the string to test. :param name: the name of the string to be used in error messages. :param max_len: the maximum length of the string. :param optional: True if no error should be thrown if the string is None. :returns: the stripped string or None if the string was optional and None or whitespace only. :raises MissingParameterError: if the string is None or whitespace only. :raises IllegalParameterError: if the string is too long or contains illegal characters. ''' # See the IDMapping service if character classes are needed. # Maybe package this stuff if max_len is not None and max_len < 1: raise ValueError('max_len must be > 0 if provided') if not string or not string.strip(): if optional: return None raise MissingParameterError(name) string = string.strip() _no_control_characters(string, name) if max_len and len(string) > max_len: raise IllegalParameterError('{} exceeds maximum length of {}'.format( name, max_len)) return string
def test_set_key_metadata_fail_bad_args(): _key_metadata_fail_([], None, ValueError('keys cannot be None')) _key_metadata_fail_([ MetadataValidator('key1', [_noop]), MetadataValidator('key3', [_noop]) ], ['key1', 'key2', 'key3'], IllegalParameterError('No such metadata key: key2'))
def test_check_string_long_fail(): for string, length in {'123456789': 8, 'ab': 1, 'a' * 100: 99}.items(): with raises(Exception) as got: check_string(string, 'var name', max_len=length) assert_exception_correct( got.value, IllegalParameterError( f'var name exceeds maximum length of {length}'))
def test_has_permission_fail_bad_input(): r = WorkspaceAccessType.READ u = UserID('b') _has_permission_fail(u, None, None, r, ValueError( 'Either an UPA or a workpace ID must be supplied')) _has_permission_fail(u, 0, None, r, IllegalParameterError('0 is not a valid workspace ID')) _has_permission_fail(u, 1, None, None, ValueError( 'perm cannot be a value that evaluates to false'))
def test_sample_node_build_fail_source_metadata(): _sample_node_build_fail_source_metadata( [SourceMetadata('k', 'k1', {'a': 'b'}), None], ValueError( 'Index 1 of iterable source_metadata cannot be a value that evaluates to false')) _sample_node_build_fail_source_metadata( [SourceMetadata('f', 'k1', {'a': 'b'}), SourceMetadata('k', 'k1', {'c': 'd'})], IllegalParameterError( 'Source metadata key k does not appear in the controlled metadata'), cmeta={'f': {'x': 'y'}}) _sample_node_build_fail_source_metadata( [SourceMetadata('k', 'k1', {'a': 'b'}), SourceMetadata('k', 'k2', {'a': 2})], IllegalParameterError('Duplicate source metadata key: k')) # 100001KB when the size calculation routine is run smeta = [SourceMetadata(str(i), 'sksksk', {'x': '𐎦' * 25}) for i in range(848)] smeta.append(SourceMetadata('a', 'b' * 36, {'x': 'y'})) _sample_node_build_fail_source_metadata(smeta, IllegalParameterError( 'Source metadata is larger than maximum of 100000B'))
def test_sample_node_address_init_fail(): sa = SampleAddress(uuid.UUID('1234567890abcdef1234567890abcdef'), 5) _sample_node_address_init_fail(None, 'f', ValueError( 'sample cannot be a value that evaluates to false')) _sample_node_address_init_fail(sa, None, MissingParameterError('node')) _sample_node_address_init_fail(sa, ' \t \n ', MissingParameterError('node')) _sample_node_address_init_fail(sa, '3' * 257, IllegalParameterError( 'node exceeds maximum length of 256'))
def _check_meta(m: Dict[str, Dict[str, PrimitiveType]], controlled: bool): c = 'Controlled' if controlled else 'User' for k in m: _check_metadata_key(k, c) _check_metadata_value(k, m[k], c) if len(_json.dumps(m, ensure_ascii=False).encode('utf-8')) > _META_MAX_SIZE_B: # would be nice if that could be streamed so we don't make a new byte array raise IllegalParameterError( f'{c} metadata is larger than maximum of {_META_MAX_SIZE_B}B')
def test_duid_init_fail(): with raises(Exception) as got: DataUnitID(None) assert_exception_correct(got.value, ValueError( 'upa cannot be a value that evaluates to false')) with raises(Exception) as got: DataUnitID(UPA('1/1/1'), 'a' * 257) assert_exception_correct(got.value, IllegalParameterError( 'dataid exceeds maximum length of 256'))
def __init__(self, sampleid: UUID, version: int): ''' Create the address. :param sampleid: The ID of the sample. :param version: The version of the sample. ''' self.sampleid = _not_falsy(sampleid, 'sampleid') if version is None or version < 1: raise IllegalParameterError('version must be > 0') self.version = version
def test_sample_node_build_fail(): # not testing every permutation of failing check_string here, just one test to make sure # it's there _sample_node_build_fail('', SubSampleType.BIOLOGICAL_REPLICATE, None, MissingParameterError('subsample name')) _sample_node_build_fail('a' * 257, SubSampleType.BIOLOGICAL_REPLICATE, None, IllegalParameterError('subsample name exceeds maximum length of 256')) _sample_node_build_fail('a', None, None, ValueError('type cannot be a value that evaluates to false')) _sample_node_build_fail('a', SubSampleType.TECHNICAL_REPLICATE, 'b' * 257, IllegalParameterError('parent exceeds maximum length of 256')) _sample_node_build_fail( 'a', SubSampleType.BIOLOGICAL_REPLICATE, 'badparent', IllegalParameterError( 'Node a is of type BioReplicate and therefore cannot have a parent')) _sample_node_build_fail( 'a', SubSampleType.TECHNICAL_REPLICATE, None, IllegalParameterError( 'Node a is of type TechReplicate and therefore must have a parent')) _sample_node_build_fail( 'a', SubSampleType.SUB_SAMPLE, None, IllegalParameterError( 'Node a is of type SubSample and therefore must have a parent'))
def test_delta_build_fail(): _build_delta_fail([u('a'), None], None, None, None, ValueError( 'Index 1 of iterable admin cannot be a value that evaluates to false')) _build_delta_fail(None, [None, None], None, None, ValueError( 'Index 0 of iterable write cannot be a value that evaluates to false')) _build_delta_fail(None, None, [u('a'), u('b'), None], None, ValueError( 'Index 2 of iterable read cannot be a value that evaluates to false')) _build_delta_fail(None, None, None, [None], ValueError( 'Index 0 of iterable remove cannot be a value that evaluates to false')) # test that you cannot have a user in 2 acls _build_delta_fail( [u('a'), u('z')], [u('a'), u('c')], [u('w'), u('b')], None, IllegalParameterError('User a appears in two ACLs')) _build_delta_fail( [u('a'), u('z')], [u('b'), u('c')], [u('w'), u('a')], None, IllegalParameterError('User a appears in two ACLs')) _build_delta_fail( [u('x'), u('z')], [u('b'), u('c'), u('w')], [u('w'), u('a')], None, IllegalParameterError('User w appears in two ACLs')) # test that you cannot have a user in the remove list and an acl _build_delta_fail( [u('f'), u('z')], [u('b'), u('c'), u('g')], [u('w'), u('a')], [u('m'), u('f')], IllegalParameterError('Users in the remove list cannot be in any other ACL')) _build_delta_fail( [u('a'), u('z')], [u('x'), u('c')], [u('w'), u('b')], [u('m'), u('x')], IllegalParameterError('Users in the remove list cannot be in any other ACL')) _build_delta_fail( [u('a'), u('z')], [u('b'), u('c')], [u('w'), u('y')], [u('y')], IllegalParameterError('Users in the remove list cannot be in any other ACL'))
def _no_control_characters(string: str, name: str) -> str: ''' Checks that a string contains no control characters and throws an exception if it does. See :meth:`contains_control_characters` for more information. :param string: The string to check. :param name: the name of the string to include in any exception. :raises IllegalParameterError: if the string contains control characters. :returns: the string. ''' # make public if needed if _contains_control_characters(string): raise IllegalParameterError(name + ' contains control characters') return string