def test_is_in_list(): valid = ["one", "two"] validation = validators.is_in_list(valid) data = {"key": "one"} errors = {"key": []} validation("key", data, errors, {}) assert len(errors["key"]) == 0 data = {"key": "pants"} validation("key", data, errors, {}) assert len(errors["key"]) == 1
def validate_resources(key, data, errors, context): """ This function will be invoked once per resource, and must check to see that all the fields in the resource are acceptable. The `key` will always be `('resources', some-integer, 'resource_format'). The integer can be used to find other values from the same resource. `data` is the "flat" version of the entire Package dictionary. """ # Information about this resource that I can take from the given key resource_format = data[key] # Simplify the resource into a dictionary of key/value pairs. Sometimes the function is passed a dict with complex # tuples for keys, sometimes it is passed pretty simple ones. if len(key) > 1: resource_index = key[1] resource = dict((t[2], data[t]) for t in data.keys() if t[0] == 'resources' and t[1] == resource_index) else: resource = dict((t[0], data[t]) for t in data.keys()) # A way to make sure I always have the right key, regardless of the idiosyncracies of what might get sent in def real_key(field_name): if len(key) > 1: return key[0:2] + (field_name, ) else: return (field_name, ) # Check for fields that are always required by NGDS for resources of any type field_name = 'resource_format' validation_runner( field_name, resource, errors, ["structured", "unstructured", "offline-resource", "data-service"], real_key(field_name)) field_name = 'distributor' validation_runner( field_name, resource, errors, [required, ngds_rules.is_valid_json, ngds_rules.is_valid_contact], real_key(field_name)) # Now things depend on the current resource's `resource_format` if resource_format in ['structured', 'unstructured']: # These are uploaded files. `format` is needed field_name = 'format' validation_runner(field_name, resource, errors, [optional], real_key(field_name)) if resource_format == 'structured': # These are content-model-aware file uploads field_name = 'content_model_uri' validation_runner(field_name, resource, errors, [required, ngds_rules.is_valid_model_uri], real_key(field_name)) field_name = 'content_model_version' validation_runner(field_name, resource, errors, [required, ngds_rules.is_valid_model_version], real_key(field_name)) # If the model version and uri are valid, then check the uploaded file against the content model if len(errors[real_key('content_model_uri')]) == 0 and len( errors[real_key('content_model_version')]) == 0: ngds_rules.check_uploaded_file(resource, errors, real_key('content_model')) if resource_format == 'data-service': # These are linked data services field_name = 'protocol' validation_runner(field_name, resource, errors, [ ngds_rules.is_in_list([ 'OGC:WMS', 'OGC:WFS', 'OGC:WCS', 'OGC:CSW', 'OGC:SOS', 'OPeNDAP', 'ESRI', 'other' ]) ], real_key(field_name)) field_name = 'layer' validation_runner(field_name, resource, errors, [optional], real_key(field_name)) if resource_format == 'offline-resource': # These are offline things field_name = 'ordering_procedure' validation_runner(field_name, resource, errors, [required], real_key(field_name))
def validate_extras(key, data, errors, context): """ This function validates the extras on a package to make sure they conform to NGDS rules. *Important*: This function will be called for each extra in the array. That means you should only be validating the current extra. However, we also need to be sure that all the required fields are present, so there is redundancy in that check (it is done every time). *Also Important*: `key` will be a tuple like `('extras', 0, 'key')` and `data` will be a "flat" dictionary that has keys that are like that tuple and values that are basically primitives (string, boolean, number). The first thing we do is remap `data` into something more comprehensible -- a simple dictionary of key/value pairs representing the set of extras *More Notes*: Its really important to understand the relationship between `package_create`, `package_update`, and the web-based UI. The first form doesn't add any extras, and it fires `package_create`. The second form creates resources, and fires `package_update`. The third form adds extras and fires `package_update`. Fortunately, this function will not fire unless there are actually some extras included in the package, so its safe to put this in the `package_create` schema. This also ensures that pacakges that are created programmatically (i.e. harvested records) must satisfy the same validation criteria as those created through the web interface. """ # Remap the messy data dictionary into a simple key/value object representing extras # This is messy and could be botched by future changes to CKAN's validation internals indexes = [t[1] for t in data.keys() if t[0] == 'extras' and t[2] == 'key'] extras = dict( (data[('extras', num, 'key')], data[('extras', num, 'value')]) for num in indexes) # Define validation criteria for each of the extra NGDS fields required_criteria = { "authors": [ required, ngds_rules.is_valid_json, ngds_rules.is_valid_list_of_contacts ], "maintainer": [required, ngds_rules.is_valid_json, ngds_rules.is_valid_contact], "dataset_category": [ required, ngds_rules.is_in_list([ "Dataset", "Physical Collection", "Catalog", "Movie or Video", "Drawing", "Photograph", "Remotely Sensed Image", "Map", "Text Document", "Physical Artifact", "Desktop Application", "Web Application" ]) ], "status": [ required, ngds_rules.is_in_list(["completed", "ongoing", "deprecated"]) ], "publication_date": [required, ngds_rules.is_valid_date], "dataset_lang": [required] } optional_criteria = { "spatial_word": [optional], "dataset_uri": [optional], "quality": [optional], "lineage": [optional], "spatial": [optional, ngds_rules.is_valid_json, ngds_rules.is_valid_rectangle], "non-geographic": [optional, ngds_rules.is_non_geographic] } # Make sure that required fields are all present required_fields = required_criteria.keys() existing_fields = extras.keys() overlap = list(set(existing_fields) & set(required_fields)) if len(overlap) < len(required_fields): errors[key].append(_('Some required NGDS fields were not present')) return # Now validate the single key that we were given on this iteration field_name = data[key] # Check if this particular key satisfies all validation criteria criteria = required_criteria.get(field_name, optional_criteria.get(field_name, [])) validation_runner(field_name, extras, errors, criteria) # Send and changes that the validation_runner made to the `extras` object back into the original `data` object reverser = dict((data[('extras', num, 'key')], ('extras', num, 'value')) for num in indexes) data[reverser[field_name]] = extras[field_name]
def validate_resources(key, data, errors, context): """ This function will be invoked once per resource, and must check to see that all the fields in the resource are acceptable. The `key` will always be `('resources', some-integer, 'resource_format'). The integer can be used to find other values from the same resource. `data` is the "flat" version of the entire Package dictionary. """ # Information about this resource that I can take from the given key resource_format = data[key] # Simplify the resource into a dictionary of key/value pairs. Sometimes the function is passed a dict with complex # tuples for keys, sometimes it is passed pretty simple ones. if len(key) > 1: resource_index = key[1] resource = dict((t[2], data[t]) for t in data.keys() if t[0] == 'resources' and t[1] == resource_index) else: resource = dict((t[0], data[t]) for t in data.keys()) # A way to make sure I always have the right key, regardless of the idiosyncracies of what might get sent in def real_key(field_name): if len(key) > 1: return key[0:2] + (field_name,) else: return (field_name,) # Check for fields that are always required by NGDS for resources of any type field_name = 'resource_format' validation_runner( field_name, resource, errors, ["structured", "unstructured", "offline-resource", "data-service"], real_key(field_name) ) field_name = 'distributor' validation_runner( field_name, resource, errors, [required, ngds_rules.is_valid_json, ngds_rules.is_valid_contact], real_key(field_name) ) # Now things depend on the current resource's `resource_format` if resource_format in ['structured', 'unstructured']: # These are uploaded files. `format` is needed field_name = 'format' validation_runner( field_name, resource, errors, [optional], real_key(field_name) ) if resource_format == 'structured': # These are content-model-aware file uploads field_name = 'content_model_uri' validation_runner( field_name, resource, errors, [required, ngds_rules.is_valid_model_uri], real_key(field_name) ) field_name = 'content_model_version' validation_runner( field_name, resource, errors, [required, ngds_rules.is_valid_model_version], real_key(field_name) ) # If the model version and uri are valid, then check the uploaded file against the content model if len(errors[real_key('content_model_uri')]) == 0 and len(errors[real_key('content_model_version')]) == 0: ngds_rules.check_uploaded_file(resource, errors, real_key('content_model')) if resource_format == 'data-service': # These are linked data services field_name = 'protocol' validation_runner( field_name, resource, errors, [ngds_rules.is_in_list(['OGC:WMS', 'OGC:WFS', 'OGC:WCS', 'OGC:CSW', 'OGC:SOS', 'OPeNDAP', 'ESRI', 'other'])], real_key(field_name) ) field_name = 'layer' validation_runner( field_name, resource, errors, [optional], real_key(field_name) ) if resource_format == 'offline-resource': # These are offline things field_name = 'ordering_procedure' validation_runner( field_name, resource, errors, [required], real_key(field_name) )
def validate_extras(key, data, errors, context): """ This function validates the extras on a package to make sure they conform to NGDS rules. *Important*: This function will be called for each extra in the array. That means you should only be validating the current extra. However, we also need to be sure that all the required fields are present, so there is redundancy in that check (it is done every time). *Also Important*: `key` will be a tuple like `('extras', 0, 'key')` and `data` will be a "flat" dictionary that has keys that are like that tuple and values that are basically primitives (string, boolean, number). The first thing we do is remap `data` into something more comprehensible -- a simple dictionary of key/value pairs representing the set of extras *More Notes*: Its really important to understand the relationship between `package_create`, `package_update`, and the web-based UI. The first form doesn't add any extras, and it fires `package_create`. The second form creates resources, and fires `package_update`. The third form adds extras and fires `package_update`. Fortunately, this function will not fire unless there are actually some extras included in the package, so its safe to put this in the `package_create` schema. This also ensures that pacakges that are created programmatically (i.e. harvested records) must satisfy the same validation criteria as those created through the web interface. """ # Remap the messy data dictionary into a simple key/value object representing extras # This is messy and could be botched by future changes to CKAN's validation internals indexes = [t[1] for t in data.keys() if t[0] == 'extras' and t[2] == 'key'] extras = dict((data[('extras', num, 'key')], data[('extras', num, 'value')]) for num in indexes) # Define validation criteria for each of the extra NGDS fields required_criteria = { "authors": [required, ngds_rules.is_valid_json, ngds_rules.is_valid_list_of_contacts], "maintainer": [required, ngds_rules.is_valid_json, ngds_rules.is_valid_contact], "dataset_category": [required, ngds_rules.is_in_list([ "Dataset", "Physical Collection", "Catalog", "Movie or Video", "Drawing", "Photograph", "Remotely Sensed Image", "Map", "Text Document", "Physical Artifact", "Desktop Application", "Web Application" ])], "status": [required, ngds_rules.is_in_list([ "completed", "ongoing", "deprecated" ])], "publication_date": [required, ngds_rules.is_valid_date], "dataset_lang": [required] } optional_criteria = { "spatial_word": [optional], "dataset_uri": [optional], "quality": [optional], "lineage": [optional], "spatial": [optional, ngds_rules.is_valid_json, ngds_rules.is_valid_rectangle], "non-geographic": [optional, ngds_rules.is_non_geographic] } # Make sure that required fields are all present required_fields = required_criteria.keys() existing_fields = extras.keys() overlap = list(set(existing_fields) & set(required_fields)) if len(overlap) < len(required_fields): errors[key].append(_('Some required NGDS fields were not present')) return # Now validate the single key that we were given on this iteration field_name = data[key] # Check if this particular key satisfies all validation criteria criteria = required_criteria.get(field_name, optional_criteria.get(field_name, [])) validation_runner(field_name, extras, errors, criteria) # Send and changes that the validation_runner made to the `extras` object back into the original `data` object reverser = dict((data[('extras', num, 'key')], ('extras', num, 'value')) for num in indexes) data[reverser[field_name]] = extras[field_name]