def parse_dates(tree_to_parse, xpath_map): """ Creates and returns a Dates Dictionary data structure given the parameters provided :param tree_to_parse: the XML tree from which to construct the Dates data structure :param xpath_map: a map containing the following type-specific XPATHs: multiple, range, range_begin, range_end, and single """ # Determine dates to query based on metadata elements values = wrap_value(parse_property(tree_to_parse, None, xpath_map, DATE_TYPE_SINGLE)) if len(values) == 1: return {DATE_TYPE: DATE_TYPE_SINGLE, DATE_VALUES: values} elif len(values) > 1: return {DATE_TYPE: DATE_TYPE_MULTIPLE, DATE_VALUES: values} values = wrap_value(parse_property(tree_to_parse, None, xpath_map, DATE_TYPE_MULTIPLE)) if len(values) == 1: return {DATE_TYPE: DATE_TYPE_SINGLE, DATE_VALUES: values} elif len(values) > 1: return {DATE_TYPE: DATE_TYPE_MULTIPLE, DATE_VALUES: values} values = flatten_items( d for x in (DATE_TYPE_RANGE_BEGIN, DATE_TYPE_RANGE_END) for d in wrap_value(parse_property(tree_to_parse, None, xpath_map, x)) ) if len(values) == 1: return {DATE_TYPE: DATE_TYPE_SINGLE, DATE_VALUES: values} elif len(values) == 2: return {DATE_TYPE: DATE_TYPE_RANGE, DATE_VALUES: values} elif len(values) > 2: return {DATE_TYPE: DATE_TYPE_MULTIPLE, DATE_VALUES: values} return {}
def validate_process_steps(prop, value): """ Default validation for Process Steps data structure """ if value is not None: validate_type(prop, value, (dict, list)) procstep_keys = set(_complex_definitions[prop]) for idx, procstep in enumerate(wrap_value(value)): ps_idx = prop + '[' + str(idx) + ']' validate_type(ps_idx, procstep, dict) for ps_prop, ps_val in iteritems(procstep): ps_key = '.'.join((ps_idx, ps_prop)) if ps_prop not in procstep_keys: _validation_error( prop, None, value, ('keys: {0}'.format(','.join(procstep_keys)))) if ps_prop != 'sources': validate_type(ps_key, ps_val, string_types) else: validate_type(ps_key, ps_val, (string_types, list)) for src_idx, src_val in enumerate(wrap_value(ps_val)): src_key = ps_key + '[' + str(src_idx) + ']' validate_type(src_key, src_val, string_types)
def parse_dates(tree_to_parse, xpath_map): """ Creates and returns a Dates Dictionary data structure given the parameters provided :param tree_to_parse: the XML tree from which to construct the Dates data structure :param xpath_map: a map containing the following type-specific XPATHs: multiple, range, range_begin, range_end, and single """ # Determine dates to query based on metadata elements values = wrap_value( parse_property(tree_to_parse, None, xpath_map, DATE_TYPE_SINGLE)) if len(values) == 1: return {DATE_TYPE: DATE_TYPE_SINGLE, DATE_VALUES: values} elif len(values) > 1: return {DATE_TYPE: DATE_TYPE_MULTIPLE, DATE_VALUES: values} values = wrap_value( parse_property(tree_to_parse, None, xpath_map, DATE_TYPE_MULTIPLE)) if len(values) == 1: return {DATE_TYPE: DATE_TYPE_SINGLE, DATE_VALUES: values} elif len(values) > 1: return {DATE_TYPE: DATE_TYPE_MULTIPLE, DATE_VALUES: values} values = flatten_items( d for x in (DATE_TYPE_RANGE_BEGIN, DATE_TYPE_RANGE_END) for d in wrap_value(parse_property(tree_to_parse, None, xpath_map, x))) if len(values) == 1: return {DATE_TYPE: DATE_TYPE_SINGLE, DATE_VALUES: values} elif len(values) == 2: return {DATE_TYPE: DATE_TYPE_RANGE, DATE_VALUES: values} elif len(values) > 2: return {DATE_TYPE: DATE_TYPE_MULTIPLE, DATE_VALUES: values} return {}
def update_element(elem, idx, root, path, vals): """ Internal helper function to encapsulate single item update """ has_root = bool(root and len(path) > len(root) and path.startswith(root)) path, attr = get_xpath_tuple(path) # 'path/@attr' to ('path', 'attr') if attr: removed = [get_element(elem, path)] remove_element_attributes(removed[0], attr) elif not has_root: removed = wrap_value(remove_element(elem, path)) else: path = get_xpath_branch(root, path) removed = [] if idx != 0 else [remove_element(e, path, True) for e in get_elements(elem, root)] if not vals: return removed items = [] for i, val in enumerate(wrap_value(vals)): elem_to_update = elem if has_root: elem_to_update = insert_element(elem, (i + idx), root) val = val.decode('utf-8') if not isinstance(val, string_types) else val if not attr: items.append(insert_element(elem_to_update, i, path, val)) else: items.append(insert_element(elem_to_update, i, path, **{attr: val})) return items
def validate_process_steps(prop, value): """ Default validation for Process Steps data structure """ if value is not None: validate_type(prop, value, (dict, list)) procstep_keys = set(_complex_definitions[prop]) for idx, procstep in enumerate(wrap_value(value)): ps_idx = prop + '[' + str(idx) + ']' validate_type(ps_idx, procstep, dict) for ps_prop, ps_val in iteritems(procstep): ps_key = '.'.join((ps_idx, ps_prop)) if ps_prop not in procstep_keys: _validation_error(prop, None, value, ('keys: {0}'.format(','.join(procstep_keys)))) if ps_prop != 'sources': validate_type(ps_key, ps_val, string_types) else: validate_type(ps_key, ps_val, (string_types, list)) for src_idx, src_val in enumerate(wrap_value(ps_val)): src_key = ps_key + '[' + str(src_idx) + ']' validate_type(src_key, src_val, string_types)
def test_reparse_complex_lists(self): complex_defs = get_complex_definitions() complex_lists = (ATTRIBUTES, CONTACTS, DIGITAL_FORMS) arcgis_parser = ArcGISParser(self.arcgis_metadata) fgdc_parser = FgdcParser(self.fgdc_metadata) iso_parser = IsoParser(self.iso_metadata) for parser in (arcgis_parser, fgdc_parser, iso_parser): # Test reparsed empty complex lists for prop in complex_lists: for empty in (None, [], [{}], [{}.fromkeys(complex_defs[prop], u"")]): self.assert_reparsed_complex_for(parser, prop, empty, []) # Test reparsed valid complex lists (strings and lists for each property in each struct) for prop in complex_lists: complex_list = [] for val in self.valid_complex_values: # Test with single unwrapped value next_complex = {}.fromkeys(complex_defs[prop], val) self.assert_reparsed_complex_for(parser, prop, next_complex, wrap_value(next_complex)) # Test with accumulated list of values complex_list.append({}.fromkeys(complex_defs[prop], val)) self.assert_reparsed_complex_for(parser, prop, complex_list, wrap_value(complex_list))
def _update_digital_forms(self, **update_props): """ Update operation for ISO Digital Forms metadata :see: gis_metadata.utils._complex_definitions[DIGITAL_FORMS] """ digital_forms = wrap_value(update_props['values']) # Update all Digital Form properties: distributionFormat* xpath_map = self._data_structures[update_props['prop']] dist_format_props = ('name', 'decompression', 'version', 'specification') dist_format_xroot = self._data_map['_digital_forms_root'] dist_format_xmap = {prop: xpath_map[prop] for prop in dist_format_props} dist_formats = [] for digital_form in digital_forms: dist_format = {prop: digital_form[prop] for prop in dist_format_props} if digital_form.get('content'): dist_spec = wrap_value(digital_form.get('specification')) dist_spec.append(_DIGITAL_FORMS_CONTENT_DELIM) dist_spec.extend(wrap_value(digital_form['content'])) dist_format['specification'] = dist_spec dist_formats.append(dist_format) update_props['values'] = dist_formats dist_formats = update_complex_list( xpath_root=dist_format_xroot, xpath_map=dist_format_xmap, **update_props ) # Update all Network Resources: transferOptions+ trans_option_props = ('access_desc', 'access_instrs', 'network_resource') trans_option_xroot = self._data_map['_transfer_options_root'] trans_option_xmap = {prop: self._data_map['_' + prop] for prop in trans_option_props} trans_options = [] for digital_form in digital_forms: trans_options.append({prop: digital_form[prop] for prop in trans_option_props}) update_props['values'] = trans_options trans_options = update_complex_list( xpath_root=trans_option_xroot, xpath_map=trans_option_xmap, **update_props ) return { 'distribution_formats': dist_formats, 'transfer_options': trans_options }
def test_reparse_process_steps(self): proc_step_def = get_complex_definitions()[PROCESS_STEPS] arcgis_parser = ArcGISParser(self.arcgis_metadata) fgdc_parser = FgdcParser(self.fgdc_metadata) iso_parser = IsoParser(self.iso_metadata) for parser in (arcgis_parser, fgdc_parser, iso_parser): # Test reparsed empty process steps for empty in (None, [], [{}], [{}.fromkeys(proc_step_def, u"")]): self.assert_reparsed_complex_for(parser, PROCESS_STEPS, empty, []) complex_list = [] # Test reparsed valid process steps for val in self.valid_complex_values: complex_struct = {}.fromkeys(proc_step_def, val) # Process steps must have a single string value for all but sources complex_struct.update( {k: ", ".join(wrap_value(v)) for k, v in iteritems(complex_struct) if k != "sources"} ) complex_list.append(complex_struct) self.assert_reparsed_complex_for(parser, PROCESS_STEPS, complex_list, complex_list)
def _update_report_item(self, **update_props): """ Update the text for each element at the configured path if attribute matches """ tree_to_update = update_props['tree_to_update'] prop = update_props['prop'] values = wrap_value(update_props['values']) xroot = self._get_xroot_for(prop) attr_key = 'type' attr_val = u'' if prop == 'attribute_accuracy': attr_val = 'DQQuanAttAcc' elif prop == 'dataset_completeness': attr_val = 'DQCompOm' # Clear (make empty) all elements of the appropriate type for elem in get_elements(tree_to_update, xroot): if get_element_attributes(elem).get(attr_key) == attr_val: clear_element(elem) # Remove all empty elements, including those previously cleared remove_empty_element(tree_to_update, xroot) # Insert elements with correct attributes for each new value attrs = {attr_key: attr_val} updated = [] for idx, value in enumerate(values): elem = insert_element(tree_to_update, idx, xroot, **attrs) updated.append(insert_element(elem, idx, 'measDesc', value)) return updated
def validate_complex_list(prop, value, xpath_map=None): """ Default validation for Attribute Details data structure """ if value is not None: validate_type(prop, value, (dict, list)) if prop in _complex_definitions: complex_keys = _complex_definitions[prop] else: complex_keys = {} if xpath_map is None else xpath_map for idx, complex_struct in enumerate(wrap_value(value)): cs_idx = prop + '[' + str(idx) + ']' validate_type(cs_idx, complex_struct, dict) for cs_prop, cs_val in iteritems(complex_struct): cs_key = '.'.join((cs_idx, cs_prop)) if cs_prop not in complex_keys: _validation_error(prop, None, value, ('keys: {0}'.format(','.join(complex_keys)))) if not isinstance(cs_val, list): validate_type(cs_key, cs_val, (string_types, list)) else: for list_idx, list_val in enumerate(cs_val): list_prop = cs_key + '[' + str(list_idx) + ']' validate_type(list_prop, list_val, string_types)
def update_complex_list(tree_to_update, xpath_root, xpath_map, prop, values): """ Updates and returns the list of updated complex Elements parsed from tree_to_update. :param tree_to_update: the XML tree compatible with element_utils to be updated :param xpath_root: the XPATH location of each complex Element :param xpath_map: a Dictionary of XPATHs corresponding to the complex structure definition :param prop: the property identifying the complex structure to be serialized :param values: a List containing the updated complex structures as Dictionaries """ complex_list = [] remove_element(tree_to_update, xpath_root, True) if not values: # Returns the elements corresponding to property removed from the tree complex_list.append(update_property(tree_to_update, xpath_root, xpath_root, prop, values)) else: for idx, complex_struct in enumerate(wrap_value(values)): # Insert a new complex element root for each dict in the list complex_element = insert_element(tree_to_update, idx, xpath_root) for subprop, value in iteritems(complex_struct): xpath = get_xpath_branch(xpath_root, xpath_map[subprop]) value = get_default_for_complex_sub(prop, subprop, value, xpath) complex_list.append(update_property(complex_element, None, xpath, subprop, value)) return complex_list
def update_complex_list(tree_to_update, xpath_root, xpath_map, prop, values): """ Updates and returns the list of updated complex Elements parsed from tree_to_update. :param tree_to_update: the XML tree compatible with element_utils to be updated :param xpath_root: the XPATH location of each complex Element :param xpath_map: a Dictionary of XPATHs corresponding to the complex structure definition :param prop: the property identifying the complex structure to be serialized :param values: a List containing the updated complex structures as Dictionaries """ complex_list = [] remove_element(tree_to_update, xpath_root, True) if not values: # Returns the elements corresponding to property removed from the tree complex_list.append( update_property(tree_to_update, xpath_root, xpath_root, prop, values)) else: for idx, complex_struct in enumerate(wrap_value(values)): # Insert a new complex element root for each dict in the list complex_element = insert_element(tree_to_update, idx, xpath_root) for subprop, value in iteritems(complex_struct): xpath = get_xpath_branch(xpath_root, xpath_map[subprop]) value = get_default_for_complex_sub(prop, subprop, value, xpath) complex_list.append( update_property(complex_element, None, xpath, subprop, value)) return complex_list
def validate_complex_list(prop, value, xpath_map=None): """ Default validation for Attribute Details data structure """ if value is not None: validate_type(prop, value, (dict, list)) if prop in _complex_definitions: complex_keys = _complex_definitions[prop] else: complex_keys = {} if xpath_map is None else xpath_map for idx, complex_struct in enumerate(wrap_value(value)): cs_idx = prop + '[' + str(idx) + ']' validate_type(cs_idx, complex_struct, dict) for cs_prop, cs_val in iteritems(complex_struct): cs_key = '.'.join((cs_idx, cs_prop)) if cs_prop not in complex_keys: _validation_error( prop, None, value, ('keys: {0}'.format(','.join(complex_keys)))) if not isinstance(cs_val, list): validate_type(cs_key, cs_val, (string_types, list)) else: for list_idx, list_val in enumerate(cs_val): list_prop = cs_key + '[' + str(list_idx) + ']' validate_type(list_prop, list_val, string_types)
def test_reparse_keywords(self): arcgis_parser = ArcGISParser(self.arcgis_metadata) fgdc_parser = FgdcParser(self.fgdc_metadata) iso_parser = IsoParser(self.iso_metadata) for parser in (arcgis_parser, fgdc_parser, iso_parser): # Test reparsed empty keywords for keywords in ("", u"", []): self.assert_reparsed_complex_for(parser, KEYWORDS_PLACE, keywords, []) self.assert_reparsed_complex_for(parser, KEYWORDS_THEME, keywords, []) # Test reparsed valid keywords for keywords in ("keyword", ["keyword", "list"]): self.assert_reparsed_complex_for(parser, KEYWORDS_PLACE, keywords, wrap_value(keywords)) self.assert_reparsed_complex_for(parser, KEYWORDS_THEME, keywords, wrap_value(keywords))
def _parse_digital_forms(self, prop=DIGITAL_FORMS): """ Concatenates a list of Digital Form data structures parsed from the metadata """ xpath_map = self._data_structures[prop] # Parse base digital form fields: 'name', 'content', 'decompression', 'version', 'specification' xpath_root = self._data_map['_digital_forms_root'] digital_forms = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop) # Parse digital form transfer option fields: 'access_desc', 'access_instrs', 'network_resource' xpath_root = self._data_map['_transfer_options_root'] transfer_opts = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop) # Split out digital form content that has been appended to specifications content_delim = _DIGITAL_FORMS_CONTENT_DELIM for digital_form in digital_forms: specs = reduce_value(digital_form['specification']) specs = specs.splitlines() if isinstance(specs, string_types) else specs specifications = wrap_value(s.strip() for s in specs) digital_form['content'] = [] digital_form['specification'] = [] has_content = False # For each specification, insert delim before appending content for spec in specifications: has_content = has_content or spec == content_delim if not has_content: digital_form['specification'].append(spec) elif spec != content_delim: digital_form['content'].append(spec) # Reduce spec and content to single string values if possible for form_prop in ('content', 'specification'): digital_form[form_prop] = reduce_value(filter_empty(digital_form[form_prop], u'')) # Combine digital forms and transfer options into a single complex struct df_len = len(digital_forms) to_len = len(transfer_opts) parsed_forms = [] for idx in xrange(0, max(df_len, to_len)): digital_form = {}.fromkeys(_iso_definitions[prop], u'') if idx < df_len: digital_form.update(i for i in digital_forms[idx].items() if i[1]) if idx < to_len: digital_form.update(i for i in transfer_opts[idx].items() if i[1]) if any(digital_form.values()): parsed_forms.append(digital_form) return get_default_for_complex(prop, parsed_forms)
def get_default_for_complex(prop, value, xpath=''): # Ensure sub-props of complex structs and complex lists that take multiple values are wrapped as lists val = [{ k: get_default_for_complex_sub(prop, k, v, xpath) for k, v in iteritems(val) } for val in wrap_value(value)] return val if prop in _COMPLEX_LISTS else reduce_value(val, {})
def get_default_for_complex(prop, value, xpath=''): # Ensure sub-props of complex structs and complex lists that take multiple values are wrapped as lists val = [ {k: get_default_for_complex_sub(prop, k, v, xpath) for k, v in iteritems(val)} for val in wrap_value(value) ] return val if prop in _COMPLEX_LISTS else reduce_value(val, {})
def update_element(elem, idx, root, path, vals): """ Internal helper function to encapsulate single item update """ has_root = bool(root and len(path) > len(root) and path.startswith(root)) path, attr = get_xpath_tuple(path) # 'path/@attr' to ('path', 'attr') if attr: removed = [get_element(elem, path)] remove_element_attributes(removed[0], attr) elif not has_root: removed = wrap_value(remove_element(elem, path)) else: path = get_xpath_branch(root, path) removed = [] if idx != 0 else [ remove_element(e, path, True) for e in get_elements(elem, root) ] if not vals: return removed items = [] for i, val in enumerate(wrap_value(vals)): elem_to_update = elem if has_root: elem_to_update = insert_element(elem, (i + idx), root) val = val.decode('utf-8') if not isinstance(val, string_types) else val if not attr: items.append(insert_element(elem_to_update, i, path, val)) elif path: items.append( insert_element(elem_to_update, i, path, **{attr: val})) else: set_element_attributes(elem_to_update, **{attr: val}) items.append(elem_to_update) return items
def get_default_for_complex_sub(prop, subprop, value, xpath): # Handle alternate props (leading underscores) prop = prop.strip('_') subprop = subprop.strip('_') value = wrap_value(value) if subprop in _COMPLEX_WITH_MULTI.get(prop, ''): return value # Leave sub properties allowing lists wrapped # Join on comma for element attribute values; newline for element text values return ','.join(value) if '@' in xpath else _COMPLEX_DELIM.join(value)
def get_default_for(prop, value): """ Ensures complex property types have the correct default values """ prop = prop.strip('_') # Handle alternate props (leading underscores) val = reduce_value(value) # Filtering of value happens here if prop in _COMPLEX_LISTS: return wrap_value(val) elif prop in _COMPLEX_STRUCTS: return val or {} else: return u'' if val is None else val
def get_default_for_complex_sub(prop, subprop, value, xpath): # Handle alternate props (leading underscores) prop = prop.strip('_') subprop = subprop.strip('_') value = wrap_value(value) if subprop in _COMPLEX_WITH_MULTI.get(prop, ''): return value # Leave sub properties allowing lists wrapped # Join on comma for element attribute values; newline for element text values return _join_complex_attr(value) if '@' in xpath else _join_complex_prop(value)
def parse_complex(tree_to_parse, xpath_root, xpath_map, complex_key): """ Creates and returns a Dictionary data structure parsed from the metadata. :param tree_to_parse: the XML tree compatible with element_utils to be parsed :param xpath_root: the XPATH location of the structure inside the parent element :param xpath_map: a dict of XPATHs corresponding to a complex definition :param complex_key: indicates which complex definition describes the structure """ complex_struct = {} for prop in _complex_definitions.get(complex_key, xpath_map): # Normalize complex values: treat values with newlines like values from separate elements parsed = parse_property(tree_to_parse, xpath_root, xpath_map, prop) parsed = reduce_value(flatten_items(v.split(_COMPLEX_DELIM) for v in wrap_value(parsed))) complex_struct[prop] = get_default_for_complex_sub(complex_key, prop, parsed, xpath_map[prop]) return complex_struct if any(complex_struct.values()) else {}
def parse_complex(tree_to_parse, xpath_root, xpath_map, complex_key): """ Creates and returns a Dictionary data structure parsed from the metadata. :param tree_to_parse: the XML tree compatible with element_utils to be parsed :param xpath_root: the XPATH location of the structure inside the parent element :param xpath_map: a dict of XPATHs corresponding to a complex definition :param complex_key: indicates which complex definition describes the structure """ complex_struct = {} for prop in _complex_definitions.get(complex_key, xpath_map): # Normalize complex values: treat values with newlines like values from separate elements parsed = parse_property(tree_to_parse, xpath_root, xpath_map, prop) parsed = reduce_value( flatten_items(v.split(_COMPLEX_DELIM) for v in wrap_value(parsed))) complex_struct[prop] = get_default_for_complex_sub( complex_key, prop, parsed, xpath_map[prop]) return complex_struct if any(complex_struct.values()) else {}
def validate_any(prop, value, xpath_map=None): """ Validates any metadata property, complex or simple (string or array) """ if value is not None: if prop in (ATTRIBUTES, CONTACTS, DIGITAL_FORMS): validate_complex_list(prop, value, xpath_map) elif prop in (BOUNDING_BOX, LARGER_WORKS, RASTER_INFO): validate_complex(prop, value, xpath_map) elif prop == DATES: validate_dates(prop, value, xpath_map) elif prop == PROCESS_STEPS: validate_process_steps(prop, value) elif prop not in _supported_props and xpath_map is not None: # Validate custom data structures as complex lists by default validate_complex_list(prop, value, xpath_map) else: for val in wrap_value(value, include_empty=True): validate_type(prop, val, (string_types, list))
def validate_any(prop, value, xpath_map=None): """ Validates any metadata property, complex or simple (string or array) """ if value is not None: if prop in (ATTRIBUTES, CONTACTS, DIGITAL_FORMS): validate_complex_list(prop, value, xpath_map) elif prop in (BOUNDING_BOX, LARGER_WORKS): validate_complex(prop, value, xpath_map) elif prop == DATES: validate_dates(prop, value, xpath_map) elif prop == PROCESS_STEPS: validate_process_steps(prop, value) elif prop not in _supported_props and xpath_map is not None: # Validate custom data structures as complex lists by default validate_complex_list(prop, value, xpath_map) else: for val in wrap_value(value, include_empty=True): validate_type(prop, val, (string_types, list))