def _parse_digital_forms(self, prop=DIGITAL_FORMS): """ Concatenates a list of Digital Form data structures parsed from the metadata """ xpath_map = self._data_structures[prop] # Parse base digital form fields: 'name', 'content', 'decompression', 'version', 'specification' xpath_root = self._data_map['_digital_forms_root'] digital_forms = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop) # Parse digital form transfer option fields: 'access_desc', 'access_instrs', 'network_resource' xpath_root = self._data_map['_transfer_options_root'] transfer_opts = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop) # Split out digital form content that has been appended to specifications content_delim = _DIGITAL_FORMS_CONTENT_DELIM for digital_form in digital_forms: specs = reduce_value(digital_form['specification']) specs = specs.splitlines() if isinstance(specs, string_types) else specs specifications = wrap_value(s.strip() for s in specs) digital_form['content'] = [] digital_form['specification'] = [] has_content = False # For each specification, insert delim before appending content for spec in specifications: has_content = has_content or spec == content_delim if not has_content: digital_form['specification'].append(spec) elif spec != content_delim: digital_form['content'].append(spec) # Reduce spec and content to single string values if possible for form_prop in ('content', 'specification'): digital_form[form_prop] = reduce_value(filter_empty(digital_form[form_prop], u'')) # Combine digital forms and transfer options into a single complex struct df_len = len(digital_forms) to_len = len(transfer_opts) parsed_forms = [] for idx in xrange(0, max(df_len, to_len)): digital_form = {}.fromkeys(_iso_definitions[prop], u'') if idx < df_len: digital_form.update(i for i in digital_forms[idx].items() if i[1]) if idx < to_len: digital_form.update(i for i in transfer_opts[idx].items() if i[1]) if any(digital_form.values()): parsed_forms.append(digital_form) return get_default_for_complex(prop, parsed_forms)
def _update_property(tree_to_update, xpath_root, xpaths, prop, values): """ Default update operation for a single parser property. If xpaths contains one xpath, then one element per value will be inserted at that location in the tree_to_update; otherwise, the number of values must match the number of xpaths. """ # Inner function to update a specific XPATH with the values provided def update_element(elem, idx, root, path, vals): """ Internal helper function to encapsulate single item update """ has_root = bool(root and len(path) > len(root) and path.startswith(root)) path, attr = get_xpath_tuple(path) # 'path/@attr' to ('path', 'attr') if attr: removed = [get_element(elem, path)] remove_element_attributes(removed[0], attr) elif not has_root: removed = wrap_value(remove_element(elem, path)) else: path = get_xpath_branch(root, path) removed = [] if idx != 0 else [remove_element(e, path, True) for e in get_elements(elem, root)] if not vals: return removed items = [] for i, val in enumerate(wrap_value(vals)): elem_to_update = elem if has_root: elem_to_update = insert_element(elem, (i + idx), root) val = val.decode('utf-8') if not isinstance(val, string_types) else val if not attr: items.append(insert_element(elem_to_update, i, path, val)) else: items.append(insert_element(elem_to_update, i, path, **{attr: val})) return items # Code to update each of the XPATHs with each of the values xpaths = reduce_value(xpaths) values = filter_empty(values) if isinstance(xpaths, string_types): return update_element(tree_to_update, 0, xpath_root, xpaths, values) else: each = [] for index, xpath in enumerate(xpaths): value = values[index] if values else None each.extend(update_element(tree_to_update, index, xpath_root, xpath, value)) return each
def _update_property(tree_to_update, xpath_root, xpaths, values): """ Default update operation for a single parser property. If xpaths contains one xpath, then one element per value will be inserted at that location in the tree_to_update; otherwise, the number of values must match the number of xpaths. """ # Inner function to update a specific XPATH with the values provided def update_element(elem, idx, root, path, vals): """ Internal helper function to encapsulate single item update """ has_root = bool(root and len(path) > len(root) and path.startswith(root)) path, attr = get_xpath_tuple(path) # 'path/@attr' to ('path', 'attr') if attr: removed = [get_element(elem, path)] remove_element_attributes(removed[0], attr) elif not has_root: removed = wrap_value(remove_element(elem, path)) else: path = get_xpath_branch(root, path) removed = [] if idx != 0 else [ remove_element(e, path, True) for e in get_elements(elem, root) ] if not vals: return removed items = [] for i, val in enumerate(wrap_value(vals)): elem_to_update = elem if has_root: elem_to_update = insert_element(elem, (i + idx), root) val = val.decode('utf-8') if not isinstance(val, string_types) else val if not attr: items.append(insert_element(elem_to_update, i, path, val)) elif path: items.append( insert_element(elem_to_update, i, path, **{attr: val})) else: set_element_attributes(elem_to_update, **{attr: val}) items.append(elem_to_update) return items # Code to update each of the XPATHs with each of the values xpaths = reduce_value(xpaths) values = filter_empty(values) if isinstance(xpaths, string_types): return update_element(tree_to_update, 0, xpath_root, xpaths, values) else: each = [] for index, xpath in enumerate(xpaths): value = values[index] if values else None each.extend( update_element(tree_to_update, index, xpath_root, xpath, value)) return each