def _parse_digital_forms(self, prop=DIGITAL_FORMS):
        """ Concatenates a list of Digital Form data structures parsed from the metadata """

        xpath_map = self._data_structures[prop]

        # Parse base digital form fields: 'name', 'content', 'decompression', 'version', 'specification'
        xpath_root = self._data_map['_digital_forms_root']
        digital_forms = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop)

        # Parse digital form transfer option fields: 'access_desc', 'access_instrs', 'network_resource'
        xpath_root = self._data_map['_transfer_options_root']
        transfer_opts = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop)

        # Split out digital form content that has been appended to specifications

        content_delim = _DIGITAL_FORMS_CONTENT_DELIM

        for digital_form in digital_forms:
            specs = reduce_value(digital_form['specification'])
            specs = specs.splitlines() if isinstance(specs, string_types) else specs

            specifications = wrap_value(s.strip() for s in specs)

            digital_form['content'] = []
            digital_form['specification'] = []
            has_content = False

            # For each specification, insert delim before appending content
            for spec in specifications:
                has_content = has_content or spec == content_delim

                if not has_content:
                    digital_form['specification'].append(spec)
                elif spec != content_delim:
                    digital_form['content'].append(spec)

            # Reduce spec and content to single string values if possible
            for form_prop in ('content', 'specification'):
                digital_form[form_prop] = reduce_value(filter_empty(digital_form[form_prop], u''))

        # Combine digital forms and transfer options into a single complex struct

        df_len = len(digital_forms)
        to_len = len(transfer_opts)
        parsed_forms = []

        for idx in xrange(0, max(df_len, to_len)):
            digital_form = {}.fromkeys(_iso_definitions[prop], u'')

            if idx < df_len:
                digital_form.update(i for i in digital_forms[idx].items() if i[1])
            if idx < to_len:
                digital_form.update(i for i in transfer_opts[idx].items() if i[1])

            if any(digital_form.values()):
                parsed_forms.append(digital_form)

        return get_default_for_complex(prop, parsed_forms)
Esempio n. 2
0
def _update_property(tree_to_update, xpath_root, xpaths, prop, values):
    """
    Default update operation for a single parser property. If xpaths contains one xpath,
    then one element per value will be inserted at that location in the tree_to_update;
    otherwise, the number of values must match the number of xpaths.
    """

    # Inner function to update a specific XPATH with the values provided

    def update_element(elem, idx, root, path, vals):
        """ Internal helper function to encapsulate single item update """

        has_root = bool(root and len(path) > len(root) and path.startswith(root))
        path, attr = get_xpath_tuple(path)  # 'path/@attr' to ('path', 'attr')

        if attr:
            removed = [get_element(elem, path)]
            remove_element_attributes(removed[0], attr)
        elif not has_root:
            removed = wrap_value(remove_element(elem, path))
        else:
            path = get_xpath_branch(root, path)
            removed = [] if idx != 0 else [remove_element(e, path, True) for e in get_elements(elem, root)]

        if not vals:
            return removed

        items = []

        for i, val in enumerate(wrap_value(vals)):
            elem_to_update = elem

            if has_root:
                elem_to_update = insert_element(elem, (i + idx), root)

            val = val.decode('utf-8') if not isinstance(val, string_types) else val
            if not attr:
                items.append(insert_element(elem_to_update, i, path, val))
            else:
                items.append(insert_element(elem_to_update, i, path, **{attr: val}))

        return items

    # Code to update each of the XPATHs with each of the values

    xpaths = reduce_value(xpaths)
    values = filter_empty(values)

    if isinstance(xpaths, string_types):
        return update_element(tree_to_update, 0, xpath_root, xpaths, values)
    else:
        each = []

        for index, xpath in enumerate(xpaths):
            value = values[index] if values else None
            each.extend(update_element(tree_to_update, index, xpath_root, xpath, value))

        return each
    def _parse_digital_forms(self, prop=DIGITAL_FORMS):
        """ Concatenates a list of Digital Form data structures parsed from the metadata """

        xpath_map = self._data_structures[prop]

        # Parse base digital form fields: 'name', 'content', 'decompression', 'version', 'specification'
        xpath_root = self._data_map['_digital_forms_root']
        digital_forms = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop)

        # Parse digital form transfer option fields: 'access_desc', 'access_instrs', 'network_resource'
        xpath_root = self._data_map['_transfer_options_root']
        transfer_opts = parse_complex_list(self._xml_tree, xpath_root, xpath_map, prop)

        # Split out digital form content that has been appended to specifications

        content_delim = _DIGITAL_FORMS_CONTENT_DELIM

        for digital_form in digital_forms:
            specs = reduce_value(digital_form['specification'])
            specs = specs.splitlines() if isinstance(specs, string_types) else specs

            specifications = wrap_value(s.strip() for s in specs)

            digital_form['content'] = []
            digital_form['specification'] = []
            has_content = False

            # For each specification, insert delim before appending content
            for spec in specifications:
                has_content = has_content or spec == content_delim

                if not has_content:
                    digital_form['specification'].append(spec)
                elif spec != content_delim:
                    digital_form['content'].append(spec)

            # Reduce spec and content to single string values if possible
            for form_prop in ('content', 'specification'):
                digital_form[form_prop] = reduce_value(filter_empty(digital_form[form_prop], u''))

        # Combine digital forms and transfer options into a single complex struct

        df_len = len(digital_forms)
        to_len = len(transfer_opts)
        parsed_forms = []

        for idx in xrange(0, max(df_len, to_len)):
            digital_form = {}.fromkeys(_iso_definitions[prop], u'')

            if idx < df_len:
                digital_form.update(i for i in digital_forms[idx].items() if i[1])
            if idx < to_len:
                digital_form.update(i for i in transfer_opts[idx].items() if i[1])

            if any(digital_form.values()):
                parsed_forms.append(digital_form)

        return get_default_for_complex(prop, parsed_forms)
Esempio n. 4
0
def _update_property(tree_to_update, xpath_root, xpaths, values):
    """
    Default update operation for a single parser property. If xpaths contains one xpath,
    then one element per value will be inserted at that location in the tree_to_update;
    otherwise, the number of values must match the number of xpaths.
    """

    # Inner function to update a specific XPATH with the values provided

    def update_element(elem, idx, root, path, vals):
        """ Internal helper function to encapsulate single item update """

        has_root = bool(root and len(path) > len(root)
                        and path.startswith(root))
        path, attr = get_xpath_tuple(path)  # 'path/@attr' to ('path', 'attr')

        if attr:
            removed = [get_element(elem, path)]
            remove_element_attributes(removed[0], attr)
        elif not has_root:
            removed = wrap_value(remove_element(elem, path))
        else:
            path = get_xpath_branch(root, path)
            removed = [] if idx != 0 else [
                remove_element(e, path, True)
                for e in get_elements(elem, root)
            ]

        if not vals:
            return removed

        items = []

        for i, val in enumerate(wrap_value(vals)):
            elem_to_update = elem

            if has_root:
                elem_to_update = insert_element(elem, (i + idx), root)

            val = val.decode('utf-8') if not isinstance(val,
                                                        string_types) else val
            if not attr:
                items.append(insert_element(elem_to_update, i, path, val))
            elif path:
                items.append(
                    insert_element(elem_to_update, i, path, **{attr: val}))
            else:
                set_element_attributes(elem_to_update, **{attr: val})
                items.append(elem_to_update)

        return items

    # Code to update each of the XPATHs with each of the values

    xpaths = reduce_value(xpaths)
    values = filter_empty(values)

    if isinstance(xpaths, string_types):
        return update_element(tree_to_update, 0, xpath_root, xpaths, values)
    else:
        each = []

        for index, xpath in enumerate(xpaths):
            value = values[index] if values else None
            each.extend(
                update_element(tree_to_update, index, xpath_root, xpath,
                               value))

        return each