def _update_keywords(self, **update_props): """ Update operation for ISO type-specific Keywords metadata: Theme or Place """ tree_to_update = update_props['tree_to_update'] prop = update_props['prop'] values = update_props['values'] keywords = [] if prop in KEYWORD_PROPS: xpath_root = self._data_map['_keywords_root'] xpath_map = self._data_structures[prop] xtype = xpath_map['keyword_type'] xroot = xpath_map['keyword_root'] xpath = xpath_map['keyword'] ktype = KEYWORD_TYPES[prop] # Remove descriptiveKeyword nodes according to type for element in get_elements(tree_to_update, xpath_root): if get_element_text(element, xtype).lower() == ktype.lower(): remove_element(tree_to_update, xpath_root) element = insert_element(tree_to_update, 0, xpath_root) insert_element(element, 0, xtype, ktype) # Add the type node keywords.extend(update_property(element, xroot, xpath, prop, values)) return keywords
def _update_keywords(self, **update_props): """ Update operation for ISO type-specific Keywords metadata: Theme or Place """ tree_to_update = update_props['tree_to_update'] prop = update_props['prop'] values = update_props['values'] keywords = [] if prop in [KEYWORDS_PLACE, KEYWORDS_THEME]: xpath_root = self._data_map['_keywords_root'] xpath_map = self._data_structures[update_props['prop']] xtype = xpath_map['keyword_type'] xroot = xpath_map['keyword_root'] xpath = xpath_map['keyword'] if prop == KEYWORDS_PLACE: ktype = KEYWORD_TYPE_PLACE elif prop == KEYWORDS_THEME: ktype = KEYWORD_TYPE_THEME # Remove descriptiveKeyword nodes according to type for element in get_elements(tree_to_update, xpath_root): if get_element_text(element, xtype).lower() == ktype.lower(): remove_element(tree_to_update, xpath_root) element = insert_element(tree_to_update, 0, xpath_root) insert_element(element, 0, xtype, ktype) # Add the type node keywords.extend(update_property(element, xroot, xpath, prop, values)) return keywords
def update_element(elem, idx, root, path, vals): """ Internal helper function to encapsulate single item update """ has_root = bool(root and len(path) > len(root) and path.startswith(root)) path, attr = get_xpath_tuple(path) # 'path/@attr' to ('path', 'attr') if attr: removed = [get_element(elem, path)] remove_element_attributes(removed[0], attr) elif not has_root: removed = wrap_value(remove_element(elem, path)) else: path = get_xpath_branch(root, path) removed = [] if idx != 0 else [remove_element(e, path, True) for e in get_elements(elem, root)] if not vals: return removed items = [] for i, val in enumerate(wrap_value(vals)): elem_to_update = elem if has_root: elem_to_update = insert_element(elem, (i + idx), root) val = val.decode('utf-8') if not isinstance(val, string_types) else val if not attr: items.append(insert_element(elem_to_update, i, path, val)) else: items.append(insert_element(elem_to_update, i, path, **{attr: val})) return items
def _update_report_item(self, **update_props): """ Update the text for each element at the configured path if attribute matches """ tree_to_update = update_props['tree_to_update'] prop = update_props['prop'] values = wrap_value(update_props['values']) xroot = self._get_xroot_for(prop) attr_key = 'type' attr_val = u'' if prop == 'attribute_accuracy': attr_val = 'DQQuanAttAcc' elif prop == 'dataset_completeness': attr_val = 'DQCompOm' # Clear (make empty) all elements of the appropriate type for elem in get_elements(tree_to_update, xroot): if get_element_attributes(elem).get(attr_key) == attr_val: clear_element(elem) # Remove all empty elements, including those previously cleared remove_empty_element(tree_to_update, xroot) # Insert elements with correct attributes for each new value attrs = {attr_key: attr_val} updated = [] for idx, value in enumerate(values): elem = insert_element(tree_to_update, idx, xroot, **attrs) updated.append(insert_element(elem, idx, 'measDesc', value)) return updated
def test_iso_parser(self): """ Tests behavior unique to the ISO parser """ # Remove the attribute details href attribute iso_element = get_remote_element(self.iso_file) for citation_element in get_elements(iso_element, _iso_tag_formats["_attr_citation"]): removed = remove_element_attributes(citation_element, "href") # Assert that the href attribute was removed and a different one was read in iso_parser = IsoParser(element_to_string(iso_element)) attribute_href = iso_parser._attr_details_file_url self.assertIsNotNone(removed, "ISO file URL was not removed") self.assertIsNotNone(attribute_href, "ISO href attribute was not read in") self.assertNotEqual(attribute_href, removed, "ISO href attribute is the same as the one removed") # Remove the attribute details linkage attribute iso_element = get_remote_element(self.iso_file) for linkage_element in get_elements(iso_element, _iso_tag_formats["_attr_contact_url"]): removed = get_element_text(linkage_element) clear_element(linkage_element) # Assert that the linkage URL was removed and a different one was read in iso_parser = IsoParser(element_to_string(iso_element)) linkage_url = iso_parser._attr_details_file_url self.assertIsNotNone(removed, "ISO linkage URL was not removed") self.assertIsNotNone(linkage_url, "ISO linkage URL was not read in") self.assertNotEqual(linkage_url, removed, "ISO file URL is the same as the one removed") # Change the href attribute so that it is invalid for citation_element in get_elements(iso_element, _iso_tag_formats["_attr_citation"]): removed = set_element_attributes(citation_element, href="neither url nor file") # Assert that the href attribute was removed and a different one was read in iso_parser = IsoParser(element_to_string(iso_element)) attributes = iso_parser.attributes self.assertIsNone(iso_parser._attr_details_file_url, "Invalid URL stored with parser") self.assertEqual( attributes, TEST_METADATA_VALUES[ATTRIBUTES], "Invalid parsed attributes: {0}".format(attributes) )
def test_fgdc_parser(self): """ Tests behavior unique to the FGDC parser """ # Test dates structure defaults # Remove multiple dates to ensure range is queried fgdc_element = get_remote_element(self.fgdc_file) remove_element(fgdc_element, "idinfo/timeperd/timeinfo/mdattim", True) # Assert that the backup dates are read in successfully fgdc_parser = FgdcParser(element_to_string(fgdc_element)) self.assertEqual(fgdc_parser.dates, {"type": "range", "values": ["Date Range Start", "Date Range End"]}) # Test contact data structure defaults contacts_def = get_complex_definitions()[CONTACTS] # Remove the contact organization completely fgdc_element = get_remote_element(self.fgdc_file) for contact_element in get_elements(fgdc_element, "idinfo/ptcontac"): if element_exists(contact_element, "cntinfo/cntorgp"): clear_element(contact_element) # Assert that the contact organization has been read in fgdc_parser = FgdcParser(element_to_string(fgdc_element)) for key in contacts_def: for contact in fgdc_parser.contacts: self.assertIsNotNone(contact[key], "Failed to read contact.{0}".format(key)) # Remove the contact person completely fgdc_element = get_remote_element(self.fgdc_file) for contact_element in get_elements(fgdc_element, "idinfo/ptcontac"): if element_exists(contact_element, "cntinfo/cntperp"): clear_element(contact_element) # Assert that the contact organization has been read in fgdc_parser = FgdcParser(element_to_string(fgdc_element)) for key in contacts_def: for contact in fgdc_parser.contacts: self.assertIsNotNone(contact[key], "Failed to read updated contact.{0}".format(key))
def _parse_report_item(self, prop): """ :return: the text for each element at the configured path if type attribute matches""" item_type = None if prop == 'attribute_accuracy': item_type = 'DQQuanAttAcc' elif prop == 'dataset_completeness': item_type = 'DQCompOm' xroot = self._get_xroot_for(prop) parsed = (element_to_dict(e) for e in get_elements(self._xml_tree, xroot)) parsed = flatten_items(e['children'] for e in parsed if e['attributes'].get('type') == item_type) return reduce_value([p['text'] for p in parsed if p['name'] == 'measDesc'])
def parse_complex_list(tree_to_parse, xpath_root, xpath_map, complex_key): """ Creates and returns a list of Dictionary data structures parsed from the metadata. :param tree_to_parse: the XML tree compatible with element_utils to be parsed :param xpath_root: the XPATH location of each structure inside the parent element :param xpath_map: a dict of XPATHs corresponding to a complex definition :param complex_key: indicates which complex definition describes each structure """ complex_list = [] for element in get_elements(tree_to_parse, xpath_root): complex_struct = parse_complex(element, xpath_root, xpath_map, complex_key) if complex_struct: complex_list.append(complex_struct) return complex_list
def _parse_keywords(self, prop): """ Parse type-specific keywords from the metadata: Theme or Place """ keywords = [] if prop in KEYWORD_PROPS: xpath_root = self._data_map['_keywords_root'] xpath_map = self._data_structures[prop] xtype = xpath_map['keyword_type'] xpath = xpath_map['keyword'] ktype = KEYWORD_TYPES[prop] for element in get_elements(self._xml_tree, xpath_root): if get_element_text(element, xtype).lower() == ktype.lower(): keywords.extend(get_elements_text(element, xpath)) return keywords
def update_element(elem, idx, root, path, vals): """ Internal helper function to encapsulate single item update """ has_root = bool(root and len(path) > len(root) and path.startswith(root)) path, attr = get_xpath_tuple(path) # 'path/@attr' to ('path', 'attr') if attr: removed = [get_element(elem, path)] remove_element_attributes(removed[0], attr) elif not has_root: removed = wrap_value(remove_element(elem, path)) else: path = get_xpath_branch(root, path) removed = [] if idx != 0 else [ remove_element(e, path, True) for e in get_elements(elem, root) ] if not vals: return removed items = [] for i, val in enumerate(wrap_value(vals)): elem_to_update = elem if has_root: elem_to_update = insert_element(elem, (i + idx), root) val = val.decode('utf-8') if not isinstance(val, string_types) else val if not attr: items.append(insert_element(elem_to_update, i, path, val)) elif path: items.append( insert_element(elem_to_update, i, path, **{attr: val})) else: set_element_attributes(elem_to_update, **{attr: val}) items.append(elem_to_update) return items
def _parse_keywords(self, prop): """ Parse type-specific keywords from the metadata: Theme or Place """ keywords = [] if prop in [KEYWORDS_PLACE, KEYWORDS_THEME]: xpath_root = self._data_map['_keywords_root'] xpath_map = self._data_structures[prop] xtype = xpath_map['keyword_type'] xpath = xpath_map['keyword'] if prop == KEYWORDS_PLACE: ktype = KEYWORD_TYPE_PLACE elif prop == KEYWORDS_THEME: ktype = KEYWORD_TYPE_THEME for element in get_elements(self._xml_tree, xpath_root): if get_element_text(element, xtype).lower() == ktype.lower(): keywords.extend(get_elements_text(element, xpath)) return keywords