def assert_parser_after_write(self, parser_type, in_file, out_file_path, use_template=False): parser = parser_type(in_file, out_file_path) complex_defs = get_complex_definitions() # Update each value and read the file in again for prop in get_supported_props(): if prop in (ATTRIBUTES, CONTACTS, DIGITAL_FORMS, PROCESS_STEPS): value = [{}.fromkeys(complex_defs[prop], "test"), {}.fromkeys(complex_defs[prop], prop)] elif prop in (BOUNDING_BOX, LARGER_WORKS): value = {}.fromkeys(complex_defs[prop], "test " + prop) elif prop == DATES: value = {DATE_TYPE: DATE_TYPE_RANGE, DATE_VALUES: ["test", prop]} elif prop in (KEYWORDS_PLACE, KEYWORDS_THEME): value = ["test", prop] else: value = "test " + prop if prop in get_complex_definitions(): value = get_default_for_complex(prop, value) setattr(parser, prop, value) parser.write(use_template=use_template) with open(out_file_path) as out_file: self.assert_parsers_are_equal(parser, parser_type(out_file))
def test_reparse_process_steps(self): proc_step_def = get_complex_definitions()[PROCESS_STEPS] arcgis_parser = ArcGISParser(self.arcgis_metadata) fgdc_parser = FgdcParser(self.fgdc_metadata) iso_parser = IsoParser(self.iso_metadata) for parser in (arcgis_parser, fgdc_parser, iso_parser): # Test reparsed empty process steps for empty in (None, [], [{}], [{}.fromkeys(proc_step_def, u"")]): self.assert_reparsed_complex_for(parser, PROCESS_STEPS, empty, []) complex_list = [] # Test reparsed valid process steps for val in self.valid_complex_values: complex_struct = {}.fromkeys(proc_step_def, val) # Process steps must have a single string value for all but sources complex_struct.update( {k: ", ".join(wrap_value(v)) for k, v in iteritems(complex_struct) if k != "sources"} ) complex_list.append(complex_struct) self.assert_reparsed_complex_for(parser, PROCESS_STEPS, complex_list, complex_list)
def test_reparse_complex_lists(self): complex_defs = get_complex_definitions() complex_lists = (ATTRIBUTES, CONTACTS, DIGITAL_FORMS) arcgis_parser = ArcGISParser(self.arcgis_metadata) fgdc_parser = FgdcParser(self.fgdc_metadata) iso_parser = IsoParser(self.iso_metadata) for parser in (arcgis_parser, fgdc_parser, iso_parser): # Test reparsed empty complex lists for prop in complex_lists: for empty in (None, [], [{}], [{}.fromkeys(complex_defs[prop], u"")]): self.assert_reparsed_complex_for(parser, prop, empty, []) # Test reparsed valid complex lists (strings and lists for each property in each struct) for prop in complex_lists: complex_list = [] for val in self.valid_complex_values: # Test with single unwrapped value next_complex = {}.fromkeys(complex_defs[prop], val) self.assert_reparsed_complex_for(parser, prop, next_complex, wrap_value(next_complex)) # Test with accumulated list of values complex_list.append({}.fromkeys(complex_defs[prop], val)) self.assert_reparsed_complex_for(parser, prop, complex_list, wrap_value(complex_list))
def assert_reparsed_complex_for(self, parser, prop, value, target): setattr(parser, prop, value) parser_type = type(parser) parser_name = parser_type.__name__ reparsed = getattr(parser_type(parser.serialize()), prop) if prop in get_complex_definitions(): target = get_default_for_complex(prop, target) if isinstance(reparsed, dict): # Reparsed is a dict: compare each value with corresponding in target for key, val in iteritems(reparsed): self.assert_equal_for(parser_name, "{0}.{1}".format(prop, key), val, target.get(key, u"")) elif len(reparsed) <= 1: # Reparsed is empty or a single-item list: do a single value comparison self.assert_equal_for(parser_name, prop, reparsed, target) else: # Reparsed is a multiple-item list: compare each value with corresponding in target for idx, value in enumerate(reparsed): if not isinstance(value, dict): self.assert_equal_for(parser_name, "{0}[{1}]".format(prop, idx), value, target[idx]) else: for key, val in iteritems(value): self.assert_equal_for(parser_name, "{0}.{1}".format(prop, key), val, target[idx].get(key, u""))
def test_validate_simple_values(self): complex_props = set(get_complex_definitions().keys()) simple_props = set(get_supported_props()).difference(complex_props) invalid_values = (None, [None], dict(), [dict()], set(), [set()], tuple(), [tuple()]) for parser in (ArcGISParser().validate(), FgdcParser().validate(), IsoParser().validate()): for prop in simple_props: for invalid in invalid_values: self.assert_validates_for(parser, prop, invalid)
def test_reparse_complex_structs(self): complex_defs = get_complex_definitions() complex_structs = (BOUNDING_BOX, LARGER_WORKS) arcgis_parser = ArcGISParser(self.arcgis_metadata) fgdc_parser = FgdcParser(self.fgdc_metadata) iso_parser = IsoParser(self.iso_metadata) for parser in (arcgis_parser, fgdc_parser, iso_parser): # Test reparsed empty complex structures for prop in complex_structs: for empty in (None, {}, {}.fromkeys(complex_defs[prop], u"")): self.assert_reparsed_complex_for(parser, prop, empty, {}) # Test reparsed valid complex structures for prop in complex_structs: for val in self.valid_complex_values: complex_struct = {}.fromkeys(complex_defs[prop], val) self.assert_reparsed_complex_for(parser, prop, complex_struct, complex_struct)
def test_fgdc_parser(self): """ Tests behavior unique to the FGDC parser """ # Test dates structure defaults # Remove multiple dates to ensure range is queried fgdc_element = get_remote_element(self.fgdc_file) remove_element(fgdc_element, "idinfo/timeperd/timeinfo/mdattim", True) # Assert that the backup dates are read in successfully fgdc_parser = FgdcParser(element_to_string(fgdc_element)) self.assertEqual(fgdc_parser.dates, {"type": "range", "values": ["Date Range Start", "Date Range End"]}) # Test contact data structure defaults contacts_def = get_complex_definitions()[CONTACTS] # Remove the contact organization completely fgdc_element = get_remote_element(self.fgdc_file) for contact_element in get_elements(fgdc_element, "idinfo/ptcontac"): if element_exists(contact_element, "cntinfo/cntorgp"): clear_element(contact_element) # Assert that the contact organization has been read in fgdc_parser = FgdcParser(element_to_string(fgdc_element)) for key in contacts_def: for contact in fgdc_parser.contacts: self.assertIsNotNone(contact[key], "Failed to read contact.{0}".format(key)) # Remove the contact person completely fgdc_element = get_remote_element(self.fgdc_file) for contact_element in get_elements(fgdc_element, "idinfo/ptcontac"): if element_exists(contact_element, "cntinfo/cntperp"): clear_element(contact_element) # Assert that the contact organization has been read in fgdc_parser = FgdcParser(element_to_string(fgdc_element)) for key in contacts_def: for contact in fgdc_parser.contacts: self.assertIsNotNone(contact[key], "Failed to read updated contact.{0}".format(key))
def _init_data_map(self): super(CustomIsoParser, self)._init_data_map() # Basic property: text or list (with backup location referencing codeListValue attribute) lang_prop = "metadata_language" self._data_map[lang_prop] = "language/CharacterString" # Parse from here if present self._data_map["_" + lang_prop] = "language/LanguageCode/@codeListValue" # Otherwise, try from here # Complex structure (reuse of contacts structure plus phone) # Define some basic variables ct_prop = "metadata_contacts" ct_xpath = "contact/CI_ResponsibleParty/{ct_path}" ct_defintion = get_complex_definitions()[CONTACTS] ct_defintion["phone"] = "{phone}" # Reuse CONTACT structure to specify locations per prop (adapted only slightly from parent) self._data_structures[ct_prop] = format_xpaths( ct_defintion, name=ct_xpath.format(ct_path="individualName/CharacterString"), organization=ct_xpath.format(ct_path="organisationName/CharacterString"), position=ct_xpath.format(ct_path="positionName/CharacterString"), phone=ct_xpath.format(ct_path="contactInfo/CI_Contact/phone/CI_Telephone/voice/CharacterString"), email=ct_xpath.format( ct_path="contactInfo/CI_Contact/address/CI_Address/electronicMailAddress/CharacterString" ), ) # Set the root and add getter/setter (parser/updater) to the data map self._data_map["_{prop}_root".format(prop=ct_prop)] = "contact" self._data_map[ct_prop] = ParserProperty(self._parse_complex_list, self._update_complex_list) # And finally, let the parent validation logic know about the two new custom properties self._metadata_props.add(lang_prop) self._metadata_props.add(ct_prop)
def test_reparse_simple_values(self): complex_props = set(get_complex_definitions().keys()) required_props = set(get_supported_props()) simple_props = required_props.difference(complex_props) simple_props = simple_props.difference({KEYWORDS_PLACE, KEYWORDS_THEME}) simple_empty_vals = ("", u"", []) simple_valid_vals = (u"value", [u"item", u"list"]) arcgis_parser = ArcGISParser(self.arcgis_metadata) fgdc_parser = FgdcParser(self.fgdc_metadata) iso_parser = IsoParser(self.iso_metadata) for parser in (arcgis_parser, fgdc_parser, iso_parser): # Test reparsed empty values for val in simple_empty_vals: self.assert_reparsed_simple_for(parser, simple_props, val, u"") # Test reparsed valid values for val in simple_valid_vals: self.assert_reparsed_simple_for(parser, simple_props, val, val)
ISO_ROOTS = ('MD_Metadata', 'MI_Metadata') KEYWORD_PROPS = (KEYWORDS_PLACE, KEYWORDS_STRATUM, KEYWORDS_TEMPORAL, KEYWORDS_THEME) KEYWORD_TYPES = { KEYWORDS_PLACE: 'place', KEYWORDS_STRATUM: 'stratum', KEYWORDS_TEMPORAL: 'temporal', KEYWORDS_THEME: 'theme' } # For appending digital form content to ISO distribution format specs _DIGITAL_FORMS_CONTENT_DELIM = '@------------------------------@' _iso_definitions = get_complex_definitions() # Define backup locations for attribute sub-properties _iso_definitions[ATTRIBUTES].update({ '_definition_source': '{_definition_src}', '__definition_source': '{__definition_src}', '___definition_source': '{___definition_src}' }) # Define backup location for dimension type property _iso_definitions[RASTER_DIMS]['_type'] = '{_type}' _iso_tag_roots = OrderedDict(( # First process private dependency tags (order enforced by key sorting) ('_content_coverage', 'contentInfo/MD_CoverageDescription'),
from gis_metadata.utils import ATTRIBUTES from gis_metadata.utils import BOUNDING_BOX from gis_metadata.utils import CONTACTS from gis_metadata.utils import DATES from gis_metadata.utils import DIGITAL_FORMS from gis_metadata.utils import KEYWORDS_PLACE, KEYWORDS_THEME from gis_metadata.utils import LARGER_WORKS from gis_metadata.utils import PROCESS_STEPS from gis_metadata.utils import ParserProperty from gis_metadata.utils import format_xpaths, get_complex_definitions FGDC_ROOT = 'metadata' _fgdc_definitions = get_complex_definitions() # Define backup locations for contact sub-properties _fgdc_definitions[CONTACTS].update({ '_name': '{_name}', '_organization': '{_organization}' }) _fgdc_tag_formats = { '_attributes_root': 'eainfo/detailed/attr', '_bounding_box_root': 'idinfo/spdom/bounding', '_contacts_root': 'idinfo/ptcontac', '_dates_root': 'idinfo/timeperd/timeinfo', '_digital_forms_root': 'distinfo/stdorder/digform', '_larger_works_root': 'idinfo/citation/citeinfo/lworkcit/citeinfo', '_process_steps_root': 'dataqual/lineage/procstep',
from gis_metadata.utils import parse_complex_list, parse_property, update_complex_list, update_property xrange = getattr(six.moves, 'xrange') ISO_ROOTS = ('MD_Metadata', 'MI_Metadata') KEYWORD_TYPE_PLACE = 'place' KEYWORD_TYPE_THEME = 'theme' # For appending digital form content to ISO distribution format specs _DIGITAL_FORMS_CONTENT_DELIM = '@------------------------------@' _iso_definitions = get_complex_definitions() # Define backup locations for attribute sub-properties _iso_definitions[ATTRIBUTES].update({ '_definition_source': '{_definition_src}', '__definition_source': '{__definition_src}', '___definition_source': '{___definition_src}' }) _iso_tag_roots = OrderedDict(( # First process private dependency tags (order enforced by key sorting) ('_content_coverage', 'contentInfo/MD_CoverageDescription'), ('_dataqual', 'dataQualityInfo/DQ_DataQuality'), ('_dataqual_lineage', '{_dataqual}/lineage/LI_Lineage'), ('_dataqual_report', '{_dataqual}/report'), ('_distinfo', 'distributionInfo/MD_Distribution'),
from gis_metadata.utils import get_default_for_complex, get_default_for_complex_sub from gis_metadata.utils import parse_complex_list, update_complex_list from gis_metadata.utils import parse_property, update_property from parserutils.collections import flatten_items, reduce_value, wrap_value from parserutils.elements import get_elements, get_element_name, get_element_attributes from parserutils.elements import clear_element, element_to_dict, insert_element, remove_element, remove_empty_element iteritems = getattr(six, 'iteritems') six_moves = getattr(six, 'moves') xrange = getattr(six_moves, 'xrange') ARCGIS_ROOTS = ('metadata', 'Metadata') ARCGIS_NODES = ('dataIdInfo', 'distInfo', 'dqInfo', 'Esri') _agis_definitions = get_complex_definitions() _agis_tag_formats = { '_attribute_accuracy_root': 'dqInfo/report', '_attributes_root': 'eainfo/detailed/attr', '_bounding_box_root': 'dataIdInfo/dataExt/geoEle', '_contacts_root': 'dataIdInfo/idPoC', '_dataset_completeness_root': 'dqInfo/report', '_dates_root': 'dataIdInfo/dataExt/tempEle', '_digital_forms_root': 'distInfo/distFormat', '_dist_liability_root': 'dataIdInfo/resConst', '_transfer_options_root': 'distInfo/distTranOps/onLineSrc', '_larger_works_root': 'dataIdInfo/aggrInfo/aggrDSName', '_process_steps_root': 'dqInfo/dataLineage/prcStep', '_raster_info_root': 'spatRepInfo/GridSpatRep/axisDimension', '_use_constraints_root': 'dataIdInfo/resConst',
from gis_metadata.utils import format_xpaths, get_complex_definitions, get_default_for_complex from gis_metadata.utils import parse_complex_list, update_complex_list from parserutils.collections import flatten_items, reduce_value, wrap_value from parserutils.elements import get_elements, get_element_name, get_element_attributes from parserutils.elements import clear_element, element_to_dict, insert_element, remove_element, remove_empty_element xrange = getattr(six.moves, 'xrange') ARCGIS_ROOTS = ('metadata', 'Metadata') ARCGIS_NODES = ('dataIdInfo', 'distInfo', 'dqInfo', 'Esri') _agis_definitions = get_complex_definitions() _agis_tag_formats = { '_attribute_accuracy_root': 'dqInfo/report', '_attributes_root': 'eainfo/detailed/attr', '_bounding_box_root': 'dataIdInfo/dataExt/geoEle', '_contacts_root': 'dataIdInfo/idPoC', '_dataset_completeness_root': 'dqInfo/report', '_dates_root': 'dataIdInfo/dataExt/tempEle', '_digital_forms_root': 'distInfo/distFormat', '_dist_liability_root': 'dataIdInfo/resConst', '_transfer_options_root': 'distInfo/distTranOps/onLineSrc', '_larger_works_root': 'dataIdInfo/aggrInfo/aggrDSName', '_process_steps_root': 'dqInfo/dataLineage/prcStep', '_use_constraints_root': 'dataIdInfo/resConst',
from gis_metadata.utils import DIGITAL_FORMS from gis_metadata.utils import KEYWORDS_PLACE, KEYWORDS_THEME from gis_metadata.utils import LARGER_WORKS from gis_metadata.utils import PROCESS_STEPS from gis_metadata.utils import RASTER_INFO from gis_metadata.utils import ParserProperty from gis_metadata.utils import format_xpaths, get_complex_definitions, update_complex iteritems = getattr(six, 'iteritems') FGDC_ROOT = 'metadata' _fgdc_definitions = get_complex_definitions() # Define backup locations for contact sub-properties _fgdc_definitions[CONTACTS].update({ '_name': '{_name}', '_organization': '{_organization}' }) # Define backup locations for raster_info sub-properties _fgdc_definitions[RASTER_INFO].update({ '_x_resolution': '{_x_resolution}', '_y_resolution': '{_y_resolution}' }) _fgdc_tag_formats = { '_attributes_root': 'eainfo/detailed/attr',