Example #1
0
    def _init_data_map(self):
        super(CustomIsoParser, self)._init_data_map()

        # Basic property: text or list (with backup location referencing codeListValue attribute)

        lang_prop = "metadata_language"
        self._data_map[lang_prop] = "language/CharacterString"  # Parse from here if present
        self._data_map["_" + lang_prop] = "language/LanguageCode/@codeListValue"  # Otherwise, try from here

        # Complex structure (reuse of contacts structure plus phone)

        # Define some basic variables
        ct_prop = "metadata_contacts"
        ct_xpath = "contact/CI_ResponsibleParty/{ct_path}"
        ct_defintion = get_complex_definitions()[CONTACTS]
        ct_defintion["phone"] = "{phone}"

        # Reuse CONTACT structure to specify locations per prop (adapted only slightly from parent)
        self._data_structures[ct_prop] = format_xpaths(
            ct_defintion,
            name=ct_xpath.format(ct_path="individualName/CharacterString"),
            organization=ct_xpath.format(ct_path="organisationName/CharacterString"),
            position=ct_xpath.format(ct_path="positionName/CharacterString"),
            phone=ct_xpath.format(ct_path="contactInfo/CI_Contact/phone/CI_Telephone/voice/CharacterString"),
            email=ct_xpath.format(
                ct_path="contactInfo/CI_Contact/address/CI_Address/electronicMailAddress/CharacterString"
            ),
        )

        # Set the root and add getter/setter (parser/updater) to the data map
        self._data_map["_{prop}_root".format(prop=ct_prop)] = "contact"
        self._data_map[ct_prop] = ParserProperty(self._parse_complex_list, self._update_complex_list)

        # And finally, let the parent validation logic know about the two new custom properties

        self._metadata_props.add(lang_prop)
        self._metadata_props.add(ct_prop)
    def _init_data_map(self):
        """ OVERRIDDEN: Initialize required ISO-19115 data map with XPATHS and specialized functions """

        if self._data_map is not None:
            return  # Initiation happens once

        # Parse and validate the ISO metadata root

        if self._xml_tree is None:
            iso_root = ISO_ROOTS[0]
        else:
            iso_root = get_element_name(self._xml_tree)

        if iso_root not in ISO_ROOTS:
            raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=iso_root)

        iso_data_map = {'_root': iso_root}
        iso_data_map.update(_iso_tag_roots)
        iso_data_map.update(_iso_tag_formats)

        iso_data_structures = {}

        # Capture and format complex XPATHs

        ad_format = iso_data_map[ATTRIBUTES]
        ft_source = iso_data_map['_attr_src'].replace('/carrierOfCharacteristics/FC_FeatureAttribute', '')

        iso_data_structures[ATTRIBUTES] = format_xpaths(
            _iso_definitions[ATTRIBUTES],

            label=ad_format.format(ad_path='memberName/LocalName'),
            aliases=ad_format.format(ad_path='aliases/LocalName'),  # Not in spec
            definition=ad_format.format(ad_path='definition/CharacterString'),

            # First try to populate attribute definition source from FC_FeatureAttribute
            definition_src=iso_data_map['_attr_src'] + '/organisationName/CharacterString',
            _definition_src=iso_data_map['_attr_src'] + '/individualName/CharacterString',

            # Then assume feature type source is the same as attribute: populate from FC_FeatureType
            __definition_src=ft_source + '/organisationName/CharacterString',
            ___definition_src=ft_source + '/individualName/CharacterString'
        )

        bb_format = iso_data_map[BOUNDING_BOX]
        iso_data_structures[BOUNDING_BOX] = format_xpaths(
            _iso_definitions[BOUNDING_BOX],
            east=bb_format.format(bbox_path='eastBoundLongitude/Decimal'),
            south=bb_format.format(bbox_path='southBoundLatitude/Decimal'),
            west=bb_format.format(bbox_path='westBoundLongitude/Decimal'),
            north=bb_format.format(bbox_path='northBoundLatitude/Decimal')
        )

        ct_format = iso_data_map[CONTACTS]
        iso_data_structures[CONTACTS] = format_xpaths(
            _iso_definitions[CONTACTS],
            name=ct_format.format(ct_path='individualName/CharacterString'),
            organization=ct_format.format(ct_path='organisationName/CharacterString'),
            position=ct_format.format(ct_path='positionName/CharacterString'),
            email=ct_format.format(
                ct_path='contactInfo/CI_Contact/address/CI_Address/electronicMailAddress/CharacterString'
            )
        )

        dt_format = iso_data_map[DATES]
        iso_data_structures[DATES] = {
            DATE_TYPE_MULTIPLE: dt_format.format(type_path='TimeInstant/timePosition'),
            DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TimePeriod/begin/TimeInstant/timePosition'),
            DATE_TYPE_RANGE_END: dt_format.format(type_path='TimePeriod/end/TimeInstant/timePosition'),
            DATE_TYPE_SINGLE: dt_format.format(type_path='TimeInstant/timePosition')  # Same as multiple
        }
        iso_data_structures[DATES][DATE_TYPE_RANGE] = [
            iso_data_structures[DATES][DATE_TYPE_RANGE_BEGIN],
            iso_data_structures[DATES][DATE_TYPE_RANGE_END]
        ]

        df_format = iso_data_map[DIGITAL_FORMS]
        iso_data_structures[DIGITAL_FORMS] = format_xpaths(
            _iso_definitions[DIGITAL_FORMS],
            name=df_format.format(df_path='name/CharacterString'),
            content='',  # Not supported in ISO-19115 (appending to spec)
            decompression=df_format.format(df_path='fileDecompressionTechnique/CharacterString'),
            version=df_format.format(df_path='version/CharacterString'),
            specification=df_format.format(df_path='specification/CharacterString'),
            access_desc=iso_data_map['_access_desc'],
            access_instrs=iso_data_map['_access_instrs'],
            network_resource=iso_data_map['_network_resource']
        )

        keywords_structure = {
            'keyword_root': 'MD_Keywords/keyword',
            'keyword_type': 'MD_Keywords/type/MD_KeywordTypeCode',
            'keyword': 'MD_Keywords/keyword/CharacterString'
        }
        for keyword_prop in KEYWORD_PROPS:
            iso_data_structures[keyword_prop] = deepcopy(keywords_structure)

        lw_format = iso_data_map[LARGER_WORKS]
        iso_data_structures[LARGER_WORKS] = format_xpaths(
            _iso_definitions[LARGER_WORKS],
            title=lw_format.format(lw_path='title/CharacterString'),
            edition=lw_format.format(lw_path='edition/CharacterString'),
            origin=iso_data_map['_lw_citation'].format(lw_path='individualName/CharacterString'),
            online_linkage=iso_data_map['_lw_linkage'].format(lw_path='linkage/URL'),
            other_citation=lw_format.format(lw_path='otherCitationDetails/CharacterString'),
            date=lw_format.format(lw_path='editionDate/Date'),
            place=iso_data_map['_lw_contact'].format(lw_path='address/CI_Address/city/CharacterString'),
            info=iso_data_map['_lw_citation'].format(lw_path='organisationName/CharacterString')
        )

        ps_format = iso_data_map[PROCESS_STEPS]
        iso_data_structures[PROCESS_STEPS] = format_xpaths(
            _iso_definitions[PROCESS_STEPS],
            description=ps_format.format(ps_path='description/CharacterString'),
            date=ps_format.format(ps_path='dateTime/DateTime'),
            sources=ps_format.format(
                ps_path='source/LI_Source/sourceCitation/CI_Citation/alternateTitle/CharacterString'
            )
        )

        ri_format = iso_data_map[RASTER_INFO]
        iso_data_structures[RASTER_INFO] = format_xpaths(
            _iso_definitions[RASTER_DIMS],
            type=ri_format.format(ri_path='dimensionName/MD_DimensionNameTypeCode'),
            _type=ri_format.format(ri_path='dimensionName/MD_DimensionNameTypeCode/@codeListValue'),
            size=ri_format.format(ri_path='dimensionSize/Integer'),
            value=ri_format.format(ri_path='resolution/Measure'),
            units=ri_format.format(ri_path='resolution/Measure/@uom')
        )

        # Assign XPATHS and gis_metadata.utils.ParserProperties to data map

        for prop, xpath in iteritems(dict(iso_data_map)):
            if prop == ATTRIBUTES:
                iso_data_map[prop] = ParserProperty(self._parse_attribute_details, self._update_attribute_details)

            elif prop in (CONTACTS, PROCESS_STEPS):
                iso_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list)

            elif prop in (BOUNDING_BOX, LARGER_WORKS):
                iso_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex)

            elif prop == DATES:
                iso_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates)

            elif prop == DIGITAL_FORMS:
                iso_data_map[prop] = ParserProperty(self._parse_digital_forms, self._update_digital_forms)

            elif prop in KEYWORD_PROPS:
                iso_data_map[prop] = ParserProperty(self._parse_keywords, self._update_keywords)

            elif prop == RASTER_INFO:
                iso_data_map[prop] = ParserProperty(self._parse_raster_info, self._update_raster_info)

            else:
                iso_data_map[prop] = xpath

        self._data_map = iso_data_map
        self._data_structures = iso_data_structures
    # Supported in separate file ISO-19110: FC_FeatureCatalog
    ('_attr_root', 'FC_FeatureCatalogue'),
    ('_attr_base', 'featureType/FC_FeatureType/carrierOfCharacteristics/FC_FeatureAttribute'),
    ('_attr_def', '{_attr_base}/definitionReference/FC_DefinitionReference/definitionSource/FC_DefinitionSource'),
    ('_attr_src', '{_attr_def}/source/CI_Citation/citedResponsibleParty/CI_ResponsibleParty'),

    # References to separate file ISO-19110 from: MD_Metadata
    ('_attr_citation', 'contentInfo/MD_FeatureCatalogueDescription/featureCatalogueCitation'),
    ('_attr_contact', '{_attr_citation}/CI_Citation/citedResponsibleParty/CI_ResponsibleParty/contactInfo/CI_Contact'),
    ('_attr_contact_url', '{_attr_contact}/onlineResource/CI_OnlineResource/linkage/URL')
))


# Two passes required because of self references within roots dict
_iso_tag_roots.update(format_xpaths(_iso_tag_roots, **_iso_tag_roots))
_iso_tag_roots.update(format_xpaths(_iso_tag_roots, **_iso_tag_roots))

_iso_tag_formats = {
    # Property-specific xpath roots: the base from which each element repeats
    '_attribute_accuracy_root': '{_dataqual_report}',
    '_attributes_root': 'featureType/FC_FeatureType/carrierOfCharacteristics',
    '_bounding_box_root': '{_idinfo_extent}/geographicElement',
    '_contacts_root': '{_idinfo}/pointOfContact',
    '_dataset_completeness_root': '{_dataqual_report}',
    '_dates_root': '{_idinfo_extent}/temporalElement',
    '_digital_forms_root': '{_distinfo}/distributionFormat',
    '_dist_liability_root': '{_idinfo}/resourceConstraints',
    '_transfer_options_root': '{_distinfo}/transferOptions/MD_DigitalTransferOptions/onLine',
    '_keywords_root': '{_idinfo}/descriptiveKeywords',
    '_larger_works_root': '{_idinfo_aggregate_citation}',
    def _init_data_map(self):
        """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """

        if self._data_map is not None:
            return  # Initiation happens once

        # Parse and validate the FGDC metadata root

        if self._xml_tree is None:
            fgdc_root = FGDC_ROOT
        else:
            fgdc_root = get_element_name(self._xml_tree)

        if fgdc_root != FGDC_ROOT:
            raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=fgdc_root)

        fgdc_data_map = {'_root': FGDC_ROOT}
        fgdc_data_structures = {}

        # Capture and format other complex XPATHs

        ad_format = _fgdc_tag_formats[ATTRIBUTES]
        fgdc_data_structures[ATTRIBUTES] = format_xpaths(
            _fgdc_definitions[ATTRIBUTES],
            label=ad_format.format(ad_path='attrlabl'),
            aliases=ad_format.format(ad_path='attalias'),
            definition=ad_format.format(ad_path='attrdef'),
            definition_src=ad_format.format(ad_path='attrdefs')
        )

        bb_format = _fgdc_tag_formats[BOUNDING_BOX]
        fgdc_data_structures[BOUNDING_BOX] = format_xpaths(
            _fgdc_definitions[BOUNDING_BOX],
            east=bb_format.format(bbox_path='eastbc'),
            south=bb_format.format(bbox_path='southbc'),
            west=bb_format.format(bbox_path='westbc'),
            north=bb_format.format(bbox_path='northbc')
        )

        ct_format = _fgdc_tag_formats[CONTACTS]
        fgdc_data_structures[CONTACTS] = format_xpaths(
            _fgdc_definitions[CONTACTS],

            name=ct_format.format(ct_path='cntperp/cntper'),
            _name=ct_format.format(ct_path='cntorgp/cntper'),  # If not in cntperp

            organization=ct_format.format(ct_path='cntperp/cntorg'),
            _organization=ct_format.format(ct_path='cntorgp/cntorg'),  # If not in cntperp

            position=ct_format.format(ct_path='cntpos'),
            email=ct_format.format(ct_path='cntemail')
        )

        dt_format = _fgdc_tag_formats[DATES]
        fgdc_data_structures[DATES] = {
            DATE_TYPE_MULTIPLE: dt_format.format(type_path='mdattim/sngdate/caldate'),
            DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='rngdates/begdate'),
            DATE_TYPE_RANGE_END: dt_format.format(type_path='rngdates/enddate'),
            DATE_TYPE_SINGLE: dt_format.format(type_path='sngdate/caldate')
        }
        fgdc_data_structures[DATES][DATE_TYPE_RANGE] = [
            fgdc_data_structures[DATES][DATE_TYPE_RANGE_BEGIN],
            fgdc_data_structures[DATES][DATE_TYPE_RANGE_END]
        ]

        df_format = _fgdc_tag_formats[DIGITAL_FORMS]
        fgdc_data_structures[DIGITAL_FORMS] = format_xpaths(
            _fgdc_definitions[DIGITAL_FORMS],
            name=df_format.format(df_path='digtinfo/formname'),
            content=df_format.format(df_path='digtinfo/formcont'),
            decompression=df_format.format(df_path='digtinfo/filedec'),
            version=df_format.format(df_path='digtinfo/formvern'),
            specification=df_format.format(df_path='digtinfo/formspec'),
            access_desc=df_format.format(df_path='digtopt/onlinopt/oncomp'),
            access_instrs=df_format.format(df_path='digtopt/onlinopt/accinstr'),
            network_resource=df_format.format(df_path='digtopt/onlinopt/computer/networka/networkr')
        )

        lw_format = _fgdc_tag_formats[LARGER_WORKS]
        fgdc_data_structures[LARGER_WORKS] = format_xpaths(
            _fgdc_definitions[LARGER_WORKS],
            title=lw_format.format(lw_path='title'),
            edition=lw_format.format(lw_path='edition'),
            origin=lw_format.format(lw_path='origin'),
            online_linkage=lw_format.format(lw_path='onlink'),
            other_citation=lw_format.format(lw_path='othercit'),
            date=lw_format.format(lw_path='pubdate'),
            place=lw_format.format(lw_path='pubinfo/pubplace'),
            info=lw_format.format(lw_path='pubinfo/publish')
        )

        ps_format = _fgdc_tag_formats[PROCESS_STEPS]
        fgdc_data_structures[PROCESS_STEPS] = format_xpaths(
            _fgdc_definitions[PROCESS_STEPS],
            description=ps_format.format(ps_path='procdesc'),
            date=ps_format.format(ps_path='procdate'),
            sources=ps_format.format(ps_path='srcused')
        )

        # Assign XPATHS and gis_metadata.utils.ParserProperties to fgdc_data_map

        fgdc_data_formats = dict(_fgdc_tag_formats)

        for prop, xpath in iteritems(fgdc_data_formats):
            if prop in (ATTRIBUTES, CONTACTS, DIGITAL_FORMS, PROCESS_STEPS):
                fgdc_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list)

            elif prop in (BOUNDING_BOX, LARGER_WORKS):
                fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex)

            elif prop == DATES:
                fgdc_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates)

            else:
                fgdc_data_map[prop] = xpath

        self._data_map = fgdc_data_map
        self._data_structures = fgdc_data_structures
    def _init_data_map(self):
        """ OVERRIDDEN: Initialize required ISO-19115 data map with XPATHS and specialized functions """

        if self._data_map is not None:
            return  # Initiation happens once

        # Parse and validate the ISO metadata root

        if self._xml_tree is None:
            iso_root = ISO_ROOTS[0]
        else:
            iso_root = get_element_name(self._xml_tree)

        if iso_root not in ISO_ROOTS:
            raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=iso_root)

        iso_data_map = {'_root': iso_root}
        iso_data_map.update(_iso_tag_roots)
        iso_data_map.update(_iso_tag_formats)

        iso_data_structures = {}

        # Capture and format complex XPATHs

        ad_format = iso_data_map[ATTRIBUTES]
        ft_source = iso_data_map['_attr_src'].replace('/carrierOfCharacteristics/FC_FeatureAttribute', '')

        iso_data_structures[ATTRIBUTES] = format_xpaths(
            _iso_definitions[ATTRIBUTES],
            label=ad_format.format(ad_path='memberName/LocalName'),
            aliases=ad_format.format(ad_path='aliases/LocalName'),  # Not in spec
            definition=ad_format.format(ad_path='definition/CharacterString'),

            # First try to populate attribute definition source from FC_FeatureAttribute
            definition_src=iso_data_map['_attr_src'] + '/organisationName/CharacterString',
            _definition_src=iso_data_map['_attr_src'] + '/individualName/CharacterString',

            # Then assume feature type source is the same as attribute: populate from FC_FeatureType
            __definition_src=ft_source + '/organisationName/CharacterString',
            ___definition_src=ft_source + '/individualName/CharacterString'
        )

        bb_format = iso_data_map[BOUNDING_BOX]
        iso_data_structures[BOUNDING_BOX] = format_xpaths(
            _iso_definitions[BOUNDING_BOX],
            east=bb_format.format(bbox_path='eastBoundLongitude/Decimal'),
            south=bb_format.format(bbox_path='southBoundLatitude/Decimal'),
            west=bb_format.format(bbox_path='westBoundLongitude/Decimal'),
            north=bb_format.format(bbox_path='northBoundLatitude/Decimal')
        )

        ct_format = iso_data_map[CONTACTS]
        iso_data_structures[CONTACTS] = format_xpaths(
            _iso_definitions[CONTACTS],
            name=ct_format.format(ct_path='individualName/CharacterString'),
            organization=ct_format.format(ct_path='organisationName/CharacterString'),
            position=ct_format.format(ct_path='positionName/CharacterString'),
            email=ct_format.format(
                ct_path='contactInfo/CI_Contact/address/CI_Address/electronicMailAddress/CharacterString'
            )
        )

        dt_format = iso_data_map[DATES]
        iso_data_structures[DATES] = {
            DATE_TYPE_MULTIPLE: dt_format.format(type_path='TimeInstant/timePosition'),
            DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TimePeriod/begin/TimeInstant/timePosition'),
            DATE_TYPE_RANGE_END: dt_format.format(type_path='TimePeriod/end/TimeInstant/timePosition'),
            DATE_TYPE_SINGLE: dt_format.format(type_path='TimeInstant/timePosition')  # Same as multiple
        }
        iso_data_structures[DATES][DATE_TYPE_RANGE] = [
            iso_data_structures[DATES][DATE_TYPE_RANGE_BEGIN],
            iso_data_structures[DATES][DATE_TYPE_RANGE_END]
        ]

        df_format = iso_data_map[DIGITAL_FORMS]
        iso_data_structures[DIGITAL_FORMS] = format_xpaths(
            _iso_definitions[DIGITAL_FORMS],
            name=df_format.format(df_path='name/CharacterString'),
            content='',  # Not supported in ISO-19115 (appending to spec)
            decompression=df_format.format(df_path='fileDecompressionTechnique/CharacterString'),
            version=df_format.format(df_path='version/CharacterString'),
            specification=df_format.format(df_path='specification/CharacterString'),
            access_desc=iso_data_map['_access_desc'],
            access_instrs=iso_data_map['_access_instrs'],
            network_resource=iso_data_map['_network_resource']
        )

        keywords_structure = {
            'keyword_root': 'MD_Keywords/keyword',
            'keyword_type': 'MD_Keywords/type/MD_KeywordTypeCode',
            'keyword': 'MD_Keywords/keyword/CharacterString'
        }
        iso_data_structures[KEYWORDS_PLACE] = keywords_structure
        iso_data_structures[KEYWORDS_THEME] = keywords_structure

        lw_format = iso_data_map[LARGER_WORKS]
        iso_data_structures[LARGER_WORKS] = format_xpaths(
            _iso_definitions[LARGER_WORKS],
            title=lw_format.format(lw_path='title/CharacterString'),
            edition=lw_format.format(lw_path='edition/CharacterString'),
            origin=iso_data_map['_lw_citation'].format(lw_path='individualName/CharacterString'),
            online_linkage=iso_data_map['_lw_linkage'].format(lw_path='linkage/URL'),
            other_citation=lw_format.format(lw_path='otherCitationDetails/CharacterString'),
            date=lw_format.format(lw_path='editionDate/Date'),
            place=iso_data_map['_lw_contact'].format(lw_path='address/CI_Address/city/CharacterString'),
            info=iso_data_map['_lw_citation'].format(lw_path='organisationName/CharacterString')
        )

        ps_format = iso_data_map[PROCESS_STEPS]
        iso_data_structures[PROCESS_STEPS] = format_xpaths(
            _iso_definitions[PROCESS_STEPS],
            description=ps_format.format(ps_path='description/CharacterString'),
            date=ps_format.format(ps_path='dateTime/DateTime'),
            sources=ps_format.format(
                ps_path='source/LI_Source/sourceCitation/CI_Citation/alternateTitle/CharacterString'
            )
        )

        # Assign XPATHS and gis_metadata.utils.ParserProperties to data map

        for prop, xpath in iteritems(dict(iso_data_map)):
            if prop == ATTRIBUTES:
                iso_data_map[prop] = ParserProperty(self._parse_attribute_details, self._update_attribute_details)

            elif prop in (CONTACTS, PROCESS_STEPS):
                iso_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list)

            elif prop in (BOUNDING_BOX, LARGER_WORKS):
                iso_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex)

            elif prop == DATES:
                iso_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates)

            elif prop == DIGITAL_FORMS:
                iso_data_map[prop] = ParserProperty(self._parse_digital_forms, self._update_digital_forms)

            elif prop in [KEYWORDS_PLACE, KEYWORDS_THEME]:
                iso_data_map[prop] = ParserProperty(self._parse_keywords, self._update_keywords)

            else:
                iso_data_map[prop] = xpath

        self._data_map = iso_data_map
        self._data_structures = iso_data_structures
    # Supported in separate file ISO-19110: FC_FeatureCatalog
    ('_attr_root', 'FC_FeatureCatalogue'),
    ('_attr_base', 'featureType/FC_FeatureType/carrierOfCharacteristics/FC_FeatureAttribute'),
    ('_attr_def', '{_attr_base}/definitionReference/FC_DefinitionReference/definitionSource/FC_DefinitionSource'),
    ('_attr_src', '{_attr_def}/source/CI_Citation/citedResponsibleParty/CI_ResponsibleParty'),

    # References to separate file ISO-19110 from: MD_Metadata
    ('_attr_citation', 'contentInfo/MD_FeatureCatalogueDescription/featureCatalogueCitation'),
    ('_attr_contact', '{_attr_citation}/CI_Citation/citedResponsibleParty/CI_ResponsibleParty/contactInfo/CI_Contact'),
    ('_attr_contact_url', '{_attr_contact}/onlineResource/CI_OnlineResource/linkage/URL')
))


# Two passes required because of self references within roots dict
_iso_tag_roots.update(format_xpaths(_iso_tag_roots, **_iso_tag_roots))
_iso_tag_roots.update(format_xpaths(_iso_tag_roots, **_iso_tag_roots))

_iso_tag_formats = {
    # Property-specific xpath roots: the base from which each element repeats
    '_attribute_accuracy_root': '{_dataqual_report}',
    '_attributes_root': 'featureType/FC_FeatureType/carrierOfCharacteristics',
    '_bounding_box_root': '{_idinfo_extent}/geographicElement',
    '_contacts_root': '{_idinfo}/pointOfContact',
    '_dataset_completeness_root': '{_dataqual_report}',
    '_dates_root': '{_idinfo_extent}/temporalElement',
    '_digital_forms_root': '{_distinfo}/distributionFormat',
    '_dist_liability_root': '{_idinfo}/resourceConstraints',
    '_transfer_options_root': '{_distinfo}/transferOptions/MD_DigitalTransferOptions/onLine',
    '_keywords_root': '{_idinfo}/descriptiveKeywords',
    '_larger_works_root': '{_idinfo_aggregate_citation}',
Example #7
0
    def _init_data_map(self):
        """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """

        if self._data_map is not None:
            return  # Initiation happens once

        # Parse and validate the ArcGIS metadata root

        if self._xml_tree is None:
            agis_root = ARCGIS_ROOTS[0]  # Default to uncapitalized
        else:
            agis_root = get_element_name(self._xml_tree)

        if agis_root not in ARCGIS_ROOTS:
            raise InvalidContent(
                'Invalid XML root for ArcGIS metadata: {root}', root=agis_root)

        agis_data_map = {'_root': agis_root}
        agis_data_map.update(_agis_tag_formats)

        agis_data_structures = {}

        # Capture and format complex XPATHs

        ad_format = agis_data_map[ATTRIBUTES]
        agis_data_structures[ATTRIBUTES] = format_xpaths(
            _agis_definitions[ATTRIBUTES],
            label=ad_format.format(ad_path='attrlabl'),
            aliases=ad_format.format(ad_path='attalias'),
            definition=ad_format.format(ad_path='attrdef'),
            definition_src=ad_format.format(ad_path='attrdefs'))

        bb_format = agis_data_map[BOUNDING_BOX]
        agis_data_structures[BOUNDING_BOX] = format_xpaths(
            _agis_definitions[BOUNDING_BOX],
            east=bb_format.format(bbox_path='eastBL'),
            south=bb_format.format(bbox_path='southBL'),
            west=bb_format.format(bbox_path='westBL'),
            north=bb_format.format(bbox_path='northBL'))

        ct_format = agis_data_map[CONTACTS]
        agis_data_structures[CONTACTS] = format_xpaths(
            _agis_definitions[CONTACTS],
            name=ct_format.format(ct_path='rpIndName'),
            organization=ct_format.format(ct_path='rpOrgName'),
            position=ct_format.format(ct_path='rpPosName'),
            email=ct_format.format(ct_path='rpCntInfo/cntAddress/eMailAdd'))

        dt_format = agis_data_map[DATES]
        agis_data_structures[DATES] = {
            DATE_TYPE_MULTIPLE:
            dt_format.format(type_path='TM_Instant/tmPosition'),
            '_' + DATE_TYPE_MULTIPLE:
            dt_format.format(type_path='TM_Instant/tmPosition/@date'),
            DATE_TYPE_RANGE_BEGIN:
            dt_format.format(type_path='TM_Period/tmBegin'),
            '_' + DATE_TYPE_RANGE_BEGIN:
            dt_format.format(type_path='TM_Period/tmBegin/@date'),
            DATE_TYPE_RANGE_END:
            dt_format.format(type_path='TM_Period/tmEnd'),
            '_' + DATE_TYPE_RANGE_END:
            dt_format.format(type_path='TM_Period/tmEnd/@date'),

            # Same as multiple dates, but will contain only one
            DATE_TYPE_SINGLE:
            dt_format.format(type_path='TM_Instant/tmPosition'),
            '_' + DATE_TYPE_SINGLE:
            dt_format.format(type_path='TM_Instant/tmPosition/@date')
        }
        agis_data_structures[DATES][DATE_TYPE_RANGE] = [
            agis_data_structures[DATES][DATE_TYPE_RANGE_BEGIN],
            agis_data_structures[DATES][DATE_TYPE_RANGE_END]
        ]
        agis_data_structures[DATES]['_' + DATE_TYPE_RANGE] = [
            agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_BEGIN],
            agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_END]
        ]

        df_format = agis_data_map[DIGITAL_FORMS]
        agis_data_structures[DIGITAL_FORMS] = format_xpaths(
            _agis_definitions[DIGITAL_FORMS],
            name=df_format.format(df_path='formatName'),
            content=df_format.format(df_path='formatInfo'),
            decompression=df_format.format(df_path='fileDecmTech'),
            version=df_format.format(df_path='formatVer'),
            specification=df_format.format(df_path='formatSpec'),
            access_desc=agis_data_map['_access_desc'],
            access_instrs=agis_data_map['_access_instrs'],
            network_resource=agis_data_map['_network_resource'])

        lw_format = agis_data_map[LARGER_WORKS]
        agis_data_structures[LARGER_WORKS] = format_xpaths(
            _agis_definitions[LARGER_WORKS],
            title=lw_format.format(lw_path='resTitle'),
            edition=lw_format.format(lw_path='resEd'),
            origin=lw_format.format(lw_path='citRespParty/rpIndName'),
            online_linkage=lw_format.format(
                lw_path='citRespParty/rpCntInfo/cntOnlineRes/linkage'),
            other_citation=lw_format.format(lw_path='otherCitDet'),
            date=lw_format.format(lw_path='date/pubDate'),
            place=lw_format.format(
                lw_path='citRespParty/rpCntInfo/cntAddress/city'),
            info=lw_format.format(lw_path='citRespParty/rpOrgName'))

        ps_format = agis_data_map[PROCESS_STEPS]
        agis_data_structures[PROCESS_STEPS] = format_xpaths(
            _agis_definitions[PROCESS_STEPS],
            description=ps_format.format(ps_path='stepDesc'),
            date=ps_format.format(ps_path='stepDateTm'),
            sources=ps_format.format(ps_path='stepSrc/srcDesc'))

        ri_format = agis_data_map[RASTER_INFO]
        agis_data_structures[RASTER_INFO] = format_xpaths(
            _agis_definitions[RASTER_DIMS],
            type=ri_format.format(ri_path='@type'),
            size=ri_format.format(ri_path='dimSize'),
            value=ri_format.format(ri_path='dimResol/value'),
            units=ri_format.format(ri_path='dimResol/value/@uom'))

        # Assign XPATHS and gis_metadata.utils.ParserProperties to data map

        for prop, xpath in iteritems(dict(agis_data_map)):
            if prop in (ATTRIBUTES, CONTACTS, PROCESS_STEPS):
                agis_data_map[prop] = ParserProperty(self._parse_complex_list,
                                                     self._update_complex_list)

            elif prop in (BOUNDING_BOX, LARGER_WORKS):
                agis_data_map[prop] = ParserProperty(self._parse_complex,
                                                     self._update_complex)

            elif prop in ('attribute_accuracy', 'dataset_completeness'):
                agis_data_map[prop] = ParserProperty(self._parse_report_item,
                                                     self._update_report_item)

            elif prop == DATES:
                agis_data_map[prop] = ParserProperty(self._parse_dates,
                                                     self._update_dates)

            elif prop == DIGITAL_FORMS:
                agis_data_map[prop] = ParserProperty(
                    self._parse_digital_forms, self._update_digital_forms)

            elif prop == RASTER_INFO:
                agis_data_map[prop] = ParserProperty(self._parse_raster_info,
                                                     self._update_raster_info)

            else:
                agis_data_map[prop] = xpath

        self._data_map = agis_data_map
        self._data_structures = agis_data_structures
    def _init_data_map(self):
        """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """

        if self._data_map is not None:
            return  # Initiation happens once

        # Parse and validate the ArcGIS metadata root

        if self._xml_tree is None:
            agis_root = ARCGIS_ROOTS[0]  # Default to uncapitalized
        else:
            agis_root = get_element_name(self._xml_tree)

        if agis_root not in ARCGIS_ROOTS:
            raise InvalidContent('Invalid XML root for ArcGIS metadata: {root}', root=agis_root)

        agis_data_map = {'_root': agis_root}
        agis_data_map.update(_agis_tag_formats)

        agis_data_structures = {}

        # Capture and format complex XPATHs

        ad_format = agis_data_map[ATTRIBUTES]
        agis_data_structures[ATTRIBUTES] = format_xpaths(
            _agis_definitions[ATTRIBUTES],
            label=ad_format.format(ad_path='attrlabl'),
            aliases=ad_format.format(ad_path='attalias'),
            definition=ad_format.format(ad_path='attrdef'),
            definition_src=ad_format.format(ad_path='attrdefs')
        )

        bb_format = agis_data_map[BOUNDING_BOX]
        agis_data_structures[BOUNDING_BOX] = format_xpaths(
            _agis_definitions[BOUNDING_BOX],
            east=bb_format.format(bbox_path='eastBL'),
            south=bb_format.format(bbox_path='southBL'),
            west=bb_format.format(bbox_path='westBL'),
            north=bb_format.format(bbox_path='northBL')
        )

        ct_format = agis_data_map[CONTACTS]
        agis_data_structures[CONTACTS] = format_xpaths(
            _agis_definitions[CONTACTS],
            name=ct_format.format(ct_path='rpIndName'),
            organization=ct_format.format(ct_path='rpOrgName'),
            position=ct_format.format(ct_path='rpPosName'),
            email=ct_format.format(ct_path='rpCntInfo/cntAddress/eMailAdd')
        )

        dt_format = agis_data_map[DATES]
        agis_data_structures[DATES] = {
            DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition'),
            '_' + DATE_TYPE_MULTIPLE: dt_format.format(type_path='TM_Instant/tmPosition/@date'),
            DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin'),
            '_' + DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='TM_Period/tmBegin/@date'),
            DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd'),
            '_' + DATE_TYPE_RANGE_END: dt_format.format(type_path='TM_Period/tmEnd/@date'),
            # Same as multiple dates, but will contain only one
            DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition'),
            '_' + DATE_TYPE_SINGLE: dt_format.format(type_path='TM_Instant/tmPosition/@date')
        }
        agis_data_structures[DATES][DATE_TYPE_RANGE] = [
            agis_data_structures[DATES][DATE_TYPE_RANGE_BEGIN],
            agis_data_structures[DATES][DATE_TYPE_RANGE_END]
        ]
        agis_data_structures[DATES]['_' + DATE_TYPE_RANGE] = [
            agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_BEGIN],
            agis_data_structures[DATES]['_' + DATE_TYPE_RANGE_END]
        ]

        df_format = agis_data_map[DIGITAL_FORMS]
        agis_data_structures[DIGITAL_FORMS] = format_xpaths(
            _agis_definitions[DIGITAL_FORMS],
            name=df_format.format(df_path='formatName'),
            content=df_format.format(df_path='formatInfo'),
            decompression=df_format.format(df_path='fileDecmTech'),
            version=df_format.format(df_path='formatVer'),
            specification=df_format.format(df_path='formatSpec'),
            access_desc=agis_data_map['_access_desc'],
            access_instrs=agis_data_map['_access_instrs'],
            network_resource=agis_data_map['_network_resource']
        )

        lw_format = agis_data_map[LARGER_WORKS]
        agis_data_structures[LARGER_WORKS] = format_xpaths(
            _agis_definitions[LARGER_WORKS],
            title=lw_format.format(lw_path='resTitle'),
            edition=lw_format.format(lw_path='resEd'),
            origin=lw_format.format(lw_path='citRespParty/rpIndName'),
            online_linkage=lw_format.format(lw_path='citRespParty/rpCntInfo/cntOnlineRes/linkage'),
            other_citation=lw_format.format(lw_path='otherCitDet'),
            date=lw_format.format(lw_path='date/pubDate'),
            place=lw_format.format(lw_path='citRespParty/rpCntInfo/cntAddress/city'),
            info=lw_format.format(lw_path='citRespParty/rpOrgName')
        )

        ps_format = agis_data_map[PROCESS_STEPS]
        agis_data_structures[PROCESS_STEPS] = format_xpaths(
            _agis_definitions[PROCESS_STEPS],
            description=ps_format.format(ps_path='stepDesc'),
            date=ps_format.format(ps_path='stepDateTm'),
            sources=ps_format.format(ps_path='stepSrc/srcDesc')
        )

        # Assign XPATHS and gis_metadata.utils.ParserProperties to data map

        for prop, xpath in iteritems(dict(agis_data_map)):
            if prop in (ATTRIBUTES, CONTACTS, PROCESS_STEPS):
                agis_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list)

            elif prop in (BOUNDING_BOX, LARGER_WORKS):
                agis_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex)

            elif prop in ('attribute_accuracy', 'dataset_completeness'):
                agis_data_map[prop] = ParserProperty(self._parse_report_item, self._update_report_item)

            elif prop == DATES:
                agis_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates)

            elif prop == DIGITAL_FORMS:
                agis_data_map[prop] = ParserProperty(self._parse_digital_forms, self._update_digital_forms)

            else:
                agis_data_map[prop] = xpath

        self._data_map = agis_data_map
        self._data_structures = agis_data_structures
Example #9
0
    def _init_data_map(self):
        """ OVERRIDDEN: Initialize required FGDC data map with XPATHS and specialized functions """

        if self._data_map is not None:
            return  # Initiation happens once

        # Parse and validate the FGDC metadata root

        if self._xml_tree is None:
            fgdc_root = FGDC_ROOT
        else:
            fgdc_root = get_element_name(self._xml_tree)

        if fgdc_root != FGDC_ROOT:
            raise InvalidContent('Invalid XML root for ISO-19115 standard: {root}', root=fgdc_root)

        fgdc_data_map = {'_root': FGDC_ROOT}
        fgdc_data_structures = {}

        # Capture and format other complex XPATHs

        ad_format = _fgdc_tag_formats[ATTRIBUTES]
        fgdc_data_structures[ATTRIBUTES] = format_xpaths(
            _fgdc_definitions[ATTRIBUTES],
            label=ad_format.format(ad_path='attrlabl'),
            aliases=ad_format.format(ad_path='attalias'),
            definition=ad_format.format(ad_path='attrdef'),
            definition_src=ad_format.format(ad_path='attrdefs')
        )

        bb_format = _fgdc_tag_formats[BOUNDING_BOX]
        fgdc_data_structures[BOUNDING_BOX] = format_xpaths(
            _fgdc_definitions[BOUNDING_BOX],
            east=bb_format.format(bbox_path='eastbc'),
            south=bb_format.format(bbox_path='southbc'),
            west=bb_format.format(bbox_path='westbc'),
            north=bb_format.format(bbox_path='northbc')
        )

        ct_format = _fgdc_tag_formats[CONTACTS]
        fgdc_data_structures[CONTACTS] = format_xpaths(
            _fgdc_definitions[CONTACTS],

            name=ct_format.format(ct_path='cntperp/cntper'),
            _name=ct_format.format(ct_path='cntorgp/cntper'),  # If not in cntperp

            organization=ct_format.format(ct_path='cntperp/cntorg'),
            _organization=ct_format.format(ct_path='cntorgp/cntorg'),  # If not in cntperp

            position=ct_format.format(ct_path='cntpos'),
            email=ct_format.format(ct_path='cntemail')
        )

        dt_format = _fgdc_tag_formats[DATES]
        fgdc_data_structures[DATES] = {
            DATE_TYPE_MULTIPLE: dt_format.format(type_path='mdattim/sngdate/caldate'),
            DATE_TYPE_RANGE_BEGIN: dt_format.format(type_path='rngdates/begdate'),
            DATE_TYPE_RANGE_END: dt_format.format(type_path='rngdates/enddate'),
            DATE_TYPE_SINGLE: dt_format.format(type_path='sngdate/caldate')
        }
        fgdc_data_structures[DATES][DATE_TYPE_RANGE] = [
            fgdc_data_structures[DATES][DATE_TYPE_RANGE_BEGIN],
            fgdc_data_structures[DATES][DATE_TYPE_RANGE_END]
        ]

        df_format = _fgdc_tag_formats[DIGITAL_FORMS]
        fgdc_data_structures[DIGITAL_FORMS] = format_xpaths(
            _fgdc_definitions[DIGITAL_FORMS],
            name=df_format.format(df_path='digtinfo/formname'),
            content=df_format.format(df_path='digtinfo/formcont'),
            decompression=df_format.format(df_path='digtinfo/filedec'),
            version=df_format.format(df_path='digtinfo/formvern'),
            specification=df_format.format(df_path='digtinfo/formspec'),
            access_desc=df_format.format(df_path='digtopt/onlinopt/oncomp'),
            access_instrs=df_format.format(df_path='digtopt/onlinopt/accinstr'),
            network_resource=df_format.format(df_path='digtopt/onlinopt/computer/networka/networkr')
        )

        lw_format = _fgdc_tag_formats[LARGER_WORKS]
        fgdc_data_structures[LARGER_WORKS] = format_xpaths(
            _fgdc_definitions[LARGER_WORKS],
            title=lw_format.format(lw_path='title'),
            edition=lw_format.format(lw_path='edition'),
            origin=lw_format.format(lw_path='origin'),
            online_linkage=lw_format.format(lw_path='onlink'),
            other_citation=lw_format.format(lw_path='othercit'),
            date=lw_format.format(lw_path='pubdate'),
            place=lw_format.format(lw_path='pubinfo/pubplace'),
            info=lw_format.format(lw_path='pubinfo/publish')
        )

        ps_format = _fgdc_tag_formats[PROCESS_STEPS]
        fgdc_data_structures[PROCESS_STEPS] = format_xpaths(
            _fgdc_definitions[PROCESS_STEPS],
            description=ps_format.format(ps_path='procdesc'),
            date=ps_format.format(ps_path='procdate'),
            sources=ps_format.format(ps_path='srcused')
        )

        ri_format = _fgdc_tag_formats[RASTER_INFO]
        fgdc_data_structures[RASTER_INFO] = format_xpaths(
            _fgdc_definitions[RASTER_INFO],

            dimensions=ri_format.format(ri_path='rasttype'),
            row_count=ri_format.format(ri_path='rowcount'),
            column_count=ri_format.format(ri_path='colcount'),
            vertical_count=ri_format.format(ri_path='vrtcount'),

            x_resolution=_fgdc_tag_formats['_raster_resolution'] + '/absres',
            _x_resolution=_fgdc_tag_formats['__raster_resolution'] + '/longres',
            y_resolution=_fgdc_tag_formats['_raster_resolution'] + '/ordres',
            _y_resolution=_fgdc_tag_formats['__raster_resolution'] + '/latres',
        )

        # Assign XPATHS and gis_metadata.utils.ParserProperties to fgdc_data_map

        fgdc_data_formats = dict(_fgdc_tag_formats)

        for prop, xpath in iteritems(fgdc_data_formats):
            if prop in (ATTRIBUTES, CONTACTS, DIGITAL_FORMS, PROCESS_STEPS):
                fgdc_data_map[prop] = ParserProperty(self._parse_complex_list, self._update_complex_list)

            elif prop in (BOUNDING_BOX, LARGER_WORKS):
                fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_complex)

            elif prop == DATES:
                fgdc_data_map[prop] = ParserProperty(self._parse_dates, self._update_dates)

            elif prop == RASTER_INFO:
                fgdc_data_map[prop] = ParserProperty(self._parse_complex, self._update_raster_info)

            else:
                fgdc_data_map[prop] = xpath

        self._data_map = fgdc_data_map
        self._data_structures = fgdc_data_structures