def getMetadataStandards(self): filter = ['datacite.org', 'openarchives.org', 'purl.org/dc/'] # TODO expand filters #http://ws.pangaea.de/oai/provider?verb=ListMetadataFormats oai_endpoint = self.endpoint.split('?')[0] #oai_endpoint = oai_endpoint.rstrip('/') oai_listmetadata_url = oai_endpoint + '?verb=ListMetadataFormats' requestHelper = RequestHelper(url=oai_listmetadata_url, logInst=self.logger) requestHelper.setAcceptType(AcceptTypes.xml) response_type, xml = requestHelper.content_negotiate(self.metric_id) root = etree.fromstring(xml.content) metadata_nodes = root.xpath( '//oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat', namespaces=OAIMetadataProvider.oai_namespaces) schemas = {} for node in metadata_nodes: ele = etree.XPathEvaluator( node, namespaces=OAIMetadataProvider.oai_namespaces).evaluate metadata_prefix = ele('string(oai:metadataPrefix/text())' ) # <metadataPrefix>oai_dc</metadataPrefix> metadata_schema = ele( 'string(oai:schema/text())' ) #<schema>http://www.openarchives.org/OAI/2.0/oai_dc.xsd</schema> metadata_schema = metadata_schema.strip() self.namespaces.append(metadata_schema) # TODO there can be more than one OAI-PMH endpoint, https://www.re3data.org/repository/r3d100011221 if not any(s in metadata_schema for s in filter): schemas[metadata_prefix] = [metadata_schema] else: self.logger.info( '{0} : Skipping domain-agnostic standard listed in OAI-PMH - {1}' .format(self.metric_id, metadata_prefix)) return schemas
def lookup_re3data(self): if self.client_id and self.pid_scheme: re3doi = RepositoryHelper.DATACITE_REPOSITORIES.get( self.client_id) # {client_id,re3doi} short_re3doi = idutils.normalize_pid( re3doi, scheme='doi') #https://doi.org/10.17616/R3XS37 # pid -> clientId -> repo doi-> re3id, and query repository metadata from re3api if re3doi: self.logger.info('Found match re3data (DOI-based) record') query_url = Preprocessor.RE3DATA_API + '?query=' + short_re3doi # https://re3data.org/api/beta/repositories?query= q = RequestHelper(url=query_url) q.setAcceptType(AcceptTypes.xml) re_source, xml = q.content_negotiate(metric_id='RE3DATA') root = etree.fromstring(xml.content) #<link href="https://www.re3data.org/api/beta/repository/r3d100010134" rel="self" /> re3link = root.xpath('//link')[0].attrib['href'] if re3link is not None: self.logger.info('Found match re3data metadata record') # query reposiroty metadata q2 = RequestHelper(url=re3link) q2.setAcceptType(AcceptTypes.xml) re3_source, re3_response = q2.content_negotiate( metric_id='RE3DATA') self.re3metadata_raw = re3_response.content self.parseRepositoryMetadata() else: self.logger.warning( 'No DOI of client id is available from datacite api')
def parse_metadata(self): XSI = "http://www.w3.org/2001/XMLSchema-instance" if self.link_type == 'embedded': source_name = self.getEnumSourceNames().LINKED_DATA.value elif self.link_type == 'guessed': source_name = self.getEnumSourceNames().GUESSED_XML.value elif self.link_type == 'negotiated': source_name = self.getEnumSourceNames().XML_NEGOTIATED.value else: source_name = self.getEnumSourceNames().TYPED_LINK.value dc_core_metadata = None requestHelper = RequestHelper(self.target_url, self.logger) requestHelper.setAcceptType(AcceptTypes.xml) #self.logger.info('FsF-F2-01M : Sending request to access metadata from -: {}'.format(self.target_url)) neg_source, xml_response = requestHelper.content_negotiate( 'FsF-F2-01M') if requestHelper.getHTTPResponse() is not None: self.logger.info( 'FsF-F2-01M : Trying to extract/parse metadata from -: {}'. format(source_name)) #dom = lxml.html.fromstring(self.landing_html.encode('utf8')) if neg_source != 'xml': self.logger.info( 'FsF-F2-01M : Expected XML but content negotiation responded -: ' + str(neg_source)) else: tree = lxml.etree.XML(xml_response) schema_locations = set( tree.xpath("//*/@xsi:schemaLocation", namespaces={'xsi': XSI})) for schema_location in schema_locations: self.namespaces = re.split('\s', schema_location) #TODO: implement some XSLT to handle the XML.. return source_name, dc_core_metadata
def getMetadataStandards(self): csw_endpoint = self.endpoint.split('?')[0] csw_listmetadata_url = csw_endpoint + '?service=CSW&request=GetCapabilities' requestHelper = RequestHelper(url=csw_listmetadata_url, logInst=self.logger) requestHelper.setAcceptType(AcceptTypes.xml) response_type, xml = requestHelper.content_negotiate(self.metric_id) schemas = {} if xml: try: root = etree.fromstring(requestHelper.response_content) metadata_nodes = root.xpath( '//ows:Parameter[@name="outputSchema"]/ows:Value', namespaces=OGCCSWMetadataProvider.csw_namespaces) for node in metadata_nodes: if node.text: if node.text not in self.namespaces: self.namespaces.append(str(node.text)) schemas[str(node.text)] = str(node.text) except: self.logger.info( '{0} : Could not parse XML response retrieved from OGC CSW endpoint' .format(self.metric_id)) return schemas
def parse_metadata(self): source_name = None dcite_metadata = {} self.logger.info('FsF-F2-01M : Trying to retrieve datacite metadata') requestHelper = RequestHelper(self.pid_url, self.logger) requestHelper.setAcceptType(AcceptTypes.datacite_json) neg_source, ext_meta = requestHelper.content_negotiate('FsF-F2-01M') if ext_meta: try: dcite_metadata = jmespath.search(self.metadata_mapping.value, ext_meta) if dcite_metadata: self.namespaces.append('http://datacite.org/schema/') source_name = self.getEnumSourceNames().DATACITE_JSON.value if dcite_metadata['creator'] is None: first = dcite_metadata['creator_first'] last = dcite_metadata['creator_last'] # default type of creator is [] if isinstance(first, list) and isinstance(last, list): if len(first) == len(last): names = [ i + " " + j for i, j in zip(first, last) ] dcite_metadata['creator'] = names if dcite_metadata.get('related_resources'): self.logger.info( 'FsF-I3-01M : {0} related resource(s) extracted from -: {1}' .format(len(dcite_metadata['related_resources']), source_name)) temp_rels = [] for r in dcite_metadata['related_resources']: if r.get('scheme_uri'): self.namespaces.append(r.get('scheme_uri')) filtered = { k: v for k, v in r.items() if v is not None } temp_rels.append(filtered) dcite_metadata['related_resources'] = temp_rels else: self.logger.info( 'FsF-I3-01M : No related resource(s) found in Datacite metadata' ) # convert all values (list type) into string except 'creator','license','related_resources' for key, value in dcite_metadata.items(): if key not in self.exclude_conversion and isinstance( value, list): flat = ', '.join(map(str, value)) dcite_metadata[key] = flat except Exception as e: self.logger.exception( 'Failed to extract Datacite Json -: {}'.format(e)) return source_name, dcite_metadata
def lookup_re3data(self): if self.client_id and self.pid_scheme: re3doi = RepositoryHelper.DATACITE_REPOSITORIES.get( self.client_id) # {client_id,re3doi} #print(self.client_id,'Re3DOI',re3doi, idutils.is_doi(re3doi)) if re3doi: if idutils.is_doi(re3doi): short_re3doi = idutils.normalize_pid( re3doi, scheme='doi') #https://doi.org/10.17616/R3XS37 else: re3doi = None # pid -> clientId -> repo doi-> re3id, and query repository metadata from re3api if re3doi: self.logger.info( 'FsF-R1.3-01M : Found match re3data (DOI-based) record') query_url = Preprocessor.RE3DATA_API + '?query=' + short_re3doi # https://re3data.org/api/beta/repositories?query= q = RequestHelper(url=query_url) q.setAcceptType(AcceptTypes.xml) re_source, xml = q.content_negotiate(metric_id='RE3DATA') try: if isinstance(xml, bytes): xml = xml.decode().encode() root = etree.fromstring(xml) #<link href="https://www.re3data.org/api/beta/repository/r3d100010134" rel="self" /> re3link = root.xpath('//link')[0].attrib['href'] if re3link is not None: self.logger.info( 'FsF-R1.3-01M : Found match re3data metadata record -: ' + str(re3link)) # query reposiroty metadata q2 = RequestHelper(url=re3link) q2.setAcceptType(AcceptTypes.xml) re3_source, re3_response = q2.content_negotiate( metric_id='RE3DATA') self.re3metadata_raw = re3_response self.parseRepositoryMetadata() except Exception as e: self.logger.warning( 'FsF-R1.3-01M : Malformed re3data (DOI-based) record received: ' + str(e)) else: self.logger.warning( 'FsF-R1.3-01M : No DOI of client id is available from datacite api' )
def evaluate(self): self.result = Persistence(id=self.metric_number, metric_identifier=self.metric_identifier, metric_name=self.metric_name) self.output = PersistenceOutput() # ======= CHECK IDENTIFIER PERSISTENCE ======= self.logger.info( 'FsF-F1-02D : PID schemes-based assessment supported by the assessment service - {}' .format(Mapper.VALID_PIDS.value)) check_url = None signposting_pid = None if self.fuji.id_scheme is not None: check_url = self.fuji.pid_url #check_url = idutils.to_url(self.fuji.id, scheme=self.fuji.id_scheme) if self.fuji.id_scheme == 'url': self.fuji.origin_url = self.fuji.id check_url = self.fuji.id if check_url: # ======= RETRIEVE METADATA FROM LANDING PAGE ======= requestHelper = RequestHelper(check_url, self.logger) requestHelper.setAcceptType(AcceptTypes.html_xml) # request neg_source, self.fuji.extruct_result = requestHelper.content_negotiate( 'FsF-F1-02D', ignore_html=False) if not 'html' in str(requestHelper.content_type): self.logger.info( 'FsF-F2-01M :Content type is ' + str(requestHelper.content_type) + ', therefore skipping embedded metadata (microdata, RDFa) tests' ) self.fuji.extruct_result = {} if type(self.fuji.extruct_result) != dict: self.fuji.extruct_result = {} r = requestHelper.getHTTPResponse() response_status = requestHelper.response_status if r: self.fuji.landing_url = requestHelper.redirect_url #in case the test has been repeated because a PID has been found in metadata #print(self.fuji.landing_url, self.fuji.input_id) if self.fuji.repeat_pid_check == True: if self.fuji.landing_url != self.fuji.input_id: self.logger.warning( 'FsF-F1-02D : Landing page URL resolved from PID found in metadata does not match with input URL' ) self.logger.warning( 'FsF-F2-01M : Seems to be a catalogue entry or alternative representation of the data set, landing page URL resolved from PID found in metadata does not match with input URL' ) #self.fuji.repeat_pid_check = False if self.fuji.landing_url not in [ 'https://datacite.org/invalid.html' ]: if response_status == 200: # identify signposting links in header header_link_string = requestHelper.getHTTPResponse( ).getheader('Link') if header_link_string is not None: self.logger.info( 'FsF-F1-02D : Found signposting links in response header of landingpage' ) for preparsed_link in header_link_string.split( ','): found_link = None found_type, type_match = None, None found_rel, rel_match = None, None found_formats, formats_match = None, None parsed_link = preparsed_link.strip().split(';') found_link = parsed_link[0].strip() for link_prop in parsed_link[1:]: if str(link_prop).startswith('rel="'): rel_match = re.search( 'rel=\"(.*?)\"', link_prop) elif str(link_prop).startswith('type="'): type_match = re.search( 'type=\"(.*?)\"', link_prop) elif str(link_prop).startswith( 'formats="'): formats_match = re.search( 'formats=\"(.*?)\"', link_prop) if type_match: found_type = type_match[1] if rel_match: found_rel = rel_match[1] if formats_match: found_formats = formats_match[1] signposting_link_dict = { 'url': found_link[1:-1], 'type': found_type, 'rel': found_rel, 'profile': found_formats } if found_link: self.fuji.signposting_header_links.append( signposting_link_dict) #check if there is a cite-as signposting link if self.fuji.pid_scheme is None: signposting_pid_link = self.fuji.get_signposting_links( 'cite-as') if signposting_pid_link: signposting_pid = signposting_pid_link[0].get( 'url') if signposting_pid: signidhelper = IdentifierHelper #found_ids = idutils.detect_identifier_schemes(signposting_pid[0]) found_id = signidhelper.preferred_schema #if len(found_ids) > 1: # found_ids.remove('url') # found_id = found_ids[0] if signidhelper.is_persistent: self.logger.info( 'FsF-F1-02D : Found object identifier in signposting header links' ) self.fuji.pid_scheme = found_id up = urlparse(self.fuji.landing_url) self.fuji.landing_origin = '{uri.scheme}://{uri.netloc}'.format( uri=up) self.fuji.landing_html = requestHelper.getResponseContent( ) self.fuji.landing_content_type = requestHelper.content_type self.output.resolved_url = self.fuji.landing_url # url is active, although the identifier is not based on a pid scheme self.output.resolvable_status = True self.logger.info( 'FsF-F1-02D : Object identifier active (status code = 200)' ) self.fuji.isMetadataAccessible = True elif response_status in [401, 402, 403]: self.fuji.isMetadataAccessible = False self.logger.warning( "FsF-F1-02D : Resource inaccessible, identifier returned http status code -: {code}" .format(code=response_status)) else: self.fuji.isMetadataAccessible = False self.logger.warning( "FsF-F1-02D : Resource inaccessible, identifier returned http status code -: {code}" .format(code=response_status)) else: self.logger.warning( "FsF-F1-02D : Invalid DOI, identifier resolved to -: {code}" .format(code=self.fuji.landing_url)) else: self.fuji.isMetadataAccessible = False self.logger.warning( "FsF-F1-02D :Resource inaccessible, no response received from -: {}" .format(check_url)) if response_status in [401, 402, 403]: self.logger.warning( "FsF-F1-02D : Resource inaccessible, identifier returned http status code -: {code}" .format(code=response_status)) else: self.logger.warning( "FsF-F1-02D :Resource inaccessible, could not identify an actionable representation for the given identfier -: {}" .format(self.fuji.id)) if self.fuji.pid_scheme is not None: # short_pid = id.normalize_pid(self.id, scheme=pid_scheme) if signposting_pid is None: idhelper = IdentifierHelper(self.fuji.id) self.fuji.pid_url = idhelper.identifier_url #self.fuji.pid_url = idutils.to_url(self.fuji.id, scheme=self.fuji.pid_scheme) else: self.fuji.pid_url = signposting_pid[0] self.output.pid_scheme = self.fuji.pid_scheme self.output.pid = self.fuji.pid_url self.setEvaluationCriteriumScore('FsF-F1-02D-1', 0.5, 'pass') self.score.earned = 0.5 self.maturity = 1 if self.fuji.isMetadataAccessible: self.setEvaluationCriteriumScore('FsF-F1-02D-2', 0.5, 'pass') self.maturity = 3 self.result.test_status = 'pass' self.score.earned = self.total_score # idenfier should be based on a persistence scheme and resolvable #print(self.metric_tests) self.logger.log( self.fuji.LOG_SUCCESS, 'FsF-F1-02D : Persistence identifier scheme -: {}'.format( self.fuji.pid_scheme)) #self.logger.info('FsF-F1-02D : Persistence identifier scheme - {}'.format(self.fuji.pid_scheme)) else: self.score.earned = 0 self.logger.warning( 'FsF-F1-02D : Not a persistent identifier scheme -: {}'.format( self.fuji.id_scheme)) self.result.score = self.score self.result.maturity = self.maturity self.result.metric_tests = self.metric_tests self.result.output = self.output
def evaluate(self): self.result = Persistence(id=self.fuji.count, metric_identifier=self.metric_identifier, metric_name=self.metric_name) self.output = PersistenceOutput() # ======= CHECK IDENTIFIER PERSISTENCE ======= self.logger.info( 'FsF-F1-02D : PID schemes-based assessment supported by the assessment service - {}' .format(Mapper.VALID_PIDS.value)) if self.fuji.pid_scheme is not None: check_url = idutils.to_url(self.fuji.id, scheme=self.fuji.pid_scheme) elif self.fuji.id_scheme == 'url': check_url = self.fuji.id # ======= RETRIEVE METADATA FROM LANDING PAGE ======= requestHelper = RequestHelper(check_url, self.logger) requestHelper.setAcceptType(AcceptTypes.html) # request neg_source, self.fuji.extruct_result = requestHelper.content_negotiate( 'FsF-F1-02D') r = requestHelper.getHTTPResponse() signposting_pid = None if r: self.fuji.landing_url = requestHelper.redirect_url if r.status == 200: # identify signposting links in header header_link_string = requestHelper.getHTTPResponse().getheader( 'Link') if header_link_string is not None: self.logger.info( 'FsF-F1-02D : Found signposting links in response header of landingpage' ) for preparsed_link in header_link_string.split(','): found_link = None found_type, type_match = None, None found_rel, rel_match = None, None parsed_link = preparsed_link.strip().split(';') found_link = parsed_link[0].strip() for link_prop in parsed_link[1:]: if str(link_prop).startswith('rel="'): rel_match = re.search('rel=\"(.*?)\"', link_prop) elif str(link_prop).startswith('type="'): type_match = re.search('type=\"(.*?)\"', link_prop) if type_match: found_type = type_match[1] if rel_match: found_rel = rel_match[1] signposting_link_dict = { 'url': found_link[1:-1], 'type': found_type, 'rel': found_rel } if found_link: self.fuji.signposting_header_links.append( signposting_link_dict) ''' if found_rel: if self.fuji.signposting_header_links.get(found_rel[1]): self.fuji.signposting_header_links[found_rel[1]].append(found_link[1:-1]) else: self.fuji.signposting_header_links[found_rel[1]]=[found_link[1:-1]] ''' #check if there is a cite-as signposting link if self.fuji.pid_scheme is None: signposting_pid_link = self.fuji.get_signposting_links( 'cite-as') if signposting_pid_link: signposting_pid = signposting_pid_link[0].get('url') if signposting_pid: found_ids = idutils.detect_identifier_schemes( signposting_pid[0]) if len(found_ids) > 1: found_ids.remove('url') found_id = found_ids[0] if found_id in Mapper.VALID_PIDS.value: self.logger.info( 'FsF-F1-02D : Found object identifier in signposting header links' ) self.fuji.pid_scheme = found_id up = urlparse(self.fuji.landing_url) self.fuji.landing_origin = '{uri.scheme}://{uri.netloc}'.format( uri=up) self.fuji.landing_html = requestHelper.getResponseContent() self.output.resolved_url = self.fuji.landing_url # url is active, although the identifier is not based on a pid scheme self.output.resolvable_status = True self.logger.info( 'FsF-F1-02D : Object identifier active (status code = 200)' ) self.fuji.isMetadataAccessible = True elif r.status_code in [401, 402, 403]: self.fuji.isMetadataAccessible = False self.logger.warning( "Resource inaccessible, identifier returned http status code: {code}" .format(code=r.status_code)) else: self.fuji.isMetadataAccessible = False self.logger.warning( "Resource inaccessible, identifier returned http status code: {code}" .format(code=r.status_code)) else: self.fuji.isMetadataAccessible = False self.logger.warning( "FsF-F1-02D :Resource inaccessible, no response received from: {}" .format(check_url)) if self.fuji.pid_scheme is not None: # short_pid = id.normalize_pid(self.id, scheme=pid_scheme) if signposting_pid is None: self.fuji.pid_url = idutils.to_url(self.fuji.id, scheme=self.fuji.pid_scheme) else: self.fuji.pid_url = signposting_pid[0] self.output.pid_scheme = self.fuji.pid_scheme self.result.test_status = 'pass' self.output.pid = self.fuji.pid_url self.setEvaluationCriteriumScore('FsF-F1-02D-1', 0, 'pass') if self.fuji.isMetadataAccessible: self.setEvaluationCriteriumScore('FsF-F1-02D-2', 1, 'pass') self.score.earned = self.total_score # idenfier should be based on a persistence scheme and resolvable #print(self.metric_tests) self.logger.log( self.fuji.LOG_SUCCESS, 'FsF-F1-02D : Persistence identifier scheme - {}'.format( self.fuji.pid_scheme)) #self.logger.info('FsF-F1-02D : Persistence identifier scheme - {}'.format(self.fuji.pid_scheme)) else: self.score.earned = 0 self.logger.warning( 'FsF-F1-02D : Not a persistent identifier scheme - {}'.format( self.fuji.id_scheme)) self.result.score = self.score self.result.metric_tests = self.metric_tests self.result.output = self.output
def parse_metadata(self): xml_metadata = None xml_mapping = None metatree = None envelope_metadata = {} XSI = "http://www.w3.org/2001/XMLSchema-instance" if self.link_type == 'linked': source_name = self.getEnumSourceNames().TYPED_LINK.value if self.link_type == 'embedded': source_name = self.getEnumSourceNames().LINKED_DATA.value elif self.link_type == 'guessed': source_name = self.getEnumSourceNames().GUESSED_XML.value elif self.link_type == 'negotiated': source_name = self.getEnumSourceNames().XML_NEGOTIATED.value else: source_name = self.getEnumSourceNames().TYPED_LINK.value dc_core_metadata = None requestHelper = RequestHelper(self.target_url, self.logger) requestHelper.setAcceptType(AcceptTypes.xml) #self.logger.info('FsF-F2-01M : Sending request to access metadata from -: {}'.format(self.target_url)) neg_source, xml_response = requestHelper.content_negotiate( 'FsF-F2-01M') if requestHelper.getHTTPResponse() is not None: self.logger.info( 'FsF-F2-01M : Trying to extract/parse metadata from -: {}'. format(source_name)) #dom = lxml.html.fromstring(self.landing_html.encode('utf8')) if neg_source != 'xml': self.logger.info( 'FsF-F2-01M : Expected XML but content negotiation responded -: ' + str(neg_source)) else: parser = lxml.etree.XMLParser(strip_cdata=False) tree = lxml.etree.XML(xml_response, parser) root_element = tree.tag if root_element.endswith('}OAI-PMH'): self.logger.info( 'FsF-F2-01M : Found OAI-PMH type XML envelope, unpacking \'metadata\' element for further processing' ) metatree = tree.find('.//{*}metadata/*') elif root_element.endswith('}mets'): self.logger.info( 'FsF-F2-01M : Found METS type XML envelope, unpacking all \'mods\' elements for further processing' ) envelope_metadata = self.get_mapped_xml_metadata( tree, Mapper.XML_MAPPING_METS.value) metatree = tree.find('.//{*}dmdSec/{*}mdWrap/{*}xmlData/*') elif root_element.endswith('}GetRecordsResponse'): self.logger.info( 'FsF-F2-01M : Found OGC CSW GetRecords type XML envelope, unpacking \'SearchResults\' element for further processing' ) metatree = tree.find('.//{*}SearchResults/*') elif root_element.endswith('}GetRecordByIdResponse'): self.logger.info( 'FsF-F2-01M : Found OGC CSW GetRecordByIdResponse type XML envelope, unpacking metadata element for further processing' ) metatree = tree.find('.//*') else: metatree = tree if metatree is not None: root_namespace = None nsmatch = re.match(r'^\{(.+)\}(.+)$', metatree.tag) schema_locations = set( metatree.xpath("//*/@xsi:schemaLocation", namespaces={'xsi': XSI})) for schema_location in schema_locations: self.namespaces = re.split('\s', schema_location) if nsmatch: root_namespace = nsmatch[1] root_element = nsmatch[2] print('#' + root_element + '#', root_namespace) self.namespaces.append(root_namespace) if root_element == 'codeBook': xml_mapping = Mapper.XML_MAPPING_DDI_CODEBOOK.value self.logger.info( 'FsF-F2-01M : Identified DDI codeBook XML based on root tag' ) elif root_element == 'dc': xml_mapping = Mapper.XML_MAPPING_DUBLIN_CORE.value self.logger.info( 'FsF-F2-01M : Identified Dublin Core XML based on root tag' ) elif root_element == 'mods': xml_mapping = Mapper.XML_MAPPING_MODS.value self.logger.info( 'FsF-F2-01M : Identified MODS XML based on root tag' ) elif root_element == 'eml': xml_mapping = Mapper.XML_MAPPING_EML.value self.logger.info( 'FsF-F2-01M : Identified EML XML based on root tag' ) elif root_element == 'MD_Metadata': xml_mapping = Mapper.XML_MAPPING_GCMD_ISO.value self.logger.info( 'FsF-F2-01M : Identified ISO 19115 XML based on root tag' ) elif root_namespace: if 'datacite.org/schema' in root_namespace: xml_mapping = Mapper.XML_MAPPING_DATACITE.value self.logger.info( 'FsF-F2-01M : Identified DataCite XML based on namespace' ) if xml_mapping and metatree is not None: xml_metadata = self.get_mapped_xml_metadata(metatree, xml_mapping) if envelope_metadata: for envelope_key, envelope_values in envelope_metadata.items(): if envelope_key not in xml_metadata: xml_metadata[envelope_key] = envelope_values return source_name, xml_metadata