def test_get_record(self): dataset = factories.Dataset(name='dataset_test_api_export', author='Test Plugin') repository = OAIPMHRepository() oaipmh_identifier = repository.record_access._get_oaipmh_id(dataset.get(repository.id_field)) request_content = repository.handle_request('GetRecord', {'identifier':oaipmh_identifier, 'metadataPrefix':'oai_dc'}, 'REQUEST_URL') oaipmh_record = XMLRecord(MetadataFormats().get_metadata_formats('oai_pmh')[0], request_content) # validate the XML assert_true(repository._is_valid_oai_pmh_record(oaipmh_record.get_xml_dict())) assert_false(repository._is_error_oai_pmh_record(oaipmh_record.get_xml_dict()))
def test_list_identifiers(self): dataset = factories.Dataset(name='dataset_test_api_export_01', author='Test Plugin') dataset = factories.Dataset(name='bad_dataset_test_api_export', author='Test Plugin') dataset = factories.Dataset(name='dataset_test_api_export_02', author='Test Plugin') repository = OAIPMHRepository() request_content = repository.handle_request('ListIdentifiers', {'metadataPrefix':'oai_dc'}, 'REQUEST_URL') oaipmh_record = XMLRecord(MetadataFormats().get_metadata_formats('oai_pmh')[0], request_content) # validate the XML assert_true(repository._is_valid_oai_pmh_record(oaipmh_record.get_xml_dict())) assert_false(repository._is_error_oai_pmh_record(oaipmh_record.get_xml_dict()))
def convert(self, record): if self.can_convert(record): dataset_dict = record.get_json_dict() oai_dc_dict = collections.OrderedDict() oai_dc_dict['oai_dc:dc'] = collections.OrderedDict() oai_dc_dict['oai_dc:dc']['@xmlns:oai_dc']='http://www.openarchives.org/OAI/2.0/oai_dc/' oai_dc_dict['oai_dc:dc']['@xmlns:dc']='http://purl.org/dc/elements/1.1/' oai_dc_dict['oai_dc:dc']['@xmlns:xsi']='http://www.w3.org/2001/XMLSchema-instance' oai_dc_dict['oai_dc:dc']['@xsi:schemaLocation'] = 'http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd' oai_dc_dict['oai_dc:dc']['dc:identifier']= dataset_dict.get('id','') oai_dc_dict['oai_dc:dc']['dc:identifier']= dataset_dict.get('name','') oai_dc_dict['oai_dc:dc']['dc:creator']= dataset_dict.get('author','') oai_dc_dict['oai_dc:dc']['dc:date']= dataset_dict.get('metadata_modified','2017').split('-')[0] oai_dc_dict['oai_dc:dc']['dc:title']= dataset_dict.get('title','') oai_dc_dict['oai_dc:dc']['dc:type']= 'Dataset' converted_record = Record(self.output_format, unparse(oai_dc_dict, pretty=True)) return XMLRecord.from_record(converted_record) return converted_record else: raise TypeError(('Converter is not compatible with the record format {record_format}({record_version}). ' + 'Accepted format is CKAN {input_format}.').format( record_format=record.get_metadata_format().get_format_name(), record_version=record.get_metadata_format().get_version(), input_format=self.get_input_format().get_format_name()))
def _is_valid_oai_pmh_record(self, xmldict, metadata_prefix=''): site_url = config.get('ckan.site_url', '') if not metadata_prefix: metadata_prefix = 'oai_dc' try: xml_record = unparse(xmldict) oai_pmh_record = XMLRecord( MetadataFormats().get_metadata_formats('oai_pmh')[0], xml_record) # get the format metadata_format = MetadataFormats().get_metadata_formats( metadata_prefix)[0] metadata_schema = metadata_format.get_xsd_url() # local xsd for gcmd_dif (nasa hosted is not always available) if metadata_prefix == 'gcmd_dif': metadata_schema = metadata_schema.replace( 'http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/', site_url + '/package_converter_xsd/') # modify xsd due to library bug fixed_xsd = '''<xs:schema xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" > <xs:import namespace="http://www.openarchives.org/OAI/2.0/" schemaLocation="http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd" /> <xs:import namespace="{namespace}" schemaLocation="{schema}" /> </xs:schema>'''.format( namespace=metadata_format.get_namespace(), schema=metadata_schema) return oai_pmh_record.validate(custom_xsd=fixed_xsd) except Exception as e: print(e) log.error('Failed to validate OAI-PMH for format {0}'.format( metadata_prefix)) except: log.error('Failed to validate OAI-PMH for format {0}'.format( metadata_prefix)) return False
def _export_dataset(self, ckan_id, oai_identifier, datestamp, format, state='active', entity='package'): if state != 'active': return (self._envelop_record(oai_identifier, datestamp, {}, state)) # Convert record try: converted_record = export_as_record(ckan_id, format, type=entity) record = XMLRecord.from_record(converted_record) except Exception as e: log.exception(e) record = None if not record: raise oaipmh_error.CannotDisseminateFormatError() return self._envelop_record(oai_identifier, datestamp, record.get_xml_dict(), state)
def publish(self, doi, pkg=None, context={}, *args, **kwargs): update_doi = kwargs.get('update', False) # dataset data package_id = pkg['id'] url = config.get('ckan.site_url', '') + '/dataset/' + pkg.get( 'name', pkg['id']) if self.url_prefix: url = self.url_prefix + pkg.get('name', pkg['id']) if update_doi: log.debug("*** Updating id = {0}, url = {1}".format( package_id, url)) # check published data match published_ids = self.get_doi_identifiers(doi) if published_ids and package_id not in published_ids and pkg.get( 'name') not in published_ids: return None, 'Dataset id ({0}, {1}) do not match published ids: [{2}]'.format( package_id, pkg.get('name'), ', '.join(published_ids)) else: log.debug("Publishing id = {0}, url = {1}".format(package_id, url)) # get converted package metadata_format = 'datacite' try: converted_package = toolkit.get_action('package_export')( context, { 'id': package_id, 'format': metadata_format }) except toolkit.ObjectNotFound: return None, 'Dataset not found' xml = converted_package.replace('\n', '').replace('\t', '') # Validate try: converted_record = XMLRecord.from_record( Record( MetadataFormats().get_metadata_formats(metadata_format)[0], xml)) validation_result = converted_record.validate() log.debug("Validation result: {0}".format(validation_result)) except Exception as e: log.error("Converted Validation FAILED, exception: {0}".format(e)) traceback.print_exc() validation_result = False if not validation_result: return None, 'Dataset XML validation failed' # encode 64 xml_bytes = xml if isinstance(xml, str): xml_bytes = xml.encode('utf-8') xml_encoded = base64.b64encode(xml_bytes) # prepare JSON headers = {"Content-Type": "application/vnd.api+json"} auth = HTTPBasicAuth(self.account_name, self.account_password) data = collections.OrderedDict() data['id'] = doi data['type'] = 'dois' data['attributes'] = collections.OrderedDict() # TODO check for update if this state is correct if update_doi: data['attributes']['event'] = "" else: data['attributes']['event'] = "publish" data['attributes']['doi'] = doi data['attributes']['url'] = url data['attributes']['xml'] = xml_encoded.decode() args = {'data': data} args_json = json.dumps(args) # log.debug(args_json) datacite_url_endpoint = self.datacite_url if update_doi: datacite_url_endpoint = self.datacite_url + '/' + doi log.debug( " REST request send to URL: {0}".format(datacite_url_endpoint)) if update_doi: r = requests.put(datacite_url_endpoint, headers=headers, auth=auth, data=args_json) else: r = requests.post(datacite_url_endpoint, headers=headers, auth=auth, data=args_json) # print(r.status_code) # print(r.json()) if r.status_code == 201 or r.status_code == 200: published_doi = r.json().get('data').get('id') return published_doi, None else: if update_doi: return None, 'Error updating to DataCite: HTTP Code: {0}, error: {1}'.format( r.status_code, r.json()) else: return None, 'Error publishing to DataCite: HTTP Code: {0}, error: {1}'.format( r.status_code, r.json())
def test_list_metadata_formats(self): request_content = OAIPMHRepository().handle_request('ListMetadataFormats', {}, 'REQUEST_URL') oaipmh_record = XMLRecord(MetadataFormats().get_metadata_formats('oai_pmh')[0], request_content) # validate the XML assert_true(oaipmh_record.validate()) assert_false(OAIPMHRepository()._is_error_oai_pmh_record(oaipmh_record.get_xml_dict()))