def record_existing_unique_identifier(package_id, identifier): """ Based on a provided identifier, checks datacite for an existing DOI Saves to local CKAN database :param package_id: string :param identifier: string :return DOI object if saved, false if it didn't exist in datacite """ datacite_api = DOIDataCiteAPI() # Check this identifier doesn't exist in the table existing_doi = Session.query(DOI).filter(DOI.identifier == identifier).first() if not existing_doi: # And check against the datacite service try: datacite_doi = datacite_api.get(identifier) if datacite_doi.text: # Determine whether or not we need to delete a doi that points to the current dataset doi_for_this_pkg = Session.query(DOI).filter(DOI.package_id == package_id).first() if doi_for_this_pkg: datacite_api doi = DOI(package_id=package_id, identifier=identifier) Session.add(doi) Session.commit() return doi except HTTPError: pass
def create_unique_identifier(package_id): """ Create a unique identifier, using the prefix and a random number: 10.5072/0044634 Checks the random number doesn't exist in the table or the datacite repository All unique identifiers are created with @return: """ datacite_api = DOIDataCiteAPI() while True: identifier = os.path.join(get_prefix(), '{0:07}'.format(random.randint(1, 100000))) # Check this identifier doesn't exist in the table if not Session.query(DOI).filter(DOI.identifier == identifier).count(): # And check against the datacite service try: datacite_doi = datacite_api.get(identifier) except HTTPError: pass else: if datacite_doi.text: continue doi = DOI(package_id=package_id, identifier=identifier) Session.add(doi) Session.commit() return doi
def check_existing_doi(key, flattened_data, errors, context): """ Based on a provided identifier, checks datacite for an existing DOI :param package_id: string :param identifier: string :return DOI object if saved, false if it didn't exist in datacite """ datacite_api = DOIDataCiteAPI() identifier = flattened_data[key] identifier_type = flattened_data[('identifier_type',)] package_id = flattened_data[('id',)] existing_doi = Session.query(DOI).filter(DOI.identifier == identifier).first() if not existing_doi: # And check against the datacite service try: datacite_doi = datacite_api.get(identifier) if not datacite_doi.text: raise Invalid("DOI %s does not exist in Datacite" % identifier) except HTTPError: raise Invalid("DOI %s does not exist in Datacite" % identifier) pass else: if not existing_doi.package_id == package_id: log.error('This DOI already exists and belongs to %s' % existing_doi.package_id) raise Invalid('This DOI already exists and belongs to %s' % existing_doi.package_id)
def get_or_create_doi(package_id): '''Create or retrieve the unique identifier for this package_id. :param package_id: ''' datacite_api = DOIDataCiteAPI() doi = get_doi(package_id) if doi is None: while True: identifier = os.path.join( get_prefix(), u'{0:07}'.format(random.randint(1, 100000))) # Check this identifier doesn't exist in the table if Session.query(DOI).filter(DOI.identifier == identifier).count(): continue # And check against the datacite service try: datacite_doi = datacite_api.get(identifier) except HTTPError: pass else: if datacite_doi.text: continue doi = DOI(package_id=package_id, identifier=identifier) Session.add(doi) Session.commit() break return doi
def publish_doi(package_id, **kwargs): """ Publish a DOI to DataCite Need to create metadata first And then create DOI => URI association See MetadataDataCiteAPI.metadata_to_xml for param information @param package_id: @param title: @param creator: @param publisher: @param publisher_year: @param kwargs: @return: request response """ identifier = kwargs.get('identifier') metadata = MetadataDataCiteAPI() metadata.upsert(**kwargs) # The ID of a dataset never changes, so use that for the URL url = os.path.join(get_site_url(), 'dataset', package_id) doi = DOIDataCiteAPI() r = doi.upsert(doi=identifier, url=url) assert r.status_code == 201, 'Operation failed ERROR CODE: %s' % r.status_code # If we have created the DOI, save it to the database if r.text == 'OK': # Update status for this package and identifier num_affected = Session.query(DOI).filter_by( package_id=package_id, identifier=identifier).update( {"published": datetime.datetime.now()}) # Raise an error if update has failed - should never happen unless # DataCite and local db get out of sync - in which case requires investigating assert num_affected == 1, 'Updating local DOI failed' log.debug('Created new DOI for package %s' % package_id)
def publish_doi(package_id, **kwargs): """ Publish a DOI to DataCite Need to create metadata first And then create DOI => URI association See MetadataDataCiteAPI.metadata_to_xml for param information @param package_id: @param title: @param creator: @param publisher: @param publisher_year: @param kwargs: @return: request response """ identifier = kwargs.get('identifier') metadata = MetadataDataCiteAPI() metadata.upsert(**kwargs) # The ID of a dataset never changes, so use that for the URL url = os.path.join(get_site_url(), 'dataset', package_id) doi = DOIDataCiteAPI() r = doi.upsert(doi=identifier, url=url) assert r.status_code == 201, 'Operation failed ERROR CODE: %s' % r.status_code # If we have created the DOI, save it to the database if r.text == 'OK': # Update status for this package and identifier num_affected = Session.query(DOI).filter_by(package_id=package_id, identifier=identifier).update({"published": datetime.datetime.now()}) # Raise an error if update has failed - should never happen unless # DataCite and local db get out of sync - in which case requires investigating assert num_affected == 1, 'Updating local DOI failed' log.debug('Created new DOI for package %s' % package_id)