Exemple #1
0
def test_api_url():
    """Test client init."""
    c = DataCiteMDSClient(
        username="******",
        prefix="10.1234",
        url="https://mds.example.org",  # without slash
        test_mode=True,
    )
    assert c.api_url == "https://mds.example.org/"  # with slash
    assert c.prefix == "10.1234"
    assert c.test_mode is True
    assert c.__repr__() == "<DataCiteMDSClient: TEST>"
    def __init__(self, pid, client=None):
        """Initialize provider.

        To use the default client, just configure the following variables:

        * `PIDSTORE_DATACITE_USERNAME` as username.

        * `PIDSTORE_DATACITE_PASSWORD` as password.

        * `PIDSTORE_DATACITE_DOI_PREFIX` as DOI prefix.

        * `PIDSTORE_DATACITE_TESTMODE` to `True` if it configured in test mode.

        * `PIDSTORE_DATACITE_URL` as DataCite URL.

        :param pid: A :class:`invenio_pidstore.models.PersistentIdentifier`
            instance.
        :param client: A client to access to DataCite.
            (Default: :class:`datacite.DataCiteMDSClient` instance)
        """
        super(DataCiteProvider, self).__init__(pid)
        if client is not None:
            self.api = client
        else:
            self.api = DataCiteMDSClient(
                username=current_app.config.get('PIDSTORE_DATACITE_USERNAME'),
                password=current_app.config.get('PIDSTORE_DATACITE_PASSWORD'),
                prefix=current_app.config.get('PIDSTORE_DATACITE_DOI_PREFIX'),
                test_mode=current_app.config.get(
                    'PIDSTORE_DATACITE_TESTMODE', False),
                url=current_app.config.get('PIDSTORE_DATACITE_URL'))
Exemple #3
0
 def __init__(self, base_url, prefix, client=None):
     self.base_url = base_url
     self.prefix = prefix
     self._client = client or DataCiteMDSClient(
         url=self.base_url,
         username=settings.DATACITE_USERNAME,
         password=settings.DATACITE_PASSWORD,
         prefix=self.prefix
     )
Exemple #4
0
 def __init__(self):
     """Initialize provider."""
     self.api = DataCiteMDSClient(
         username=cfg.get('CFG_DATACITE_USERNAME'),
         password=cfg.get('CFG_DATACITE_PASSWORD'),
         prefix=cfg.get('CFG_DATACITE_DOI_PREFIX'),
         test_mode=cfg.get('CFG_DATACITE_TESTMODE', False),
         url=cfg.get('CFG_DATACITE_URL')
     )
def update_doi(doi, xml, url=''):

    password = os.environ['DATACITE']
    prefix = doi.split('/')[0]

    # Initialize the MDS client.
    d = DataCiteMDSClient(username='******',
                          password=password,
                          prefix=prefix,
                          url='https://mds.datacite.org'
                          #test_mode=True
                          )

    response = d.metadata_post(xml)
    print(response)
    if url != '':
        response = d.doi_post(doi, url)
        print(response)
Exemple #6
0
 def _create_client(self):
     datacite_kwargs = {
         'username': self.conf.dc_symbol,
         'password': self.conf.dc_password,
         'prefix': self.conf.dc_prefix,
         'test_mode': self.test_mode}
     #self.client = DataCiteMDSClient(**datacite_kwargs)
     try:
         self.client = DataCiteMDSClient(**datacite_kwargs)
     except Exception as e:
         self.errors.append("Datacite connection failed")
 def __init__(self, pid, client=None):
     """Initialize provider."""
     super(DataCiteProvider, self).__init__(pid)
     if client is not None:
         self.api = client
     else:
         self.api = DataCiteMDSClient(
             username=current_app.config.get('PIDSTORE_DATACITE_USERNAME'),
             password=current_app.config.get('PIDSTORE_DATACITE_PASSWORD'),
             prefix=current_app.config.get('PIDSTORE_DATACITE_DOI_PREFIX'),
             test_mode=current_app.config.get(
                 'PIDSTORE_DATACITE_TESTMODE', False),
             url=current_app.config.get('PIDSTORE_DATACITE_URL'))
Exemple #8
0
def get_client(username="******",
               password="******",
               prefix='10.5072',
               test_mode=False,
               timeout=None):
    """Create a API client."""
    return DataCiteMDSClient(
        username=username,
        password=password,
        prefix=prefix,
        url=APIURL,
        test_mode=test_mode,
        timeout=timeout,
    )
def generate_new_doi(*args):
    # If you want to generate XML for earlier versions, you need to use either the
    # schema31, schema40 or schema41 instead.

    data = {
        "identifier": {
            "identifier": prefix + "/RDS." + str(args[0]),
            "identifierType": "DOI"
        },
        "creators": [{
            "creatorName": organization
        }],
        "titles": [{
            "title": args[1]
        }],
        "publisher":
        "ICIMOD",
        "publicationYear":
        args[2],
        "language":
        "en-us",
        "resourceType": {
            "resourceTypeGeneral": "Dataset"
        },
        "rightsList": [{
            "rights":
            "Creative Commons Attribution 4.0 International (CC-BY-4.0)",
            "rightsURI": "https://creativecommons.org/licenses/by/4.0",
            "lang": "en-us"
        }],
        "descriptions": [{
            "descriptionType": "Abstract",
            "description": args[3]
        }],
    }

    # Validate dictionary
    # assert schema41.validate(data)

    # Generate DataCite XML from dictionary.
    doc = schema41.tostring(data)
    # print(doc)

    # Initialize the MDS client.
    d = DataCiteMDSClient(
        username=login_user,
        password=login_pass,
        url=url,
        prefix=prefix,
    )

    # Set metadata for DOI
    d.metadata_post(doc)

    # Mint new DOI
    d.doi_post(prefix + '/RDS.' + str(args[0]), args[4])

    logging.basicConfig(filename='logs/DOI_generation_Log_' + date_time +
                        '.log',
                        filemode='w',
                        format='%(asctime)s - %(message)s',
                        level=logging.INFO)
    logging.info("{} DOI Registration for id={} and title={}".format(
        args[5], args[0], args[1]))
    print(args[5], prefix + '/RDS.' + str(args[0]), args[4])
Exemple #10
0
class DataCiteProvider(BaseProvider):
    """DOI provider using DataCite API."""

    pid_type = 'doi'
    """Default persistent identifier type."""

    pid_provider = 'datacite'
    """Persistent identifier provider name."""

    default_status = PIDStatus.NEW
    """Default status for newly created PIDs by this provider."""

    @classmethod
    def create(cls, pid_value, **kwargs):
        """Create a new record identifier.

        For more information about parameters,
        see :meth:`invenio_pidstore.providers.base.BaseProvider.create`.

        :param pid_value: Persistent identifier value.
        :params ``**kwargs``: See
            :meth:`invenio_pidstore.providers.base.BaseProvider.create` extra
            parameters.
        :returns: A
            :class:`invenio_pidstore.providers.datacite.DataCiteProvider`
            instance.
        """
        return super(DataCiteProvider, cls).create(
            pid_value=pid_value, **kwargs)

    def __init__(self, pid, client=None):
        """Initialize provider.

        To use the default client, just configure the following variables:

        * `PIDSTORE_DATACITE_USERNAME` as username.

        * `PIDSTORE_DATACITE_PASSWORD` as password.

        * `PIDSTORE_DATACITE_DOI_PREFIX` as DOI prefix.

        * `PIDSTORE_DATACITE_TESTMODE` to `True` if it configured in test mode.

        * `PIDSTORE_DATACITE_URL` as DataCite URL.

        :param pid: A :class:`invenio_pidstore.models.PersistentIdentifier`
            instance.
        :param client: A client to access to DataCite.
            (Default: :class:`datacite.DataCiteMDSClient` instance)
        """
        super(DataCiteProvider, self).__init__(pid)
        if client is not None:
            self.api = client
        else:
            self.api = DataCiteMDSClient(
                username=current_app.config.get('PIDSTORE_DATACITE_USERNAME'),
                password=current_app.config.get('PIDSTORE_DATACITE_PASSWORD'),
                prefix=current_app.config.get('PIDSTORE_DATACITE_DOI_PREFIX'),
                test_mode=current_app.config.get(
                    'PIDSTORE_DATACITE_TESTMODE', False),
                url=current_app.config.get('PIDSTORE_DATACITE_URL'))

    def reserve(self, doc):
        """Reserve a DOI (amounts to upload metadata, but not to mint).

        :param doc: Set metadata for DOI.
        :returns: `True` if is reserved successfully.
        """
        # Only registered PIDs can be updated.
        try:
            self.pid.reserve()
            self.api.metadata_post(doc)
        except (DataCiteError, HttpError):
            logger.exception("Failed to reserve in DataCite",
                             extra=dict(pid=self.pid))
            raise
        logger.info("Successfully reserved in DataCite",
                    extra=dict(pid=self.pid))
        return True

    def register(self, url, doc):
        """Register a DOI via the DataCite API.

        :param url: Specify the URL for the API.
        :param doc: Set metadata for DOI.
        :returns: `True` if is registered successfully.
        """
        try:
            self.pid.register()
            # Set metadata for DOI
            self.api.metadata_post(doc)
            # Mint DOI
            self.api.doi_post(self.pid.pid_value, url)
        except (DataCiteError, HttpError):
            logger.exception("Failed to register in DataCite",
                             extra=dict(pid=self.pid))
            raise
        logger.info("Successfully registered in DataCite",
                    extra=dict(pid=self.pid))
        return True

    def update(self, url, doc):
        """Update metadata associated with a DOI.

        This can be called before/after a DOI is registered.

        :param doc: Set metadata for DOI.
        :returns: `True` if is updated successfully.
        """
        if self.pid.is_deleted():
            logger.info("Reactivate in DataCite",
                        extra=dict(pid=self.pid))

        try:
            # Set metadata
            self.api.metadata_post(doc)
            self.api.doi_post(self.pid.pid_value, url)
        except (DataCiteError, HttpError):
            logger.exception("Failed to update in DataCite",
                             extra=dict(pid=self.pid))
            raise

        if self.pid.is_deleted():
            self.pid.sync_status(PIDStatus.REGISTERED)
        logger.info("Successfully updated in DataCite",
                    extra=dict(pid=self.pid))
        return True

    def delete(self):
        """Delete a registered DOI.

        If the PID is new then it's deleted only locally.
        Otherwise, also it's deleted also remotely.

        :returns: `True` if is deleted successfully.
        """
        try:
            if self.pid.is_new():
                self.pid.delete()
            else:
                self.pid.delete()
                self.api.metadata_delete(self.pid.pid_value)
        except (DataCiteError, HttpError):
            logger.exception("Failed to delete in DataCite",
                             extra=dict(pid=self.pid))
            raise
        logger.info("Successfully deleted in DataCite",
                    extra=dict(pid=self.pid))
        return True

    def sync_status(self):
        """Synchronize DOI status DataCite MDS.

        :returns: `True` if is sync successfully.
        """
        status = None

        try:
            try:
                self.api.doi_get(self.pid.pid_value)
                status = PIDStatus.REGISTERED
            except DataCiteGoneError:
                status = PIDStatus.DELETED
            except DataCiteNoContentError:
                status = PIDStatus.REGISTERED
            except DataCiteNotFoundError:
                pass

            if status is None:
                try:
                    self.api.metadata_get(self.pid.pid_value)
                    status = PIDStatus.RESERVED
                except DataCiteGoneError:
                    status = PIDStatus.DELETED
                except DataCiteNoContentError:
                    status = PIDStatus.REGISTERED
                except DataCiteNotFoundError:
                    pass
        except (DataCiteError, HttpError):
            logger.exception("Failed to sync status from DataCite",
                             extra=dict(pid=self.pid))
            raise

        if status is None:
            status = PIDStatus.NEW

        self.pid.sync_status(status)

        logger.info("Successfully synced status from DataCite",
                    extra=dict(pid=self.pid))
        return True
Exemple #11
0
class DataCiteClient(AbstractIdentifierClient):
    def __init__(self, node):
        try:
            assert settings.DATACITE_URL and (getattr(
                node.provider, 'doi_prefix', None) or settings.DATACITE_PREFIX)
        except AssertionError:
            raise ImproperlyConfigured(
                'OSF\'Datacite client\'s settings are not configured')

        self._client = DataCiteMDSClient(
            url=settings.DATACITE_URL,
            username=settings.DATACITE_USERNAME,
            password=settings.DATACITE_PASSWORD,
            prefix=getattr(node.provider, 'doi_prefix', None)
            or settings.DATACITE_PREFIX)

    def build_metadata(self, node):
        """Return the formatted datacite metadata XML as a string.
         """

        data = {
            'identifier': {
                'identifier': self.build_doi(node),
                'identifierType': 'DOI',
            },
            'creators':
            datacite_format_creators([node.creator]),
            'contributors':
            datacite_format_contributors(node.visible_contributors),
            'titles': [{
                'title': node.title
            }],
            'publisher':
            'Open Science Framework',
            'publicationYear':
            str(datetime.datetime.now().year),
            'resourceType': {
                'resourceType':
                'Pre-registration'
                if node.type == 'osf.registration' else 'Project',
                'resourceTypeGeneral':
                'Text'
            },
            'dates': [
                {
                    'date': node.created.isoformat(),
                    'dateType': 'Created'
                },
                {
                    'date': node.modified.isoformat(),
                    'dateType': 'Updated'
                },
            ]
        }

        article_doi = node.article_doi
        if article_doi:
            data['relatedIdentifiers'] = [{
                'relatedIdentifier': article_doi,
                'relatedIdentifierType': 'DOI',
                'relationType': 'IsSupplementTo'
            }]

        if node.description:
            data['descriptions'] = [{
                'descriptionType': 'Abstract',
                'description': node.description
            }]

        if node.node_license:
            data['rightsList'] = [{
                'rights': node.node_license.name,
                'rightsURI': node.node_license.url
            }]

        data['subjects'] = datacite_format_subjects(node)

        # Validate dictionary
        assert schema40.validate(data)

        # Generate DataCite XML from dictionary.
        return schema40.tostring(data)

    def build_doi(self, object):
        return settings.DOI_FORMAT.format(
            prefix=getattr(object.provider, 'doi_prefix', None)
            or settings.DATACITE_PREFIX,
            guid=object._id)

    def get_identifier(self, identifier):
        self._client.doi_get(identifier)

    def create_identifier(self, node, category):
        if category == 'doi':
            if settings.DATACITE_ENABLED:
                metadata = self.build_metadata(node)
                resp = self._client.metadata_post(metadata)
                # Typical response: 'OK (10.70102/FK2osf.io/cq695)' to doi 10.70102/FK2osf.io/cq695
                doi = re.match(r'OK \((?P<doi>[a-zA-Z0-9 .\/]{0,})\)',
                               resp).groupdict()['doi']
                self._client.doi_post(doi, node.absolute_url)
                return {'doi': doi}
            logger.info('TEST ENV: DOI built but not minted')
            return {'doi': self.build_doi(node)}
        else:
            raise NotImplementedError(
                'Creating an identifier with category {} is not supported'.
                format(category))

    def update_identifier(self, node, category):
        if settings.DATACITE_ENABLED and not node.is_public or node.is_deleted:
            if category == 'doi':
                doi = self.build_doi(node)
                self._client.metadata_delete(doi)
                return {'doi': doi}
            else:
                raise NotImplementedError(
                    'Updating metadata not supported for {}'.format(category))
        else:
            return self.create_identifier(node, category)
Exemple #12
0
        {'title': 'DataCite PyPI Package'}
    ],
    'publisher': 'CERN',
    'publicationYear': '2015',
}

# Validate dictionary
assert schema31.validate(data)

# Generate DataCite XML from dictionary.
doc = schema31.tostring(data)

# Initialize the MDS client.
d = DataCiteMDSClient(
    username='******',
    password='******',
    prefix='10.5072',
    test_mode=True
)

# Set metadata for DOI
d.metadata_post(doc)

# Mint new DOI
d.doi_post('10.5072/test-doi', 'http://example.org/test-doi')

# Get DOI location
location = d.doi_get("10.5072/test-doi")

# Set alternate URL for content type (availble through content negotiation)
d.media_post(
    "10.5072/test-doi",
Exemple #13
0
        'resourceType': 'Dataset',
        'resourceTypeGeneral': 'Dataset'
    },
    'schemaVersion': 'http://datacite.org/schema/kernel-4',
}

# Validate dictionary
assert schema42.validate(data)

# Generate DataCite XML from dictionary.
doc = schema42.tostring(data)

# Initialize the MDS client.
d = DataCiteMDSClient(
    username='******',
    password='******',
    prefix='10.5072',
)

# Set metadata for DOI
d.metadata_post(doc)

# Mint new DOI
d.doi_post('10.5072/test-doi', 'http://example.org/test-doi')

# Get DOI location
location = d.doi_get("10.5072/test-doi")

# Set alternate URL for content type (available through content negotiation)
d.media_post(
    "10.5072/test-doi",
Exemple #14
0
class DataCite(PidProvider):

    """DOI provider using DataCite API."""

    pid_type = 'doi'

    def __init__(self):
        """Initialize provider."""
        self.api = DataCiteMDSClient(
            username=cfg.get('CFG_DATACITE_USERNAME'),
            password=cfg.get('CFG_DATACITE_PASSWORD'),
            prefix=cfg.get('CFG_DATACITE_DOI_PREFIX'),
            test_mode=cfg.get('CFG_DATACITE_TESTMODE', False),
            url=cfg.get('CFG_DATACITE_URL')
        )

    def _get_url(self, kwargs):
        try:
            return kwargs['url']
        except KeyError:
            raise Exception("url keyword argument must be specified.")

    def _get_doc(self, kwargs):
        try:
            return kwargs['doc']
        except KeyError:
            raise Exception("doc keyword argument must be specified.")

    def reserve(self, pid, *args, **kwargs):
        """Reserve a DOI (amounts to upload metadata, but not to mint)."""
        # Only registered PIDs can be updated.
        doc = self._get_doc(kwargs)

        try:
            self.api.metadata_post(doc)
        except DataCiteError as e:
            pid.log("RESERVE", "Failed with %s" % e.__class__.__name__)
            return False
        except HttpError as e:
            pid.log("RESERVE", "Failed with HttpError - %s" % unicode(e))
            return False
        else:
            pid.log("RESERVE", "Successfully reserved in DataCite")
        return True

    def register(self, pid, *args, **kwargs):
        """Register a DOI via the DataCite API."""
        url = self._get_url(kwargs)
        doc = self._get_doc(kwargs)

        try:
            # Set metadata for DOI
            self.api.metadata_post(doc)
            # Mint DOI
            self.api.doi_post(pid.pid_value, url)
        except DataCiteError as e:
            pid.log("REGISTER", "Failed with %s" % e.__class__.__name__)
            return False
        except HttpError as e:
            pid.log("REGISTER", "Failed with HttpError - %s" % unicode(e))
            return False
        else:
            pid.log("REGISTER", "Successfully registered in DataCite")
        return True

    def update(self, pid, *args, **kwargs):
        """Update metadata associated with a DOI.

        This can be called before/after a DOI is registered.
        """
        url = self._get_url(kwargs)
        doc = self._get_doc(kwargs)

        if pid.is_deleted():
            pid.log("UPDATE", "Reactivate in DataCite")

        try:
            # Set metadata
            self.api.metadata_post(doc)
            self.api.doi_post(pid.pid_value, url)
        except DataCiteError as e:
            pid.log("UPDATE", "Failed with %s" % e.__class__.__name__)
            return False
        except HttpError as e:
            pid.log("UPDATE", "Failed with HttpError - %s" % unicode(e))
            return False
        else:
            if pid.is_deleted():
                pid.log(
                    "UPDATE",
                    "Successfully updated and possibly registered in DataCite"
                )
            else:
                pid.log("UPDATE", "Successfully updated in DataCite")
        return True

    def delete(self, pid, *args, **kwargs):
        """Delete a registered DOI."""
        try:
            self.api.metadata_delete(pid.pid_value)
        except DataCiteError as e:
            pid.log("DELETE", "Failed with %s" % e.__class__.__name__)
            return False
        except HttpError as e:
            pid.log("DELETE", "Failed with HttpError - %s" % unicode(e))
            return False
        else:
            pid.log("DELETE", "Successfully deleted in DataCite")
        return True

    def sync_status(self, pid, *args, **kwargs):
        """Synchronize DOI status DataCite MDS."""
        status = None

        try:
            self.api.doi_get(pid.pid_value)
            status = cfg['PIDSTORE_STATUS_REGISTERED']
        except DataCiteGoneError:
            status = cfg['PIDSTORE_STATUS_DELETED']
        except DataCiteNoContentError:
            status = cfg['PIDSTORE_STATUS_REGISTERED']
        except DataCiteNotFoundError:
            pass
        except DataCiteError as e:
            pid.log("SYNC", "Failed with %s" % e.__class__.__name__)
            return False
        except HttpError as e:
            pid.log("SYNC", "Failed with HttpError - %s" % unicode(e))
            return False

        if status is None:
            try:
                self.api.metadata_get(pid.pid_value)
                status = cfg['PIDSTORE_STATUS_RESERVED']
            except DataCiteGoneError:
                status = cfg['PIDSTORE_STATUS_DELETED']
            except DataCiteNoContentError:
                status = cfg['PIDSTORE_STATUS_REGISTERED']
            except DataCiteNotFoundError:
                pass
            except DataCiteError as e:
                pid.log("SYNC", "Failed with %s" % e.__class__.__name__)
                return False
            except HttpError as e:
                pid.log("SYNC", "Failed with HttpError - %s" % unicode(e))
                return False

        if status is None:
            status = cfg['PIDSTORE_STATUS_NEW']

        if pid.status != status:
            pid.log(
                "SYNC", "Fixed status from %s to %s." % (pid.status, status)
            )
            pid.status = status

        return True

    @classmethod
    def is_provider_for_pid(cls, pid_str):
        """Check if DataCite is the provider for this DOI.

        Note: If you e.g. changed DataCite account and received a new prefix,
        then this provider can only update and register DOIs for the new
        prefix.
        """
        return pid_str.startswith("%s/" % cfg['CFG_DATACITE_DOI_PREFIX'])
Exemple #15
0
metadata['publisher'] = 'Caltech Library'

metadata['resourceType'] = {
    "resourceTypeGeneral": 'Other',
    'resourceType': 'Website'
}

#Get our DataCite password
infile = open('data/pw', 'r')
password = infile.readline().strip()

# Initialize the MDS client.
d = DataCiteMDSClient(
    username='******',
    password=password,
    prefix=prefix,
    #test_mode=True
)

doi_end = subprocess.check_output(['./gen-cool-doi'], universal_newlines=True)
identifier = str(prefix) + '/' + str(doi_end)

metadata['identifier'] = {'identifier': identifier, 'identifierType': 'DOI'}

assert schema40.validate(metadata)
#Debugging if this fails
#v = schema40.validator.validate(metadata)
#errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
#for error in errors:
#    print(error.message)
class DataCiteProvider(BaseProvider):
    """DOI provider using DataCite API."""

    pid_type = 'doi'
    pid_provider = 'datacite'
    default_status = PIDStatus.NEW

    @classmethod
    def create(cls, pid_value, **kwargs):
        """Create a new record identifier."""
        return super(DataCiteProvider, cls).create(
            pid_value=pid_value, **kwargs)

    def __init__(self, pid, client=None):
        """Initialize provider."""
        super(DataCiteProvider, self).__init__(pid)
        if client is not None:
            self.api = client
        else:
            self.api = DataCiteMDSClient(
                username=current_app.config.get('PIDSTORE_DATACITE_USERNAME'),
                password=current_app.config.get('PIDSTORE_DATACITE_PASSWORD'),
                prefix=current_app.config.get('PIDSTORE_DATACITE_DOI_PREFIX'),
                test_mode=current_app.config.get(
                    'PIDSTORE_DATACITE_TESTMODE', False),
                url=current_app.config.get('PIDSTORE_DATACITE_URL'))

    def reserve(self, doc):
        """Reserve a DOI (amounts to upload metadata, but not to mint)."""
        # Only registered PIDs can be updated.
        try:
            self.pid.reserve()
            self.api.metadata_post(doc)
        except (DataCiteError, HttpError):
            logger.exception("Failed to reserve in DataCite",
                             extra=dict(pid=self.pid))
            raise
        logger.info("Successfully reserved in DataCite",
                    extra=dict(pid=self.pid))
        return True

    def register(self, url, doc):
        """Register a DOI via the DataCite API."""
        try:
            self.pid.register()
            # Set metadata for DOI
            self.api.metadata_post(doc)
            # Mint DOI
            self.api.doi_post(self.pid.pid_value, url)
        except (DataCiteError, HttpError):
            logger.exception("Failed to register in DataCite",
                             extra=dict(pid=self.pid))
            raise
        logger.info("Successfully registered in DataCite",
                    extra=dict(pid=self.pid))
        return True

    def update(self, url, doc):
        """Update metadata associated with a DOI.

        This can be called before/after a DOI is registered.
        """
        if self.pid.is_deleted():
            logger.info("Reactivate in DataCite",
                        extra=dict(pid=self.pid))

        try:
            # Set metadata
            self.api.metadata_post(doc)
            self.api.doi_post(self.pid.pid_value, url)
        except (DataCiteError, HttpError):
            logger.exception("Failed to update in DataCite",
                             extra=dict(pid=self.pid))
            raise

        if self.pid.is_deleted():
            self.pid.sync_status(PIDStatus.REGISTERED)
        logger.info("Successfully updated in DataCite",
                    extra=dict(pid=self.pid))
        return True

    def delete(self):
        """Delete a registered DOI."""
        try:
            if self.pid.is_new():
                self.pid.delete()
            else:
                self.pid.delete()
                self.api.metadata_delete(self.pid.pid_value)
        except (DataCiteError, HttpError):
            logger.exception("Failed to delete in DataCite",
                             extra=dict(pid=self.pid))
            raise
        logger.info("Successfully deleted in DataCite",
                    extra=dict(pid=self.pid))
        return True

    def sync_status(self):
        """Synchronize DOI status DataCite MDS."""
        status = None

        try:
            try:
                self.api.doi_get(self.pid.pid_value)
                status = PIDStatus.REGISTERED
            except DataCiteGoneError:
                status = PIDStatus.DELETED
            except DataCiteNoContentError:
                status = PIDStatus.REGISTERED
            except DataCiteNotFoundError:
                pass

            if status is None:
                try:
                    self.api.metadata_get(self.pid.pid_value)
                    status = PIDStatus.RESERVED
                except DataCiteGoneError:
                    status = PIDStatus.DELETED
                except DataCiteNoContentError:
                    status = PIDStatus.REGISTERED
                except DataCiteNotFoundError:
                    pass
        except (DataCiteError, HttpError):
            logger.exception("Failed to sync status from DataCite",
                             extra=dict(pid=self.pid))
            raise

        if status is None:
            status = PIDStatus.NEW

        self.pid.sync_status(status)

        logger.info("Successfully synced status from DataCite",
                    extra=dict(pid=self.pid))
        return True
Exemple #17
0
class DataCiteProvider(BaseProvider):
    """DOI provider using DataCite API."""

    pid_type = 'doi'
    """Default persistent identifier type."""

    pid_provider = 'datacite'
    """Persistent identifier provider name."""

    default_status = PIDStatus.NEW
    """Default status for newly created PIDs by this provider."""

    @classmethod
    def create(cls, pid_value, **kwargs):
        """Create a new record identifier.

        For more information about parameters,
        see :meth:`invenio_pidstore.providers.BaseProvider.create`.

        :param pid_value: Persistent identifier value.
        :params **kwargs: See
            :meth:`invenio_pidstore.providers.base.BaseProvider.create` extra
            parameters.
        :returns: A :class:`invenio_pidstore.providers.DataCiteProvider`
            instance.
        """
        return super(DataCiteProvider, cls).create(
            pid_value=pid_value, **kwargs)

    def __init__(self, pid, client=None):
        """Initialize provider.

        To use the default client, just configure the following variables:

        * `PIDSTORE_DATACITE_USERNAME` as username.

        * `PIDSTORE_DATACITE_PASSWORD` as password.

        * `PIDSTORE_DATACITE_DOI_PREFIX` as DOI prefix.

        * `PIDSTORE_DATACITE_TESTMODE` to `True` if it configured in test mode.

        * `PIDSTORE_DATACITE_URL` as DataCite URL.

        :param pid: A :class:`invenio_pidstore.models.PersistentIdentifier`
            instance.
        :param client: A client to access to DataCite.
            (Default: :class:`datacite.DataCiteMDSClient` instance)
        """
        super(DataCiteProvider, self).__init__(pid)
        if client is not None:
            self.api = client
        else:
            self.api = DataCiteMDSClient(
                username=current_app.config.get('PIDSTORE_DATACITE_USERNAME'),
                password=current_app.config.get('PIDSTORE_DATACITE_PASSWORD'),
                prefix=current_app.config.get('PIDSTORE_DATACITE_DOI_PREFIX'),
                test_mode=current_app.config.get(
                    'PIDSTORE_DATACITE_TESTMODE', False),
                url=current_app.config.get('PIDSTORE_DATACITE_URL'))

    def reserve(self, doc):
        """Reserve a DOI (amounts to upload metadata, but not to mint).

        :param doc: Set metadata for DOI.
        :returns: `True` if is reserved successfully.
        """
        # Only registered PIDs can be updated.
        try:
            self.pid.reserve()
            self.api.metadata_post(doc)
        except (DataCiteError, HttpError):
            logger.exception("Failed to reserve in DataCite",
                             extra=dict(pid=self.pid))
            raise
        logger.info("Successfully reserved in DataCite",
                    extra=dict(pid=self.pid))
        return True

    def register(self, url, doc):
        """Register a DOI via the DataCite API.

        :param url: Specify the URL for the API.
        :param doc: Set metadata for DOI.
        :returns: `True` if is registered successfully.
        """
        try:
            self.pid.register()
            # Set metadata for DOI
            self.api.metadata_post(doc)
            # Mint DOI
            self.api.doi_post(self.pid.pid_value, url)
        except (DataCiteError, HttpError):
            logger.exception("Failed to register in DataCite",
                             extra=dict(pid=self.pid))
            raise
        logger.info("Successfully registered in DataCite",
                    extra=dict(pid=self.pid))
        return True

    def update(self, url, doc):
        """Update metadata associated with a DOI.

        This can be called before/after a DOI is registered.

        :param doc: Set metadata for DOI.
        :returns: `True` if is updated successfully.
        """
        if self.pid.is_deleted():
            logger.info("Reactivate in DataCite",
                        extra=dict(pid=self.pid))

        try:
            # Set metadata
            self.api.metadata_post(doc)
            self.api.doi_post(self.pid.pid_value, url)
        except (DataCiteError, HttpError):
            logger.exception("Failed to update in DataCite",
                             extra=dict(pid=self.pid))
            raise

        if self.pid.is_deleted():
            self.pid.sync_status(PIDStatus.REGISTERED)
        logger.info("Successfully updated in DataCite",
                    extra=dict(pid=self.pid))
        return True

    def delete(self):
        """Delete a registered DOI.

        If the PID is new then it's deleted only locally.
        Otherwise, also it's deleted also remotely.

        :returns: `True` if is deleted successfully.
        """
        try:
            if self.pid.is_new():
                self.pid.delete()
            else:
                self.pid.delete()
                self.api.metadata_delete(self.pid.pid_value)
        except (DataCiteError, HttpError):
            logger.exception("Failed to delete in DataCite",
                             extra=dict(pid=self.pid))
            raise
        logger.info("Successfully deleted in DataCite",
                    extra=dict(pid=self.pid))
        return True

    def sync_status(self):
        """Synchronize DOI status DataCite MDS.

        :returns: `True` if is sync successfully.
        """
        status = None

        try:
            try:
                self.api.doi_get(self.pid.pid_value)
                status = PIDStatus.REGISTERED
            except DataCiteGoneError:
                status = PIDStatus.DELETED
            except DataCiteNoContentError:
                status = PIDStatus.REGISTERED
            except DataCiteNotFoundError:
                pass

            if status is None:
                try:
                    self.api.metadata_get(self.pid.pid_value)
                    status = PIDStatus.RESERVED
                except DataCiteGoneError:
                    status = PIDStatus.DELETED
                except DataCiteNoContentError:
                    status = PIDStatus.REGISTERED
                except DataCiteNotFoundError:
                    pass
        except (DataCiteError, HttpError):
            logger.exception("Failed to sync status from DataCite",
                             extra=dict(pid=self.pid))
            raise

        if status is None:
            status = PIDStatus.NEW

        self.pid.sync_status(status)

        logger.info("Successfully synced status from DataCite",
                    extra=dict(pid=self.pid))
        return True
Exemple #18
0
            }
            metadata['publicationYear'] = str(now.year)
            metadata['publisher'] = 'Caltech Library'

            #Get the prefix to use
            prefix = inputv['prefix'].split('(')[1].split(')')[0]
            #prefix = '10.5072'

            #Get our DataCite password
            infile = open('pw', 'r')
            password = infile.readline().strip()

            # Initialize the MDS client.
            d = DataCiteMDSClient(
                username='******',
                password=password,
                prefix=prefix,
            )

            identifier = str(prefix)

            metadata['identifier'] = {
                'identifier': identifier,
                'identifierType': 'DOI'
            }

            #assert schema40.validate(metadata)
            #Debugging if this fails
            #v = schema40.validator.validate(metadata)
            #errors = sorted(v.iter_errors(instance), key=lambda e: e.path)
            #for error in errors:
Exemple #19
0
class DataCite(PidProvider):
    """DOI provider using DataCite API."""

    pid_type = 'doi'

    def __init__(self):
        """Initialize provider."""
        self.api = DataCiteMDSClient(username=cfg.get('CFG_DATACITE_USERNAME'),
                                     password=cfg.get('CFG_DATACITE_PASSWORD'),
                                     prefix=cfg.get('CFG_DATACITE_DOI_PREFIX'),
                                     test_mode=cfg.get('CFG_DATACITE_TESTMODE',
                                                       False),
                                     url=cfg.get('CFG_DATACITE_URL'))

    def _get_url(self, kwargs):
        try:
            return kwargs['url']
        except KeyError:
            raise Exception("url keyword argument must be specified.")

    def _get_doc(self, kwargs):
        try:
            return kwargs['doc']
        except KeyError:
            raise Exception("doc keyword argument must be specified.")

    def reserve(self, pid, *args, **kwargs):
        """Reserve a DOI (amounts to upload metadata, but not to mint)."""
        # Only registered PIDs can be updated.
        doc = self._get_doc(kwargs)

        try:
            self.api.metadata_post(doc)
        except DataCiteError as e:
            pid.log("RESERVE", "Failed with %s" % e.__class__.__name__)
            return False
        except HttpError as e:
            pid.log("RESERVE", "Failed with HttpError - %s" % unicode(e))
            return False
        else:
            pid.log("RESERVE", "Successfully reserved in DataCite")
        return True

    def register(self, pid, *args, **kwargs):
        """Register a DOI via the DataCite API."""
        url = self._get_url(kwargs)
        doc = self._get_doc(kwargs)

        try:
            # Set metadata for DOI
            self.api.metadata_post(doc)
            # Mint DOI
            self.api.doi_post(pid.pid_value, url)
        except DataCiteError as e:
            pid.log("REGISTER", "Failed with %s" % e.__class__.__name__)
            return False
        except HttpError as e:
            pid.log("REGISTER", "Failed with HttpError - %s" % unicode(e))
            return False
        else:
            pid.log("REGISTER", "Successfully registered in DataCite")
        return True

    def update(self, pid, *args, **kwargs):
        """Update metadata associated with a DOI.

        This can be called before/after a DOI is registered.
        """
        url = self._get_url(kwargs)
        doc = self._get_doc(kwargs)

        if pid.is_deleted():
            pid.log("UPDATE", "Reactivate in DataCite")

        try:
            # Set metadata
            self.api.metadata_post(doc)
            self.api.doi_post(pid.pid_value, url)
        except DataCiteError as e:
            pid.log("UPDATE", "Failed with %s" % e.__class__.__name__)
            return False
        except HttpError as e:
            pid.log("UPDATE", "Failed with HttpError - %s" % unicode(e))
            return False
        else:
            if pid.is_deleted():
                pid.log(
                    "UPDATE",
                    "Successfully updated and possibly registered in DataCite")
            else:
                pid.log("UPDATE", "Successfully updated in DataCite")
        return True

    def delete(self, pid, *args, **kwargs):
        """Delete a registered DOI."""
        try:
            self.api.metadata_delete(pid.pid_value)
        except DataCiteError as e:
            pid.log("DELETE", "Failed with %s" % e.__class__.__name__)
            return False
        except HttpError as e:
            pid.log("DELETE", "Failed with HttpError - %s" % unicode(e))
            return False
        else:
            pid.log("DELETE", "Successfully deleted in DataCite")
        return True

    def sync_status(self, pid, *args, **kwargs):
        """Synchronize DOI status DataCite MDS."""
        status = None

        try:
            self.api.doi_get(pid.pid_value)
            status = cfg['PIDSTORE_STATUS_REGISTERED']
        except DataCiteGoneError:
            status = cfg['PIDSTORE_STATUS_DELETED']
        except DataCiteNoContentError:
            status = cfg['PIDSTORE_STATUS_REGISTERED']
        except DataCiteNotFoundError:
            pass
        except DataCiteError as e:
            pid.log("SYNC", "Failed with %s" % e.__class__.__name__)
            return False
        except HttpError as e:
            pid.log("SYNC", "Failed with HttpError - %s" % unicode(e))
            return False

        if status is None:
            try:
                self.api.metadata_get(pid.pid_value)
                status = cfg['PIDSTORE_STATUS_RESERVED']
            except DataCiteGoneError:
                status = cfg['PIDSTORE_STATUS_DELETED']
            except DataCiteNoContentError:
                status = cfg['PIDSTORE_STATUS_REGISTERED']
            except DataCiteNotFoundError:
                pass
            except DataCiteError as e:
                pid.log("SYNC", "Failed with %s" % e.__class__.__name__)
                return False
            except HttpError as e:
                pid.log("SYNC", "Failed with HttpError - %s" % unicode(e))
                return False

        if status is None:
            status = cfg['PIDSTORE_STATUS_NEW']

        if pid.status != status:
            pid.log("SYNC",
                    "Fixed status from %s to %s." % (pid.status, status))
            pid.status = status

        return True

    @classmethod
    def is_provider_for_pid(cls, pid_str):
        """Check if DataCite is the provider for this DOI.

        Note: If you e.g. changed DataCite account and received a new prefix,
        then this provider can only update and register DOIs for the new
        prefix.
        """
        return pid_str.startswith("%s/" % cfg['CFG_DATACITE_DOI_PREFIX'])
Exemple #20
0
class DoiHelper(object):

    def __init__(self, conf, errors):
        self.conf = conf
        self.errors = errors
        self.test_mode = False #True
        self._create_client()

    def safe_call(self, f, *args):
        try:
            f(*args)
        except Exception as e:
            self.errors.append("Datacite1 connection failed")

    @on_no_errors
    def _create_client(self):
        datacite_kwargs = {
            'username': self.conf.dc_symbol,
            'password': self.conf.dc_password,
            'prefix': self.conf.dc_prefix,
            'test_mode': self.test_mode}
        #self.client = DataCiteMDSClient(**datacite_kwargs)
        try:
            self.client = DataCiteMDSClient(**datacite_kwargs)
        except Exception as e:
            self.errors.append("Datacite connection failed")

    @on_no_errors
    def find_free_doi(self):
        r = "".join([random.choice(string.ascii_uppercase + string.digits)
                     for _ in range(5)])
        doi = "/".join([self.conf.dc_prefix, self.conf.dc_identifier, r])
        try:
            self.client.metadata_get(doi)
            return self.find_free_doi()
        except DataCiteServerError as e:
            if e.error_code != 404:
                self.errors.append(
                    'Not the expected result from MDS while validation: %s' % e)

        return doi

    @on_no_errors
    def _post_metadata(self, metadata):
        logging.debug("METADATA:" + metadata)
        res = self.client.metadata_post(metadata)
        logging.debug("RES: %s" % res)
        if not res.startswith("OK"):
            self.errors.append(res)

    @on_no_errors
    def _post_doi(self, doi, url):
        for testurl in [
            "https://develop.osl.tib.eu",
            "https://test.osl.tib.eu",
            "https://develop.handbuch.tib.eu",
            "https://test.handbuch.tib.eu",
            "https://handbuch.tib.eu",
            "https://handbuch.local",

        ]:
            if testurl in url:
                url = url.replace(testurl, "http://handbuch.io")

        if "handbuch.io" in url and not self.test_mode:
            logging.debug("Post DOI: Doi: %s\t%s" % (doi, url))
            self.client.doi_post(doi, url)

    def _get_creators_list(self, book):
        creators = []
        for title in ['AUTOREN', 'HERAUSGEBER', 'KONTRIBUTOREN', ]:
            namesstr = book.info.get(title, "")
            dicts = [
                {'creatorName': name.strip()}
                for name in namesstr.split(',')
                if name.strip()
            ]
            creators += [d for d in dicts if d]

        return creators

    def _get_bookdata(self, book):
        publisher = book.info.get('HERAUSGEBER', None) or \
            book.info['AUTOREN']
        data = {
            'identifier': {
                'identifier': book.info['doi'],
                'identifierType': 'DOI',
            },
            'creators': self._get_creators_list(book),
            'titles': [
                {'title': book.book_page.friendly_title}
            ],
            'publisher': publisher,
            'publicationYear': str(datetime.now().year),
            'version': str(self.conf.version)
        }
        return data

    def _create_doi(self, doi, url, data):
        metadata = None
        try:
            schema31.validate(data)
            metadata = schema31.tostring(data)
        except ValidationError as e:
            self.errors.append(str(e))
        #import pdb; pdb.set_trace()
        self._post_metadata(metadata)
        self._post_doi(doi, url)

    @on_no_errors
    def create_chapterdoi(self, doi, url, title, book, username):
        #import pdb; pdb.set_trace()
        data = self._get_bookdata(book)
        data['titles'][0]['title'] = title
        data['identifier']['identifier'] = doi
        data['relatedIdentifiers'] = [{
               'relatedIdentifier': book.info['doi'],
               'relatedIdentifierType': 'DOI',
               'relationType':'IsPartOf'}, ]
        if username:
            data['publisher'] = username

        self._create_doi(doi, url, data)

    @on_no_errors
    def create_bookdoi(self, url, book):
        data = self._get_bookdata(book)
        self._create_doi(book.info['doi'], url, data)