Exemple #1
0
def decide_doi_update(metadata):
    if "doi" not in metadata:
        possible = []
        eprint = metadata["eprint_id"]
        if "related_url" in metadata and "items" in metadata["related_url"]:
            items = metadata["related_url"]["items"]
            for item in items:
                description = ""
                if "url" in item:
                    url = item["url"].strip()
                if "type" in item:
                    itype = item["type"].strip().lower()
                if "description" in item:
                    description = item["description"].strip().lower()
                if itype == "doi":
                    if is_doi(url):
                        possible.append([normalize_doi(url), description])
                    else:
                        # Dropping anything without a 10. pattern
                        if "10." in url:
                            doi = "10." + url.split("10.")[1]
                            if is_doi(doi):
                                possible.append([doi, description])
            if len(possible) == 1:
                # Description not really used
                return [eprint, possible[0][0]]
            else:
                return None
        else:
            return None
    else:
        return None
Exemple #2
0
def extract_doi_and_url(s):
    doi, url = None, None
    if '|' in s:
        p = s.split('|', maxsplit=2)
        doi, url = p[0], p[1]
    elif idutils.is_doi(s):
        doi = s
    errors = []
    # clieanup doi
    if isinstance(doi, str):
        if doi.startswith('http://doi.org/'):
            doi = doi[len('http://doi.org/'):]
        elif doi.startswith('https://doi.org/'):
            doi = doi[len('https://doi.org/'):]
        if ('<' in doi) and doi.endswith('>'):
            doi, junk = doi.split('<', maxsplit=2)
        doi = doi.strip()
    # validate we have a DOI and URL
    if (doi != None) and (idutils.is_doi(doi) == False):
        errors.append(f'"{doi}" does not appear to be a DOI')
    # valudate URL is present
    is_url, uerr = True, ''
    try:
        u = urlparse(url)
    except Exception as err:
        is_url = False
    if (url != None) and (is_url == False):
        errors.append(f'"{url}" does not appear to be a URL, {uerr}')
    if len(errors) > 0:
        return doi, url, ', '.join(errors)
    return doi, url, None
Exemple #3
0
def zenodo_doi_minter(record_uuid, data):
    """Mint DOI."""
    doi = data.get('doi')
    status = PIDStatus.RESERVED
    provider = None

    # Create a DOI if no DOI was found.
    if not doi:
        assert 'recid' in data
        doi = doi_generator(data['recid'])
        data['doi'] = doi
    else:
        assert 'recid' in data

    if doi != doi_generator(data['recid']):
        return

    assert idutils.is_doi(doi)

    if is_local_doi(doi):
        provider = 'datacite'

    return PersistentIdentifier.create(
        'doi',
        doi,
        pid_provider=provider,
        object_type='rec',
        object_uuid=record_uuid,
        status=status,
    )
Exemple #4
0
def zenodo_doi_updater(record_uuid, data):
    """Update the DOI (only external DOIs)."""
    assert 'recid' in data
    doi = data.get('doi')
    assert doi
    assert idutils.is_doi(doi)

    # If the DOI is the same as an already generated one, do nothing
    if doi == doi_generator(data['recid']):
        return
    if is_local_doi(doi):  # Zenodo DOI, but different than recid
        # ERROR, user provided a custom ZENODO DOI!
        raise PIDValueError('doi', doi)

    doi_pid = PersistentIdentifier.get_by_object(
        pid_type='doi', object_type='rec', object_uuid=record_uuid)

    if doi_pid.pid_value != doi:
        with db.session.begin_nested():
            db.session.delete(doi_pid)
            return PersistentIdentifier.create(
                'doi',
                doi,
                object_type='rec',
                object_uuid=record_uuid,
                status=PIDStatus.RESERVED,
            )
Exemple #5
0
 def validate(self, value, obj):
     super().validate(value, obj)
     if not idutils.is_doi(value):
         raise ValidationError(
             _('%(value)s is not a valid DOI'),
             params={'value': value},
         )
Exemple #6
0
 def add_uid(self, uid):
     """Add unique identifier in correct field."""
     # We might add None values from wherever. Kill them here.
     uid = uid or ''
     if _is_arxiv(uid):
         self._ensure_reference_field('arxiv_eprints', [])
         self.obj['reference']['arxiv_eprints'].append(
             _normalize_arxiv(uid))
     elif idutils.is_doi(uid):
         self._ensure_reference_field('dois', [])
         self.obj['reference']['dois'].append(idutils.normalize_doi(uid))
     elif idutils.is_handle(uid):
         self._ensure_reference_field('persistent_identifiers', [])
         value = idutils.normalize_handle(uid)
         if not value.startswith('hdl:'):
             # Prone to the day in which normalize_handle might prepend
             # 'hdl:'.
             value = u'hdl:{}'.format(value)
         self.obj['reference']['persistent_identifiers'].append(value)
     elif self.RE_VALID_CNUM.match(uid):
         self._ensure_reference_field('publication_info', {})
         self.obj['reference']['publication_info']['cnum'] = uid
     else:
         # idutils.is_isbn has a different implementation than normalize
         # isbn. Better to do it like this.
         try:
             isbn = idutils.normalize_isbn(uid)
             self._ensure_reference_field('publication_info', {})
             self.obj['reference']['publication_info']['isbn'] = isbn
         # See https://github.com/nekobcn/isbnid/issues/2 and
         # https://github.com/nekobcn/isbnid/issues/3 for understanding the
         # long exception list.
         except (ISBNError, ISBNRangeError, UnicodeEncodeError):
             pass
Exemple #7
0
 def _deserialize(self, value, attr, data):
     """Deserialize DOI value."""
     value = super(DOI, self)._deserialize(value, attr, data)
     value = value.strip()
     if not idutils.is_doi(value):
         self.fail('invalid_doi')
     return idutils.normalize_doi(value)
 def add_uid(self, uid):
     """Add unique identifier in correct field."""
     # We might add None values from wherever. Kill them here.
     uid = uid or ''
     if _is_arxiv(uid):
         self._ensure_reference_field('arxiv_eprint', _normalize_arxiv(uid))
     elif idutils.is_doi(uid):
         self._ensure_reference_field('dois', [])
         self.obj['reference']['dois'].append(idutils.normalize_doi(uid))
     elif idutils.is_handle(uid):
         self._ensure_reference_field('persistent_identifiers', [])
         self.obj['reference']['persistent_identifiers'].append({
             'schema': 'HDL',
             'value': idutils.normalize_handle(uid),
         })
     elif idutils.is_urn(uid):
         self._ensure_reference_field('persistent_identifiers', [])
         self.obj['reference']['persistent_identifiers'].append({
             'schema': 'URN',
             'value': uid,
         })
     elif self.RE_VALID_CNUM.match(uid):
         self._ensure_reference_field('publication_info', {})
         self.obj['reference']['publication_info']['cnum'] = uid
     else:
         # ``idutils.is_isbn`` is too strict in what it accepts.
         try:
             isbn = str(ISBN(uid))
             self._ensure_reference_field('isbn', {})
             self.obj['reference']['isbn'] = isbn
         except Exception:
             self.add_misc(uid)
 def add_uid(self, uid):
     """Add unique identifier in correct field."""
     # We might add None values from wherever. Kill them here.
     uid = uid or ''
     if _is_arxiv(uid):
         self._ensure_reference_field('arxiv_eprints', [])
         self.obj['reference']['arxiv_eprints'].append(_normalize_arxiv(uid))
     elif idutils.is_doi(uid):
         self._ensure_reference_field('dois', [])
         self.obj['reference']['dois'].append(idutils.normalize_doi(uid))
     elif idutils.is_handle(uid):
         self._ensure_reference_field('persistent_identifiers', [])
         value = idutils.normalize_handle(uid)
         if not value.startswith('hdl:'):
             # Prone to the day in which normalize_handle might prepend
             # 'hdl:'.
             value = u'hdl:{}'.format(value)
         self.obj['reference']['persistent_identifiers'].append(value)
     elif self.RE_VALID_CNUM.match(uid):
         self._ensure_reference_field('publication_info', {})
         self.obj['reference']['publication_info']['cnum'] = uid
     else:
         # idutils.is_isbn has a different implementation than normalize
         # isbn. Better to do it like this.
         try:
             isbn = idutils.normalize_isbn(uid)
             self._ensure_reference_field('publication_info', {})
             self.obj['reference']['publication_info']['isbn'] = isbn
         # See https://github.com/nekobcn/isbnid/issues/2 and
         # https://github.com/nekobcn/isbnid/issues/3 for understanding the
         # long exception list.
         except (ISBNError, ISBNRangeError, UnicodeEncodeError):
             pass
Exemple #10
0
 def add_uid(self, uid):
     """Add unique identifier in correct field."""
     # We might add None values from wherever. Kill them here.
     uid = uid or ""
     if _is_arxiv(uid):
         self._ensure_field("arxiv_eprints", [])
         self.obj["arxiv_eprints"].append(_normalize_arxiv(uid))
     elif idutils.is_doi(uid):
         self._ensure_field("publication_info", {})
         self.obj["publication_info"]["doi"] = idutils.normalize_doi(uid)
     elif idutils.is_handle(uid):
         self._ensure_field("persistent_identifiers", [])
         value = idutils.normalize_handle(uid)
         if not value.startswith("hdl:"):
             # Prone to the day in which normalize_handle might prepend
             # 'hdl:'.
             value = "hdl:{}".format(value)
         self.obj["persistent_identifiers"].append(value)
     elif self.RE_VALID_CNUM.match(uid):
         self._ensure_field("publication_info", {})
         self.obj["publication_info"]["cnum"] = uid
     else:
         # idutils.is_isbn has a different implementation than normalize
         # isbn. Better to do it like this.
         try:
             isbn = idutils.normalize_isbn(uid)
             self._ensure_field("publication_info", {})
             self.obj["publication_info"]["isbn"] = isbn
         except ISBNError:
             pass
Exemple #11
0
def zenodo_doi_updater(record_uuid, data):
    """Update the DOI (only external DOIs)."""
    assert 'recid' in data
    doi = data.get('doi')
    assert doi
    assert idutils.is_doi(doi)

    # If the DOI is the same as an already generated one, do nothing
    if doi == doi_generator(data['recid']):
        return
    if is_local_doi(doi):  # Zenodo DOI, but different than recid
        # ERROR, user provided a custom ZENODO DOI!
        raise PIDValueError('doi', doi)

    doi_pid = PersistentIdentifier.get_by_object(
        pid_type='doi', object_type='rec', object_uuid=record_uuid)

    if doi_pid.pid_value != doi:
        with db.session.begin_nested():
            db.session.delete(doi_pid)
            return PersistentIdentifier.create(
                'doi',
                doi,
                object_type='rec',
                object_uuid=record_uuid,
                status=PIDStatus.RESERVED,
            )
Exemple #12
0
def format():
    doi = request.args.get('doi', '')
    lang = request.args.get(
        'lang', current_app.config['CITATIONFORMATTER_DEFAULT_LANG'])
    style = request.args.get(
        'style', current_app.config['CITATIONFORMATTER_DEFAULT_STYLE'])

    # Abort early on invalid DOI.
    if not is_doi(doi):
        abort(404, "DOI not found.")
    if lang not in current_app.config['CITATIONFORMATTER_LANGS']:
        abort(404, "Language not found.")
    if style not in current_app.config['CITATIONFORMATTER_STYLES']:
        abort(404, "Style not found.")

    r = requests.get(
        current_app.config['CITATIONFORMATTER_API'],
        params=dict(
            doi=doi,
            lang=lang,
            style=style,
        )
    )
    r.encoding = 'utf-8'

    if r.status_code == 200:
        return (r.text, 200, [('content-type', 'text/plain')])
    else:
        abort(404, "DOI not found")
Exemple #13
0
    def _add_uid(self, uid, skip_handle=False):
        """Add unique identifier in correct field.

        The ``skip_handle`` flag is used when adding a uid through the add_url function
        since urls can be easily confused with handle elements.
        """
        # We might add None values from wherever. Kill them here.
        uid = uid or ''
        if is_arxiv(uid):
            self._ensure_reference_field('arxiv_eprint', normalize_arxiv(uid))
        elif idutils.is_doi(uid):
            self._ensure_reference_field('dois', [])
            normalized_doi = idutils.normalize_doi(uid)
            if normalized_doi not in self.obj['reference']['dois']:
                self.obj['reference']['dois'].append(normalized_doi)
        elif idutils.is_handle(uid) and not skip_handle:
            self._ensure_reference_field('persistent_identifiers', [])
            self.obj['reference']['persistent_identifiers'].append({
                'schema':
                'HDL',
                'value':
                idutils.normalize_handle(uid),
            })
        elif idutils.is_urn(uid):
            self._ensure_reference_field('persistent_identifiers', [])
            self.obj['reference']['persistent_identifiers'].append({
                'schema':
                'URN',
                'value':
                uid,
            })
        elif self.RE_VALID_CNUM.match(uid):
            self._ensure_reference_field('publication_info', {})
            self.obj['reference']['publication_info']['cnum'] = uid
        elif is_cds_url(uid):
            self._ensure_reference_field('external_system_identifiers', [])
            cds_id = extract_cds_id(uid)
            cds_id_dict = {'schema': 'CDS', 'value': cds_id}
            if cds_id_dict not in self.obj['reference'][
                    'external_system_identifiers']:
                self.obj['reference']['external_system_identifiers'].append(
                    cds_id_dict)
        elif is_ads_url(uid):
            self._ensure_reference_field('external_system_identifiers', [])
            self.obj['reference']['external_system_identifiers'].append({
                'schema':
                'ADS',
                'value':
                extract_ads_id(uid),
            })
        else:
            # ``idutils.is_isbn`` is too strict in what it accepts.
            try:
                isbn = str(ISBN(uid))
                self._ensure_reference_field('isbn', {})
                self.obj['reference']['isbn'] = isbn
            except Exception:
                raise ValueError('Unrecognized uid type')
Exemple #14
0
 def _deserialize(self, value, attr, data):
     """Deserialize DOI value."""
     value = super(DOI, self)._deserialize(value, attr, data)
     value = value.strip()
     if value == '' and not (
             self.required or self.context.get('doi_required')):
         return value
     if not idutils.is_doi(value):
         self.fail('invalid_doi')
     return idutils.normalize_doi(value)
Exemple #15
0
 def _deserialize(self, value, attr, data):
     """Deserialize DOI value."""
     value = super(DOI, self)._deserialize(value, attr, data)
     value = value.strip()
     if value == '' and not (
             self.required or self.context.get('doi_required')):
         return value
     if not idutils.is_doi(value):
         self.fail('invalid_doi')
     return idutils.normalize_doi(value)
Exemple #16
0
def doi_minter(record_uuid, data):
    """Mint DOI."""
    doi = data.get('doi')
    assert 'recid' in data
    assert idutils.is_doi(doi) if doi else True
    
    # Create a DOI if no DOI was found.
    if not doi:
        doi = cds_doi_generator(data['recid'])
        data['doi'] = doi

        # Make sure it's a proper DOI
        assert idutils.is_doi(doi)
        return PersistentIdentifier.create(
            'doi',
            doi,
            pid_provider='datacite',
            object_type='rec',
            object_uuid=record_uuid,
            status=PIDStatus.RESERVED
        )
Exemple #17
0
def test_doi():
    """Test DOI validation."""
    assert idutils.is_doi('10.1000/123456')
    assert idutils.is_doi('10.1038/issn.1476-4687')
    assert idutils.is_doi('10.1000.10/123456')
    assert not idutils.is_doi('10.1000/')
    assert not idutils.is_doi('10.10O0/123456')
    assert not idutils.is_doi('10.1.NOTGOOD.0/123456')
Exemple #18
0
def import_article(identifier):
    """Import a new article from arXiv or Crossref based on the identifier.

    This function attempts to parse  and normalize the identifier as a valid
    arXiv id or DOI. If the identifier is valid and there is no record in
    Inspire matching the ID, it queries the arXiv/CrossRef APIs and parses
    the record to make it inspire compliant.

    Args:
        identifier(str): the ID of the record to import

    Returns:
        dict: the serialized article

    Raises:
        ExistingArticleError: if the record is already in Inspire.
        ImportArticleError: if no article is found.
        ImportConnectionError: if the importing request fails.
        ImportParsingError: if an error occurs while parsing the result.
        UnknownIdentifierError: if the identifier is neither "arxiv" or "doi".
    """
    if is_arxiv(identifier):
        pid_type = "arxiv"
        pid_value = normalize_arxiv(identifier)

    elif is_doi(identifier):
        pid_type = "doi"
        pid_value = normalize_doi(identifier)

    else:
        raise UnknownImportIdentifierError(identifier)

    pid = PersistentIdentifier.query.filter_by(
        pid_type=pid_type, pid_value=pid_value).one_or_none()

    if pid:
        raise ExistingArticleError(
            f"Article {identifier} already in Inspire. UUID: {pid.object_uuid}"
        )
    importers = {"arxiv": import_arxiv, "doi": import_doi}
    importer = importers.get(pid_type, UnknownImportIdentifierError)
    article = importer(pid_value)

    if not article:
        raise ImportArticleError(f"No article found for {identifier}")

    if pid_type == "arxiv":
        article = merge_article_with_crossref_data(article)

    return article
Exemple #19
0
def get_related_identifiers_url(record: Record, doi_prefix: str) -> List[Dict]:
    """Create related identifiers URL.

    Args:
        related_identifiers (Record): Record API Object from where the related
        identifiers will be extracted.

        doi_prefix (str): GEO Knowledge Hub DOI Prefix.

    Returns:
        List[Dict]: List of record related identifiers (with URL resolved)

    Note:
        The `doi_prefix` is used to check if the items are managed by the GEO Knowledge Hub.
    """
    # extracting related identifiers
    related_identifiers = py_.get(record, "metadata.related_identifiers", [])

    new_related_identifiers = []
    for related_identifier in related_identifiers:
        if related_identifier.get("identifier", None):
            pass

        scheme = related_identifier["scheme"]
        identifier = related_identifier["identifier"]

        related_identifier_obj = py_.set_(py_.clone_deep(related_identifier),
                                          "url", "")

        try:
            if idutils.is_url(identifier):
                related_identifier_obj["url"] = identifier
            else:
                # checking if the doi is internal
                if idutils.is_doi(identifier):
                    identifier_split = identifier.split("/")

                    if doi_prefix and identifier_split[0] == doi_prefix:
                        related_identifier_obj["url"] = posixpath.join(
                            "/records", identifier_split[1])

                if not related_identifier_obj["url"]:
                    related_identifier_obj["url"] = idutils.to_url(
                        identifier, scheme, "https")
        except BaseException:
            related_identifier_obj["url"] = identifier
        new_related_identifiers.append(related_identifier_obj)
    return new_related_identifiers
Exemple #20
0
def import_article(identifier):
    """Import a new article from arXiv or Crossref based on the identifier.

    This function attempts to parse and normalize the identifier as a valid
    arXiv id or DOI. If the identifier is valid and there is no record in
    Inspire matching the ID, it queries the arXiv/CrossRef APIs and parses
    the record to make it inspire compliant.

    Args:
        identifier(str): the ID of the record to import

    Returns:
        dict: the serialized article

    Raises:
        ExistingArticleError: if the record is already in Inspire.
        ImportArticleNotFoundError: if no article is found.
        ImportParsingError: if an error occurs while parsing the result.
        UnknownIdentifierError: if the identifier is neither "arxiv" or "doi".
    """
    if is_arxiv(identifier):
        pid_type = "arxiv"
        pid_value = normalize_arxiv(identifier)

    elif is_doi(identifier):
        pid_type = "doi"
        pid_value = normalize_doi(identifier).lower()

    else:
        raise UnknownImportIdentifierError(identifier)

    recid = get_pid_for_pid(pid_type, pid_value, provider="recid")

    if recid:
        raise ExistingArticleError(
            f"The article {identifier} already exists in Inspire", recid)

    importers = {"arxiv": import_arxiv, "doi": import_doi}
    importer = importers.get(pid_type, UnknownImportIdentifierError)
    article = importer(pid_value)

    if not article:
        raise ImportArticleNotFoundError(f"No article found for {identifier}")

    if pid_type == "arxiv":
        article = merge_article_with_crossref_data(article)

    return article
    def lookup_re3data(self):
        if self.client_id and self.pid_scheme:

            re3doi = RepositoryHelper.DATACITE_REPOSITORIES.get(
                self.client_id)  # {client_id,re3doi}
            #print(self.client_id,'Re3DOI',re3doi, idutils.is_doi(re3doi))
            if re3doi:
                if idutils.is_doi(re3doi):
                    short_re3doi = idutils.normalize_pid(
                        re3doi, scheme='doi')  #https://doi.org/10.17616/R3XS37
                else:
                    re3doi = None

            # pid -> clientId -> repo doi-> re3id, and query repository metadata from re3api
            if re3doi:
                self.logger.info(
                    'FsF-R1.3-01M : Found match re3data (DOI-based) record')
                query_url = Preprocessor.RE3DATA_API + '?query=' + short_re3doi  # https://re3data.org/api/beta/repositories?query=
                q = RequestHelper(url=query_url)
                q.setAcceptType(AcceptTypes.xml)
                re_source, xml = q.content_negotiate(metric_id='RE3DATA')
                try:
                    if isinstance(xml, bytes):
                        xml = xml.decode().encode()
                    root = etree.fromstring(xml)

                    #<link href="https://www.re3data.org/api/beta/repository/r3d100010134" rel="self" />
                    re3link = root.xpath('//link')[0].attrib['href']
                    if re3link is not None:
                        self.logger.info(
                            'FsF-R1.3-01M : Found match re3data metadata record -: '
                            + str(re3link))
                        # query reposiroty metadata
                        q2 = RequestHelper(url=re3link)
                        q2.setAcceptType(AcceptTypes.xml)
                        re3_source, re3_response = q2.content_negotiate(
                            metric_id='RE3DATA')
                        self.re3metadata_raw = re3_response
                        self.parseRepositoryMetadata()
                except Exception as e:
                    self.logger.warning(
                        'FsF-R1.3-01M : Malformed re3data (DOI-based) record received: '
                        + str(e))
            else:
                self.logger.warning(
                    'FsF-R1.3-01M : No DOI of client id is available from datacite api'
                )
 def add_uid(self, uid):
     """Add unique identifier in correct field."""
     # We might add None values from wherever. Kill them here.
     uid = uid or ''
     if _is_arxiv(uid):
         self._ensure_reference_field('arxiv_eprint', _normalize_arxiv(uid))
     elif idutils.is_doi(uid):
         self._ensure_reference_field('dois', [])
         self.obj['reference']['dois'].append(idutils.normalize_doi(uid))
     elif idutils.is_handle(uid):
         self._ensure_reference_field('persistent_identifiers', [])
         self.obj['reference']['persistent_identifiers'].append({
             'schema': 'HDL',
             'value': idutils.normalize_handle(uid),
         })
     elif idutils.is_urn(uid):
         self._ensure_reference_field('persistent_identifiers', [])
         self.obj['reference']['persistent_identifiers'].append({
             'schema': 'URN',
             'value': uid,
         })
     elif self.RE_VALID_CNUM.match(uid):
         self._ensure_reference_field('publication_info', {})
         self.obj['reference']['publication_info']['cnum'] = uid
     else:
         # idutils.is_isbn has a different implementation than normalize
         # isbn. Better to do it like this.
         try:
             isbn = idutils.normalize_isbn(uid)
             self._ensure_reference_field('isbn', {})
             self.obj['reference']['isbn'] = isbn.replace(' ', '').replace(
                 '-', '')
         # See https://github.com/nekobcn/isbnid/issues/2 and
         # https://github.com/nekobcn/isbnid/issues/3 for understanding the
         # long exception list.
         except (ISBNError, ISBNRangeError, UnicodeEncodeError):
             self.add_misc(uid)
Exemple #23
0
def zenodo_doi_minter(record_uuid, data):
    """Mint DOI."""
    doi = data.get('doi')
    assert 'recid' in data

    # Create a DOI if no DOI was found.
    if not doi:
        doi = doi_generator(data['recid'])
        data['doi'] = doi

    # Make sure it's a proper DOI
    assert idutils.is_doi(doi)

    # user-provided DOI (external or Zenodo DOI)
    if doi != doi_generator(data['recid']):
        if is_local_doi(doi):
            # User should not provide a custom Zenodo DOI
            # which is not dependent on the recid
            raise PIDValueError('doi', doi)
        else:
            return PersistentIdentifier.create(
                'doi',
                doi,
                object_type='rec',
                object_uuid=record_uuid,
                status=PIDStatus.RESERVED,
            )
    else:  # Zenodo-generated DOI
        return PersistentIdentifier.create(
            'doi',
            doi,
            pid_provider='datacite',
            object_type='rec',
            object_uuid=record_uuid,
            status=PIDStatus.RESERVED,
        )
Exemple #24
0
def zenodo_doi_minter(record_uuid, data):
    """Mint DOI."""
    doi = data.get('doi')
    assert 'recid' in data

    # Create a DOI if no DOI was found.
    if not doi:
        doi = doi_generator(data['recid'])
        data['doi'] = doi

    # Make sure it's a proper DOI
    assert idutils.is_doi(doi)

    # user-provided DOI (external or Zenodo DOI)
    if doi != doi_generator(data['recid']):
        if is_local_doi(doi):
            # User should not provide a custom Zenodo DOI
            # which is not dependent on the recid
            raise PIDValueError('doi', doi)
        else:
            return PersistentIdentifier.create(
                'doi',
                doi,
                object_type='rec',
                object_uuid=record_uuid,
                status=PIDStatus.RESERVED,
            )
    else:  # Zenodo-generated DOI
        return PersistentIdentifier.create(
            'doi',
            doi,
            pid_provider='datacite',
            object_type='rec',
            object_uuid=record_uuid,
            status=PIDStatus.RESERVED,
        )
Exemple #25
0
 def add_uid(self, uid):
     """Add unique identifier in correct field."""
     # We might add None values from wherever. Kill them here.
     uid = uid or ''
     if _is_arxiv(uid):
         self._ensure_reference_field('arxiv_eprint', _normalize_arxiv(uid))
     elif idutils.is_doi(uid):
         self._ensure_reference_field('dois', [])
         self.obj['reference']['dois'].append(idutils.normalize_doi(uid))
     elif idutils.is_handle(uid):
         self._ensure_reference_field('persistent_identifiers', [])
         self.obj['reference']['persistent_identifiers'].append({
             'schema': 'HDL',
             'value': idutils.normalize_handle(uid),
         })
     elif idutils.is_urn(uid):
         self._ensure_reference_field('persistent_identifiers', [])
         self.obj['reference']['persistent_identifiers'].append({
             'schema': 'URN',
             'value': uid,
         })
     elif self.RE_VALID_CNUM.match(uid):
         self._ensure_reference_field('publication_info', {})
         self.obj['reference']['publication_info']['cnum'] = uid
     else:
         # idutils.is_isbn has a different implementation than normalize
         # isbn. Better to do it like this.
         try:
             isbn = idutils.normalize_isbn(uid)
             self._ensure_reference_field('isbn', {})
             self.obj['reference']['isbn'] = isbn.replace(' ', '').replace('-', '')
         # See https://github.com/nekobcn/isbnid/issues/2 and
         # https://github.com/nekobcn/isbnid/issues/3 for understanding the
         # long exception list.
         except (ISBNError, ISBNRangeError, UnicodeEncodeError):
             self.add_misc(uid)
def validate_doi_identifier(identifier):
    if not idutils.is_doi(identifier):
        raise ValidationError(f"'{identifier}' is not a valid DOI.")
Exemple #27
0
    with open("README.md", "r") as infile:
        for line in infile:
            if line.startswith("|"):  # start of table
                break
            else:
                intro.append(line)

    # generate the new README
    records = list()
    with open("maverefs.tsv", mode="r", newline="") as infile, open("README.md", mode="w", newline="") as outfile:
        # rewrite the introductory text
        for line in intro:
            print(line, end="", file=outfile)

        # regenerate the table line-by-line
        reader = csv.DictReader(infile, delimiter="\t")
        writer = csv.DictWriter(outfile, fieldnames=reader.fieldnames, delimiter="|", lineterminator="|\n")
        print("|", end="", file=outfile)
        writer.writeheader()
        print("|---" * len(reader.fieldnames) + "|\n", end="", file=outfile)
        for row in reader:
            if idutils.is_pmid(row["PMID"]):
                row["PMID"] = format_md_link(row["PMID"], "pmid")
            if idutils.is_doi(row["DOI"]):
                row["DOI"] = format_md_link(row["DOI"], "doi")
            raw_data_schemes = idutils.detect_identifier_schemes(row["Raw Data"])
            if len(raw_data_schemes) == 1:  # uniquely identified the raw data
                row["Raw Data"] = format_md_link(row["Raw Data"], raw_data_schemes[0])
            print("|", end="", file=outfile)
            writer.writerow(row)
Exemple #28
0
def validate_doi(doi):
    return idutils.is_doi(doi)
Exemple #29
0
 def _is_doi(id_, type_):
     return (not type_ or type_.upper() == 'DOI') and is_doi(id_)
Exemple #30
0
def add_thesis_doi(data_collection, thesis_collection, token, production=True):
    """Add in theis DOI to CaltechDATA records"""

    # Search across CaltechTHESIS DOIs
    dot_paths = ["._Key", ".doi", ".official_url", ".related_url"]
    labels = ["eprint_id", "doi", "official_url", "related_url"]
    keys = dataset.keys(thesis_collection)
    all_metadata = get_records(dot_paths, "dois", thesis_collection, keys, labels)
    dois = []
    for metadata in progressbar(all_metadata, redirect_stdout=True):
        if "doi" in metadata:
            record_doi = metadata["doi"].strip()
            if "related_url" in metadata and "items" in metadata["related_url"]:
                items = metadata["related_url"]["items"]
                for item in items:
                    if "url" in item:
                        url = item["url"].strip()
                    if "type" in item:
                        itype = item["type"].strip().lower()
                    if itype == "doi":
                        if idutils.is_doi(url):
                            doi = "10." + url.split("10.")[1]
                            prefix = doi.split("/")[0]
                            if prefix == "10.22002":
                                dois.append([doi, record_doi])
                        else:
                            print("Ignoring non-DOI")
                            print(metadata["eprint_id"])
                            print(url.split("10."))
    for doi_link in dois:
        cd_doi = doi_link[0]
        thesis_doi = doi_link[1]
        print("Checking " + cd_doi)
        if "D1" in cd_doi:
            record_number = cd_doi.split("D1.")[1]
        if "d1" in cd_doi:
            record_number = cd_doi.split("d1.")[1]
        record, err = dataset.read(data_collection, record_number)
        if err != "":
            print(err)
            exit()

        done = False
        if "relatedIdentifiers" in record:
            for idv in record["relatedIdentifiers"]:
                identifier = idv["relatedIdentifier"]
                if identifier == thesis_doi:
                    done = True
            if done == False:
                identifiers = record["relatedIdentifiers"]
                identifiers.append(
                    {
                        "relatedIdentifier": thesis_doi,
                        "relatedIdentifierType": "DOI",
                        "relationType": "IsSupplementTo",
                    }
                )
                new_metadata = {"relatedIdentifiers": identifiers}
        else:
            new_metadata = {
                "relatedIdentifiers": [
                    {
                        "relatedIdentifier": thesis_doi,
                        "relatedIdentifierType": "DOI",
                        "relationType": "IsSupplementTo",
                    }
                ]
            }
        if done == False:
            print("Adding " + thesis_doi + " to " + cd_doi)
            response = caltechdata_edit(
                token, record_number, new_metadata, {}, {}, True
            )
            print(response)
Exemple #31
0
 def _is_doi(id_, type_):
     return (not type_ or type_.upper() == 'DOI') and is_doi(id_)
Exemple #32
0
 def _deserialize(self, value, attr, data, **kwargs):
     doi = idutils.is_doi(value)
     if not doi:
         raise ValidationError(f"It is not valid doi: \"{value}\"")
     return doi.string