def decide_doi_update(metadata): if "doi" not in metadata: possible = [] eprint = metadata["eprint_id"] if "related_url" in metadata and "items" in metadata["related_url"]: items = metadata["related_url"]["items"] for item in items: description = "" if "url" in item: url = item["url"].strip() if "type" in item: itype = item["type"].strip().lower() if "description" in item: description = item["description"].strip().lower() if itype == "doi": if is_doi(url): possible.append([normalize_doi(url), description]) else: # Dropping anything without a 10. pattern if "10." in url: doi = "10." + url.split("10.")[1] if is_doi(doi): possible.append([doi, description]) if len(possible) == 1: # Description not really used return [eprint, possible[0][0]] else: return None else: return None else: return None
def add_uid(self, uid): """Add unique identifier in correct field.""" # We might add None values from wherever. Kill them here. uid = uid or '' if _is_arxiv(uid): self._ensure_reference_field('arxiv_eprints', []) self.obj['reference']['arxiv_eprints'].append(_normalize_arxiv(uid)) elif idutils.is_doi(uid): self._ensure_reference_field('dois', []) self.obj['reference']['dois'].append(idutils.normalize_doi(uid)) elif idutils.is_handle(uid): self._ensure_reference_field('persistent_identifiers', []) value = idutils.normalize_handle(uid) if not value.startswith('hdl:'): # Prone to the day in which normalize_handle might prepend # 'hdl:'. value = u'hdl:{}'.format(value) self.obj['reference']['persistent_identifiers'].append(value) elif self.RE_VALID_CNUM.match(uid): self._ensure_reference_field('publication_info', {}) self.obj['reference']['publication_info']['cnum'] = uid else: # idutils.is_isbn has a different implementation than normalize # isbn. Better to do it like this. try: isbn = idutils.normalize_isbn(uid) self._ensure_reference_field('publication_info', {}) self.obj['reference']['publication_info']['isbn'] = isbn # See https://github.com/nekobcn/isbnid/issues/2 and # https://github.com/nekobcn/isbnid/issues/3 for understanding the # long exception list. except (ISBNError, ISBNRangeError, UnicodeEncodeError): pass
def add_uid(self, uid): """Add unique identifier in correct field.""" # We might add None values from wherever. Kill them here. uid = uid or '' if _is_arxiv(uid): self._ensure_reference_field('arxiv_eprint', _normalize_arxiv(uid)) elif idutils.is_doi(uid): self._ensure_reference_field('dois', []) self.obj['reference']['dois'].append(idutils.normalize_doi(uid)) elif idutils.is_handle(uid): self._ensure_reference_field('persistent_identifiers', []) self.obj['reference']['persistent_identifiers'].append({ 'schema': 'HDL', 'value': idutils.normalize_handle(uid), }) elif idutils.is_urn(uid): self._ensure_reference_field('persistent_identifiers', []) self.obj['reference']['persistent_identifiers'].append({ 'schema': 'URN', 'value': uid, }) elif self.RE_VALID_CNUM.match(uid): self._ensure_reference_field('publication_info', {}) self.obj['reference']['publication_info']['cnum'] = uid else: # ``idutils.is_isbn`` is too strict in what it accepts. try: isbn = str(ISBN(uid)) self._ensure_reference_field('isbn', {}) self.obj['reference']['isbn'] = isbn except Exception: self.add_misc(uid)
def add_doi(self, doi, source=None, material=None): """Add doi. :param doi: doi for the current document. :type doi: string :param source: source for the doi. :type source: string :param material: material for the doi. :type material: string """ if doi is None: return try: doi = idutils.normalize_doi(doi) except AttributeError: return if not doi: return dois = self._sourced_dict(source, value=doi) if material is not None: dois['material'] = material self._append_to('dois', dois)
def add_uid(self, uid): """Add unique identifier in correct field.""" # We might add None values from wherever. Kill them here. uid = uid or "" if _is_arxiv(uid): self._ensure_field("arxiv_eprints", []) self.obj["arxiv_eprints"].append(_normalize_arxiv(uid)) elif idutils.is_doi(uid): self._ensure_field("publication_info", {}) self.obj["publication_info"]["doi"] = idutils.normalize_doi(uid) elif idutils.is_handle(uid): self._ensure_field("persistent_identifiers", []) value = idutils.normalize_handle(uid) if not value.startswith("hdl:"): # Prone to the day in which normalize_handle might prepend # 'hdl:'. value = "hdl:{}".format(value) self.obj["persistent_identifiers"].append(value) elif self.RE_VALID_CNUM.match(uid): self._ensure_field("publication_info", {}) self.obj["publication_info"]["cnum"] = uid else: # idutils.is_isbn has a different implementation than normalize # isbn. Better to do it like this. try: isbn = idutils.normalize_isbn(uid) self._ensure_field("publication_info", {}) self.obj["publication_info"]["isbn"] = isbn except ISBNError: pass
def add_uid(self, uid): """Add unique identifier in correct field.""" # We might add None values from wherever. Kill them here. uid = uid or '' if _is_arxiv(uid): self._ensure_reference_field('arxiv_eprints', []) self.obj['reference']['arxiv_eprints'].append( _normalize_arxiv(uid)) elif idutils.is_doi(uid): self._ensure_reference_field('dois', []) self.obj['reference']['dois'].append(idutils.normalize_doi(uid)) elif idutils.is_handle(uid): self._ensure_reference_field('persistent_identifiers', []) value = idutils.normalize_handle(uid) if not value.startswith('hdl:'): # Prone to the day in which normalize_handle might prepend # 'hdl:'. value = u'hdl:{}'.format(value) self.obj['reference']['persistent_identifiers'].append(value) elif self.RE_VALID_CNUM.match(uid): self._ensure_reference_field('publication_info', {}) self.obj['reference']['publication_info']['cnum'] = uid else: # idutils.is_isbn has a different implementation than normalize # isbn. Better to do it like this. try: isbn = idutils.normalize_isbn(uid) self._ensure_reference_field('publication_info', {}) self.obj['reference']['publication_info']['isbn'] = isbn # See https://github.com/nekobcn/isbnid/issues/2 and # https://github.com/nekobcn/isbnid/issues/3 for understanding the # long exception list. except (ISBNError, ISBNRangeError, UnicodeEncodeError): pass
def _deserialize(self, value, attr, data): """Deserialize DOI value.""" value = super(DOI, self)._deserialize(value, attr, data) value = value.strip() if not idutils.is_doi(value): self.fail('invalid_doi') return idutils.normalize_doi(value)
def normalize_provided_doi(obj, formdata): try: doi = formdata.get('doi') formdata['doi'] = normalize_doi(doi) except AttributeError: formdata['doi'] = None return formdata
def _add_uid(self, uid, skip_handle=False): """Add unique identifier in correct field. The ``skip_handle`` flag is used when adding a uid through the add_url function since urls can be easily confused with handle elements. """ # We might add None values from wherever. Kill them here. uid = uid or '' if is_arxiv(uid): self._ensure_reference_field('arxiv_eprint', normalize_arxiv(uid)) elif idutils.is_doi(uid): self._ensure_reference_field('dois', []) normalized_doi = idutils.normalize_doi(uid) if normalized_doi not in self.obj['reference']['dois']: self.obj['reference']['dois'].append(normalized_doi) elif idutils.is_handle(uid) and not skip_handle: self._ensure_reference_field('persistent_identifiers', []) self.obj['reference']['persistent_identifiers'].append({ 'schema': 'HDL', 'value': idutils.normalize_handle(uid), }) elif idutils.is_urn(uid): self._ensure_reference_field('persistent_identifiers', []) self.obj['reference']['persistent_identifiers'].append({ 'schema': 'URN', 'value': uid, }) elif self.RE_VALID_CNUM.match(uid): self._ensure_reference_field('publication_info', {}) self.obj['reference']['publication_info']['cnum'] = uid elif is_cds_url(uid): self._ensure_reference_field('external_system_identifiers', []) cds_id = extract_cds_id(uid) cds_id_dict = {'schema': 'CDS', 'value': cds_id} if cds_id_dict not in self.obj['reference'][ 'external_system_identifiers']: self.obj['reference']['external_system_identifiers'].append( cds_id_dict) elif is_ads_url(uid): self._ensure_reference_field('external_system_identifiers', []) self.obj['reference']['external_system_identifiers'].append({ 'schema': 'ADS', 'value': extract_ads_id(uid), }) else: # ``idutils.is_isbn`` is too strict in what it accepts. try: isbn = str(ISBN(uid)) self._ensure_reference_field('isbn', {}) self.obj['reference']['isbn'] = isbn except Exception: raise ValueError('Unrecognized uid type')
def _to_scolix(self, data): """Convert a default Crossref event to Scolix data.""" src_id = idutils.normalize_doi(data['subj_id']) trg_id = idutils.normalize_doi(data['obj_id']) reltaion_type = data['relation_type_id'].capitalize() return { 'LinkPublicationDate': data['timestamp'], 'LinkProvider': [{'Name': 'crossref'}], 'RelationshipType': {'Name': reltaion_type}, 'Source': { 'Identifier': {'ID': src_id, 'IDScheme': 'DOI'}, 'Type': {'Name': 'unknown'}, }, 'Target': { 'Identifier': {'ID': trg_id, 'IDScheme': 'DOI'}, 'Type': {'Name': 'unknown'}, } }
def dois(self, key, value): """Populate the ``dois`` key. Also populates the ``persistent_identifiers`` key through side effects. """ def _get_first_non_curator_source(sources): sources_without_curator = [ el for el in sources if el.upper() != 'CURATOR' ] return force_single_element(sources_without_curator) def _get_material(value): MATERIAL_MAP = { 'ebook': 'publication', } q_value = force_single_element(value.get('q', '')) normalized_q_value = q_value.lower() return MATERIAL_MAP.get(normalized_q_value, normalized_q_value) def _is_doi(id_, type_): return (not type_ or type_.upper() == 'DOI') and is_doi(id_) def _is_handle(id_, type_): return (not type_ or type_.upper() == 'HDL') and is_handle(id_) dois = self.get('dois', []) persistent_identifiers = self.get('persistent_identifiers', []) values = force_list(value) for value in values: id_ = force_single_element(value.get('a', '')) material = _get_material(value) schema = force_single_element(value.get('2', '')) sources = force_list(value.get('9')) source = _get_first_non_curator_source(sources) if _is_doi(id_, schema): dois.append({ 'material': material, 'source': source, 'value': normalize_doi(id_), }) else: schema = 'HDL' if _is_handle(id_, schema) else schema persistent_identifiers.append({ 'material': material, 'schema': schema, 'source': source, 'value': id_, }) self['persistent_identifiers'] = persistent_identifiers return dois
def _deserialize(self, value, attr, data): """Deserialize DOI value.""" value = super(DOI, self)._deserialize(value, attr, data) value = value.strip() if value == '' and not ( self.required or self.context.get('doi_required')): return value if not idutils.is_doi(value): self.fail('invalid_doi') return idutils.normalize_doi(value)
def import_article(identifier): """Import a new article from arXiv or Crossref based on the identifier. This function attempts to parse and normalize the identifier as a valid arXiv id or DOI. If the identifier is valid and there is no record in Inspire matching the ID, it queries the arXiv/CrossRef APIs and parses the record to make it inspire compliant. Args: identifier(str): the ID of the record to import Returns: dict: the serialized article Raises: ExistingArticleError: if the record is already in Inspire. ImportArticleError: if no article is found. ImportConnectionError: if the importing request fails. ImportParsingError: if an error occurs while parsing the result. UnknownIdentifierError: if the identifier is neither "arxiv" or "doi". """ if is_arxiv(identifier): pid_type = "arxiv" pid_value = normalize_arxiv(identifier) elif is_doi(identifier): pid_type = "doi" pid_value = normalize_doi(identifier) else: raise UnknownImportIdentifierError(identifier) pid = PersistentIdentifier.query.filter_by( pid_type=pid_type, pid_value=pid_value).one_or_none() if pid: raise ExistingArticleError( f"Article {identifier} already in Inspire. UUID: {pid.object_uuid}" ) importers = {"arxiv": import_arxiv, "doi": import_doi} importer = importers.get(pid_type, UnknownImportIdentifierError) article = importer(pid_value) if not article: raise ImportArticleError(f"No article found for {identifier}") if pid_type == "arxiv": article = merge_article_with_crossref_data(article) return article
def search(doi=None): provided_doi = doi or request.args.get('doi') try: normalized_doi = normalize_doi(provided_doi) result = get_json(normalized_doi) except AttributeError: result = { 'query': {}, 'source': 'inspire', 'status': 'badrequest', } resp = jsonify(result) resp.status_code = current_app.config['CROSSREF_RESPONSE_CODES'].get(result['status'], 200) return resp
def import_article(identifier): """Import a new article from arXiv or Crossref based on the identifier. This function attempts to parse and normalize the identifier as a valid arXiv id or DOI. If the identifier is valid and there is no record in Inspire matching the ID, it queries the arXiv/CrossRef APIs and parses the record to make it inspire compliant. Args: identifier(str): the ID of the record to import Returns: dict: the serialized article Raises: ExistingArticleError: if the record is already in Inspire. ImportArticleNotFoundError: if no article is found. ImportParsingError: if an error occurs while parsing the result. UnknownIdentifierError: if the identifier is neither "arxiv" or "doi". """ if is_arxiv(identifier): pid_type = "arxiv" pid_value = normalize_arxiv(identifier) elif is_doi(identifier): pid_type = "doi" pid_value = normalize_doi(identifier).lower() else: raise UnknownImportIdentifierError(identifier) recid = get_pid_for_pid(pid_type, pid_value, provider="recid") if recid: raise ExistingArticleError( f"The article {identifier} already exists in Inspire", recid) importers = {"arxiv": import_arxiv, "doi": import_doi} importer = importers.get(pid_type, UnknownImportIdentifierError) article = importer(pid_value) if not article: raise ImportArticleNotFoundError(f"No article found for {identifier}") if pid_type == "arxiv": article = merge_article_with_crossref_data(article) return article
def check_doi(self, doi): """Check doi structure. Check that the doi has a form 12.12345/123 with the prefix defined """ # If prefix is in doi if '/' in doi: split = doi.split('/') if split[0] != self.prefix: # Provided a DOI with the wrong prefix raise ValueError('DOI prefix provided ' + split[0] + ' not prefix in rest client '+self.prefix) else: doi = self.prefix + '/' + doi return normalize_doi(doi)
def search(doi=None): provided_doi = doi or request.args.get('doi') try: normalized_doi = normalize_doi(provided_doi) result = get_json(normalized_doi) except AttributeError: result = { 'query': {}, 'source': 'inspire', 'status': 'badrequest', } resp = jsonify(result) resp.status_code = current_app.config['CROSSREF_RESPONSE_CODES'].get( result['status'], 200) return resp
def check_doi(self, doi): """Check doi structure. Check that the doi has a form 12.12345/123 with the prefix defined """ # If prefix is in doi if '/' in doi: split = doi.split('/') prefix = split[0] if prefix != self.prefix: # Provided a DOI with the wrong prefix raise ValueError('Wrong DOI {0} prefix provided, it should be ' '{1} as defined in the rest client'.format( prefix, self.prefix)) else: doi = f'{self.prefix}/{doi}' return normalize_doi(doi)
def dois(self, key, value): """Populate the ``dois`` key. Also populates the ``persistent_identifiers`` key through side effects. """ def _get_first_non_curator_source(sources): sources_without_curator = [el for el in sources if el.upper() != 'CURATOR'] return force_single_element(sources_without_curator) def _is_doi(id_, type_): return (not type_ or type_.upper() == 'DOI') and is_doi(id_) def _is_handle(id_, type_): return (not type_ or type_.upper() == 'HDL') and is_handle(id_) dois = self.get('dois', []) persistent_identifiers = self.get('persistent_identifiers', []) values = force_list(value) for value in values: id_ = force_single_element(value.get('a', '')) material = force_single_element(value.get('q', '')).lower() schema = force_single_element(value.get('2', '')) sources = force_list(value.get('9')) source = _get_first_non_curator_source(sources) if _is_doi(id_, schema): dois.append({ 'material': material, 'source': source, 'value': normalize_doi(id_), }) else: schema = 'HDL' if _is_handle(id_, schema) else schema persistent_identifiers.append({ 'material': material, 'schema': schema, 'source': source, 'value': id_, }) self['persistent_identifiers'] = persistent_identifiers return dois
def add_doi(self, doi, source=None, material=None): """Add doi. :param doi: doi for the current document. :type doi: string :param source: source for the doi. :type source: string :param material: material for the doi. :type material: string """ if not idutils.normalize_doi(doi): return dois = {'value': doi, 'source': self._get_source(source)} if material is not None: dois['material'] = material self._append_to('dois', dois)
def persistent_identifiers(self, key, value): """Persistent Standard Identifiers.""" def _first_non_curator_source(sources): sources = force_force_list(sources) without_curator = filter(lambda el: el.upper() != 'CURATOR', sources) return force_single_element(without_curator) def _is_doi(type_, id_): return (not type_ or type_.upper() == 'DOI') and is_doi(id_) def _is_handle(type_, id_): return (not type_ or type_.upper() in ('DOI', 'HDL')) and is_handle(id_) dois = self.get('dois', []) persistent_identifiers = self.get('persistent_identifiers', []) values = force_force_list(value) for value in values: if value: ids = force_force_list(value.get('a')) type_ = force_single_element(value.get('2')) source = _first_non_curator_source(value.get('9')) for id_ in ids: if _is_doi(type_, id_): dois.append({ 'source': source, 'value': normalize_doi(id_), }) else: if _is_handle(type_, id_): type_ = 'HDL' persistent_identifiers.append({ 'source': source, 'type': type_, 'value': id_, }) self['dois'] = dois return persistent_identifiers
def add_uid(self, uid): """Add unique identifier in correct field.""" # We might add None values from wherever. Kill them here. uid = uid or '' if _is_arxiv(uid): self._ensure_reference_field('arxiv_eprint', _normalize_arxiv(uid)) elif idutils.is_doi(uid): self._ensure_reference_field('dois', []) self.obj['reference']['dois'].append(idutils.normalize_doi(uid)) elif idutils.is_handle(uid): self._ensure_reference_field('persistent_identifiers', []) self.obj['reference']['persistent_identifiers'].append({ 'schema': 'HDL', 'value': idutils.normalize_handle(uid), }) elif idutils.is_urn(uid): self._ensure_reference_field('persistent_identifiers', []) self.obj['reference']['persistent_identifiers'].append({ 'schema': 'URN', 'value': uid, }) elif self.RE_VALID_CNUM.match(uid): self._ensure_reference_field('publication_info', {}) self.obj['reference']['publication_info']['cnum'] = uid else: # idutils.is_isbn has a different implementation than normalize # isbn. Better to do it like this. try: isbn = idutils.normalize_isbn(uid) self._ensure_reference_field('isbn', {}) self.obj['reference']['isbn'] = isbn.replace(' ', '').replace( '-', '') # See https://github.com/nekobcn/isbnid/issues/2 and # https://github.com/nekobcn/isbnid/issues/3 for understanding the # long exception list. except (ISBNError, ISBNRangeError, UnicodeEncodeError): self.add_misc(uid)
def add_uid(self, uid): """Add unique identifier in correct field.""" # We might add None values from wherever. Kill them here. uid = uid or '' if _is_arxiv(uid): self._ensure_reference_field('arxiv_eprint', _normalize_arxiv(uid)) elif idutils.is_doi(uid): self._ensure_reference_field('dois', []) self.obj['reference']['dois'].append(idutils.normalize_doi(uid)) elif idutils.is_handle(uid): self._ensure_reference_field('persistent_identifiers', []) self.obj['reference']['persistent_identifiers'].append({ 'schema': 'HDL', 'value': idutils.normalize_handle(uid), }) elif idutils.is_urn(uid): self._ensure_reference_field('persistent_identifiers', []) self.obj['reference']['persistent_identifiers'].append({ 'schema': 'URN', 'value': uid, }) elif self.RE_VALID_CNUM.match(uid): self._ensure_reference_field('publication_info', {}) self.obj['reference']['publication_info']['cnum'] = uid else: # idutils.is_isbn has a different implementation than normalize # isbn. Better to do it like this. try: isbn = idutils.normalize_isbn(uid) self._ensure_reference_field('isbn', {}) self.obj['reference']['isbn'] = isbn.replace(' ', '').replace('-', '') # See https://github.com/nekobcn/isbnid/issues/2 and # https://github.com/nekobcn/isbnid/issues/3 for understanding the # long exception list. except (ISBNError, ISBNRangeError, UnicodeEncodeError): self.add_misc(uid)
def clean_doi(doi): doi = idutils.normalize_doi(doi) doi = doi.replace("\\", "") return doi
def get_agency(self, doi: str) -> str: """.""" normalized_doi = idutils.normalize_doi(doi) doi_prefix = normalized_doi.split('/', 1)[0] return self._agency_by_prefix(doi_prefix)
def dois(self, key, value): return { 'source': value.get('9'), 'value': normalize_doi(value.get('a')), }
def test_valid_doi(input_val, ctx): """Test DOI.""" data, errors = MetadataSchemaV1(partial=['doi'], context=ctx).load(dict(doi=input_val)) assert data['doi'] == idutils.normalize_doi(input_val.strip())
def test_valid_doi(input_val, ctx): """Test DOI.""" data, errors = MetadataSchemaV1( partial=['doi'], context=ctx).load(dict(doi=input_val)) assert data['doi'] == idutils.normalize_doi(input_val.strip())