def import_article(identifier): """Import a new article from arXiv or Crossref based on the identifier. This function attempts to parse and normalize the identifier as a valid arXiv id or DOI. If the identifier is valid and there is no record in Inspire matching the ID, it queries the arXiv/CrossRef APIs and parses the record to make it inspire compliant. Args: identifier(str): the ID of the record to import Returns: dict: the serialized article Raises: ExistingArticleError: if the record is already in Inspire. ImportArticleError: if no article is found. ImportConnectionError: if the importing request fails. ImportParsingError: if an error occurs while parsing the result. UnknownIdentifierError: if the identifier is neither "arxiv" or "doi". """ if is_arxiv(identifier): pid_type = "arxiv" pid_value = normalize_arxiv(identifier) elif is_doi(identifier): pid_type = "doi" pid_value = normalize_doi(identifier) else: raise UnknownImportIdentifierError(identifier) pid = PersistentIdentifier.query.filter_by( pid_type=pid_type, pid_value=pid_value).one_or_none() if pid: raise ExistingArticleError( f"Article {identifier} already in Inspire. UUID: {pid.object_uuid}" ) importers = {"arxiv": import_arxiv, "doi": import_doi} importer = importers.get(pid_type, UnknownImportIdentifierError) article = importer(pid_value) if not article: raise ImportArticleError(f"No article found for {identifier}") if pid_type == "arxiv": article = merge_article_with_crossref_data(article) return article
def import_article(identifier): """Import a new article from arXiv or Crossref based on the identifier. This function attempts to parse and normalize the identifier as a valid arXiv id or DOI. If the identifier is valid and there is no record in Inspire matching the ID, it queries the arXiv/CrossRef APIs and parses the record to make it inspire compliant. Args: identifier(str): the ID of the record to import Returns: dict: the serialized article Raises: ExistingArticleError: if the record is already in Inspire. ImportArticleNotFoundError: if no article is found. ImportParsingError: if an error occurs while parsing the result. UnknownIdentifierError: if the identifier is neither "arxiv" or "doi". """ if is_arxiv(identifier): pid_type = "arxiv" pid_value = normalize_arxiv(identifier) elif is_doi(identifier): pid_type = "doi" pid_value = normalize_doi(identifier).lower() else: raise UnknownImportIdentifierError(identifier) recid = get_pid_for_pid(pid_type, pid_value, provider="recid") if recid: raise ExistingArticleError( f"The article {identifier} already exists in Inspire", recid) importers = {"arxiv": import_arxiv, "doi": import_doi} importer = importers.get(pid_type, UnknownImportIdentifierError) article = importer(pid_value) if not article: raise ImportArticleNotFoundError(f"No article found for {identifier}") if pid_type == "arxiv": article = merge_article_with_crossref_data(article) return article
def test_normalize_arxiv_handles_uppercase_with_cls(): expected = 'math/0312059' result = utils.normalize_arxiv('MatH.AC/0312059') assert expected == result
def test_normalize_arxiv_handles_uppercase(): expected = 'math/0312059' result = utils.normalize_arxiv('MATH/0312059') assert expected == result
def test_normalize_arxiv_handles_new_identifiers_with_cls_prefix_and_ver(): expected = '1501.00001' result = utils.normalize_arxiv('arXiv:hep-th.GT/1501.00001v1') assert expected == result
def test_normalize_arxiv_handles_new_identifiers_with_class_and_wo_version(): expected = '1501.00001' result = utils.normalize_arxiv('arXiv:hep-th/1501.00001') assert expected == result
def test_normalize_arxiv_handles_solv_int(): expected = 'solv-int/9611008' result = utils.normalize_arxiv('solv-int/9611008') assert expected == result
def test_normalize_arxiv_handles_old_identifiers_with_prefix_and_version(): expected = 'math/0309136' result = utils.normalize_arxiv('arXiv:math.GT/0309136v2') assert expected == result
def test_normalize_arxiv_handles_new_identifiers_wo_prefix_and_with_version(): expected = '1501.00001' result = utils.normalize_arxiv('1501.00001v1') assert expected == result
def test_normalize_arxiv_handles_category_in_brackets(): expected = '1406.1599' result = utils.normalize_arxiv('arXiv:1406.1599v2[physics.ins-det]') assert expected == result