def test_citation_to_citeproc_pubmed_book(): """ Extracting CSL metadata from books in PubMed is not supported. Logic not implemented to parse XML returned by https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=29227604&rettype=full """ with pytest.raises(NotImplementedError): citation_to_citeproc('pmid:29227604')
def test_citation_to_citeproc_pmc(identifier, citation_id): citation = f'pmcid:{identifier}' citeproc = citation_to_citeproc(citation) assert citeproc['id'] == citation_id assert citeproc['URL'] == 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3041534/' assert citeproc['container-title'] == 'Summit on Translational Bioinformatics' assert citeproc['title'] == 'Secondary Use of EHR: Data Quality Issues and Informatics Opportunities' authors = citeproc['author'] assert authors[0]['family'] == 'Botsis' assert citeproc['PMID'] == '21347133' assert citeproc['PMCID'] == 'PMC3041534'
def test_citation_to_citeproc_doi_datacite(): citation = 'doi:10.7287/peerj.preprints.3100v1' citeproc = citation_to_citeproc(citation) assert citeproc['id'] == '11cb5HXoY' assert citeproc['URL'] == 'https://doi.org/10.7287/peerj.preprints.3100v1' assert citeproc['DOI'] == '10.7287/peerj.preprints.3100v1' assert citeproc['type'] == 'report' assert citeproc['title'] == 'Sci-Hub provides access to nearly all scholarly literature' authors = citeproc['author'] assert authors[0]['family'] == 'Himmelstein' assert authors[-1]['family'] == 'Greene'
def test_citation_to_citeproc_arxiv(): citation = 'arxiv:cond-mat/0703470v2' citeproc = citation_to_citeproc(citation) assert citeproc['id'] == 'ES92tcdg' assert citeproc['URL'] == 'https://arxiv.org/abs/cond-mat/0703470v2' assert citeproc['arxiv_id'] == 'cond-mat/0703470v2' assert citeproc['version'] == '2' assert citeproc['type'] == 'report' assert citeproc['container-title'] == 'arXiv' assert citeproc['title'] == 'Portraits of Complex Networks' authors = citeproc['author'] assert authors[0]['literal'] == 'J. P. Bagrow' assert citeproc['DOI'] == '10.1209/0295-5075/81/68004'
def test_citation_to_citeproc_pubmed(): citation = 'pmid:21347133' citeproc = citation_to_citeproc(citation) assert citeproc['id'] == 'y9ONtSZ9' assert citeproc['URL'] == 'https://www.ncbi.nlm.nih.gov/pubmed/21347133' assert citeproc[ 'container-title'] == 'Summit on Translational Bioinformatics' assert citeproc[ 'title'] == 'Secondary Use of EHR: Data Quality Issues and Informatics Opportunities' authors = citeproc['author'] assert authors[0]['family'] == 'Botsis' assert citeproc['PMID'] == '21347133' assert citeproc['PMCID'] == 'PMC3041534'
def generate_csl_items(args, citation_df): """ General CSL (citeproc) items for standard_citations in citation_df. Writes references.json to disk and logs warnings for potential problems. """ # Read manual references (overrides) in JSON CSL manual_refs = read_manual_references(args.manual_references_path) requests_cache.install_cache(args.requests_cache_path, include_get_headers=True) cache = requests_cache.get_cache() if args.clear_requests_cache: logging.info('Clearing requests-cache') requests_cache.clear() logging.info( f'requests-cache starting with {len(cache.responses)} cached responses' ) csl_items = list() failures = list() for citation in citation_df.standard_citation.unique(): if citation in manual_refs: csl_items.append(manual_refs[citation]) continue try: citeproc = citation_to_citeproc(citation) csl_items.append(citeproc) except Exception as error: logging.exception(f'Citeproc retrieval failure for {citation}') failures.append(citation) logging.info( f'requests-cache finished with {len(cache.responses)} cached responses' ) requests_cache.uninstall_cache() if failures: message = 'Citeproc retrieval failed for:\n{}'.format( '\n'.join(failures)) logging.error(message) # Write JSON CSL bibliography for Pandoc. with args.references_path.open('w') as write_file: json.dump(csl_items, write_file, indent=2, ensure_ascii=False) write_file.write('\n') return csl_items
def test_citation_to_citeproc_pubmed_1(): """ Generated from XML returned by https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=21347133&rettype=full """ citation = 'pmid:21347133' citeproc = citation_to_citeproc(citation) assert citeproc['id'] == 'y9ONtSZ9' assert citeproc['type'] == 'article-journal' assert citeproc['URL'] == 'https://www.ncbi.nlm.nih.gov/pubmed/21347133' assert citeproc['container-title'] == 'AMIA Joint Summits on Translational Science proceedings. AMIA Joint Summits on Translational Science' assert citeproc['title'] == 'Secondary Use of EHR: Data Quality Issues and Informatics Opportunities.' assert citeproc['issued']['date-parts'] == [[2010, 3, 1]] authors = citeproc['author'] assert authors[0]['given'] == 'Taxiarchis' assert authors[0]['family'] == 'Botsis' assert citeproc['PMID'] == '21347133' assert citeproc['PMCID'] == 'PMC3041534'
def test_citation_to_citeproc_pubmed_2(): """ Generated from XML returned by https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=27094199&rettype=full """ citation = 'pmid:27094199' citeproc = citation_to_citeproc(citation) print(citeproc) assert citeproc['id'] == 'alaFV9OY' assert citeproc['type'] == 'article-journal' assert citeproc['URL'] == 'https://www.ncbi.nlm.nih.gov/pubmed/27094199' assert citeproc['container-title'] == 'Circulation. Cardiovascular genetics' assert citeproc['container-title-short'] == 'Circ Cardiovasc Genet' assert citeproc['page'] == '179-84' assert citeproc['title'] == 'Genetic Association-Guided Analysis of Gene Networks for the Study of Complex Traits.' assert citeproc['issued']['date-parts'] == [[2016, 4]] authors = citeproc['author'] assert authors[0]['given'] == 'Casey S' assert authors[0]['family'] == 'Greene' assert citeproc['PMID'] == '27094199' assert citeproc['DOI'] == '10.1161/circgenetics.115.001181'