コード例 #1
0
def test_citation_to_citeproc_pubmed_book():
    """
    Extracting CSL metadata from books in PubMed is not supported.
    Logic not implemented to parse XML returned by
    https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=29227604&rettype=full
    """
    with pytest.raises(NotImplementedError):
        citation_to_citeproc('pmid:29227604')
コード例 #2
0
ファイル: process-catalog.py プロジェクト: zietzm/catalog
def process_record(record):
    """
    Expand a catalog record with retrieved metadata
    """
    output = {}
    html_url = record.pop('html_url')
    output['manubot'] = {
        'repo_url': record.pop('repo_url'),
        'url': html_url,
        'citation': f"url:{html_url}",
    }
    if 'thumbnail_url' in record:
        output['manubot']['thumbnail_url'] = record.pop('thumbnail_url')
    for publication_type in 'preprint', 'journal':
        citation = record.pop(f'{publication_type}_citation', None)
        if not citation:
            continue
        if not is_valid_citation(citation):
            continue
        output[publication_type] = {
            'citation': citation,
        }
    for item in output.values():
        citation = standardize_citation(item['citation'])
        csl_item = citation_to_citeproc(citation)
        if 'url' not in item and 'URL' in csl_item:
            item['url'] = csl_item['URL']
        item['title'] = get_title(csl_item)
        item['authors'] = get_authors_text(csl_item)
        item['journal'] = get_journal(csl_item)
        item['date_iso'] = get_date(csl_item)
        item['date_human'] = get_date_summary(csl_item)
        item['csl_item'] = csl_item
    output['extras'] = record
    return output
コード例 #3
0
ファイル: test_util.py プロジェクト: nichtich/manubot
def test_citation_to_citeproc_pubmed_with_numeric_month():
    """
    Generated from XML returned by
    https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=29028984&rettype=full

    See https://github.com/manubot/manubot/issues/69
    """
    citation = 'pmid:29028984'
    citeproc = citation_to_citeproc(citation)
    print(citeproc)
    assert citeproc['issued']['date-parts'] == [[2018, 3, 15]]
コード例 #4
0
ファイル: test_util.py プロジェクト: nichtich/manubot
def test_citation_to_citeproc_doi_datacite():
    citation = 'doi:10.7287/peerj.preprints.3100v1'
    citeproc = citation_to_citeproc(citation)
    assert citeproc['id'] == '11cb5HXoY'
    assert citeproc['URL'] == 'https://doi.org/10.7287/peerj.preprints.3100v1'
    assert citeproc['DOI'] == '10.7287/peerj.preprints.3100v1'
    assert citeproc['type'] == 'report'
    assert citeproc['title'] == 'Sci-Hub provides access to nearly all scholarly literature'
    authors = citeproc['author']
    assert authors[0]['family'] == 'Himmelstein'
    assert authors[-1]['family'] == 'Greene'
コード例 #5
0
def test_citation_to_citeproc_arxiv():
    citation = 'arxiv:cond-mat/0703470v2'
    citeproc = citation_to_citeproc(citation)
    assert citeproc['id'] == 'ES92tcdg'
    assert citeproc['URL'] == 'https://arxiv.org/abs/cond-mat/0703470v2'
    assert citeproc['number'] == 'cond-mat/0703470v2'
    assert citeproc['version'] == '2'
    assert citeproc['type'] == 'report'
    assert citeproc['container-title'] == 'arXiv'
    assert citeproc['title'] == 'Portraits of Complex Networks'
    authors = citeproc['author']
    assert authors[0]['literal'] == 'J. P. Bagrow'
    assert citeproc['DOI'] == '10.1209/0295-5075/81/68004'
コード例 #6
0
ファイル: util.py プロジェクト: nichtich/manubot
def generate_csl_items(args, citation_df):
    """
    General CSL (citeproc) items for standard_ids in citation_df.
    Writes references.json to disk and logs warnings for potential problems.
    """
    # Read manual references (overrides) in JSON CSL
    manual_refs = load_manual_references(args.manual_references_paths)

    requests_cache.install_cache(args.requests_cache_path,
                                 include_get_headers=True)
    cache = requests_cache.get_cache()
    if args.clear_requests_cache:
        logging.info('Clearing requests-cache')
        requests_cache.clear()
    logging.info(
        f'requests-cache starting with {len(cache.responses)} cached responses'
    )

    csl_items = list()
    failures = list()
    for standard_id in citation_df.standard_id.unique():
        if standard_id in manual_refs:
            csl_items.append(manual_refs[standard_id])
            continue
        elif standard_id.startswith('raw:'):
            logging.error(
                f'CSL JSON Data with a standard_id of {standard_id} not found in manual-references.json. '
                'Metadata must be provided for raw citations.')
            failures.append(standard_id)
        try:
            citeproc = citation_to_citeproc(standard_id)
            csl_items.append(citeproc)
        except Exception:
            logging.exception(f'Citeproc retrieval failure for {standard_id}')
            failures.append(standard_id)

    logging.info(
        f'requests-cache finished with {len(cache.responses)} cached responses'
    )
    requests_cache.uninstall_cache()

    if failures:
        message = 'CSL JSON Data retrieval failed for:\n{}'.format(
            '\n'.join(failures))
        logging.error(message)

    # Write JSON CSL bibliography for Pandoc.
    with args.references_path.open('w') as write_file:
        json.dump(csl_items, write_file, indent=2, ensure_ascii=False)
        write_file.write('\n')
    return csl_items
コード例 #7
0
def test_citation_to_citeproc_pmc(identifier, citation_id):
    citation = f'pmcid:{identifier}'
    citeproc = citation_to_citeproc(citation)
    assert citeproc['id'] == citation_id
    assert citeproc[
        'URL'] == 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3041534/'
    assert citeproc[
        'container-title'] == 'Summit on Translational Bioinformatics'
    assert citeproc[
        'title'] == 'Secondary Use of EHR: Data Quality Issues and Informatics Opportunities'
    authors = citeproc['author']
    assert authors[0]['family'] == 'Botsis'
    assert citeproc['PMID'] == '21347133'
    assert citeproc['PMCID'] == 'PMC3041534'
コード例 #8
0
ファイル: test_util.py プロジェクト: nichtich/manubot
def test_citation_to_citeproc_pmc():
    """
    https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pmc/?format=csl&id=3041534
    """
    citation = f'pmcid:PMC3041534'
    citeproc = citation_to_citeproc(citation)
    assert citeproc['id'] == 'RoOhUFKU'
    assert citeproc['URL'] == 'https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3041534/'
    assert citeproc['container-title-short'] == 'Summit Transl Bioinform'
    assert citeproc['title'] == 'Secondary Use of EHR: Data Quality Issues and Informatics Opportunities'
    authors = citeproc['author']
    assert authors[0]['family'] == 'Botsis'
    assert citeproc['PMID'] == '21347133'
    assert citeproc['PMCID'] == 'PMC3041534'
    assert 'generated by Manubot' in citeproc['note']
    assert 'standard_id: pmcid:PMC3041534' in citeproc['note']
コード例 #9
0
ファイル: test_util.py プロジェクト: nichtich/manubot
def test_citation_to_citeproc_pubmed_1():
    """
    Generated from XML returned by
    https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=21347133&rettype=full
    """
    citation = 'pmid:21347133'
    citeproc = citation_to_citeproc(citation)
    assert citeproc['id'] == 'y9ONtSZ9'
    assert citeproc['type'] == 'article-journal'
    assert citeproc['URL'] == 'https://www.ncbi.nlm.nih.gov/pubmed/21347133'
    assert citeproc['container-title'] == 'Summit on translational bioinformatics'
    assert citeproc['title'] == 'Secondary Use of EHR: Data Quality Issues and Informatics Opportunities.'
    assert citeproc['issued']['date-parts'] == [[2010, 3, 1]]
    authors = citeproc['author']
    assert authors[0]['given'] == 'Taxiarchis'
    assert authors[0]['family'] == 'Botsis'
    assert citeproc['PMID'] == '21347133'
    assert citeproc['PMCID'] == 'PMC3041534'
コード例 #10
0
ファイル: test_util.py プロジェクト: nichtich/manubot
def test_citation_to_citeproc_pubmed_2():
    """
    Generated from XML returned by
    https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=27094199&rettype=full
    """
    citation = 'pmid:27094199'
    citeproc = citation_to_citeproc(citation)
    print(citeproc)
    assert citeproc['id'] == 'alaFV9OY'
    assert citeproc['type'] == 'article-journal'
    assert citeproc['URL'] == 'https://www.ncbi.nlm.nih.gov/pubmed/27094199'
    assert citeproc['container-title'] == 'Circulation. Cardiovascular genetics'
    assert citeproc['container-title-short'] == 'Circ Cardiovasc Genet'
    assert citeproc['page'] == '179-84'
    assert citeproc['title'] == 'Genetic Association-Guided Analysis of Gene Networks for the Study of Complex Traits.'
    assert citeproc['issued']['date-parts'] == [[2016, 4]]
    authors = citeproc['author']
    assert authors[0]['given'] == 'Casey S'
    assert authors[0]['family'] == 'Greene'
    assert citeproc['PMID'] == '27094199'
    assert citeproc['DOI'] == '10.1161/circgenetics.115.001181'
コード例 #11
0
ファイル: test_util.py プロジェクト: nichtich/manubot
def test_citation_to_citeproc_isbn():
    csl_item = citation_to_citeproc('isbn:9780387950693')
    assert csl_item['type'] == 'book'
    assert csl_item['title'] == 'Complex analysis'