Example #1
0
def test_esearch_sra_withhistory():
    esearch_results = entrez.esearch(DB,
                                     QUERY,
                                     userhistory=True,
                                     retmax=RETMAX,
                                     api_key=API_KEY)
    assert len(esearch_results.ids) == RETMAX
    assert esearch_results.webenv != ""
    assert esearch_results.query_key != ""
Example #2
0
def download_biosample_xml(biosample_ids, defaults):
    logger.info(f"BioSample - Downloading XML Records")
    for i, ids in enumerate(chunked(biosample_ids, BATCH_SIZE)):
        start, end = i * BATCH_SIZE, (i + 1) * BATCH_SIZE
        logger.info(f"BioSample - Downloading XML Records [Batch {start:,}-{end:,}]")
        query = "+OR+".join(ids)
        esearch_result = entrez.esearch("biosample", query, **defaults)
        webenv = esearch_result.webenv
        query_key = esearch_result.query_key
        count = esearch_result.count
        for accn, xml in biosample_efetch(webenv, query_key, count, defaults):
            root = xml_to_root(xml)
            doc = parsers_biosample_xml.parse_biosample(root)
            yield doc
        time.sleep(1)
Example #3
0
def check_sra_for_updated_ids(query, collection, defaults):
    logger.info(f"SRA - Querying SraMongo for last update")
    last_srx_update = get_sramongo_last_srx_update(collection)

    logger.info(f"SRA - Querying SRA for: {query}")
    esearch_result = entrez.esearch("sra", query, **defaults)
    webenv = esearch_result.webenv
    query_key = esearch_result.query_key

    count = DEBUG_SIZE if _DEBUG else esearch_result.count
    logger.info("SRA - Checking for Updates")
    ids_to_update = []
    for esummary_result in sra_esummary(webenv, query_key, count, defaults):
        srx = esummary_result.accn
        if esummary_result.update_date != last_srx_update.get(srx, None):
            ids_to_update.append(esummary_result.id)

    logger.info(f"SRA - {len(ids_to_update):,} IDs to Update")
    return ids_to_update
Example #4
0
def small_esearch_results() -> entrez.EsearchResult:
    return entrez.esearch(DB, QUERY, retmax=RETMAX, api_key=API_KEY)