def test_esearch_sra_withhistory(): esearch_results = entrez.esearch(DB, QUERY, userhistory=True, retmax=RETMAX, api_key=API_KEY) assert len(esearch_results.ids) == RETMAX assert esearch_results.webenv != "" assert esearch_results.query_key != ""
def download_biosample_xml(biosample_ids, defaults): logger.info(f"BioSample - Downloading XML Records") for i, ids in enumerate(chunked(biosample_ids, BATCH_SIZE)): start, end = i * BATCH_SIZE, (i + 1) * BATCH_SIZE logger.info(f"BioSample - Downloading XML Records [Batch {start:,}-{end:,}]") query = "+OR+".join(ids) esearch_result = entrez.esearch("biosample", query, **defaults) webenv = esearch_result.webenv query_key = esearch_result.query_key count = esearch_result.count for accn, xml in biosample_efetch(webenv, query_key, count, defaults): root = xml_to_root(xml) doc = parsers_biosample_xml.parse_biosample(root) yield doc time.sleep(1)
def check_sra_for_updated_ids(query, collection, defaults): logger.info(f"SRA - Querying SraMongo for last update") last_srx_update = get_sramongo_last_srx_update(collection) logger.info(f"SRA - Querying SRA for: {query}") esearch_result = entrez.esearch("sra", query, **defaults) webenv = esearch_result.webenv query_key = esearch_result.query_key count = DEBUG_SIZE if _DEBUG else esearch_result.count logger.info("SRA - Checking for Updates") ids_to_update = [] for esummary_result in sra_esummary(webenv, query_key, count, defaults): srx = esummary_result.accn if esummary_result.update_date != last_srx_update.get(srx, None): ids_to_update.append(esummary_result.id) logger.info(f"SRA - {len(ids_to_update):,} IDs to Update") return ids_to_update
def small_esearch_results() -> entrez.EsearchResult: return entrez.esearch(DB, QUERY, retmax=RETMAX, api_key=API_KEY)