def query_gsm(gsm, out_file, config = {}): gsm = gsm[0] out_dir = os.path.dirname(os.path.abspath(out_file)) name = utils.splitext_plus(os.path.basename(out_file))[0] url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=sra\&term={0}\&retmode=json".format(gsm) cmd = "curl {0}".format(url) process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) out = process.stdout.read() data = json.loads(out) ids = data.get("esearchresult", {}).get("idlist", []) logger.debug("Get id sample for %s" % gsm) if ids: gsm_info = _query_info("sra", ids[-1]) print(gsm_info) srrall = [] for srr in gsm_info: srrall.append(_create_link(srr)) logger.debug("Get FTP link for %s : %s" % (ids[-1], srrall)) outs = [] for srx in srrall: sra_dir = utils.safe_makedir(os.path.join(out_dir, name)) srafiles = _download_srx(gsm, srx, sra_dir) logger.debug("Get SRA for %s: %s" % (gsm, " ".join(srafiles))) if srafiles: for sra in srafiles: outs.extend(_convert_fastq(sra, out_dir)) logger.debug("Get FASTQ for %s: %s" % (gsm, " ".join(outs))) if outs: files = combine_pairs(outs) out_file = fastq.merge(files, out_file, config) return out_file
def query_gsm(gsm, out_file, config = {}): gsm = gsm[0] out_dir = os.path.dirname(out_file) url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gds\&term={0}\&retmode=json".format(gsm) cmd = "curl {0}".format(url) process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) out = process.stdout.read() data = json.loads(out) ids = data.get("esearchresult", {}).get("idlist", []) logger.debug("Get id sample for %s" % gsm) if ids: gsm_info = _query_info("gds", ids[-1]) srxlist = gsm_info.get("result", {}).get(ids[-1], {}).get("extrelations", {}) srxall = [] for srxe in srxlist: if srxe.get("targetftplink", None): srxall.append(srxe["targetftplink"]) logger.debug("Get FTP link for %s : %s" % (ids[-1], srxall)) outs = [] for srx in srxall: srafiles = _download_srx(gsm, srx, out_dir) logger.debug("Get SRA for %s: %s" % (gsm, " ".join(srafiles))) if srafiles: for sra in srafiles: outs.extend(_convert_fastq(sra, out_dir)) logger.debug("Get FASTQ for %s: %s" % (gsm, " ".join(outs))) if outs: files = combine_pairs(outs) out_file = fastq.merge(files, out_file, config) return out_file
def query_gsm(gsm, out_file, config = {}): gsm = gsm[0] out_dir = os.path.dirname(os.path.abspath(out_file)) name = utils.splitext_plus(os.path.basename(out_file))[0] url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=sra\&term={0}\&retmode=json".format(gsm) cmd = "curl {0}".format(url) process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) out = process.stdout.read() data = json.loads(out) ids = data.get("esearchresult", {}).get("idlist", []) logger.debug("Get id sample for %s" % gsm) if ids: gsm_info = _query_info("sra", ids[-1]) logger.debug("gsm_info:%s" % gsm_info) srrall = [] for srr in gsm_info: srrall.append(_create_link(srr)) logger.debug("Get FTP link for %s : %s" % (ids[-1], srrall)) outs = [] for srx in srrall: sra_dir = utils.safe_makedir(os.path.join(out_dir, name)) srafiles = _download_srx(srx, sra_dir) if srafiles: logger.debug("Get SRA for %s: %s" % (gsm, " ".join(srafiles))) for sra in srafiles: fastq_fn = _convert_fastq(sra, out_dir) if fastq_fn: outs.extend(fastq_fn) logger.debug("Get FASTQ for %s: %s" % (gsm, " ".join(outs))) if outs: files = combine_pairs(outs) out_file = fastq.merge(files, out_file, config) return out_file
def query_srr(sra, out_file, config={}): sra = sra[0] outs = [] out_dir = os.path.dirname(os.path.abspath(out_file)) name = utils.splitext_plus(os.path.basename(out_file))[0] srrall = [] for srr in sra: srrall.append(_create_link(srr)) logger.debug("Get FTP link for %s : %s" % (name, srrall)) for srx in srrall: sra_dir = utils.safe_makedir(os.path.join(out_dir, name)) srafiles = _download_srx(srx, sra_dir) if srafiles: logger.debug("Get SRA for %s: %s" % (sra, " ".join(srafiles))) for sra in srafiles: fastq_fn = _convert_fastq(sra, out_dir) if fastq_fn: outs.extend(fastq_fn) logger.debug("Get FASTQ for %s: %s" % (sra, " ".join(outs))) if outs: files = combine_pairs(outs) out_file = fastq.merge(files, out_file, config) return out_file
def query_srr(sra, out_file, config = {}): sra = sra[0] outs = [] out_dir = os.path.dirname(os.path.abspath(out_file)) name = utils.splitext_plus(os.path.basename(out_file))[0] srrall = [] for srr in sra: srrall.append(_create_link(srr)) logger.debug("Get FTP link for %s : %s" % (name, srrall)) for srx in srrall: sra_dir = utils.safe_makedir(os.path.join(out_dir, name)) srafiles = _download_srx(srx, sra_dir) if srafiles: logger.debug("Get SRA for %s: %s" % (sra, " ".join(srafiles))) for sra in srafiles: fastq_fn = _convert_fastq(sra, out_dir) if fastq_fn: outs.extend(fastq_fn) logger.debug("Get FASTQ for %s: %s" % (sra, " ".join(outs))) if outs: files = combine_pairs(outs) out_file = fastq.merge(files, out_file, config) return out_file