Example #1
0
def query_gsm(gsm, out_file, config = {}):
    gsm = gsm[0]
    out_dir = os.path.dirname(os.path.abspath(out_file))
    name = utils.splitext_plus(os.path.basename(out_file))[0]
    url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=sra\&term={0}\&retmode=json".format(gsm)
    cmd = "curl {0}".format(url)
    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
    out = process.stdout.read()
    data = json.loads(out)
    ids = data.get("esearchresult", {}).get("idlist", [])
    logger.debug("Get id sample for %s" % gsm)
    if ids:
        gsm_info = _query_info("sra", ids[-1])
        print(gsm_info)
        srrall = []
        for srr in gsm_info:
            srrall.append(_create_link(srr))
        logger.debug("Get FTP link for %s : %s" % (ids[-1], srrall))
        outs = []
        for srx in srrall:
            sra_dir = utils.safe_makedir(os.path.join(out_dir, name))
            srafiles = _download_srx(gsm, srx, sra_dir)
            logger.debug("Get SRA for %s: %s" % (gsm, " ".join(srafiles)))
            if srafiles:
                for sra in srafiles:
                    outs.extend(_convert_fastq(sra, out_dir))
            logger.debug("Get FASTQ for %s: %s" % (gsm, " ".join(outs)))
        if outs:
            files = combine_pairs(outs)
            out_file = fastq.merge(files, out_file, config)
            return out_file
Example #2
0
def query_gsm(gsm, out_file, config = {}):
    gsm = gsm[0]
    out_dir = os.path.dirname(out_file)
    url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=gds\&term={0}\&retmode=json".format(gsm)
    cmd = "curl {0}".format(url)
    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
    out = process.stdout.read()
    data = json.loads(out)
    ids = data.get("esearchresult", {}).get("idlist", [])
    logger.debug("Get id sample for %s" % gsm)
    if ids:
        gsm_info = _query_info("gds", ids[-1])
        srxlist = gsm_info.get("result", {}).get(ids[-1], {}).get("extrelations", {})
        srxall = []
        for srxe in srxlist:
            if srxe.get("targetftplink", None):
                srxall.append(srxe["targetftplink"])
        logger.debug("Get FTP link for %s : %s" % (ids[-1], srxall))
        outs = []
        for srx in srxall:
            srafiles = _download_srx(gsm, srx, out_dir)
            logger.debug("Get SRA for %s: %s" % (gsm, " ".join(srafiles)))
            if srafiles:
                for sra in srafiles:
                    outs.extend(_convert_fastq(sra, out_dir))
            logger.debug("Get FASTQ for %s: %s" % (gsm, " ".join(outs)))
        if outs:
            files = combine_pairs(outs)
            out_file = fastq.merge(files, out_file, config)
            return out_file
Example #3
0
def query_gsm(gsm, out_file, config = {}):
    gsm = gsm[0]
    out_dir = os.path.dirname(os.path.abspath(out_file))
    name = utils.splitext_plus(os.path.basename(out_file))[0]
    url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=sra\&term={0}\&retmode=json".format(gsm)
    cmd = "curl {0}".format(url)
    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
    out = process.stdout.read()
    data = json.loads(out)
    ids = data.get("esearchresult", {}).get("idlist", [])
    logger.debug("Get id sample for %s" % gsm)
    if ids:
        gsm_info = _query_info("sra", ids[-1])
        logger.debug("gsm_info:%s" % gsm_info)
        srrall = []
        for srr in gsm_info:
            srrall.append(_create_link(srr))
        logger.debug("Get FTP link for %s : %s" % (ids[-1], srrall))
        outs = []
        for srx in srrall:
            sra_dir = utils.safe_makedir(os.path.join(out_dir, name))
            srafiles = _download_srx(srx, sra_dir)
            if srafiles:
                logger.debug("Get SRA for %s: %s" % (gsm, " ".join(srafiles)))
                for sra in srafiles:
                    fastq_fn = _convert_fastq(sra, out_dir)
                    if fastq_fn:
                        outs.extend(fastq_fn)
            logger.debug("Get FASTQ for %s: %s" % (gsm, " ".join(outs)))
        if outs:
            files = combine_pairs(outs)
            out_file = fastq.merge(files, out_file, config)
            return out_file
Example #4
0
def query_srr(sra, out_file, config={}):
    sra = sra[0]
    outs = []
    out_dir = os.path.dirname(os.path.abspath(out_file))
    name = utils.splitext_plus(os.path.basename(out_file))[0]
    srrall = []
    for srr in sra:
        srrall.append(_create_link(srr))
    logger.debug("Get FTP link for %s : %s" % (name, srrall))
    for srx in srrall:
        sra_dir = utils.safe_makedir(os.path.join(out_dir, name))
        srafiles = _download_srx(srx, sra_dir)
        if srafiles:
            logger.debug("Get SRA for %s: %s" % (sra, " ".join(srafiles)))
            for sra in srafiles:
                fastq_fn = _convert_fastq(sra, out_dir)
                if fastq_fn:
                    outs.extend(fastq_fn)
        logger.debug("Get FASTQ for %s: %s" % (sra, " ".join(outs)))
    if outs:
        files = combine_pairs(outs)
        out_file = fastq.merge(files, out_file, config)
        return out_file
Example #5
0
def query_srr(sra, out_file, config = {}):
    sra = sra[0]
    outs = []
    out_dir = os.path.dirname(os.path.abspath(out_file))
    name = utils.splitext_plus(os.path.basename(out_file))[0]
    srrall = []
    for srr in sra:
        srrall.append(_create_link(srr))
    logger.debug("Get FTP link for %s : %s" % (name, srrall))
    for srx in srrall:
        sra_dir = utils.safe_makedir(os.path.join(out_dir, name))
        srafiles = _download_srx(srx, sra_dir)
        if srafiles:
            logger.debug("Get SRA for %s: %s" % (sra, " ".join(srafiles)))
            for sra in srafiles:
                fastq_fn = _convert_fastq(sra, out_dir)
                if fastq_fn:
                    outs.extend(fastq_fn)
        logger.debug("Get FASTQ for %s: %s" % (sra, " ".join(outs)))
    if outs:
        files = combine_pairs(outs)
        out_file = fastq.merge(files, out_file, config)
        return out_file