def call_macs_peaks(exp_bed, back_bed, out_base, ref, config):
    """Peak calling using MACS.
    """
    peak_dir = config["peak_dir"]
    subpeaks = config["algorithm"].get("subpeaks", False)
    shiftsize = config["algorithm"].get("shiftsize", None)
    largelambda = config["algorithm"].get("largelambda", None)
    if not os.path.exists(peak_dir):
        os.makedirs(peak_dir)
    genome_size = _size_from_fai(_get_ref_fai(ref))
    out_name = os.path.join(peak_dir, "%s-%s-macs" %
                            (out_base.replace(" ", "_"), os.path.basename(ref)))
    ext = "peaks.subpeaks.bed" if subpeaks else "peaks.xls"
    peak_file = "%s_%s" % (out_name, ext)
    if not os.path.exists(peak_file):
        cl = ["macs14", "-t", _full_path(exp_bed), "-c", _full_path(back_bed),
              "--name=%s" % os.path.basename(out_name),
              "--format=BED", "--gsize=%s" % genome_size]
        if largelambda:
            cl += ["--llocal", str(largelambda)]
        if shiftsize:
            cl += ["--nomodel", "--shiftsize=%s" % shiftsize]
        if subpeaks:
            cl += ["--call-subpeaks", "--wig"]
        with chdir(os.path.dirname(out_name)):
            print " ".join(cl)
            subprocess.check_call(cl)
    return peak_file
def split_by_barcode(fastq1, fastq2, multiplex, base_name, config):
    """Split a fastq file into multiplex pieces using barcode details.
    """
    if not multiplex:
        return [("", "", fastq1, fastq2)]
    bc_dir = "%s_barcode" % base_name
    nomatch_file = "%s_1_unmatched_fastq.txt" % base_name
    with utils.chdir(bc_dir):
        tag_file = _make_tag_file(multiplex)
        cl = [config["program"]["barcode"], tag_file,
              "%s_--b--_--r--_fastq.txt" % base_name,
              fastq1]
        if fastq2:
            cl.append(fastq2)
        cl.append("--mismatch=%s" % config["algorithm"]["bc_mismatch"])
        if int(config["algorithm"]["bc_read"]) == 2:
            cl.append("--second")
        if int(config["algorithm"]["bc_position"]) == 5:
            cl.append("--five")
        if not os.path.exists(nomatch_file):
            subprocess.check_call(cl)
    out_files = []
    for info in multiplex:
        fq_fname = lambda x: os.path.join(bc_dir, "%s_%s_%s_fastq.txt" %
                             (base_name, info["barcode_id"], x))
        bc_file1 = fq_fname("1")
        bc_file2 = fq_fname("2") if fastq2 else None
        out_files.append((info["barcode_id"], info["name"], bc_file1, bc_file2))
    return out_files
 def _make_blastdb(self, seq):
     out_file = "ref.fa"
     with chdir(self._tmpdir):
         with open(out_file, "w") as out_handle:
             out_handle.write(">ref\n%s\n" % seq.upper())
         cl = ["makeblastdb", "-in", out_file, "-dbtype", "nucl",
               "-out", out_file]
         with open("/dev/null", "w") as stdout:
             subprocess.check_call(cl, stdout=stdout)
     return os.path.join(self._tmpdir, out_file)
Example #4
0
def make_search_db(seq_recs, ids, target_org, tmp_dir):
    search_db = "%s-db.fa" % target_org.replace(" ", "_")
    db_name = os.path.splitext(search_db)[0]
    with chdir(tmp_dir):
        with open(search_db, "w") as out_handle:
            SeqIO.write((seq_recs[i] for i in ids), out_handle, "fasta")
        cl = [
            "makeblastdb", "-in", search_db, "-dbtype", "prot", "-out",
            db_name, "-title", target_org
        ]
        subprocess.check_call(cl)
    return os.path.join(tmp_dir, db_name)
Example #5
0
def make_search_db(seq_recs, ids, target_org, tmp_dir):
    search_db = "%s-db.fa" % target_org.replace(" ", "_")
    db_name = os.path.splitext(search_db)[0]
    with chdir(tmp_dir):
        with open(search_db, "w") as out_handle:
            SeqIO.write((seq_recs[i] for i in ids), out_handle, "fasta")
        cl = ["makeblastdb", "-in", search_db,
              "-dbtype", "prot",
              "-out", db_name,
              "-title", target_org]
        subprocess.check_call(cl)
    return os.path.join(tmp_dir, db_name)
Example #6
0
def _generate_fastq(fc_dir):
    """Generate fastq files for the current flowcell.
    """
    fc_name, fc_date = get_flowcell_info(fc_dir)
    short_fc_name = "%s_%s" % (fc_date, fc_name)
    fastq_dir = get_fastq_dir(fc_dir)
    if not fastq_dir == fc_dir and not os.path.exists(fastq_dir):
        with utils.chdir(os.path.split(fastq_dir)[0]):
            lanes = sorted(list(set([f.split("_")[1] for f in
                glob.glob("*qseq.txt")])))
            cl = ["solexa_qseq_to_fastq.py", short_fc_name,
                    ",".join(lanes)]
            subprocess.check_call(cl)
    return fastq_dir
Example #7
0
def _files_to_copy(directory):
    """Retrieve files that should be remotely copied.
    """
    with utils.chdir(directory):
        image_redo_files = reduce(operator.add,
		[glob.glob("*.params"),
		 glob.glob("Images/L*/C*"),
		 ["RunInfo.xml"]])
        qseqs = reduce(operator.add,
                     [glob.glob("Data/Intensities/*.xml"),
                      glob.glob("Data/Intensities/BaseCalls/*qseq.txt"),
                      ])
        reports = reduce(operator.add,
                     [glob.glob("Data/Intensities/BaseCalls/*.xml"),
                      glob.glob("Data/Intensities/BaseCalls/*.xsl"),
                      glob.glob("Data/Intensities/BaseCalls/*.htm"),
                      ["Data/Intensities/BaseCalls/Plots", "Data/reports"]])
        fastq = ["Data/Intensities/BaseCalls/fastq"]
    return sorted(image_redo_files + qseqs), sorted(reports + fastq)
Example #8
0
def _generate_fastq(fc_dir, config):
    """Generate fastq files for the current flowcell.
    """
    fc_name, fc_date = get_flowcell_info(fc_dir)
    short_fc_name = "%s_%s" % (fc_date, fc_name)
    fastq_dir = get_fastq_dir(fc_dir)
    basecall_dir = os.path.split(fastq_dir)[0]
    if not fastq_dir == fc_dir and not os.path.exists(fastq_dir):
	log.info("Generating fastq files for %s" % fc_dir)
        _generate_qseq(basecall_dir, config)
	log.info("Qseq files generated.")
        with utils.chdir(basecall_dir):
            lanes = sorted(list(set([f.split("_")[1] for f in
                glob.glob("*qseq.txt")])))
            cl = ["solexa_qseq_to_fastq.py", short_fc_name,
                    ",".join(lanes)]
	    log.info("Converting qseq to fastq on all lanes.")
            subprocess.check_call(cl)
	    log.info("Qseq to fastq conversion completed.")
    return fastq_dir
Example #9
0
def _generate_qseq(bc_dir, config):
    """Generate qseq files from illumina bcl files if not present.

    More recent Illumina updates do not produce qseq files. These can be
    generated from bcl, intensity and filter files with tools from
    the offline base caller OLB.
    """
    qseqs = glob.glob(os.path.join(bc_dir, "*qseq.txt"))
    if len(qseqs) == 0:
	log.info("Generating qseq files at %s" % bc_dir)
        cmd = os.path.join(config["program"]["olb"], "bin", "setupBclToQseq.py")
        cl = [cmd, "-i", bc_dir, "-o", bc_dir, "-p", os.path.split(bc_dir)[0],
             "--in-place", "--overwrite"]
        subprocess.check_call(cl)
        with utils.chdir(bc_dir):
            try:
                processors = config["algorithm"]["num_cores"]
            except KeyError:
                processors = 8
            cl = config["program"].get("olb_make", "make").split() + ["-j", str(processors)]
            subprocess.check_call(cl)