Ejemplo n.º 1
0
def rmdup(align_file, out_file):
    cmd = ("samtools view -bh {align_file} | samtools sort -o -n - {tmp} | bammarkduplicates rmdup=1   O={tx_out_file}")
    tmp = align_file + "_tmp"
    if not os.path.exists(out_file):
        with file_transaction(out_file) as tx_out_file:
            do.run(cmd.format(**locals()))
    return out_file
Ejemplo n.º 2
0
def qc(sam_file):
    """fastqc for the sam file"""
    out_dir = os.path.basename(sam_file) + "_fastq"
    cmd = "fastqc {sam_file} -f sam -o {out_dir}".format(**locals())
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
        do.run(cmd)
    return out_dir
Ejemplo n.º 3
0
def _bowtie_align(fastq_file, control_index, out_file):
    cmd = ("bowtie2 -p 4 --no-unal -x {control_index} -U {fastq_file} | samtools view -Shb /dev/stdin > {tx_out_file} ")
    stat_file = out_file + ".flagstat"
    if not os.path.exists(out_file):
        with file_transaction(out_file) as tx_out_file:
            do.run(cmd.format(**locals()), "bowtie2 %s" % fastq_file)
    do.run("samtools flagstat {out_file} > {stat_file}".format(**locals()), "stats control sequences")
    return stat_file
Ejemplo n.º 4
0
def rmdup(align_file, out_file):
    cmd = (
        "samtools view -bh {align_file} | samtools sort -o -n - {tmp} | bammarkduplicates rmdup=1   O={tx_out_file}"
    )
    tmp = align_file + "_tmp"
    if not os.path.exists(out_file):
        with file_transaction(out_file) as tx_out_file:
            do.run(cmd.format(**locals()))
    return out_file
Ejemplo n.º 5
0
def qc(data, args):
    """fastqc for the sam file"""
    sam_file = data['align']
    out_dir = os.path.basename(sam_file) + "_fastq"
    cmd = "fastqc {sam_file} -f sam -o {out_dir}".format(**locals())
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
        do.run(cmd)
    else:
        logger.my_logger.info("%s has already been QC, skipping." % (sam_file))
    return data
Ejemplo n.º 6
0
def qc(data, args):
    """fastqc for the sam file"""
    sam_file = data['r1_path']
    out_dir = os.path.basename(data["sample_id"]) + "_fastqc"
    cmd = "fastqc {sam_file} -f sam -o {out_dir}".format(**locals())
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)
        do.run(cmd)
    else:
        logger.my_logger.info("%s has already been QC, skipping." % (sam_file))
    return data
Ejemplo n.º 7
0
def _bowtie_align(fastq_file, control_index, out_file):
    cmd = (
        "bowtie2 -p 4 --no-unal -x {control_index} -U {fastq_file} | samtools view -Shb /dev/stdin > {tx_out_file} "
    )
    stat_file = out_file + ".flagstat"
    if not os.path.exists(out_file):
        with file_transaction(out_file) as tx_out_file:
            do.run(cmd.format(**locals()), "bowtie2 %s" % fastq_file)
    do.run("samtools flagstat {out_file} > {stat_file}".format(**locals()),
           "stats control sequences")
    return stat_file
Ejemplo n.º 8
0
def star_align(data, args, fastq_path, out_prefix, opts=""):
    cores = args.cores_per_job
    reference_prefix = args.aligner_index
    max_best = MAX_BEST
    out_file = out_prefix + "Aligned.out.sam"
    if not os.path.exists(out_file):
        cmd = ("STAR --genomeDir {reference_prefix} --readFilesIn {fastq_path} --readFilesCommand zcat "
           "--runThreadN {cores} --outFileNamePrefix {out_prefix} "
           "--outFilterMultimapNmax {max_best} "
           "--outSAMattributes NH HI NM MD AS {opts} "
           "--outSAMstrandField intronMotif").format(**locals())
        do.run(cmd)
    clean_file = clean_align(out_file, out_prefix + "cleaned.sam")
    return out_file, clean_file
Ejemplo n.º 9
0
def star_align(fastq_path, reference_prefix, out_prefix, cores=1):
    max_best = MAX_BEST
    out_file = out_prefix + "Aligned.out.sam"
    if file_exists(out_file):
        print ("%s has already been aligned, skipping." % (fastq_path))
        return out_file

    cmd = ("STAR --genomeDir {reference_prefix} --readFilesIn {fastq_path} --readFilesCommand zcat "
           "--runThreadN {cores} --outFileNamePrefix {out_prefix} "
           "--outFilterMultimapNmax {max_best} "
           "--outSAMattributes NH HI NM MD AS "
           "--outSAMstrandField intronMotif").format(**locals())
    do.run(cmd)
    return out_file
Ejemplo n.º 10
0
def star_align(data, args, fastq_path, out_prefix, opts=""):
    cores = args.cores_per_job
    reference_prefix = args.aligner_index
    max_best = MAX_BEST
    out_file = out_prefix + "Aligned.sortedByCoord.out.bam"
    if not os.path.exists(out_file):
        cmd = (
            "STAR --genomeDir {reference_prefix} --readFilesIn {fastq_path} --readFilesCommand zcat "
            "--runThreadN {cores} --outFileNamePrefix {out_prefix} "
            "--outFilterMultimapNmax {max_best} --outSAMtype BAM SortedByCoordinate "
            "--outSAMattributes NH HI NM MD AS {opts} "
            "").format(**locals())
        do.run(cmd)
    clean_file = clean_align(out_file, out_prefix + "cleaned.sam")
    return out_file, clean_file
Ejemplo n.º 11
0
def _cmd_counts(in_file, out_file, gtf_file, cores):
    if not os.path.exists(out_file):
        cmd = "featureCounts -R -T {cores} -a {gtf_file} -o {out_file} {in_file}"
        do.run(cmd.format(**locals()))
    return in_file + ".featureCounts"
Ejemplo n.º 12
0
def _cmd_counts(in_file, out_file, gtf_file, cores):
    if not os.path.exists(out_file):
        cmd = "featureCounts -R -T {cores} --primary -a {gtf_file} -o {out_file} {in_file}"
        do.run(cmd.format(**locals()))
    return in_file + ".featureCounts"