Ejemplo n.º 1
0
    def index(self):
        """ Sort out indexing """
        # Symlink reference:
        new_ref = os.path.join(self.rts.base, os.path.basename(self.ref))
        os.symlink(self.ref, new_ref)
        # Register reference:
        self.rts.register(new_ref)

        old_ref = self.ref
        self.ref = new_ref
        ind_suf = [
            ".1.bt2", ".2.bt2", ".3.bt2", ".4.bt2", ".rev.1.bt2", ".rev.2.bt2"
        ]

        if self.has_index:
            # Symlink index:
            for sx in ind_suf:
                os.symlink(old_ref + sx, new_ref + sx)
        else:
            # Run indexing:
            cmd = u.Cmd(self.log,
                        prog="bowtie2-build",
                        opts=self.index_opts,
                        post_args=[self.ref, self.ref],
                        sink_err=self.sink_err)
            cmd.comm()

        # Register index files:
        for suf in ind_suf:
            self.rts.register(self.ref + suf)
        pass
Ejemplo n.º 2
0
    def sort(self, inp=None, outp=None, opts={}):
        """ Sort BAM file """
        out_name = str()
        if type(outp) == file:
            self.log.fatal("Sort does not work on pipe output")
        elif type(outp) == str:
            out_name = self.rts.tempfile(outp)
        elif outp == None and type(inp) == str:
            base = os.path.basename(inp).split(".bam")[0] + "_sort"
            out_name = base
        else:
            self.log.fatal("No output available!")

        cmd = u.Cmd(log=self.log,
                    prog=self.prog,
                    pre_args=["sort"],
                    opts=opts,
                    post_args=["-", out_name],
                    inp=inp,
                    outp=None,
                    path=self.path,
                    sink_err=self.sink_err,
                    cwd=self.rts.base)
        cmd.comm()
        out_name += ".bam"
        self.rts.register(out_name)
        return os.path.join(self.rts.base, out_name)
Ejemplo n.º 3
0
 def mk_index(cls, ref, log, index_opts={}):
     """ Make a standalone index """
     cmd = u.Cmd(log,
                 prog="bowtie2-build",
                 opts=index_opts,
                 post_args=[ref, ref],
                 sink_err=True)
     cmd.comm()
Ejemplo n.º 4
0
    def flagstat(self, bam):
        """ Get BAM statistics """
        if not os.path.exists(bam):
            self.log.fatal("BAM file does not exists.")

        cmd = u.Cmd(log=self.log,
                    prog=self.prog,
                    pre_args=["flagstat"],
                    opts={},
                    post_args=[bam],
                    path=self.path,
                    sink_err=self.sink_err)
        data = cmd.comm()

        stats = {}

        feat_map = {
            re.compile("(\d+)\s\+\s\d+\sin total \(QC-passed reads \+ QC-failed reads\)"):
            tuple(["qc_fail"]),
            re.compile("(\d+)\s\+\s\d+\sduplicates"):
            tuple(["duplicates"]),
            re.compile("(\d+)\s\+\s\d+\smapped\s\((\S+)%\s\+\s\S+%\)"):
            tuple(["nr_mapped", "percent_mapped"]),
            re.compile("(\d+)\s\+\s\d+\spaired in sequencing"):
            tuple(["seq_paired"]),
            re.compile("(\d+)\s\+\s\d+\sproperly paired \((\S+)%\s\+\s\S+%\)"):
            tuple(["nr_proper_pairs", "percent_proper_pairs"]),
            re.compile("(\d+)\s\+\s\d+\swith itself and mate mapped"):
            tuple(["nr_proper_with_mate"]),
            re.compile("(\d+)\s\+\s\d+\ssingletons\s\((\S+)%\s\+\s\S+%\)"):
            tuple(["singletons", "percent_singletons"]),
            re.compile("(\d+)\s\+\s\d+\sread1"):
            tuple(["nr_read1"]),
            re.compile("(\d+)\s\+\s\d+\sread2"):
            tuple(["nr_read2"]),
            re.compile("(\d+)\s\+\s\d+\swith mate mapped to a different chr \(mapQ.+\)\Z"):
            tuple(["chr_mismatch_q5"]),
            re.compile("(\d+)\s\+\s\d+\swith mate mapped to a different chr\Z"):
            tuple(["chr_mismatch"])
        }

        for line in data.split("\n"):
            for (pattern, names) in feat_map.iteritems():
                m = pattern.match(line)
                if m != None:
                    groups = m.groups()
                    if len(groups) != len(names):
                        self.log.fatal("Name/group mismatch: %s %s" %
                                       (groups, names))
                    for i in xrange(len(groups)):
                        stats[names[i]] = float(groups[i])
        return stats
Ejemplo n.º 5
0
 def index(self, bam):
     """ Index BAM file """
     cmd = u.Cmd(log=self.log,
                 prog=self.prog,
                 pre_args=["index"],
                 opts={},
                 post_args=[bam],
                 inp=None,
                 outp=None,
                 path=self.path,
                 sink_err=self.sink_err,
                 cwd=self.rts.base)
     cmd.comm()
     self.rts.register(bam + ".sai")
Ejemplo n.º 6
0
    def flagstat_text(self, bam):
        """ Get BAM statistics """
        if not os.path.exists(bam):
            self.log.fatal("BAM file does not exists.")

        cmd = u.Cmd(log=self.log,
                    prog=self.prog,
                    pre_args=["flagstat"],
                    opts={},
                    post_args=[bam],
                    path=self.path,
                    sink_err=self.sink_err)
        data = cmd.comm()
        return data
Ejemplo n.º 7
0
 def view(self, inp=None, outp=None, opts={"-S": None, "-b": None}):
     """ View SAM/BAM files """
     if outp != None:
         outp = os.path.join(self.rts.base, outp)
     cmd = u.Cmd(log=self.log,
                 prog=self.prog,
                 pre_args=["view"],
                 opts=opts,
                 post_args=["-"],
                 inp=inp,
                 outp=outp,
                 path=self.path,
                 sink_err=self.sink_err)
     cmd.comm()
     # Register output file.
     if type(outp) == str:
         self.rts.register(outp)
     return cmd.output_fh
Ejemplo n.º 8
0
    def sam(self):
        """ Generate sam file """
        # Check input:
        if len(self.reads) == 0:
            self.log.fatal("No fastq files specified")

        (path, base) = os.path.split(self.reads[0])
        pattern = re.compile(".fq\d")
        base = pattern.split(base)[0]

        # Generate SAM file:
        sam = os.path.join(self.rts.base, base + ".sam")

        # Register sam file:
        self.rts.register(sam)

        # Construct post args:
        inp_flags = ["-1", "-2"]
        inf = []
        for i in xrange(len(self.reads)):
            inf.append(inp_flags[i])
            inf.append(self.reads[i])

        # Add bwt base:
        tmp = [self.ref]
        tmp.extend(inf)
        inf = tmp

        # Construct command object:
        cmd = u.Cmd(self.log,
                    prog="bowtie2",
                    opts=self.aln_opts,
                    post_args=inf,
                    outp=sam,
                    cwd=self.rts.base,
                    sink_err=self.sink_err)
        cmd.comm()
        return sam