Example #1
0
 def exec_mapping_stage(self):
     """Executor of the Wedring pipeline."""
     if self.quiet:
         wedr_check_path(self.index + '.*')
         self.exec_mapping()
         self.aln_file = (path.join(self._mapper_out, "accepted_hits.bam") if
                          self.mapper == "tophat" else
                          self._mapper_out + ".sam")
         wedr_check_path(self.aln_file)
         if wedr_check_program("samtools"):
             self.exec_samtools()
             if wedr_check_program("bedtools"):
                 self.exec_bedtools()
     else:
         wedr_check_path(self.index + '.*')
         self.exec_mapping()
         self.aln_file = (path.join(self._mapper_out, "accepted_hits.bam") if
                          self.mapper == "tophat" else
                          self._mapper_out + ".sam")
         wedr_check_path(self.aln_file)
         if wedr_check_program("samtools"):
             wedr_report("[%s] Processing aligments with SAMtools." %
                         self._out_pref)
             self.exec_samtools()
             wedr_report("[%s] SAMtools - Done!." % self._out_pref)
             if wedr_check_program("bedtools"):
                 wedr_report("[%s] Calculating mapping coverage with BEDTools." %
                             self._out_pref)
                 self.exec_bedtools()
                 wedr_report("[%s] BEDTools - Done!" % self._out_pref)
     return self
Example #2
0
    def exec_deseq(self):
        """Execute the **Wedring**'s differential expression stage.

        :raises: :class:WedringError

        """
        if not self._qt:
            wedr_report("Calculating differential expression with DESeq.")
        outfile = path.join(self._o, "diffexpr.txt")
        errfile = path.join(self._ld, "diffexpr.log")
        de_cmd_line = "Rscript --vanilla %s %s %s %s %s" % (wedr_which("diffExprStage.R"),
                                                            self.wedring_diffexpr_parse_options(),
                                                            self._cnt_table,
                                                            self._cnd,
                                                            outfile)
        de = BioSoft(de_cmd_line, errfile=errfile)
        if not self._qt:
            wedr_report("Command line:\n    %s" % de.command)
        de.run()
        if 0 != de.return_code != -1:
            raise WedringError(141, "%s exitted with status %d. See log file '%s' for more details." %
                               (de.program_name, de.return_code, de.errfile))
        wedr_clean(de.errfile)
        # TODO Add verification of the DESeq's output with wedr_check_path()
        #   \_ table (OK), graphics
        wedr_check_path(outfile)
        if not self._qt:
            wedr_report("DESeq - Done!")
Example #3
0
    def run(self):
        """Execute *bowtie-build*.

        :raises: :class:WedringError

        """
        if not self.quiet:
            wedr_report("[%s] Building Bowtie BW index '%s'." % (self._out_pref,
                                                                  self.index))
        self.parse_bb_cmd_line()
        if not self.quiet:
            wedr_report("[%s] Command line:\n    %s" % (self._out_pref,
                                                         self._bb_cmd))
        wedr_prepare_directory(self.out_dir)
        wedr_prepare_directory(self.log_dir)
        outfile = path.join(self.log_dir, self._out_pref + "_build.log")
        bb = BioSoft(command=self._bb_cmd, outfile=outfile)
        bb.run()
        if 0 != bb.return_code != -1:
            raise WedringError(141, "[%s] %s exitted with status %d. See log file '%s' for more details." %
                               (self._out_pref, bb.program_name,
                                bb.return_code, bb.outfile))
        wedr_clean(outfile)
        if not self.quiet:
            wedr_report("[%s] BW index build - Done!." % self._out_pref)
Example #4
0
    def exec_bedtools(self):
        """Executor of the BEDTools part of the pipeline.

        :raises: :class:WedringError

        """
        cov_out = self.aln_file.replace(".bam", ".cov")
        errfile = path.join(self.log_dir, self._out_pref + "_coverage.log")
        bt = BioSoft(command="bedtools coverage -s -abam %s -b %s" %
                     (self.aln_file, self.annot_file),
                     outfile=cov_out, errfile=errfile)
        if not self.quiet:
            wedr_report("[%s] Command line:\n    %s" % (self._out_pref,
                                                         bt.command))
        bt.run()
        if 0 != bt.return_code != -1:
            raise WedringError(141, "[%s] %s exitted with status %d. See log file '%s' for more details." %
                               (self._out_pref, bt.program_name, bt.return_code,
                                bt.errfile))
        wedr_clean(bt.errfile)
        if wedr_check_path(cov_out):
            self.cov_file = cov_out
Example #5
0
    def exec_mapping(self):
        """Executor of the mapping part of the pipeline.

        :raises: :class:WedringError

        """
        if not self.quiet:
            wedr_report("[%s] Mapping reads against reference genome." %
                        self._out_pref)
        self.parse_mapper_cmd_line()
        if not self.quiet:
            wedr_report("[%s] Command line:\n   %s" % (self._out_pref,
                                                       self._mapper_cmd))
        errfile = path.join(self.log_dir, self._out_pref + "_mapping.log")
        mp = BioSoft(command=self._mapper_cmd, errfile=errfile)
        mp.run()
        if 0 != mp.return_code != -1:
            raise WedringError(141, "[%s] %s exitted with status %d. See log file '%s' for more details." %
                               (self._out_pref, mp.program_name, mp.return_code,
                                mp.errfile))
        wedr_clean(mp.errfile)
        if not self.quiet:
            wedr_report("[%s] Mapping - Done!." % self._out_pref)
Example #6
0
    def wedring_parse_args(self, arguments):
        """This method parses the :class:Wedring arguments and make some
        adjustments.

        :param arguments: List of arguments, usually command line ones.
        :type arguments: list
        :raises: :class:WedringError

        """
        try:
            if len(arguments) < 1:
                raise WedringError(133, "Insufficient number of arguments.")
        except TypeError:
            raise WedringError(133, "Arguments were not provided.")
        bin_path = None # path to add to system path

        group_all = False
        if "--group-all" in arguments:
            group_all = True
            arguments.remove("--group-all")

        # Lambda function to adjust parameters according to the --group-all
        # argument value.
        adjust_param = lambda params: ([param for param in params.split(',')
                                        if param != '']
                                       if not group_all else [param])

        # Adjusting the value of the Wedring barrier to define wich pipeline
        # stage will be executed
        if "--just-indexbuild" in arguments:
            self._wb = JUST_INDEX
            arguments.remove("--just-indexbuild")
        if "--just-map" in arguments:
            self._wb = JUST_MAP
            arguments.remove("--just-map")
        if "--just-counttable" in arguments:
            self._wb = JUST_TABLE
            arguments.remove("--just-counttable")
        if "--just-de" in arguments:
            self._wb = JUST_DE
            arguments.remove("--just-de")

        try:
            opts = getopt(arguments, "n:o:x:m:r:i:l:1:2:q:a:g:t:c:t:d:p:",
                          ["num-threads=",
                           "out-dir=",
                           "index-dir=",
                           "quiet",
                           "mapper=",
                           "ref-sequence=",
                           "bw-index=",
                           "lib-file=",
                           "pair-mate-1=",
                           "pair-mate-2=",
                           "quals=",
                           "q1=",
                           "q2=",
                           "annot-file=",
                           "coverage-files=",
                           "config-file=",
                           "count-table=",
                           "map-label=",
                           "index-label=",
                           "conditions=",
                           "path="])
            if opts[1] != []:
                raise WedringError(132, "Argument list not supported: %s." %
                                   " ".join(opts[1]))
            for opt, val in opts[0]:
                if opt in ("-n", "--num-threads"):
                    self._num_threads = int(val)
                elif opt in ("--quiet"):
                    self._qt = True
                elif opt in ("-m", "--mapper"):
                    self._m = val
                elif opt in ("-r", "--ref-sequence"):
                    self._r = val
                elif opt in ("-i", "--bw-index"):
                    self._i = val
                elif opt in ("-a", "--annot-file"):
                    self._a = val
                elif opt in ("-c", "--config-file"):
                    self._c = val
                elif opt in ("-t", "--count-table"):
                    self._cnt_table = val
                elif opt == "--index-label":
                    self._il = val
                elif opt in ("-d", "--conditions"):
                    self._cnd = val
                elif opt in ("-x", "--index-dir"):
                    self._id = val
                elif opt in ("-o", "--out-dir"):
                    self._o = val
                elif opt in ("-l", "--lib-file"):
                    self._l = adjust_param(val)
                elif opt in ("-1", "--pair-mate-1"):
                    self._1 = adjust_param(val)
                elif opt in ("-2", "--pair-mate-2"):
                    self._2 = adjust_param(val)
                elif opt in ("-q", "--quals"):
                    self._q = adjust_param(val)
                elif opt == "--q1":
                    self._q1 = adjust_param(val)
                elif opt == "--q2":
                    self._q2 = adjust_param(val)
                elif opt in ("-g", "--coverage-files"):
                    if ',' not in val:
                        raise WedringError(134, "You must provide a list of coverage files.")
                    else:
                        self._cf = [cf for cf in val.split(',') if cf != '']
                elif opt == "--map-label":
                    self._ml = adjust_param(val)
                elif opt in ("-p", "--path"):
                    bin_path = val
        except GetoptError as err:
                raise WedringError(136, "%s." % str(err).capitalize())
        if bin_path is not None:
            environ["PATH"] = path.pathsep.join((environ["PATH"], path))
        if self._wb != JUST_INDEX:
            self._ld = path.join(self._o, "log")
            if self._a:
                if not self._qt:
                    wedr_report("Validating GFF file: \'%s\'." % self._a)
                try:
                    gff_out = write_validated_gff(self._a)
                except GffFormatError as gffferr:
                    raise WedringError(134, "[%s] %s" %
                                        (type(gffferr).__name__, gffferr))
                except EnvironmentError as env_err:
                    raise WedringError(env_err.errno, "[%s (%d)] %s%s%s." %
                                (type(env_err).__name__,
                                 env_err.errno, 
                                 env_err.strerror,
                                 ': ' if env_err.filename else '',
                                 env_err.filename if env_err.filename else ''))
                if isinstance(gff_out, str):
                    if not self._qt:
                        wedr_report("Now using validated GFF file: \'%s\'." %
                                     gff_out)
                    self._a = gff_out
                    if not self._qt:
                        wedr_report("Gff validation - Done!")
                else:
                    if not self._qt:
                        wedr_report("Gff validation - Done!")
            if self._wb in (TTL_PIPELINE, JUST_TABLE, JUST_DE):
                if self._cnd is None:
                    raise WedringError(135, "You must set the experimental conditions.")
            if self._wb in (TTL_PIPELINE, JUST_MAP):
                if self._l != [None] and self._q == [None]:
                    self._q += [None] * (len(self._l) - 1)
                if self._l != [None] and self._ml == [None]:
                    self._ml += [None] * (len(self._l) - 1)
                if (self._1 != [None] and self._2 != [None] and
                    self._q1 == [None] and self._q1 == [None]):
                    self._q1 += [None] * (len(self._1) - 1)
                    self._q2 += [None] * (len(self._2) - 1)
                if self._1 != [None] and self._2 != [None] and self._ml == [None]:
                    self._ml += [None] * (len(self._1) - 1)
            elif self._wb == JUST_DE and self._cnt_table is None:
                raise WedringError(135, "You must provide the counting table.")
Example #7
0
 def run(self):
     """Execute all steps of the **Wedring** pipeline."""
     # This method will execute according to the self._wb value:
     # The value are set after the command line options --just-indexbuild,
     # --just-map, --just-counttable, --just-de, and the possible values are
     # defined in the globals TTL_PIPELINE, JUST_INDEX, JUST_MAP, JUST_TABLE
     # and JUST_DE, which mean:
     # TTL_PIPELINE -- execute all steps of the pipeline
     # JUST_INDEX -- just execute the indexing stage
     # JUST_MAP -- execute the indexing stage (if needed) and the mapping
     #             stage
     # JUST_TABLE -- just build the count table
     # JUST_DE -- just execute the differential expression stage
     if self._wb in (TTL_PIPELINE, JUST_INDEX, JUST_MAP):
         self.wedring_indexbuilder_parse_params()
         if self._indexbldr is not None:
             self._indexbldr.run()
             self._i = self._indexbldr.index
         if self._wb != JUST_INDEX:
             wedr_prepare_directory(self._o)
             wedr_prepare_directory(self._ld)
             self.wedring_mapping_parse_params()
             p = Pool(self._num_threads)
             self._wedr_list = p.map(WedringMast.exec_mapping_stage,
                                     self._wedr_list)
             if self._wb != JUST_MAP:
                 feats_file = path.join(self._o, "genomic_features.txt")
                 tbl_file = path.join(self._o, "count_table.txt")
                 if not self._qt:
                     wedr_report("Writing genomic features to disk.")
                 write_genomic_features_to_file(self._a, feats_file)
                 if wedr_check_path(feats_file):
                     if not self._qt:
                         wedr_report("Writing genomic features - Done!")
                 cov_f = [wedrmast.cov_file for wedrmast in self._wedr_list]
                 if not self._qt:
                     wedr_report("Writing count table to disk.")
                 write_count_table_to_file(self._a, cov_f, self._cnd,
                                            tbl_file)
                 if wedr_check_path(tbl_file):
                     self._cnt_table = tbl_file
                     if not self._qt:
                         wedr_report("Writing count table - Done!")
                 self.exec_deseq()
     elif self._wb == JUST_TABLE:
         self.wedring_mapping_parse_params()
         wedr_prepare_directory(self._o)
         feats_file = path.join(self._o, "genomic_features.txt")
         tbl_file = path.join(self._o, "count_table.txt")
         if not self._qt:
             wedr_report("Writing genomic features to disk.")
         write_genomic_features_to_file(self._a, feats_file)
         if wedr_check_path(feats_file):
             if not self._qt:
                 wedr_report("Writing genomic features - Done!")
         cov_f = [wedrmast.cov_file for wedrmast in self._wedr_list]
         if not self._qt:
             wedr_report("Writing count table to disk.")
         write_count_table_to_file(self._a, cov_f, self._cnd, tbl_file)
         if wedr_check_path(tbl_file):
             self._cnt_table = tbl_file
             if not self._qt:
                 wedr_report("Writing count table - Done!")
     elif self._wb == JUST_DE:
         wedr_prepare_directory(self._o)
         wedr_prepare_directory(self._ld)
         self.exec_deseq()
Example #8
0
    def exec_samtools(self):
        """Executor of the SAMtools part of the pipeline.

        :raises: :class:WedringError

        """
        if self.mapper == "bowtie":
            if not self.quiet:
                wedr_report("[%s] Converting SAM file to BAM file." %
                            self._out_pref)
            sam_in_pref = path.splitext(self.aln_file)[0]
            bam_out = sam_in_pref + ".bam"
            errfile = path.join(self.log_dir, self._out_pref + "_view.log")
            st = BioSoft(command="samtools view -bS -o %s %s.sam" %
                         (bam_out, self._mapper_out), errfile=errfile)
            if not self.quiet:
                wedr_report("[%s] Command line:\n    %s" % (self._out_pref,
                                                             st.command))
            st.run()
            if 0 != st.return_code != -1:
                raise WedringError(141, "[%s] %s exitted with status %d. See log file '%s' for more details." %
                                   (self._out_pref, st.program_name,
                                    st.return_code, st.errfile))
            wedr_clean(st.errfile)
            wedr_clean(self.aln_file, force=True)
            self.aln_file = bam_out
            wedr_check_path(self.aln_file)
            if not self.quiet:
                wedr_report("[%s] Sorting BAM file." % self._out_pref)
            errfile = path.join(self.log_dir, self._out_pref + "_sort.log")
            st = BioSoft(command = "samtools sort %s %s" %
                         (bam_out, sam_in_pref), errfile =errfile)
            if not self.quiet:
                wedr_report("[%s] Command line:\n    %s" % (self._out_pref,
                                                            st.command))
            st.run()
            if 0 != st.return_code != -1:
                raise WedringError(141, "[%s] %s exitted with status %d. See log file '%s' for more details." %
                                   (self._out_pref, st.program_name,
                                    st.return_code, st.errfile))
            wedr_clean(st.errfile)
            wedr_check_path(self.aln_file)
        if not self.quiet:
            wedr_report("[%s] Indexing BAM file." % self._out_pref)
        errfile = path.join(self.log_dir, self._out_pref + "_index.log")
        st = BioSoft(command="samtools index %s" % self.aln_file,
                      errfile=errfile)
        if not self.quiet:
            wedr_report("[%s] Command line:\n    %s" % (self._out_pref,
                                                         st.command))
        st.run()
        if 0 != st.return_code != -1:
            raise WedringError(141, "[%s] %s exitted with status %d. See log file '%s' for more details." %
                               (self._out_pref, st.program_name, st.return_code,
                                st.errfile))
        wedr_clean(st.errfile)
        wedr_check_path(self.aln_file + ".bai")
Example #9
0
    def parse_tophat_options(self):
        """Parser for the tophat section of the configuration file.

        :returns: Additional options of *TopHat*'s command line

        """
        mapper_cmd = ""
        params = {"bowtie1": "false",
                  "mate_inner_dist": "0",
                  "mate_std_dev": "20",
                  "min_anchor_length": "8",
                  "splice_mismatches": "0",
                  "min_intron_length": "70",
                  "max_intron_length": "500000",
                  "max_insertion_length": "3",
                  "max_insertion_length": "3",
                  "solexa_quals": "false",
                  "solexa_1.3_quals": "false",
                  "color": "false",
                  "num_threads": "1",
                  "integer_quals": "false",
                  "max_multihits": "20",
                  "report_secondary_hits": "false",
                  "report_discordant_pair_alignments": "false",
                  "no_coverage_search": "false",
                  "coverage_search": "false",
                  "microexon_search": "false",
                  "library_type": "fr-unstranded",
                  "n": "2",
                  "genome_read_mismatches": "2",
                  "read_mismatches": "2 ",
                  "bowtie_n": "false",
                  "segment_mismatches": "2",
                  "segment_length": "25",
                  "min_coverage_intron": "50",
                  "max_coverage_intron": "20000",
                  "min_segment_intron": "50",
                  "max_segment_intron": "500000",
                  "keep_tmp": "false",
                  "zpacker": "gzip",
                  "fusion_search": "false",
                  "raw_juncs": "none",
                  "fusion_anchor_length": "20",
                  "fusion_min_dist": "10000000",
                  "fusion_read_mismatches": "2",
                  "fusion_multireads": "2",
                  "fusion_multipairs": "2",
                  "fusion_ignore_chromosomes": "none",
                  "no_novel_juncs": "false",
                  "G": "false",
                  "transcriptome_index": "none",
                  "transcriptome_only": "false",
                  "transcriptome_max_hits": "0",
                  "prefilter_multihits": "false",
                  "insertions": "none",
                  "deletions": "none",
                  "no_novel_indels": "false"}
        cf_parser = RawConfigParser()
        cf_parser.read(self.cfg_file)
        for param, val in cf_parser.items(self.mapper):
            params[param] = val
        for param, val in params.iteritems():
            if param == "bowtie1" and val == "true":
                mapper_cmd += " --bowtie1"
            elif param == "output_dir" and val != "./tophat_out":
                mapper_cmd += " -o %s" % val
            elif param == "mate_inner_dist" and val != "0":
                mapper_cmd += " -r %s" % val
            elif param == "mate_std_dev" and val != "20":
                mapper_cmd += " --mate-std-dev %s" % val
            elif param == "min_anchor_length" and val != "8":
                mapper_cmd += " -a %s" % val
            elif param == "splice_mismatches" and val != "0":
                mapper_cmd += " -m %s" % val
            elif param == "splice_mismatches" and val != "0":
                mapper_cmd += " -m %s" % val
            elif param == "min_intron_length" and val != "70":
                mapper_cmd += " -i %s" % val
            elif param == "max_intron_length" and val != "500000":
                mapper_cmd += " -I %s" % val
            elif param == "max_insertion_length" and val != "3":
                mapper_cmd += " --max-insertion-length %s" % val
            elif param == "max_deletion_length" and val != "3":
                mapper_cmd += " --max-deletion-length %s" % val
            elif param == "solexa_quals" and val == "true":
                mapper_cmd += " --solexa-quals"
            elif param == "solexa1.3_quals" and val == "true":
                mapper_cmd += " --solexa1.3-quals"
            elif param == "color" and val == "true":
                mapper_cmd += " -C"
            elif (param == "integer_quals" and params["color"] != "true" and
                  val == "true"):
                    mapper_cmd += " --integer-quals %s" % val
            elif param == "num_threads" and val != "1":
                mapper_cmd += " -p %s" % val
            elif param == "max_multihits" and  val != "20":
                mapper_cmd += " -g %s" % val
            elif param == "report_secondary_hits" and val == "true":
                mapper_cmd += " --report-secondary-hits"
            elif param == "report_discordant_pair_alignments" and val == "true":
                mapper_cmd += " --report_discordant_pair_alignments"
            elif param == "no_coverage_search" and val == "true":
                mapper_cmd += " --no-coverage-search"
            elif param == "coverage_search"and val == "true":
                mapper_cmd += " --coverage-search"
            elif param == "coverage_search" and val == "true":
                mapper_cmd += " --coverage-search"
            elif param == "microexon_search" and val == "true":
                mapper_cmd += " --microexon-search"
            elif (param == "library_type" and val != "fr-unstranded" and
                 val in ("fr-firststrand","fr-secondstrand")):
                mapper_cmd += " --library-type %s" % val
            elif param == "n" and val != "2":
                mapper_cmd += " -n %s" % val
            elif param == "genome_read_mismatches" and val != "2":
                mapper_cmd += " --genome-read-mismatches %s" % val
            elif param == "read_mismatches" and val != "2":
                mapper_cmd += " --read-mismatches %s" % val
            elif param == "bowtie_n" and val == "true":
                mapper_cmd += " --bowtie-n"
            elif param == "segment_mismatches" and val != "2":
                mapper_cmd += " --segment-mismatches %s" % val
            elif param == "segment_length" and val != "25":
                mapper_cmd += " --segment-length %s" % val
            elif param == "min_coverage_intron" and val != "50":
                mapper_cmd += " --min-coverage-intron %s" % val
            elif param == "max_coverage_intron" and val != "20000":
                mapper_cmd += " --max-coverage-intron %s" % val
            elif param == "min_segment_intron" and val != "50":
                mapper_cmd += " --min-segment-intron %s" % val
            elif param == "max_segment_intron" and val != "500000":
                mapper_cmd += " --min-segment-intron %s" % val
            elif param == "keep_tmp" and val == "true":
                mapper_cmd += " --keep-tmp"
            elif param == "zpacker" and val != "gzip":
                mapper_cmd += " -z %s" % val
            elif param == "fusion_search" and val == "true":
                mapper_cmd += " --fusion-search"
                if params["raw_juncs"] != "none":
                    mapper_cmd += " -j %s" % params["raw_juncs"]
                if params["fusion_anchor_length"] != "20":
                    mapper_cmd += (" --fusion-anchor-length %s" %
                                   params["fusion_anchor_length"])
                if params["fusion_min_dist"] != "10000000":
                    mapper_cmd += (" --fusion-min-dist %s" %
                                   params["fusion_min_dist"])
                if params["fusion_read_mismatches"] != "2":
                    mapper_cmd += (" --fusion-read-mismatches %s" %
                                   params["fusion_read_mismatches"])
                if params["fusion_multireads"] != "2":
                    mapper_cmd += (" --fusion-multireads %s" %
                                   params["fusion_multireads"])
                if params["fusion_multipairs"] != "2":
                    mapper_cmd += (" --fusion-multipairs %s" %
                                   params["fusion_multipairs"])
                if params["fusion_ignore_chromosomes"] != "none":
                    mapper_cmd += (" --fusion-ignore-chromosomes %s" %
                                   params["fusion_ignore_chromosomes"])
            elif param == "raw_juncs" and val != "none":
                mapper_cmd += " -j %s" % val
            elif param == "no_novel_juncs" and val == "true":
                mapper_cmd += " --no-novel-juncs"
            elif param == "G" and val == "true":
                if self.annot_file != None:
                    mapper_cmd += " -G %s" % self.annot_file
                else:
                    wedr_report("Ignoring TopHat's option -G/--GTF.")
            elif param == "transcriptome_index" and val != "none":
                mapper_cmd += " --transcriptome-index %s" % val
            elif param == "transcriptome_only" and val == "true":
                mapper_cmd += " -T"
            elif param == "transcriptome_max_hits" and val != "0":
                mapper_cmd += " -x %s" % val
            elif param == "prefilter_multihits" and val == "true":
                mapper_cmd += " -M"
            elif param == "insertions" and val != "none":
                mapper_cmd += " --insertions %s" % val
            elif param == "deletions" and val != "none":
                mapper_cmd += " --deletions %s" % val
            elif param == "no_novel_indels" and val == "true":
                mapper_cmd += " --no-novel-indels"
        return mapper_cmd