def exec_deseq(self): """Execute the **Wedring**'s differential expression stage. :raises: :class:WedringError """ if not self._qt: wedr_report("Calculating differential expression with DESeq.") outfile = path.join(self._o, "diffexpr.txt") errfile = path.join(self._ld, "diffexpr.log") de_cmd_line = "Rscript --vanilla %s %s %s %s %s" % (wedr_which("diffExprStage.R"), self.wedring_diffexpr_parse_options(), self._cnt_table, self._cnd, outfile) de = BioSoft(de_cmd_line, errfile=errfile) if not self._qt: wedr_report("Command line:\n %s" % de.command) de.run() if 0 != de.return_code != -1: raise WedringError(141, "%s exitted with status %d. See log file '%s' for more details." % (de.program_name, de.return_code, de.errfile)) wedr_clean(de.errfile) # TODO Add verification of the DESeq's output with wedr_check_path() # \_ table (OK), graphics wedr_check_path(outfile) if not self._qt: wedr_report("DESeq - Done!")
def exec_mapping_stage(self): """Executor of the Wedring pipeline.""" if self.quiet: wedr_check_path(self.index + '.*') self.exec_mapping() self.aln_file = (path.join(self._mapper_out, "accepted_hits.bam") if self.mapper == "tophat" else self._mapper_out + ".sam") wedr_check_path(self.aln_file) if wedr_check_program("samtools"): self.exec_samtools() if wedr_check_program("bedtools"): self.exec_bedtools() else: wedr_check_path(self.index + '.*') self.exec_mapping() self.aln_file = (path.join(self._mapper_out, "accepted_hits.bam") if self.mapper == "tophat" else self._mapper_out + ".sam") wedr_check_path(self.aln_file) if wedr_check_program("samtools"): wedr_report("[%s] Processing aligments with SAMtools." % self._out_pref) self.exec_samtools() wedr_report("[%s] SAMtools - Done!." % self._out_pref) if wedr_check_program("bedtools"): wedr_report("[%s] Calculating mapping coverage with BEDTools." % self._out_pref) self.exec_bedtools() wedr_report("[%s] BEDTools - Done!" % self._out_pref) return self
def parse_args(self, out_dir="./wedr_index", ref_seq=None, index_label=None, cfg_file=None, quiet=False): """This method checks the validity of :class:IndexBuilder parameters. :param out_dir: Which output directory? (default: ./wedr_out) :type out_dir: str :param ref_seq: Which reference sequence(s)? :type ref_seq: str :param index_label: A personalized name to the index. :type index_label: str :param cfg_file: Which configuration file? :type cfg_file: str :param quiet: If True don't print anything besides errors. :type quiet: bool :raises: :class:WedringError """ if wedr_check_program("bowtie-build"): if quiet: self.quiet = True if out_dir != "./wedr_index": self.out_dir = out_dir if cfg_file is not None: if wedr_check_path(cfg_file): self.cfg_file = cfg_file if ref_seq is not None: if wedr_check_path(self.ref_seq.split(',')): self.ref_seq = ref_seq else: raise WedringError(135, "You must provide the reference sequence file(s).") if index_label is not None: self.index = path.join(self.out_dir, index_label) self._out_pref = index_label elif ',' in self.ref_seq: index_name = [i for i in self.ref_seq.split(',') if i != ''][0] self.index = path.join(self.out_dir, path.split(path.splitext(index_name)[0])[1] + "_andothers") self._out_pref = index_name else: self._out_pref = path.split(path.splitext(self.ref_seq)[0])[1] self.index = path.join(self.out_dir, self._out_pref)
def exec_samtools(self): """Executor of the SAMtools part of the pipeline. :raises: :class:WedringError """ if self.mapper == "bowtie": if not self.quiet: wedr_report("[%s] Converting SAM file to BAM file." % self._out_pref) sam_in_pref = path.splitext(self.aln_file)[0] bam_out = sam_in_pref + ".bam" errfile = path.join(self.log_dir, self._out_pref + "_view.log") st = BioSoft(command="samtools view -bS -o %s %s.sam" % (bam_out, self._mapper_out), errfile=errfile) if not self.quiet: wedr_report("[%s] Command line:\n %s" % (self._out_pref, st.command)) st.run() if 0 != st.return_code != -1: raise WedringError(141, "[%s] %s exitted with status %d. See log file '%s' for more details." % (self._out_pref, st.program_name, st.return_code, st.errfile)) wedr_clean(st.errfile) wedr_clean(self.aln_file, force=True) self.aln_file = bam_out wedr_check_path(self.aln_file) if not self.quiet: wedr_report("[%s] Sorting BAM file." % self._out_pref) errfile = path.join(self.log_dir, self._out_pref + "_sort.log") st = BioSoft(command = "samtools sort %s %s" % (bam_out, sam_in_pref), errfile =errfile) if not self.quiet: wedr_report("[%s] Command line:\n %s" % (self._out_pref, st.command)) st.run() if 0 != st.return_code != -1: raise WedringError(141, "[%s] %s exitted with status %d. See log file '%s' for more details." % (self._out_pref, st.program_name, st.return_code, st.errfile)) wedr_clean(st.errfile) wedr_check_path(self.aln_file) if not self.quiet: wedr_report("[%s] Indexing BAM file." % self._out_pref) errfile = path.join(self.log_dir, self._out_pref + "_index.log") st = BioSoft(command="samtools index %s" % self.aln_file, errfile=errfile) if not self.quiet: wedr_report("[%s] Command line:\n %s" % (self._out_pref, st.command)) st.run() if 0 != st.return_code != -1: raise WedringError(141, "[%s] %s exitted with status %d. See log file '%s' for more details." % (self._out_pref, st.program_name, st.return_code, st.errfile)) wedr_clean(st.errfile) wedr_check_path(self.aln_file + ".bai")
def exec_bedtools(self): """Executor of the BEDTools part of the pipeline. :raises: :class:WedringError """ cov_out = self.aln_file.replace(".bam", ".cov") errfile = path.join(self.log_dir, self._out_pref + "_coverage.log") bt = BioSoft(command="bedtools coverage -s -abam %s -b %s" % (self.aln_file, self.annot_file), outfile=cov_out, errfile=errfile) if not self.quiet: wedr_report("[%s] Command line:\n %s" % (self._out_pref, bt.command)) bt.run() if 0 != bt.return_code != -1: raise WedringError(141, "[%s] %s exitted with status %d. See log file '%s' for more details." % (self._out_pref, bt.program_name, bt.return_code, bt.errfile)) wedr_clean(bt.errfile) if wedr_check_path(cov_out): self.cov_file = cov_out
def run(self): """Execute all steps of the **Wedring** pipeline.""" # This method will execute according to the self._wb value: # The value are set after the command line options --just-indexbuild, # --just-map, --just-counttable, --just-de, and the possible values are # defined in the globals TTL_PIPELINE, JUST_INDEX, JUST_MAP, JUST_TABLE # and JUST_DE, which mean: # TTL_PIPELINE -- execute all steps of the pipeline # JUST_INDEX -- just execute the indexing stage # JUST_MAP -- execute the indexing stage (if needed) and the mapping # stage # JUST_TABLE -- just build the count table # JUST_DE -- just execute the differential expression stage if self._wb in (TTL_PIPELINE, JUST_INDEX, JUST_MAP): self.wedring_indexbuilder_parse_params() if self._indexbldr is not None: self._indexbldr.run() self._i = self._indexbldr.index if self._wb != JUST_INDEX: wedr_prepare_directory(self._o) wedr_prepare_directory(self._ld) self.wedring_mapping_parse_params() p = Pool(self._num_threads) self._wedr_list = p.map(WedringMast.exec_mapping_stage, self._wedr_list) if self._wb != JUST_MAP: feats_file = path.join(self._o, "genomic_features.txt") tbl_file = path.join(self._o, "count_table.txt") if not self._qt: wedr_report("Writing genomic features to disk.") write_genomic_features_to_file(self._a, feats_file) if wedr_check_path(feats_file): if not self._qt: wedr_report("Writing genomic features - Done!") cov_f = [wedrmast.cov_file for wedrmast in self._wedr_list] if not self._qt: wedr_report("Writing count table to disk.") write_count_table_to_file(self._a, cov_f, self._cnd, tbl_file) if wedr_check_path(tbl_file): self._cnt_table = tbl_file if not self._qt: wedr_report("Writing count table - Done!") self.exec_deseq() elif self._wb == JUST_TABLE: self.wedring_mapping_parse_params() wedr_prepare_directory(self._o) feats_file = path.join(self._o, "genomic_features.txt") tbl_file = path.join(self._o, "count_table.txt") if not self._qt: wedr_report("Writing genomic features to disk.") write_genomic_features_to_file(self._a, feats_file) if wedr_check_path(feats_file): if not self._qt: wedr_report("Writing genomic features - Done!") cov_f = [wedrmast.cov_file for wedrmast in self._wedr_list] if not self._qt: wedr_report("Writing count table to disk.") write_count_table_to_file(self._a, cov_f, self._cnd, tbl_file) if wedr_check_path(tbl_file): self._cnt_table = tbl_file if not self._qt: wedr_report("Writing count table - Done!") elif self._wb == JUST_DE: wedr_prepare_directory(self._o) wedr_prepare_directory(self._ld) self.exec_deseq()
def parse_args(self, out_dir="./wedr_out", mapper="bowtie", index=None, lib_file=None, lib_mate_1=None, lib_mate_2=None, qual_file=None, q_mate_1=None, q_mate_2=None, annot_file=None, cov_file=None, cfg_file=None, map_label=None, quiet=False, barrier=0): """This method verifies if **Wedring** parameters are set correctly and makes other adjustments. :param out_dir: Which output directory? (default: ./wedr_out) :type out_dir: str -- Pathname to an existing (or not) directory. :param mapper: Which mapper? (default: bowtie) :type mapper: str -- "bowtie" or "tophat" :param index: Which BW index? :type index: str -- Pathname to *Bowtie*'s index (just the suffix) :param lib_file: Which library file(s)? :type lib_file: str :param lib_mate_1: Which first mate reads file? :type lib_mate_1: str :param lib_mate_2: Which second mate reads file? :type lib_mate_2: str :param qual_file: Which quality file(s)? :type qual_file: str :param q_mate_1: Which quality file(s) for mate 1? :type q_mate_1: str :param q_mate_2: Which quality file(s) for mate 2? :type q_mate_2: str :param annot_file: Which annotation file? :type q_mate_2: str :param cfg_file: Which configuration file? :type cfg_file: str :param map_label: A personalized name to the mapping. :type map_label: str :param barrier: When the pipeline will stop? :type barrier: int :param quiet: If True don't print anything besides errors. :type quiet: bool :raises: :class:WedringError """ if self.wedr_barrier != 3: mp = mapper.lower() if mp in ("bowtie", "tophat"): if wedr_check_program(mp): self.mapper = mp else: raise WedringError(134, "Invalid mapper name: %s" % mapper) if quiet: self.quiet = True if out_dir != "./wedr_out": self.out_dir = out_dir self.log_dir = path.join(out_dir, "log") else: self.log_dir = path.join(out_dir, "log") if cfg_file is not None: if wedr_check_path(cfg_file): self.cfg_file = cfg_file if index is not None: if wedr_check_path(index + ".*"): self.index = index else: raise WedringError(135, "You must provide Bowtie's BW index.") if lib_file is not None: lib_temp = [lf for lf in lib_file.split(',') if lf != ''] if wedr_check_path(lib_temp): self.lib_file = ','.join(lib_temp) self._map_mode = 2 if qual_file is not None: qual_temp = [qf for qf in qual_file.split(',') if qf != ''] if wedr_check_path(qual_temp): if len(lib_temp) != len(qual_temp): raise WedringError(140, "Unbalanced number of library and quality files.") self.qual_file = qual_file self._map_mode += 5 else: self._map_mode += 6 elif lib_mate_1 is not None and lib_mate_2 is not None: lib_1_temp = [l1 for l1 in lib_mate_1.split(',') if l1 != ''] lib_2_temp = [l2 for l2 in lib_mate_2.split(',') if l2 != ''] if wedr_check_path(lib_1_temp + lib_2_temp): if len(lib_1_temp) != len(lib_2_temp): raise WedringError(140, "Unbalanced number of pair-mate libraries.") if q_mate_1 is not None and q_mate_2 is not None: qual_1_temp = [q1 for q1 in q_mate_1.split(',') if q1 != ''] qual_2_temp = [q2 for q2 in q_mate_2.split(',') if q2 != ''] if wedr_check_path(qual_1_temp + qual_2_temp): if (len(lib_1_temp) != len(qual_1_temp) or len(lib_1_temp) != len(qual_2_temp)): raise WedringError(140, "Unbalanced number of pair-mate libraries and its qualities.") self.lib_mate_1 = ','.join(lib_1_temp) self.lib_mate_2 = ','.join(lib_2_temp) self.q_mate_1 = ','.join(qual_1_temp) self.q_mate_2 = ','.join(qual_2_temp) self._map_mode = 4 elif (q_mate_1 is None) ^ (q_mate_2 is None): raise WedringError(135, "You must set both pair-mate quality files.") self.lib_mate_1 = ','.join(lib_1_temp) self.lib_mate_2 = ','.join(lib_2_temp) self._map_mode = 5 elif (lib_mate_1 is None) ^ (lib_mate_2 is None): raise WedringError(135, "You must set both pair-mate library files.") else: raise WedringError(135, "You must provide the library file(s).") if map_label is not None: self._mapper_out = path.join(self.out_dir, map_label) self._out_pref = map_label elif 7 != self._map_mode != 8: if ',' in self.lib_mate_1: map_out_pref = (path.split(path.splitext(self.lib_mate_1[:self.lib_mate_1.index(',')])[0])[1], path.split(path.splitext(self.lib_mate_2[:self.lib_mate_2.index(',')])[0][1])) if map_out_pref[0][:-2] == map_out_pref[1][:-2]: self._mapper_out = (path.join(self.out_dir, map_out_pref[0] + "_andothers_vs_" + path.split(self.index)[1])) self._out_pref = map_out_pref[0] else: self._mapper_out = (path.join(self.out_dir, map_out_pref[0][:-2] + '_' + map_out_pref[1][:-2] + "_andothers_vs_" + path.split(self.index)[1])) self._out_pref = (map_out_pref[0][:-2] + '_' + map_out_pref[1][:-2] + "_andothers_vs_") else: map_out_pref = (path.split(path.splitext(self.lib_mate_1)[0])[1], path.split(path.splitext(self.lib_mate_2)[0])[1]) if map_out_pref[0][:-2] == map_out_pref[1][:-2]: self._mapper_out = (path.join(self.out_dir, map_out_pref[0][:-2] + "_vs_" + path.split(self.index)[1])) self._out_pref = map_out_pref[0][:-2] else: self._mapper_out = (path.join(self.out_dir, map_out_pref[0][:-2] + '_' + map_out_pref[1][:-2] + "_vs_" + path.split(self.index)[1])) self._out_pref = (map_out_pref[0][:-2] + '_' + map_out_pref[1][:-2]) else: if ',' in self.lib_file: map_out_pref = path.split(path.splitext(self.lib_file[:self.lib_file.index(',')])[0])[1] self._mapper_out = (path.join(self.out_dir, map_out_pref + "_andothers_vs_" + path.split(self.index)[1])) self._out_pref = map_out_pref + "_andothers_vs_" else: map_out_pref = path.split(path.splitext(self.lib_file)[0])[1] self._mapper_out = (path.join(self.out_dir, map_out_pref + "_vs_" + path.split(self.index)[1])) self._out_pref = map_out_pref if annot_file is not None: if wedr_check_path(annot_file): self.annot_file = annot_file else: raise WedringError(135, "You must provide the annotation file.") else: if annot_file is not None: if wedr_check_path(annot_file): self.annot_file = annot_file else: raise WedringError(135, "You must provide the annotation file.") if cov_file is not None: if wedr_check_path(cov_file): self.cov_file = cov_file else: raise WedringError(135, "You must provide the coverage file.")