def run(self, fastq1, fastq2, index): print(time_stamp(), 'start to align reads of {} by BWA.'.format(index), flush=True) cmd = 'bwa mem -t {0} {1} {2} {3} | \ samtools view -b \ -o {4}/20_bam/{5}.bam \ >> {4}/log/bwa.log \ 2>&1'.format(self.args.threads, self.args.ref, fastq1, fastq2, self.out, index) cmd = clean_cmd(cmd) try: sbp.run(cmd, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'bwa', cmd) sys.exit() print(time_stamp(), 'alignment {} successfully finished.'.format(index), flush=True)
def run(self): print(time_stamp(), 'start to calculate SNP-index.', flush=True) field_pos = self.get_field() self.calc_SNPindex(field_pos) print(time_stamp(), 'SNP-index successfully finished.', flush=True) print(time_stamp(), 'start to smooth SNP-index.', flush=True) sm = Smooth(self.args) sm.run() print(time_stamp(), 'smoothing process successfully finished.', flush=True)
def run_snpEff(self): cmd = 'snpEff ann -s {0}/snpEff_summary.html \ {1} \ {2} \ 1> {0}/mutmap.snpEff.vcf \ 2> {0}/snpEff.log'.format(self.out, self.snpEff, self.vcf) cmd = clean_cmd(cmd) print(time_stamp(), 'start to run SnpEff.', flush=True) sbp.run(cmd, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) print(time_stamp(), 'SnpEff successfully finished.', flush=True) self.args.vcf = '{}/mutmap.snpEff.vcf'.format(self.out)
def check_outdir(self): if os.path.exists(self.out): print(time_stamp(), 'output directory already exist.'.format(self.out), flush=True) else: os.mkdir(self.out)
def run(self): labels, label_with_flags = self.get_labels() print(time_stamp(), 'start to filter reads.', flush=True) p = Pool(self.args.threads) p.map(self.filt, label_with_flags) p.close() p = Pool(self.args.threads) p.map(self.merge, labels) p.close() self.clean_log() print(time_stamp(), 'filtering process successfully finished.', flush=True)
def run(self): print(time_stamp(), 'start to index reference fasta.', flush=True) cmd1 = 'bwa index {} \ >> {}/log/bwa.log \ 2>&1'.format(self.args.ref, self.out) cmd2 = 'samtools faidx {} \ >> {}/log/samtools.log \ 2>&1'.format(self.args.ref, self.out) cmd1 = clean_cmd(cmd1) cmd2 = clean_cmd(cmd2) try: sbp.run(cmd1, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'bwa', cmd1) sys.exit() try: sbp.run(cmd2, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'samtools', cmd1) sys.exit() print(time_stamp(), 'indexing of reference successfully finished.', flush=True)
def run(self, fastq1, fastq2, index): print(time_stamp(), 'start to align reads of {} by BWA.'.format(index), flush=True) cmd = 'bwa mem -t {0} \ {1} {2} {3} | \ samtools fixmate -m \ - \ - | \ samtools sort -m {4} \ -@ {0} | \ samtools markdup -r \ - \ - | \ samtools view -b \ -f 2 \ -F 2048 \ -o {5}/20_bam/{6}.bam \ >> {5}/log/alignment.log \ 2>&1'.format(self.args.threads, self.args.ref, fastq1, fastq2, self.args.mem, self.out, index) cmd = clean_cmd(cmd) try: sbp.run(cmd, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'alignment', cmd) sys.exit(1) print(time_stamp(), 'alignment {} successfully finished.'.format(index), flush=True)
def check_max_threads(self, args): max_cpu = multi.cpu_count() print(time_stamp(), 'maximum number of threads which you can use is up to {}.'.format(max_cpu), flush=True) if max_cpu <= args.threads: sys.stderr.write(('!!WARNING!! You can use up to {0} threads. ' 'This program will use {0} threads.\n').format(max_cpu)) sys.stderr.flush() args.threads = max_cpu elif args.threads < 1: args.threads = max_cpu return args
def get_field(self): root, ext = os.path.splitext(self.vcf) if ext == '.gz': vcf = gzip.open(self.vcf, 'rt') else: vcf = open(self.vcf, 'r') for line in vcf: if re.match(r'[^#]', line): fields = line.split()[8].split(':') try: GT_pos = fields.index('GT') except ValueError: sys.stderr.write(('{} No GT field' ' in your VCF!!\n').format(time_stamp())) sys.exit(1) try: AD_pos = fields.index('AD') except ValueError: sys.stderr.write(('{} No AD field' ' in your VCF!!\n').format(time_stamp())) sys.exit(1) if 'ADF' in fields and 'ADR' in fields: ADF_pos = fields.index('ADF') ADR_pos = fields.index('ADR') else: ADF_pos = None ADR_pos = None sys.stderr.write(('{} no ADF or ADR field' ' in your VCF.\n').format(time_stamp())) sys.stderr.write( ('{} strand bias filter ' 'will be skipped.\n').format(time_stamp())) break vcf.close() return GT_pos, AD_pos, ADF_pos, ADR_pos
def run(self): self.check_outdir() if self.snpEff is not None: self.run_snpEff() v2i = Vcf2Index(self.args) v2i.run() print(time_stamp(), 'plotting now...', flush=True) pt = Plot(self.args) pt.run() if self.args.igv: self.make_igv_file()
def run(self): print(time_stamp(), 'start to merge BAMs.', flush=True) self.merge() print(time_stamp(), 'merging process successfully finished.', flush=True) print(time_stamp(), 'start to call variants.', flush=True) chr_names = self.get_header() p = Pool(self.args.threads) p.map(self.mpileup, chr_names) p.close() self.concat() print(time_stamp(), 'variant calling successfully finished.', flush=True) print(time_stamp(), 'start to index VCF.', flush=True) self.mkindex() print(time_stamp(), 'indexing VCF successfully finished.', flush=True)
def main(): print(time_stamp(), 'start to run MutPlot.', flush=True) mp = MutPlot(args) mp.run() print(time_stamp(), 'MutPlot successfully finished.', flush=True)
def run(self, fastq1, fastq2, index): print(time_stamp(), 'start trimming for {} and {}.'.format(fastq1, fastq2), flush=True) trim1 = '{}/00_fastq/{}.1.trim.fastq.gz'.format(self.out, index) trim2 = '{}/00_fastq/{}.2.trim.fastq.gz'.format(self.out, index) unpaired1 = '{}/00_fastq/{}.1.unpaired.fastq.gz'.format( self.out, index) unpaired2 = '{}/00_fastq/{}.2.unpaired.fastq.gz'.format( self.out, index) if (len(self.trim_params['ILLUMINACLIP']) == 0) or \ ('<ADAPTER_FASTA>' in self.trim_params['ILLUMINACLIP']): cmd = 'trimmomatic PE -threads {} \ -phred{} {} {} {} {} {} {} \ LEADING:{} \ TRAILING:{} \ SLIDINGWINDOW:{} \ MINLEN:{} \ >> {}/log/trimmomatic.log \ 2>&1'.format( self.args.threads, self.trim_params['phred'], fastq1, fastq2, trim1, unpaired1, trim2, unpaired2, self.trim_params['LEADING'], self.trim_params['TRAILING'], self.trim_params['SLIDINGWINDOW'], self.trim_params['MINLEN'], self.out) else: cmd = 'trimmomatic PE -threads {} \ -phred{} {} {} {} {} {} {} \ ILLUMINACLIP:{} \ LEADING:{} \ TRAILING:{} \ SLIDINGWINDOW:{} \ MINLEN:{} \ >> {}/log/trimmomatic.log \ 2>&1'.format( self.args.threads, self.trim_params['phred'], fastq1, fastq2, trim1, unpaired1, trim2, unpaired2, self.trim_params['ILLUMINACLIP'], self.trim_params['LEADING'], self.trim_params['TRAILING'], self.trim_params['SLIDINGWINDOW'], self.trim_params['MINLEN'], self.out) cmd = clean_cmd(cmd) try: sbp.run(cmd, stdout=sbp.DEVNULL, stderr=sbp.DEVNULL, shell=True, check=True) except sbp.CalledProcessError: call_log(self.out, 'trimmomatic', cmd) sys.exit() print(time_stamp(), 'trimming for {} and {} successfully finished.'.format( fastq1, fastq2), flush=True) aln = Alignment(self.args) aln.run(trim1, trim2, index)
def main(): print(time_stamp(), 'start to run MutMap.', flush=True) MutMap(args).run() print(time_stamp(), 'MutMap successfully finished.\n', flush=True)