def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) wait_time = random.uniform(0, 3) time.sleep(wait_time) sys.stdout.flush() while not pipeline_utils.add_thread_count(global_vars.thread_file, 1): time.sleep(1.2) for i, input_file in enumerate(self.input()): cmd = 'grep "ChrID" %s' % input_file.path #| awk '$17 >= 3' > $file_out p1 = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) # outs, err = p.communicate() cmd = "awk '$17>=%s'" % self.cfg['pindel_min_reads'] p2 = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=p1.stdout, shell=True) # outs, err = p.communicate() outs, err = p2.communicate() with open(self.output()[i].path, 'wb') as f: f.write(outs) while not pipeline_utils.sub_thread_count(global_vars.thread_file, 1): time.sleep(1.2)
def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) wait_time = random.uniform(0, 3) time.sleep(wait_time) sys.stdout.flush() while not pipeline_utils.add_thread_count(global_vars.thread_file, 1): time.sleep(1.2) sample_dict = { output.path.split('/')[-1].split('.pindel.bed')[0]: output.path for output in self.output()[:-1] } misc_utils.filter_pindel( pindel_files=[input_file.path for input_file in self.input()], sample_dict=sample_dict, project_dir=self.project_dir, all_samples_output=self.output()[-1].path, min_reads=self.cfg['pindel_min_reads'], min_qual=self.cfg['pindel_min_qual'], max_inv_length=self.cfg['pindel_max_inv_length']) while not pipeline_utils.sub_thread_count(global_vars.thread_file, 1): time.sleep(1.2)
def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) wait_time = random.uniform(0, 3) time.sleep(wait_time) sys.stdout.flush() while not pipeline_utils.add_thread_count(global_vars.thread_file, 1): time.sleep(1.2) for i, input_file in enumerate(self.input()[:-1]): cmd = 'sort-bed %s' % input_file.path # print(cmd) p1 = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True) cmd = [ 'bedtools', 'intersect', '-wa', '-u', '-sorted', '-a', 'stdin', '-b', self.cfg['exons_bed'] ] cmd = ' '.join(cmd) p2 = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=p1.stdout, shell=True) # print(cmd) cmd = [ "bedmap", "--echo", "--echo-map-id-uniq", "--delim", r"'\t'", "-", self.cfg['genmap'] ] cmd = " ".join(cmd) # print(cmd) p3 = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=p2.stdout, shell=True) outs, err = p3.communicate() with open(self.output()[i].path, 'wb') as f: f.write(str.encode('#gffTags\n')) f.write(outs) while not pipeline_utils.sub_thread_count(global_vars.thread_file, 1): time.sleep(1.2)
def run(self): pipeline_utils.confirm_path(self.output().path) wait_time = random.uniform(0, 3) time.sleep(wait_time) sys.stdout.flush() while not pipeline_utils.add_thread_count(global_vars.thread_file, 1): time.sleep(1) cmd = [ self.cfg['freebayes_location'], '-f', self.cfg['fasta_file'], '-t', self.cfg['library_bed'], self.input()[0].path ] print(' '.join(cmd)) sys.stdout.flush() p1 = subprocess.Popen(' '.join(cmd), stdout=subprocess.PIPE, shell=True) # outs, err = p.communicate() cmd = [self.cfg['vcffilter_location'], '-f', '"QUAL > 20"'] p2 = subprocess.Popen(' '.join(cmd), stdout=subprocess.PIPE, stdin=p1.stdout, shell=True) cmd = ['vcf-sort'] p3 = subprocess.Popen(' '.join(cmd), stdout=subprocess.PIPE, stdin=p2.stdout, shell=True) # outs, err = p.communicate() outs, err = p3.communicate() with open(self.output().path, 'wb') as f: f.write(outs) while not pipeline_utils.sub_thread_count(global_vars.thread_file, 1): time.sleep(1)
def run(self): for output in self.output(): pipeline_utils.confirm_path(output.path) cmd = 'python3 %s segment %s -m %s --drop-low-coverage -o %s' % ( self.cfg['cnvkit_location'], self.input().path, self.cfg['cnvkit_seg_method'], self.output()[0].path) cmd = cmd.split(' ') pipeline_utils.command_call(cmd, self.output()) cmd = 'python3 %s segmetrics %s -s %s --ci --std --mean -o %s' % ( self.cfg['cnvkit_location'], self.input().path, self.output()[0].path, self.output()[1].path) cmd = cmd.split(' ') pipeline_utils.command_call(cmd, self.output()) cmd = 'python3 %s call %s -i %s_T -m threshold --filter ci --sample-sex female -t=-1.1,-0.4,0.3,0.7 -o %s' % ( self.cfg['cnvkit_location'], self.output()[1].path, self.case, self.output()[2].path) cmd = cmd.split(' ') pipeline_utils.command_call(cmd, self.output()) cmd = 'python3 %s scatter %s -s %s -o %s' % ( self.cfg['cnvkit_location'], self.input().path, self.output()[2].path, self.output()[3].path) cmd = cmd.split(' ') pipeline_utils.command_call(cmd, self.output()) cmd = 'python3 %s genemetrics %s -s %s -t %s -m %s -o %s' % ( self.cfg['cnvkit_location'], self.input().path, self.output()[2].path, self.cfg['cnvkit_genemetrics_threshold'], self.cfg['cnvkit_genemetrics_minprobes'], self.output()[4].path) cmd = cmd.split(' ') pipeline_utils.command_call(cmd, self.output()) #repurpose command call for piped commands wait_time = random.uniform(0, 3) time.sleep(wait_time) sys.stdout.flush() while not pipeline_utils.add_thread_count(global_vars.thread_file, 1): time.sleep(1.2) cmd = 'python3 %s genemetrics %s -s %s' % (self.cfg['cnvkit_location'], self.input().path, self.output()[2].path) cmd = cmd.split(' ') p1 = subprocess.Popen(cmd, stdout=subprocess.PIPE) # outs, err = p.communicate() cmd = 'tail -n+2' cmd = cmd.split(' ') p2 = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=p1.stdout) # outs, err = p.communicate() cmd = 'cut -f1' cmd = cmd.split(' ') p3 = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=p2.stdout) # outs, err = p.communicate() cmd = 'sort' cmd = cmd.split(' ') p4 = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=p3.stdout) outs, err = p4.communicate() with open('%s_segment_genes.txt' % self.case, 'wb') as f: f.write(outs)