Esempio n. 1
0
    def run(self):
        for output in self.output():
            pipeline_utils.confirm_path(output.path)

        wait_time = random.uniform(0, 3)
        time.sleep(wait_time)
        sys.stdout.flush()
        while not pipeline_utils.add_thread_count(global_vars.thread_file, 1):
            time.sleep(1.2)

        for i, input_file in enumerate(self.input()):
            cmd = 'grep "ChrID" %s' % input_file.path  #| awk '$17 >= 3' > $file_out
            p1 = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
            # outs, err = p.communicate()
            cmd = "awk '$17>=%s'" % self.cfg['pindel_min_reads']
            p2 = subprocess.Popen(cmd,
                                  stdout=subprocess.PIPE,
                                  stdin=p1.stdout,
                                  shell=True)
            # outs, err = p.communicate()
            outs, err = p2.communicate()
            with open(self.output()[i].path, 'wb') as f:
                f.write(outs)

        while not pipeline_utils.sub_thread_count(global_vars.thread_file, 1):
            time.sleep(1.2)
Esempio n. 2
0
    def run(self):
        for output in self.output():
            pipeline_utils.confirm_path(output.path)

        wait_time = random.uniform(0, 3)
        time.sleep(wait_time)
        sys.stdout.flush()
        while not pipeline_utils.add_thread_count(global_vars.thread_file, 1):
            time.sleep(1.2)

        sample_dict = {
            output.path.split('/')[-1].split('.pindel.bed')[0]: output.path
            for output in self.output()[:-1]
        }

        misc_utils.filter_pindel(
            pindel_files=[input_file.path for input_file in self.input()],
            sample_dict=sample_dict,
            project_dir=self.project_dir,
            all_samples_output=self.output()[-1].path,
            min_reads=self.cfg['pindel_min_reads'],
            min_qual=self.cfg['pindel_min_qual'],
            max_inv_length=self.cfg['pindel_max_inv_length'])

        while not pipeline_utils.sub_thread_count(global_vars.thread_file, 1):
            time.sleep(1.2)
Esempio n. 3
0
    def run(self):
        for output in self.output():
            pipeline_utils.confirm_path(output.path)

        wait_time = random.uniform(0, 3)
        time.sleep(wait_time)
        sys.stdout.flush()
        while not pipeline_utils.add_thread_count(global_vars.thread_file, 1):
            time.sleep(1.2)

        for i, input_file in enumerate(self.input()[:-1]):
            cmd = 'sort-bed %s' % input_file.path
            # print(cmd)
            p1 = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)

            cmd = [
                'bedtools', 'intersect', '-wa', '-u', '-sorted', '-a', 'stdin',
                '-b', self.cfg['exons_bed']
            ]
            cmd = ' '.join(cmd)
            p2 = subprocess.Popen(cmd,
                                  stdout=subprocess.PIPE,
                                  stdin=p1.stdout,
                                  shell=True)
            # print(cmd)

            cmd = [
                "bedmap", "--echo", "--echo-map-id-uniq", "--delim", r"'\t'",
                "-", self.cfg['genmap']
            ]
            cmd = " ".join(cmd)
            # print(cmd)
            p3 = subprocess.Popen(cmd,
                                  stdout=subprocess.PIPE,
                                  stdin=p2.stdout,
                                  shell=True)

            outs, err = p3.communicate()
            with open(self.output()[i].path, 'wb') as f:
                f.write(str.encode('#gffTags\n'))
                f.write(outs)

        while not pipeline_utils.sub_thread_count(global_vars.thread_file, 1):
            time.sleep(1.2)
Esempio n. 4
0
    def run(self):
        pipeline_utils.confirm_path(self.output().path)

        wait_time = random.uniform(0, 3)
        time.sleep(wait_time)
        sys.stdout.flush()
        while not pipeline_utils.add_thread_count(global_vars.thread_file, 1):
            time.sleep(1)

        cmd = [
            self.cfg['freebayes_location'], '-f', self.cfg['fasta_file'], '-t',
            self.cfg['library_bed'],
            self.input()[0].path
        ]
        print(' '.join(cmd))
        sys.stdout.flush()
        p1 = subprocess.Popen(' '.join(cmd),
                              stdout=subprocess.PIPE,
                              shell=True)
        # outs, err = p.communicate()
        cmd = [self.cfg['vcffilter_location'], '-f', '"QUAL > 20"']
        p2 = subprocess.Popen(' '.join(cmd),
                              stdout=subprocess.PIPE,
                              stdin=p1.stdout,
                              shell=True)

        cmd = ['vcf-sort']
        p3 = subprocess.Popen(' '.join(cmd),
                              stdout=subprocess.PIPE,
                              stdin=p2.stdout,
                              shell=True)
        # outs, err = p.communicate()
        outs, err = p3.communicate()
        with open(self.output().path, 'wb') as f:
            f.write(outs)

        while not pipeline_utils.sub_thread_count(global_vars.thread_file, 1):
            time.sleep(1)
Esempio n. 5
0
    def run(self):
        for output in self.output():
            pipeline_utils.confirm_path(output.path)

        cmd = 'python3 %s segment %s -m %s --drop-low-coverage -o %s' % (
            self.cfg['cnvkit_location'], self.input().path,
            self.cfg['cnvkit_seg_method'], self.output()[0].path)
        cmd = cmd.split(' ')
        pipeline_utils.command_call(cmd, self.output())

        cmd = 'python3 %s segmetrics %s -s %s --ci --std --mean -o %s' % (
            self.cfg['cnvkit_location'], self.input().path,
            self.output()[0].path, self.output()[1].path)
        cmd = cmd.split(' ')
        pipeline_utils.command_call(cmd, self.output())

        cmd = 'python3 %s call %s -i %s_T -m threshold --filter ci --sample-sex female -t=-1.1,-0.4,0.3,0.7 -o %s' % (
            self.cfg['cnvkit_location'], self.output()[1].path, self.case,
            self.output()[2].path)
        cmd = cmd.split(' ')
        pipeline_utils.command_call(cmd, self.output())

        cmd = 'python3 %s scatter %s -s %s -o %s' % (
            self.cfg['cnvkit_location'], self.input().path,
            self.output()[2].path, self.output()[3].path)
        cmd = cmd.split(' ')
        pipeline_utils.command_call(cmd, self.output())

        cmd = 'python3 %s genemetrics %s -s %s -t %s -m %s -o %s' % (
            self.cfg['cnvkit_location'], self.input().path,
            self.output()[2].path, self.cfg['cnvkit_genemetrics_threshold'],
            self.cfg['cnvkit_genemetrics_minprobes'], self.output()[4].path)
        cmd = cmd.split(' ')
        pipeline_utils.command_call(cmd, self.output())

        #repurpose command call for piped commands
        wait_time = random.uniform(0, 3)
        time.sleep(wait_time)
        sys.stdout.flush()
        while not pipeline_utils.add_thread_count(global_vars.thread_file, 1):
            time.sleep(1.2)

        cmd = 'python3 %s genemetrics %s -s %s' % (self.cfg['cnvkit_location'],
                                                   self.input().path,
                                                   self.output()[2].path)
        cmd = cmd.split(' ')
        p1 = subprocess.Popen(cmd, stdout=subprocess.PIPE)
        # outs, err = p.communicate()
        cmd = 'tail -n+2'
        cmd = cmd.split(' ')
        p2 = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=p1.stdout)
        # outs, err = p.communicate()
        cmd = 'cut -f1'
        cmd = cmd.split(' ')
        p3 = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=p2.stdout)
        # outs, err = p.communicate()
        cmd = 'sort'
        cmd = cmd.split(' ')
        p4 = subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=p3.stdout)
        outs, err = p4.communicate()
        with open('%s_segment_genes.txt' % self.case, 'wb') as f:
            f.write(outs)