Beispiel #1
0
    def create_plink_binary_tasks(self, parent_tasks):
        tasks = []
        stage = '2-plink-binaries'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        for ptask in sorted(parent_tasks, key=lambda t: t.id):
            chrom = ptask.params['chrom']
            output_path = os.path.join(basedir, chrom, 'c{}'.format(chrom))

            task = {
                'func' : plink_binary,
                'params' : {
                    'in_vcf' : ptask.params['out_vcf'],
                    'out_path' : output_path,
                    'chrom' : chrom,
                },
                'stage_name' : stage,
                'uid' : '{chrom}'.format(chrom=chrom),
                'drm_params' :
                    to_json(plink_binary_lsf_params(email)),
                'parents' : [ptask],
            }
            tasks.append( self.workflow.add_task(**task) )

        return tasks
Beispiel #2
0
    def create_plink_extract_prune_tasks(self, parent_tasks):
        tasks = []
        stage = '4-plink-extract-prune'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        for ptask in sorted(parent_tasks, key=lambda t: t.id):
            plink_extract_file = "{}.prune.in".format(ptask.params['out_path'])
            orig_binary_data = ptask.params['in_path']
            chrom = ptask.params['chrom']
            output_path = os.path.join(basedir, chrom, 'c{}.extracted'.format(chrom))

            task = {
                'func' : plink_extract_prune,
                'params' : {
                    'in_path' : orig_binary_data,
                    'in_extract' : plink_extract_file,
                    'out_path' : output_path,
                    'chrom' : chrom,
                },
                'stage_name' : stage,
                'uid' : '{chrom}'.format(chrom=chrom),
                'drm_params' :
                    to_json(plink_extract_prune_lsf_params(email)),
                'parents' : [ptask],
            }
            tasks.append( self.workflow.add_task(**task) )

        return tasks
Beispiel #3
0
    def create_plink_merge_prune_file_task(self, parent_tasks):
        stage = '5-plink-merge-prune-files'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        parent_tasks_sorted = sorted(parent_tasks, key=lambda t: t.id)

        first_task = parent_tasks_sorted[0]
        remaining_tasks = parent_tasks_sorted[1:]

        merge_list_file = os.path.join(basedir, 'allfiles.txt')
        self._create_merge_list(merge_list_file, remaining_tasks)

        output_path = os.path.join(basedir, 'merged')

        task = {
            'func' : plink_merge_pruned_files,
            'params' : {
                'in_ref' : first_task.params['out_path'],
                'in_merge_file' : merge_list_file,
                'out_path' : output_path,
            },
            'stage_name' : stage,
            'uid' : 'all-chroms',
            'drm_params' :
                to_json(plink_merge_pruned_files_lsf_params(email)),
            'parents' : parent_tasks_sorted,
        }

        merge_task = self.workflow.add_task(**task)

        return merge_task
Beispiel #4
0
    def create_eigenstrat_smartpca_task(self, parent_task):
        stage = '6-eigenstrat-smartpca'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        ped_file = "{}.ped".format(parent_task.params['out_path'])
        map_file = "{}.map".format(parent_task.params['out_path'])

        task = {
            'func' : eigenstrat_smartpca_analysis,
            'params' : {
                'in_ped_file' : ped_file,
                'in_map_file' : map_file,
                'out_prj_dir' : basedir,
            },
            'stage_name' : stage,
            'uid' : 'all-chroms',
            'drm_params' :
                to_json(eigenstrat_smartpca_analysis_lsf_params(email)),
            'parents' : [ parent_task ],
        }

        eigenstrat_task = self.workflow.add_task(**task)

        return eigenstrat_task
Beispiel #5
0
    def create_filter_biallelic_snps_tasks(self):
        tasks = []
        stage = '1-filter-biallelic-snps'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        for chrom in self.config.chroms:
            vcf = self.config.vcfs[chrom]
            output_vcf = 'filtered.snps.c{chrom}.vcf.gz'.format(chrom=chrom)
            task = {
                'func'   : filter_biallelic_snps,
                'params' : {
                    'chrom' : chrom,
                    'in_vcf' : vcf,
                    'out_vcf' : os.path.join(basedir, chrom, output_vcf),
                    'in_min_vqslod' : self.config.vqslod_threshold,
                },
                'stage_name' : stage,
                'uid' : '{chrom}'.format(chrom=chrom),
                'drm_params' :
                    to_json(filter_biallelic_snps_lsf_params(email)),
            }
            tasks.append( self.workflow.add_task(**task) )

        return tasks
Beispiel #6
0
    def create_bcftools_stats_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('bcftools-stats', step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(bcftools_stats_lsf_params, self.config)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_stats = '{}.stats.out'.format(chrom)
            task = {
                'func': bcftools_stats,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_stats': os.path.join(basedir, chrom, output_stats),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks
Beispiel #7
0
    def create_remove_ac_0_tasks(self, step_number):
        tasks = []
        stage = self._construct_task_name('select-variants-ac-0-removal',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(
            gatk_select_variants_remove_ac_0_lsf_params, self.config.email,
            self.config.docker, self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for chrom in self.config.chroms:
            vcf = self.config.vcfs[chrom]
            output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom)
            output_log = 'select-variants-chrom-{}-gatk.log'.format(chrom)
            task = {
                'func': gatk_select_variants_remove_ac_0,
                'params': {
                    'in_chrom': chrom,
                    'in_vcf': vcf,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks
Beispiel #8
0
    def create_count_sample_missingness_tasks(self, step_number):
        tasks = []
        stage = self._construct_task_name('count-sample-missingness',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(count_sample_missingness_lsf_params,
                                    self.config)
        lsf_params_json = to_json(lsf_params)

        for chrom in self.config.chroms:

            # only count missing genotypes on chromosomes 1-22 (not X, Y, or MT)
            chrom_number = get_chrom_number(chrom)
            if not chrom_number.isdigit(): continue

            output_json = '{chrom}-sample-missingness-counts.json'.format(
                chrom=chrom)
            output_log = '{}-sample-missingness-counts.log'.format(chrom)
            task = {
                'func': count_sample_missingness,
                'params': {
                    'in_vcf': self.config.vcfs[chrom],
                    'in_chrom': chrom,
                    'out_json': os.path.join(basedir, chrom, output_json),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks
Beispiel #9
0
    def create_speedseq_realign_tasks(self):
        tasks = []
        stage = '1-exec-speedseq-realign'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email
        lsf_job_group = self.config.drm_job_group
        sample_data = self.config.sample_data

        for sample_id in sample_data.keys():
            bam_paths = sample_data[sample_id]['bams']
            sample_name = sample_data[sample_id]['meta']['original-name']
            output_prefix = os.path.join(basedir, sample_id, "{}.b38.realign".format(sample_id))
            tmpdir = os.path.join(basedir, sample_id, 'tmpdir')
            input_bams = ' '.join(bam_paths)

            task = {
                'func'   : exec_speedseq,
                'params' : {
                    'output_prefix' : output_prefix,
                    'tmpdir' : tmpdir,
                    'input_bams' : input_bams,
                },
                'stage_name' : stage,
                'uid' : sample_id,
                'drm_params' :
                    to_json(exec_speedseq_lsf_params(email, lsf_job_group)),
            }
            tasks.append( self.workflow.add_task(**task) )

        return tasks
Beispiel #10
0
    def create_calculate_sample_missingness_task(self, parent_tasks,
                                                 step_number):
        stage = self._construct_task_name('calculate-sample-missingness',
                                          step_number)
        output_dir = os.path.join(self.config.rootdir, stage)

        prior_stage_name = parent_tasks[0].stage.name
        input_dir = os.path.join(self.config.rootdir, prior_stage_name)
        input_json_wildcard_path = os.path.join(input_dir, '*', '*.json')

        lsf_params = get_lsf_params(calculate_sample_missingness_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        task = {
            'func': calculate_sample_missingness,
            'params': {
                'in_json':
                input_json_wildcard_path,
                'out_stats':
                os.path.join(output_dir, 'sample-missingness-pct.dat'),
                'out_log':
                os.path.join(output_dir, 'sample-missingness-pct.dat.log'),
            },
            'stage_name': stage,
            'uid': '1-22',
            'drm_params': lsf_params_json,
            'parents': parent_tasks,
        }

        summary_task = self.workflow.add_task(**task)
        return summary_task
Beispiel #11
0
    def create_remove_symbolic_deletion_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('remove-symbolic-alleles',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(
            remove_symbolic_deletion_alleles_lsf_params, self.config.email,
            self.config.docker)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom)
            output_log = 'remove-symbolic-alleles-chrom-{}.log'.format(chrom)
            task = {
                'func': remove_symbolic_deletion_alleles,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks
Beispiel #12
0
    def create_decompose_normalize_unique_tasks(self, parent_tasks,
                                                step_number):
        tasks = []
        stage = self._construct_task_name('decompose-normalize-uniq',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(normalize_decompose_unique_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom)
            output_log = 'decompose-normalize-unique-{}.log'.format(chrom)
            task = {
                'func': normalize_decompose_unique,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks
Beispiel #13
0
    def create_variant_eval_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('gatk-variant-eval', step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(gatk_variant_eval_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_stats = 'chrom-{}-variant-eval.out'.format(chrom)
            output_log = 'chrom-{}-variant-eval.log'.format(chrom)
            task = {
                'func': gatk_variant_eval,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_stats': os.path.join(basedir, chrom, output_stats),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks
Beispiel #14
0
    def create_1000G_annotation_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('annotate-w-1000G', step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(annotation_1000G_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = '1kg-annotated.c{}.vcf.gz'.format(chrom)
            output_log = '1000G-annotate.{}.log'.format(chrom)
            task = {
                'func': annotation_1000G,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks
Beispiel #15
0
    def create_filter_variant_missingness_tasks(self, parent_tasks,
                                                step_number):
        tasks = []
        stage = self._construct_task_name('filter-variant-missingness',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(filter_variant_missingness_lsf_params,
                                    self.config)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = 'combined.c{chrom}.vcf.gz'.format(chrom=chrom)
            output_log = 'filter-missingness-{}.log'.format(chrom)
            task = {
                'func': filter_variant_missingness,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks
Beispiel #16
0
    def create_variant_eval_summary_task(self, parent_tasks, step_number):
        stage = self._construct_task_name('gatk-variant-eval-summary',
                                          step_number)
        output_dir = os.path.join(self.config.rootdir, stage)

        prior_stage_name = parent_tasks[0].stage.name
        input_dir = os.path.join(self.config.rootdir, prior_stage_name)

        lsf_params = get_lsf_params(variant_eval_summary_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        task = {
            'func': variant_eval_summary,
            'params': {
                'in_dir': input_dir,
                'out_dir': output_dir,
            },
            'stage_name': stage,
            'uid': 'all-chroms',
            'drm_params': lsf_params_json,
            'parents': parent_tasks,
        }

        summary_task = self.workflow.add_task(**task)
        return summary_task
Beispiel #17
0
    def create_LCR_annotation_tasks(self, parent_tasks, step_number):
        tasks = []
        stage = self._construct_task_name('Low-Confidence-Region-annotation',
                                          step_number)
        basedir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(annotation_LCR_lsf_params, self.config)
        lsf_params_json = to_json(lsf_params)

        for ptask in parent_tasks:
            chrom = ptask.params['in_chrom']
            output_vcf = 'b38.LCR.annotated.c{}.vcf.gz'.format(chrom)
            output_log = 'LCR.annotation.{}.log'.format(chrom)
            task = {
                'func': annotation_LCR,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_chrom': chrom,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'out_log': os.path.join(basedir, chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=chrom),
                'drm_params': lsf_params_json,
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks
Beispiel #18
0
    def create_data_frame_task(self, parent_task):
        stage = '7-make-data-frame'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        pca_evec_file = os.path.join(
            parent_task.params['out_prj_dir'],
            'merged.eigenstrat.pca.evec',
        )

        out_file = os.path.join(basedir, 'merged.eigenstrat.pca.evec.tsv')

        task = {
            'func' : create_evec_data_frame,
            'params' : {
                'in_file' : pca_evec_file,
                'out_file' : out_file,
            },
            'stage_name' : stage,
            'uid' : 'all-chroms',
            'drm_params' :
                to_json(create_evec_data_frame_lsf_params(email)),
            'parents' : [ parent_task ],
        }

        df_task = self.workflow.add_task(**task)

        return df_task
Beispiel #19
0
    def create_bcftools_stats_summary_task(self, parent_tasks, step_number):
        stage = self._construct_task_name('bcftools-stats-summary',
                                          step_number)
        output_dir = os.path.join(self.config.rootdir, stage)

        prior_stage_name = parent_tasks[0].stage.name
        input_dir = os.path.join(self.config.rootdir, prior_stage_name)

        lsf_params = get_lsf_params(bcftools_stats_summary_lsf_params,
                                    self.config)
        lsf_params_json = to_json(lsf_params)

        task = {
            'func': bcftools_stats_summary,
            'params': {
                'in_dir': input_dir,
                'out_dir': output_dir,
            },
            'stage_name': stage,
            'uid': 'all-chroms',
            'drm_params': lsf_params_json,
            'parents': parent_tasks,
        }

        summary_task = self.workflow.add_task(**task)
        return summary_task
Beispiel #20
0
    def create_aggregate_mie_stats_tasks(self, parent_tasks):
        tasks = []
        stage = '3-aggregate-mie-stats'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        input_dir = os.path.join(self.config.rootdir,
                                 parent_tasks[0].stage.name)

        parent_snp_tranche_tasks = \
            [task for task in parent_tasks \
                  if (task.params['type'] == 'snps' and task.params['method'] == 'tranche') ]

        parent_snp_percentile_tasks = \
            [task for task in parent_tasks \
                  if (task.params['type'] == 'snps' and task.params['method'] == 'percentile') ]

        parent_indel_tranche_tasks = \
            [task for task in parent_tasks \
                  if (task.params['type'] == 'indels' and task.params['method'] == 'tranche') ]

        parent_indel_percentile_tasks = \
            [task for task in parent_tasks \
                  if (task.params['type'] == 'indels' and task.params['method'] == 'percentile') ]

        task_groups = (parent_snp_tranche_tasks, parent_snp_percentile_tasks,
                       parent_indel_tranche_tasks,
                       parent_indel_percentile_tasks)

        for tgroup in task_groups:
            category = tgroup[0].params['type']
            method = tgroup[0].params['method']
            out_filename = '.'.join([category, method, 'tsv'])
            output_file = os.path.join(basedir, out_filename)
            task = {
                'func':
                aggregate_mie_statistics,
                'params': {
                    'in_category': category,
                    'in_method': method,
                    'in_dir': input_dir,
                    'out_file': output_file,
                },
                'stage_name':
                stage,
                'uid':
                '{category}:{method}'.format(method=method, category=category),
                'drm_params':
                to_json(aggregate_mie_statistics_lsf_params(email)),
                'parents':
                tgroup,
            }
            tasks.append(self.workflow.add_task(**task))
Beispiel #21
0
    def create_concatenate_vcfs_task(self, parent_tasks, step_number):
        tasks = list()
        stage = self._construct_task_name('concat-vcfs', step_number)
        output_dir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(concatenate_vcfs_lsf_params,
                                    self.config.email, self.config.docker,
                                    self.config.drm_queue)
        lsf_params_json = to_json(lsf_params)

        def region_key(task):
            reference_fai = os.path.join(
                '/gscmnt/ams1102/info/model_data/2869585698/build106942997',
                'all_sequences.fa.fai')
            return Region(all_sequences.fa.fai, task.params['in_chrom'])

        def chromosome_key(task):
            reference_fai = os.path.join(
                '/gscmnt/ams1102/info/model_data/2869585698/build106942997',
                'all_sequences.fa.fai')
            return Region(reference_fai, task.params['in_chrom']).chrom

        for ref_chrom, chrom_tasks in groupby(sorted(parent_tasks,
                                                     key=region_key),
                                              key=chromosome_key):
            ptasks = list(chrom_tasks)
            input_vcfs = [x.params['out_vcf'] for x in ptasks]
            output_vcf = 'concatenated.c{}.vcf.gz'.format(ref_chrom)
            output_log = 'concatenate.{}.log'.format(ref_chrom)
            task = {
                'func': concatenate_vcfs,
                'params': {
                    'in_vcfs': input_vcfs,
                    'in_chrom': ref_chrom,
                    'out_vcf': os.path.join(output_dir, ref_chrom, output_vcf),
                    'out_log': os.path.join(output_dir, ref_chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=ref_chrom),
                'drm_params': lsf_params_json,
                'parents': ptasks,
            }
            tasks.append(self.workflow.add_task(**task))
        return tasks
Beispiel #22
0
    def create_plink_pipeline_tasks(self, parent_tasks):
        tasks = []
        stage = '2-plink-pipeline'
        basedir = os.path.join(self.config.rootdir, stage)
        email = self.config.email

        for ptask in sorted(parent_tasks, key=lambda t: t.id):
            chrom = ptask.params['in_chrom']
            label = ptask.params['in_label']
            method = ptask.params['in_method']
            category = ptask.params['in_type']

            output_dir = os.path.join(basedir, category, method, label, chrom)
            #            ensure_directory(output_dir)

            task = {
                'func':
                plink_pipeline,
                'params': {
                    'in_vcf': ptask.params['out_vcf'],
                    'in_trio_fam': self.config.plink_fam_file,
                    'chrom': chrom,
                    'type': category,
                    'method': method,
                    'chrom': chrom,
                    'label': label,
                    'out_dir': output_dir,
                },
                'stage_name':
                stage,
                'uid':
                '{category}:{method}:{label}:{chrom}'.format(chrom=chrom,
                                                             method=method,
                                                             category=category,
                                                             label=label),
                'drm_params':
                to_json(plink_pipeline_lsf_params(email)),
                'parents': [ptask],
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks
Beispiel #23
0
    def create_concatenate_vcfs_task(self, parent_tasks, step_number):
        tasks = list()
        stage = self._construct_task_name('concat-vcfs', step_number)
        output_dir = os.path.join(self.config.rootdir, stage)

        lsf_params = get_lsf_params(concatenate_vcfs_lsf_params, self.config)
        lsf_params_json = to_json(lsf_params)

        def region_key(task):
            reference_fai = '/gscmnt/gc2802/halllab/ccdg_resources/genomes/human/GRCh38DH/all_sequences.fa.fai'
            return Region(reference_fai, task.params['in_chrom'])

        def chromosome_key(task):
            reference_fai = '/gscmnt/gc2802/halllab/ccdg_resources/genomes/human/GRCh38DH/all_sequences.fa.fai'
            return Region(reference_fai, task.params['in_chrom']).chrom

        for ref_chrom, chrom_tasks in groupby(sorted(parent_tasks,
                                                     key=region_key),
                                              key=chromosome_key):
            ptasks = list(chrom_tasks)
            input_vcfs = [x.params['out_vcf'] for x in ptasks]
            output_vcf = 'concatenated.c{}.vcf.gz'.format(ref_chrom)
            output_log = 'concatenate.{}.log'.format(ref_chrom)
            task = {
                'func': concatenate_vcfs,
                'params': {
                    'in_vcfs': input_vcfs,
                    'in_chrom': ref_chrom,
                    'out_vcf': os.path.join(output_dir, ref_chrom, output_vcf),
                    'out_log': os.path.join(output_dir, ref_chrom, output_log),
                },
                'stage_name': stage,
                'uid': '{chrom}'.format(chrom=ref_chrom),
                'drm_params': lsf_params_json,
                'parents': ptasks,
            }
            tasks.append(self.workflow.add_task(**task))
        return tasks
Beispiel #24
0
    def create_vcf_partition_chromosome_tasks(self, method, label, category,
                                              interval):
        tasks = []
        stage = '1-partition-vcfs'
        basedir = os.path.join(self.config.rootdir, stage, category, method,
                               label)
        email = self.config.email

        for chrom in self.config.chroms:
            vcf = self.config.vcfs[chrom]
            output_vcf = 'selected.c{chrom}.vcf.gz'.format(chrom=chrom)
            task = {
                'func':
                vcf_partition,
                'params': {
                    'in_vcf': vcf,
                    'out_vcf': os.path.join(basedir, chrom, output_vcf),
                    'in_min_vqslod': interval[0],
                    'in_max_vqslod': interval[1],
                    'in_samples': self.config.control_samples_file,
                    'in_type': category,
                    'in_method': method,
                    'in_chrom': chrom,
                    'in_label': label,
                },
                'stage_name':
                stage,
                'uid':
                '{category}:{method}:{label}:{chrom}'.format(chrom=chrom,
                                                             method=method,
                                                             category=category,
                                                             label=label),
                'drm_params':
                to_json(vcf_partition_lsf_params(email)),
            }
            tasks.append(self.workflow.add_task(**task))

        return tasks