def Pipeline_Somatic(): testing = wga_settings['test'] target = wga_settings['target'] if testing: intervals = ('interval', [20]) else: intervals = ('interval',range(1,23) + ['X', 'Y']) glm = ('glm', ['SNP', 'INDEL']) align_to_reference = sequence_( apply_( reduce_(['sample_name', 'library', 'platform', 'platform_unit','sample_type','chunk','rgid'], pipes.AlignAndCleanMEM) ), ) if target: remove_dup = sequence_( reduce_(['sample_name','sample_type','rgid'], picard.MERGE_SAMS) ) else: remove_dup = sequence_( reduce_(['sample_name','sample_type','rgid'], picard.MarkDuplicates) ) preprocess_alignment = sequence_( map_(samtools.IndexBam), apply_( split_([intervals],gatk.RealignerTargetCreator) #if not is_capture or testing else map_(gatk.RealignerTargetCreator) ), map_(gatk.IndelRealigner), map_(gatk.BQSR), apply_( reduce_(['sample_name','sample_type','rgid'], gatk.BQSRGatherer), map_(gatk.ApplyBQSR) #TODO I add BQSRGatherer as a parent with a hack inside ApplyBQSR.cmd ) ) somatic_call = sequence_( apply_( sequence_( map_(mutect.createInput), reduce_(['rgid','interval'], mutect.Somatic, tag={'vcf': 'Mutect'}), reduce_(['vcf'], gatk.CombineVariants, 'Combine Into Raw VCFs'), ), sequence_( map_(svdetect.PreProcessing), map_(svdetect.link2SV) ) ) ) return sequence_( align_to_reference, remove_dup, preprocess_alignment, somatic_call )
def Pipeline_Somatic(): testing = wga_settings['test'] target = wga_settings['target'] if testing: intervals = ('interval', [20]) else: intervals = ('interval', list(range(1, 23)) + ['X', 'Y']) glm = ('glm', ['SNP', 'INDEL']) align_to_reference = sequence_( apply_( reduce_([ 'sample_name', 'library', 'platform', 'platform_unit', 'sample_type', 'chunk', 'rgid' ], pipes.AlignAndCleanMEM)), ) if target: remove_dup = sequence_( reduce_(['sample_name', 'sample_type', 'rgid'], picard.MERGE_SAMS)) else: remove_dup = sequence_( reduce_(['sample_name', 'sample_type', 'rgid'], picard.MarkDuplicates)) preprocess_alignment = sequence_( map_(samtools.IndexBam), apply_( split_( [intervals], gatk.RealignerTargetCreator ) #if not is_capture or testing else map_(gatk.RealignerTargetCreator) ), map_(gatk.IndelRealigner), map_(gatk.BQSR), apply_( reduce_(['sample_name', 'sample_type', 'rgid'], gatk.BQSRGatherer), map_( gatk.ApplyBQSR ) #TODO I add BQSRGatherer as a parent with a hack inside ApplyBQSR.cmd )) somatic_call = sequence_( apply_( sequence_( map_(mutect.createInput), reduce_(['rgid', 'interval'], mutect.Somatic, tag={'vcf': 'Mutect'}), reduce_(['vcf'], gatk.CombineVariants, 'Combine Into Raw VCFs'), ), sequence_(map_(svdetect.PreProcessing), map_(svdetect.link2SV)))) return sequence_(align_to_reference, remove_dup, preprocess_alignment, somatic_call)
def Pipeline(): testing = wga_settings['test'] target = wga_settings['target'] if testing: intervals = ('interval', [20]) else: intervals = ('interval',range(1,23) + ['X', 'Y']) glm = ('glm', ['SNP', 'INDEL']) align_to_reference = sequence_( apply_( reduce_(['sample_name', 'library', 'platform', 'platform_unit', 'chunk'], pipes.AlignAndCleanALN) ), ) if target: remove_dup = sequence_( reduce_(['sample_name'], picard.MERGE_SAMS) ) else: remove_dup = sequence_( reduce_(['sample_name'], picard.MarkDuplicates) ) preprocess_alignment = sequence_( map_(samtools.IndexBam), apply_( split_([intervals],gatk.RealignerTargetCreator) #if not is_capture or testing else map_(gatk.RealignerTargetCreator) ), map_(gatk.IndelRealigner), map_(gatk.BQSR), apply_( reduce_(['sample_name'], gatk.BQSRGatherer), map_(gatk.ApplyBQSR) #TODO I add BQSRGatherer as a parent with a hack inside ApplyBQSR.cmd ) ) call_variants = sequence_( apply_( #reduce_(['interval'],gatk.HaplotypeCaller,tag={'vcf':'HaplotypeCaller'}), reduce_split_(['interval'], [glm], gatk.UnifiedGenotyper, tag={'vcf': 'UnifiedGenotyper'}), combine=True ), reduce_(['vcf'], gatk.CombineVariants, 'Combine Into Raw VCFs'), split_([glm],gatk.VQSR), ) return sequence_( align_to_reference, remove_dup, preprocess_alignment, call_variants )
def Pipeline(): testing = wga_settings['test'] target = wga_settings['target'] if testing: intervals = ('interval', [20]) else: intervals = ('interval', list(range(1, 23)) + ['X', 'Y']) glm = ('glm', ['SNP', 'INDEL']) align_to_reference = sequence_( apply_( reduce_([ 'sample_name', 'library', 'platform', 'platform_unit', 'chunk' ], pipes.AlignAndCleanALN)), ) if target: remove_dup = sequence_(reduce_(['sample_name'], picard.MERGE_SAMS)) else: remove_dup = sequence_(reduce_(['sample_name'], picard.MarkDuplicates)) preprocess_alignment = sequence_( map_(samtools.IndexBam), apply_( split_( [intervals], gatk.RealignerTargetCreator ) #if not is_capture or testing else map_(gatk.RealignerTargetCreator) ), map_(gatk.IndelRealigner), map_(gatk.BQSR), apply_( reduce_(['sample_name'], gatk.BQSRGatherer), map_( gatk.ApplyBQSR ) #TODO I add BQSRGatherer as a parent with a hack inside ApplyBQSR.cmd )) call_variants = sequence_( apply_( #reduce_(['interval'],gatk.HaplotypeCaller,tag={'vcf':'HaplotypeCaller'}), reduce_split_(['interval'], [glm], gatk.UnifiedGenotyper, tag={'vcf': 'UnifiedGenotyper'}), combine=True), reduce_(['vcf'], gatk.CombineVariants, 'Combine Into Raw VCFs'), split_([glm], gatk.VQSR), ) return sequence_(align_to_reference, remove_dup, preprocess_alignment, call_variants)
def Pipeline_split(): split_fastq = sequence_( map_(json_.Split), apply_(reduce_(['gz_output_dir'], json_.Total_json)), map_(json_.Format_json)) return sequence_(split_fastq, )
def Pipeline_local(): testing = wga_settings['test'] target = wga_settings['target'] if testing: intervals = ('interval', [20]) else: intervals = ('interval',range(1,23) + ['X', 'Y']) glm = ('glm', ['SNP', 'INDEL']) print [glm][0] align_to_reference = sequence_( apply_( reduce_(['sample_name', 'library', 'platform', 'platform_unit'], pipes.AlignAndCleanALN) ), ) if target: remove_dup = sequence_( ) else: remove_dup = sequence_( reduce_(['sample_name'], picard.MarkDuplicates) ) preprocess_alignment = sequence_( map_(samtools.IndexBam), map_(gatk.RealignerTargetCreator), #if not is_capture or testing else map_(gatk.RealignerTargetCreator) map_(gatk.IndelRealigner), map_(gatk.BQSR), map_(gatk.ApplyBQSR_local) ) call_variants = sequence_( apply_( map_(gatk.UnifiedGenotyper_local, tag={'vcf': 'UnifiedGenotyper'}) ) ) return sequence_( align_to_reference, remove_dup, preprocess_alignment, call_variants )
def Pipeline_split(): split_fastq = sequence_( map_(json_.Split), apply_( reduce_(['gz_output_dir'],json_.Total_json) ), map_(json_.Format_json) ) return sequence_( split_fastq, )
def Pipeline_local(): testing = wga_settings['test'] target = wga_settings['target'] if testing: intervals = ('interval', [20]) else: intervals = ('interval', list(range(1, 23)) + ['X', 'Y']) glm = ('glm', ['SNP', 'INDEL']) print([glm][0]) align_to_reference = sequence_( apply_( reduce_(['sample_name', 'library', 'platform', 'platform_unit'], pipes.AlignAndCleanALN)), ) if target: remove_dup = sequence_() else: remove_dup = sequence_(reduce_(['sample_name'], picard.MarkDuplicates)) preprocess_alignment = sequence_( map_(samtools.IndexBam), map_( gatk.RealignerTargetCreator ), #if not is_capture or testing else map_(gatk.RealignerTargetCreator) map_(gatk.IndelRealigner), map_(gatk.BQSR), map_(gatk.ApplyBQSR_local)) call_variants = sequence_( apply_( map_(gatk.UnifiedGenotyper_local, tag={'vcf': 'UnifiedGenotyper'}))) return sequence_(align_to_reference, remove_dup, preprocess_alignment, call_variants)
def Pipeline(): is_capture = wga_settings['capture'] testing = wga_settings['test'] # split_ tuples if testing: intervals = ('interval', [20]) else: intervals = ('interval', range(1, 23) + ['X', 'Y']) glm = ('glm', ['SNP', 'INDEL']) align_to_reference = sequence_( apply_( reduce_(['sample_name', 'library'], misc.FastqStats), reduce_([ 'sample_name', 'library', 'platform', 'platform_unit', 'chunk' ], pipes.AlignAndClean)), ) preprocess_alignment = sequence_( reduce_(['sample_name'], picard.MarkDuplicates), apply_( map_(picard.CollectMultipleMetrics), split_( [intervals], gatk.RealignerTargetCreator ) #if not is_capture or testing else map_(gatk.RealignerTargetCreator) ), map_(gatk.IndelRealigner), map_(gatk.BQSR), apply_( reduce_(['sample_name'], gatk.BQSRGatherer), map_( gatk.ApplyBQSR ) #TODO I add BQSRGatherer as a parent with a hack inside ApplyBQSR.cmd )) call_variants = sequence_( # apply_( # reduce_split_([],[intervals,glm], gatk.UnifiedGenotyper, tag={'vcf': 'UnifiedGenotyper'}), # combine=True # ) if is_capture # else apply_( #reduce_(['interval'],gatk.HaplotypeCaller,tag={'vcf':'HaplotypeCaller'}), reduce_split_(['interval'], [glm], gatk.UnifiedGenotyper, tag={'vcf': 'UnifiedGenotyper'}), combine=True), reduce_(['vcf'], gatk.CombineVariants, 'Combine Into Raw VCFs'), split_([glm], gatk.VQSR), map_(gatk.Apply_VQSR), reduce_(['vcf'], gatk.CombineVariants, "Combine into Master VCFs")) if is_capture: return sequence_(align_to_reference, preprocess_alignment, call_variants, massive_annotation) else: return sequence_( align_to_reference, preprocess_alignment, reduce_split_(['sample_name'], [intervals], gatk.ReduceReads), call_variants, massive_annotation)
def Pipeline(): is_capture = wga_settings['capture'] testing = wga_settings['test'] # split_ tuples if testing: intervals = ('interval', [20]) else: intervals = ('interval',range(1,23) + ['X', 'Y']) glm = ('glm', ['SNP', 'INDEL']) align_to_reference = sequence_( apply_( reduce_(['sample_name', 'library'], misc.FastqStats), reduce_(['sample_name', 'library', 'platform', 'platform_unit', 'chunk'], pipes.AlignAndClean) ), ) preprocess_alignment = sequence_( reduce_(['sample_name'], picard.MarkDuplicates), apply_( map_(picard.CollectMultipleMetrics), split_([intervals],gatk.RealignerTargetCreator) #if not is_capture or testing else map_(gatk.RealignerTargetCreator) ), map_(gatk.IndelRealigner), map_(gatk.BQSR), apply_( reduce_(['sample_name'], gatk.BQSRGatherer), map_(gatk.ApplyBQSR) #TODO I add BQSRGatherer as a parent with a hack inside ApplyBQSR.cmd ) ) call_variants = sequence_( # apply_( # reduce_split_([],[intervals,glm], gatk.UnifiedGenotyper, tag={'vcf': 'UnifiedGenotyper'}), # combine=True # ) if is_capture # else apply_( #reduce_(['interval'],gatk.HaplotypeCaller,tag={'vcf':'HaplotypeCaller'}), reduce_split_(['interval'], [glm], gatk.UnifiedGenotyper, tag={'vcf': 'UnifiedGenotyper'}), combine=True ), reduce_(['vcf'], gatk.CombineVariants, 'Combine Into Raw VCFs'), split_([glm],gatk.VQSR), map_(gatk.Apply_VQSR), reduce_(['vcf'], gatk.CombineVariants, "Combine into Master VCFs") ) if is_capture: return sequence_( align_to_reference, preprocess_alignment, call_variants, massive_annotation ) else: return sequence_( align_to_reference, preprocess_alignment, reduce_split_(['sample_name'],[intervals],gatk.ReduceReads), call_variants, massive_annotation )