def initialize(results_path,haplotype_path,cancer_dir_path): try: event_list=['gain','loss'] gaincnv = params.GetGainCNV() losscnv = params.GetLossCNV() logger.debug(' --- Initializing input files --- ') vcf_path = bamhelp.GetVCF() exons_path = bamhelp.GetExons() reference_path = bamhelp.GetRef() vpath, vcf = os.path.split(vcf_path) phasedvcf = "/".join([results_path, sub('.vcf.gz$', '_phased.vcf.gz', vcf)]) vcftobed = "/".join([results_path, sub('.vcf.gz$', '.bed', vcf)]) hap1vcf = "/".join([results_path,"hap1_het.vcf"]) hap2vcf = "/".join([results_path, "hap2_het.vcf"]) hap1vcffiltered = "/".join([results_path, "hap1_het_filtered"]) hap2vcffiltered = "/".join([results_path, "hap2_het_filtered"]) hap1vcffilteredtobed = "/".join([results_path, "hap1_het_filtered.bed"]) hap2vcffilteredtobed = "/".join([results_path, "hap2_het_filtered.bed"]) phased_bed = "/".join([results_path, "PHASED.BED"]) phaseVCF(vcf_path, phasedvcf) getVCFHaplotypes(phasedvcf, hap1vcf, hap2vcf) thinVCF(hap1vcf, hap1vcffiltered) thinVCF(hap2vcf, hap2vcffiltered) convertvcftobed(hap1vcffiltered+".recode.vcf", hap1vcffilteredtobed) convertvcftobed(hap2vcffiltered+".recode.vcf", hap2vcffilteredtobed) cmd1 = """sed -i 's/$/\thap1/' """+ hap1vcffilteredtobed cmd2 = """sed -i 's/$/\thap2/' """+ hap2vcffilteredtobed cmd3 = "cat " + hap1vcffilteredtobed + " " + hap2vcffilteredtobed + " > " + 'tmp.bed' cmd4 = "sort -V -k1,1 -k2,2 tmp.bed > " + phased_bed runCommand(cmd1) runCommand(cmd2) runCommand(cmd3) runCommand(cmd4) os.remove('tmp.bed') for event in event_list: roibed = "/".join([haplotype_path, event + "_roi.bed"]) exonsinroibed = "/".join([haplotype_path, event + "_exons_in_roi.bed"]) nonhetbed = "/".join([haplotype_path, event + "_non_het.bed"]) hetbed = "/".join([haplotype_path, event + "_het.bed"]) hetsnpbed = "/".join([haplotype_path, event + "_het_snp.bed"]) intersectBed( exons_path, locals()[event + 'cnv'], exonsinroibed, wa=True) intersectBed(phased_bed, exonsinroibed, hetsnpbed, wa=True) splitBed(exonsinroibed, event+'_exons_in_roi_') splitBed(hetsnpbed, event+'_het_snp_') except: logger.exception("Initialization error !") raise logger.debug("--- initialization complete ---") return
def initialize_pipeline(phase_path, haplotype_path, cnv_path): exons_path = bamhelp.GetExons() event, extension = os.path.splitext(os.path.basename(cnv_path)) phased_bed = "/".join([phase_path, "PHASED.BED"]) bedtools_path = bamhelp.GetBedtoolsPath() try: logger.debug(' --- Initializing input files --- ') exonsinroibed = "/".join( [haplotype_path, "exons_in_roi" + str(event) + ".bed"]) nonhetbed = "/".join([haplotype_path, "non_het" + str(event) + ".bed"]) hetbed = "/".join([haplotype_path, "het" + str(event) + ".bed"]) hetsnpbed = "/".join([haplotype_path, "het_snp" + str(event) + ".bed"]) tmp = "/".join([haplotype_path, str(event) + "_tmp.bed"]) command = " ".join([ bedtools_path, "intersect -a", exons_path, "-b", cnv_path, "-wa -wb > ", tmp ]) runCommand(command) cmd = "".join([ """awk '{print $1"\t"$2"\t"$3"\t"$NF}' """, tmp, " > ", exonsinroibed ]) runCommand(cmd) splitBed(exonsinroibed, '_exons_in_roi' + str(event)) command = " ".join([ bedtools_path, "intersect -a", phased_bed, "-b", exonsinroibed, "-wa -wb >", tmp ]) runCommand(command) cmd = "".join([ """awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$NF}' """, tmp, " > ", hetsnpbed ]) runCommand(cmd) splitBed(hetsnpbed, '_het_snp' + str(event)) os.remove(tmp) except: logger.exception("Initialization error !") raise logger.debug("--- initialization complete ---") return
def initialize0(results_path, cancer_dir_path): try: vcf_path = bamhelp.GetVCF() exons_path = bamhelp.GetExons() reference_path = bamhelp.GetRef() bedtools_path = bamhelp.GetBedtoolsPath() vpath, vcf = os.path.split(vcf_path) if (params.GetPhase()): phasedvcf = "/".join( [results_path, sub('.vcf$', '_phased.vcf.gz', vcf)]) vcftobed = "/".join([results_path, sub('.vcf$', '.bed', vcf)]) hap1vcf = "/".join([results_path, "hap1_het.vcf"]) hap2vcf = "/".join([results_path, "hap2_het.vcf"]) hap1vcffiltered = "/".join([results_path, "hap1_het_filtered"]) hap2vcffiltered = "/".join([results_path, "hap2_het_filtered"]) hap1vcffilteredtobed = "/".join( [results_path, "hap1_het_filtered.bed"]) hap2vcffilteredtobed = "/".join( [results_path, "hap2_het_filtered.bed"]) phased_bed = "/".join([results_path, "PHASED.BED"]) phaseVCF(vcf_path, phasedvcf) getVCFHaplotypes(phasedvcf, hap1vcf, hap2vcf) thinVCF(hap1vcf, hap1vcffiltered) thinVCF(hap2vcf, hap2vcffiltered) convertvcftobed(hap1vcffiltered + ".recode.vcf", hap1vcffilteredtobed) convertvcftobed(hap2vcffiltered + ".recode.vcf", hap2vcffilteredtobed) cmd1 = """sed -i 's/$/\thap1/' """ + hap1vcffilteredtobed cmd2 = """sed -i 's/$/\thap2/' """ + hap2vcffilteredtobed cmd3 = "cat " + hap1vcffilteredtobed + " " + hap2vcffilteredtobed + " > " + 'tmp.bed' cmd4 = "sort -V -k1,1 -k2,2 tmp.bed > " + phased_bed runCommand(cmd1) runCommand(cmd2) runCommand(cmd3) runCommand(cmd4) os.remove('tmp.bed') except: raise return
def initialize0(results_path, cancer_dir_path): try: vcf_path = bamhelp.GetVCF() exons_path = bamhelp.GetExons() reference_path = bamhelp.GetRef() bedtools_path = bamhelp.GetBedtoolsPath() vpath, vcf = os.path.split(vcf_path) if params.GetPhase(): phasedvcf = "/".join( [results_path, sub('.vcf$', '_phased.vcf.gz', vcf)]) vcftobed = "/".join([results_path, sub('.vcf$', '.bed', vcf)]) hap1vcf = "/".join([results_path, "hap1_het.vcf"]) hap2vcf = "/".join([results_path, "hap2_het.vcf"]) hap1vcffiltered = "/".join([results_path, "hap1_het_filtered"]) hap2vcffiltered = "/".join([results_path, "hap2_het_filtered"]) hap1vcffilteredtobed = "/".join( [results_path, "hap1_het_filtered.bed"]) hap2vcffilteredtobed = "/".join( [results_path, "hap2_het_filtered.bed"]) phased_bed = "/".join([results_path, "PHASED.BED"]) phaseVCF(vcf_path, phasedvcf) getVCFHaplotypes(phasedvcf, hap1vcf, hap2vcf) thinVCF(hap1vcf, hap1vcffiltered) thinVCF(hap2vcf, hap2vcffiltered) convertvcftobed(hap1vcffiltered + ".recode.vcf", hap1vcffilteredtobed) convertvcftobed(hap2vcffiltered + ".recode.vcf", hap2vcffilteredtobed) generatePhasedBed(hap1vcffilteredtobed, hap2vcffilteredtobed, phased_bed) except: logger.exception("Initialization error !") raise return
def initialize(): try: utils.createDirectory(results_path) utils.createDirectory(cancer_dir_path) utils.createDirectory(haplotype_path) utils.createDirectory(tmpbams_path) utils.createDirectory(finalbams_path) event_list=['gain','loss'] gaincnv = params.GetGainCNV() losscnv = params.GetLossCNV() pipelineHelpers.Logging("INFO", log, " --- Initializing input files --- ") vcf_path = bamhelp.GetVCF() exons_path = bamhelp.GetExons() reference_path = bamhelp.GetRef() vpath, vcf = os.path.split(vcf_path) phasedvcf = "/".join([results_path, sub('.vcf$', '_phased.vcf.gz', vcf)]) vcftobed = "/".join([results_path, sub('.vcf$', '.bed', vcf)]) hap1vcf = "/".join([results_path,"hap1_het.vcf"]) hap2vcf = "/".join([results_path, "hap2_het.vcf"]) hap1vcffiltered = "/".join([results_path, "hap1_het_filtered"]) hap2vcffiltered = "/".join([results_path, "hap2_het_filtered"]) hap1vcffilteredtobed = "/".join([results_path, "hap1_het_filtered.bed"]) hap2vcffilteredtobed = "/".join([results_path, "hap2_het_filtered.bed"]) phased_bed = "/".join([results_path, "PHASED.BED"]) utils.phaseVCF(vcf_path, phasedvcf) utils.getVCFHaplotypes(phasedvcf, hap1vcf, hap2vcf) utils.thinVCF(hap1vcf, hap1vcffiltered) utils.thinVCF(hap2vcf, hap2vcffiltered) utils.convertvcftobed(hap1vcffiltered+".recode.vcf", hap1vcffilteredtobed) utils.convertvcftobed(hap2vcffiltered+".recode.vcf", hap2vcffilteredtobed) cmd1 = """sed -i 's/$/\thap1/' """+ hap1vcffilteredtobed cmd2 = """sed -i 's/$/\thap2/' """+ hap2vcffilteredtobed cmd3 = "cat " + hap1vcffilteredtobed + " " + hap2vcffilteredtobed + " > " + 'tmp.bed' cmd4 = "sort -V -k1,1 -k2,2 tmp.bed > " + phased_bed utils.runCommand(cmd1) utils.runCommand(cmd2) utils.runCommand(cmd3) utils.runCommand(cmd4) os.remove('tmp.bed') for event in event_list: roibed = "/".join([haplotype_path, event + "_roi.bed"]) exonsinroibed = "/".join([haplotype_path, event + "_exons_in_roi.bed"]) nonhetbed = "/".join([haplotype_path, event + "_non_het.bed"]) hetbed = "/".join([haplotype_path, event + "_het.bed"]) hetsnpbed = "/".join([haplotype_path, event + "_het_snp.bed"]) if (locals()[event + 'cnv']): utils.intersectBed( exons_path, locals()[event + 'cnv'], exonsinroibed, wa=True) utils.intersectBed(phased_bed, exonsinroibed, hetsnpbed, wa=True) utils.splitBed(exonsinroibed, event+'_exons_in_roi_') utils.splitBed(hetsnpbed, event+'_het_snp_') except: pipelineHelpers.Logging("INFO", log, "Initialization error !") raise pipelineHelpers.Logging("ERROR",log, "--- initialization complete ---") return