コード例 #1
0
def initialize(results_path,haplotype_path,cancer_dir_path):
    
    try:
        event_list=['gain','loss']
        gaincnv = params.GetGainCNV()
        losscnv = params.GetLossCNV()
        logger.debug(' --- Initializing input files  --- ')
        vcf_path = bamhelp.GetVCF()
        exons_path = bamhelp.GetExons()
        reference_path = bamhelp.GetRef()
        vpath, vcf = os.path.split(vcf_path)
        phasedvcf = "/".join([results_path, sub('.vcf.gz$', '_phased.vcf.gz', vcf)])
        vcftobed =  "/".join([results_path, sub('.vcf.gz$', '.bed', vcf)])
        
        hap1vcf = "/".join([results_path,"hap1_het.vcf"])
        hap2vcf = "/".join([results_path, "hap2_het.vcf"])
        hap1vcffiltered = "/".join([results_path, "hap1_het_filtered"])
        hap2vcffiltered = "/".join([results_path, "hap2_het_filtered"])
        hap1vcffilteredtobed = "/".join([results_path, "hap1_het_filtered.bed"])
        hap2vcffilteredtobed = "/".join([results_path, "hap2_het_filtered.bed"])
        phased_bed =  "/".join([results_path, "PHASED.BED"])
        
        phaseVCF(vcf_path, phasedvcf)
        getVCFHaplotypes(phasedvcf, hap1vcf, hap2vcf)
        thinVCF(hap1vcf, hap1vcffiltered)
        thinVCF(hap2vcf, hap2vcffiltered)
        convertvcftobed(hap1vcffiltered+".recode.vcf", hap1vcffilteredtobed)
        convertvcftobed(hap2vcffiltered+".recode.vcf", hap2vcffilteredtobed)
       
        cmd1 = """sed -i 's/$/\thap1/' """+ hap1vcffilteredtobed
        cmd2 = """sed -i 's/$/\thap2/' """+ hap2vcffilteredtobed
        cmd3 = "cat " + hap1vcffilteredtobed + " " + hap2vcffilteredtobed + " > " + 'tmp.bed'
        cmd4 = "sort -V -k1,1 -k2,2 tmp.bed > " + phased_bed  
            
        runCommand(cmd1)
        runCommand(cmd2)
        runCommand(cmd3)
        runCommand(cmd4)
        os.remove('tmp.bed')  
        
        for  event in event_list: 
            roibed = "/".join([haplotype_path,  event + "_roi.bed"])
            exonsinroibed = "/".join([haplotype_path,   event + "_exons_in_roi.bed"])
            nonhetbed = "/".join([haplotype_path, event + "_non_het.bed"])
            hetbed = "/".join([haplotype_path, event + "_het.bed"])
            hetsnpbed = "/".join([haplotype_path,  event + "_het_snp.bed"])
            
            intersectBed( exons_path, locals()[event + 'cnv'], exonsinroibed, wa=True)
            intersectBed(phased_bed, exonsinroibed, hetsnpbed, wa=True)
            splitBed(exonsinroibed, event+'_exons_in_roi_')
            splitBed(hetsnpbed, event+'_het_snp_')

    except:  
        logger.exception("Initialization error !")
        raise
    logger.debug("--- initialization complete ---")    
    return 
コード例 #2
0
def initialize_pipeline(phase_path, haplotype_path, cnv_path):
    exons_path = bamhelp.GetExons()

    event, extension = os.path.splitext(os.path.basename(cnv_path))

    phased_bed = "/".join([phase_path, "PHASED.BED"])
    bedtools_path = bamhelp.GetBedtoolsPath()

    try:
        logger.debug(' --- Initializing input files  --- ')
        exonsinroibed = "/".join(
            [haplotype_path, "exons_in_roi" + str(event) + ".bed"])

        nonhetbed = "/".join([haplotype_path, "non_het" + str(event) + ".bed"])
        hetbed = "/".join([haplotype_path, "het" + str(event) + ".bed"])
        hetsnpbed = "/".join([haplotype_path, "het_snp" + str(event) + ".bed"])

        tmp = "/".join([haplotype_path, str(event) + "_tmp.bed"])
        command = " ".join([
            bedtools_path, "intersect -a", exons_path, "-b", cnv_path,
            "-wa -wb > ", tmp
        ])
        runCommand(command)

        cmd = "".join([
            """awk '{print $1"\t"$2"\t"$3"\t"$NF}' """, tmp, " > ",
            exonsinroibed
        ])
        runCommand(cmd)

        splitBed(exonsinroibed, '_exons_in_roi' + str(event))
        command = " ".join([
            bedtools_path, "intersect -a", phased_bed, "-b", exonsinroibed,
            "-wa -wb >", tmp
        ])
        runCommand(command)

        cmd = "".join([
            """awk '{print $1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6"\t"$NF}' """, tmp,
            " > ", hetsnpbed
        ])
        runCommand(cmd)
        splitBed(hetsnpbed, '_het_snp' + str(event))
        os.remove(tmp)
    except:
        logger.exception("Initialization error !")
        raise
    logger.debug("--- initialization complete ---")
    return
コード例 #3
0
def initialize0(results_path, cancer_dir_path):
    try:
        vcf_path = bamhelp.GetVCF()
        exons_path = bamhelp.GetExons()
        reference_path = bamhelp.GetRef()
        bedtools_path = bamhelp.GetBedtoolsPath()
        vpath, vcf = os.path.split(vcf_path)

        if (params.GetPhase()):
            phasedvcf = "/".join(
                [results_path,
                 sub('.vcf$', '_phased.vcf.gz', vcf)])
            vcftobed = "/".join([results_path, sub('.vcf$', '.bed', vcf)])

            hap1vcf = "/".join([results_path, "hap1_het.vcf"])
            hap2vcf = "/".join([results_path, "hap2_het.vcf"])
            hap1vcffiltered = "/".join([results_path, "hap1_het_filtered"])
            hap2vcffiltered = "/".join([results_path, "hap2_het_filtered"])
            hap1vcffilteredtobed = "/".join(
                [results_path, "hap1_het_filtered.bed"])
            hap2vcffilteredtobed = "/".join(
                [results_path, "hap2_het_filtered.bed"])
            phased_bed = "/".join([results_path, "PHASED.BED"])

            phaseVCF(vcf_path, phasedvcf)
            getVCFHaplotypes(phasedvcf, hap1vcf, hap2vcf)
            thinVCF(hap1vcf, hap1vcffiltered)
            thinVCF(hap2vcf, hap2vcffiltered)
            convertvcftobed(hap1vcffiltered + ".recode.vcf",
                            hap1vcffilteredtobed)
            convertvcftobed(hap2vcffiltered + ".recode.vcf",
                            hap2vcffilteredtobed)

            cmd1 = """sed -i 's/$/\thap1/' """ + hap1vcffilteredtobed
            cmd2 = """sed -i 's/$/\thap2/' """ + hap2vcffilteredtobed
            cmd3 = "cat " + hap1vcffilteredtobed + " " + hap2vcffilteredtobed + " > " + 'tmp.bed'
            cmd4 = "sort -V -k1,1 -k2,2 tmp.bed > " + phased_bed

            runCommand(cmd1)
            runCommand(cmd2)
            runCommand(cmd3)
            runCommand(cmd4)
            os.remove('tmp.bed')

    except:

        raise

    return
コード例 #4
0
ファイル: methods.py プロジェクト: cyclica/bamgineer
def initialize0(results_path, cancer_dir_path):
    try:
        vcf_path = bamhelp.GetVCF()
        exons_path = bamhelp.GetExons()
        reference_path = bamhelp.GetRef()
        bedtools_path = bamhelp.GetBedtoolsPath()
        vpath, vcf = os.path.split(vcf_path)

        if params.GetPhase():
            phasedvcf = "/".join(
                [results_path,
                 sub('.vcf$', '_phased.vcf.gz', vcf)])
            vcftobed = "/".join([results_path, sub('.vcf$', '.bed', vcf)])

            hap1vcf = "/".join([results_path, "hap1_het.vcf"])
            hap2vcf = "/".join([results_path, "hap2_het.vcf"])
            hap1vcffiltered = "/".join([results_path, "hap1_het_filtered"])
            hap2vcffiltered = "/".join([results_path, "hap2_het_filtered"])
            hap1vcffilteredtobed = "/".join(
                [results_path, "hap1_het_filtered.bed"])
            hap2vcffilteredtobed = "/".join(
                [results_path, "hap2_het_filtered.bed"])
            phased_bed = "/".join([results_path, "PHASED.BED"])

            phaseVCF(vcf_path, phasedvcf)
            getVCFHaplotypes(phasedvcf, hap1vcf, hap2vcf)
            thinVCF(hap1vcf, hap1vcffiltered)
            thinVCF(hap2vcf, hap2vcffiltered)
            convertvcftobed(hap1vcffiltered + ".recode.vcf",
                            hap1vcffilteredtobed)
            convertvcftobed(hap2vcffiltered + ".recode.vcf",
                            hap2vcffilteredtobed)

            generatePhasedBed(hap1vcffilteredtobed, hap2vcffilteredtobed,
                              phased_bed)

    except:

        logger.exception("Initialization error !")
        raise

    return
コード例 #5
0
def initialize():
    try:
        utils.createDirectory(results_path)
        utils.createDirectory(cancer_dir_path)
        utils.createDirectory(haplotype_path)
        utils.createDirectory(tmpbams_path)
        utils.createDirectory(finalbams_path)  
        
        event_list=['gain','loss']
        gaincnv = params.GetGainCNV()
        losscnv = params.GetLossCNV()
        
        pipelineHelpers.Logging("INFO", log, " --- Initializing input files  --- ")
        vcf_path = bamhelp.GetVCF()
        exons_path = bamhelp.GetExons()
        reference_path = bamhelp.GetRef()
        vpath, vcf = os.path.split(vcf_path)
        phasedvcf = "/".join([results_path, sub('.vcf$', '_phased.vcf.gz', vcf)])
        vcftobed =  "/".join([results_path, sub('.vcf$', '.bed', vcf)])
        
        hap1vcf = "/".join([results_path,"hap1_het.vcf"])
        hap2vcf = "/".join([results_path, "hap2_het.vcf"])
        hap1vcffiltered = "/".join([results_path, "hap1_het_filtered"])
        hap2vcffiltered = "/".join([results_path, "hap2_het_filtered"])
        hap1vcffilteredtobed = "/".join([results_path, "hap1_het_filtered.bed"])
        hap2vcffilteredtobed = "/".join([results_path, "hap2_het_filtered.bed"])
        phased_bed =  "/".join([results_path, "PHASED.BED"])
        
        
        utils.phaseVCF(vcf_path, phasedvcf)
        utils.getVCFHaplotypes(phasedvcf, hap1vcf, hap2vcf)
        utils.thinVCF(hap1vcf, hap1vcffiltered)
        utils.thinVCF(hap2vcf, hap2vcffiltered)
        utils.convertvcftobed(hap1vcffiltered+".recode.vcf", hap1vcffilteredtobed)
        utils.convertvcftobed(hap2vcffiltered+".recode.vcf", hap2vcffilteredtobed)
       
        cmd1 = """sed -i 's/$/\thap1/' """+ hap1vcffilteredtobed
        cmd2 = """sed -i 's/$/\thap2/' """+ hap2vcffilteredtobed
        cmd3 = "cat " + hap1vcffilteredtobed + " " + hap2vcffilteredtobed + " > " + 'tmp.bed'
        cmd4 = "sort -V -k1,1 -k2,2 tmp.bed > " + phased_bed  
            
        utils.runCommand(cmd1)
        utils.runCommand(cmd2)
        utils.runCommand(cmd3)
        utils.runCommand(cmd4)
        os.remove('tmp.bed')  
        
        for  event in event_list: 
            roibed = "/".join([haplotype_path,  event + "_roi.bed"])
            exonsinroibed = "/".join([haplotype_path,   event + "_exons_in_roi.bed"])
            nonhetbed = "/".join([haplotype_path, event + "_non_het.bed"])
            hetbed = "/".join([haplotype_path, event + "_het.bed"])
            hetsnpbed = "/".join([haplotype_path,  event + "_het_snp.bed"])
            
            if (locals()[event + 'cnv']):
                utils.intersectBed( exons_path, locals()[event + 'cnv'], exonsinroibed, wa=True)
                utils.intersectBed(phased_bed, exonsinroibed, hetsnpbed, wa=True)
                utils.splitBed(exonsinroibed, event+'_exons_in_roi_')
                utils.splitBed(hetsnpbed, event+'_het_snp_')

    except:  
        pipelineHelpers.Logging("INFO", log, "Initialization error !")
        raise
    
    pipelineHelpers.Logging("ERROR",log, "--- initialization complete ---")    
    return