コード例 #1
0
ファイル: module03_geneDensity.py プロジェクト: yzqheart/ChIP
    def merge_RPKM_uniq( self ):

        m01.make_dir([ self.dir_RPM_mrg ])
        sh_file      = "%s/s15.MergeRPKM.sh"        % (self.scripts)
        sh_work_file = "%s/s15.1.MergeRPKM_work.sh" % (self.scripts)
        
        l_brief = self.samInfo_pd_ChIP['brief_name']
        l_merge = set(self.samInfo_pd_ChIP['merge_name'])
        
        l_sh_info = self.s15_merge_RPKM()
        l_sh_work = []
        
        for window in [ "100","1kb" ]:
            for ltype in ["rep","mrg"]:
                l_sam = l_brief
                RPKM_dir = self.dir_RPM_bins_rep
                if ltype == "mrg":
                    l_sam = l_merge
                    RPKM_dir = self.dir_RPM_bins_mrg
                
                header = "\"#chr\\tbeg\\tend\\t%s\"" % ("\\t".join(l_sam))
                l_RPKM_file = [ 
                    "%s/%s/%s.RPKM.uniq.%s"                                 %\
                    (RPKM_dir,sam,sam,window) for sam in l_sam
                ]
                l_sh_work.append(
                    "sh %s  %s %s %s %s"                                    %\
                    (sh_file, header, window, ltype, " ".join(l_RPKM_file))
                )

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #2
0
    def run_macs_rep(self, pvalue=0.001, ref="hs", core_num=4):
        sh_file = "%s/s06.1.macs2PeakRep.sh" % (self.scripts)
        sh_work_file = "%s/s06.1.macs2PeakRep_work.sh" % (self.scripts)

        l_sh_info = self.s06_1_macs2PeakRep(ref)
        l_sh_work = []

        for brief_name in self.samInfo_pd_ChIP['brief_name']:
            idx = (self.samInfo_pd_ChIP['brief_name'] == brief_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_Peak_rep, brief_name])
            ctrl_name = self.samInfo_pd_ChIP[idx]['control'].values[0]

            shift_size = 300
            f_shiftSize = "%s/%s/out/out.tab"                               %\
                (self.dir_spp_rep_shiftSize, brief_name )

            if os.path.isfile(f_shiftSize):
                val = get_shiftSize(f_shiftSize)
                if val > 0:
                    shift_size = val

            l_sh_work.append("sh %s  %s %s %f %d"                           %\
                (sh_file, brief_name, ctrl_name, pvalue, shift_size))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #3
0
    def run_anno_peak(self,
                      TSS_genebody_up,
                      TSS_genebody_down,
                      TSS_promoter_up,
                      TSS_promoter_down,
                      ext_binlen=50,
                      body_bincnt=100,
                      tss_binlen=1):

        sh_file = "%s/s12.PeakGeneRegion.sh" % (self.scripts)
        sh_work_file = "%s/s12.PeakGeneRegion_work.sh" % (self.scripts)

        l_sh_info = self.s12_PeakGeneRegion(TSS_genebody_up,
                                            TSS_genebody_down,
                                            TSS_promoter_up,
                                            TSS_promoter_down,
                                            ext_binlen=50,
                                            body_bincnt=100,
                                            tss_binlen=1)

        l_sh_work = []

        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            m01.make_dir([self.dir_Peak_mrg_TSS, merge_name])
            m01.make_dir([self.dir_Peak_mrg_Gene, merge_name])
            l_sh_work.append("sh %s %s" % (sh_file, merge_name))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #4
0
    def merge_RPKM_uniq(self):

        m01.make_dir([self.dir_RPM_mrg])
        sh_file = "%s/s15.MergeRPKM.sh" % (self.scripts)
        sh_work_file = "%s/s15.1.MergeRPKM_work.sh" % (self.scripts)

        l_brief = self.samInfo_pd_ChIP['brief_name']
        l_merge = set(self.samInfo_pd_ChIP['merge_name'])

        l_sh_info = self.s15_merge_RPKM()
        l_sh_work = []

        for window in ["100", "1kb"]:
            for ltype in ["rep", "mrg"]:
                l_sam = l_brief
                RPKM_dir = self.dir_RPM_bins_rep
                if ltype == "mrg":
                    l_sam = l_merge
                    RPKM_dir = self.dir_RPM_bins_mrg

                header = "\"#chr\\tbeg\\tend\\t%s\"" % ("\\t".join(l_sam))
                l_RPKM_file = [
                    "%s/%s/%s.RPKM.uniq.%s"                                 %\
                    (RPKM_dir,sam,sam,window) for sam in l_sam
                ]
                l_sh_work.append(
                    "sh %s  %s %s %s %s"                                    %\
                    (sh_file, header, window, ltype, " ".join(l_RPKM_file))
                )

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #5
0
ファイル: module02_call_peaks.py プロジェクト: yzqheart/ChIP
    def make_igv_broad(self):
        sh_file              = "%s/s11.makeIGV_broad.sh"                    %\
            (self.scripts)
        sh_work_repPeak_file = "%s/s11.3.makeIGV.repBroadPeak_work.sh"      %\
            (self.scripts)
        sh_work_mrgPeak_file = "%s/s11.4.makeIGV.mrgBroadPeak_work.sh"      %\
            (self.scripts)
        
        l_sh_info = self.s11_makeIGV_broad()
        l_sh_work_rep = []
        l_sh_work_mrg = []
        
        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx     = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue
            
            m01.make_dir([self.dir_BroadPeak_TDF, merge_name])            
            l_brief = list(self.samInfo_pd_ChIP[idx]['brief_name'])
            for brief in l_brief:
                l_sh_work_rep.append("sh %s   %s %s %s"                     %\
                     (sh_file, brief, merge_name, self.dir_BroadPeak_rep))

            l_sh_work_mrg.append("sh %s  %s %s %s"                          %\
                 (sh_file, merge_name, merge_name, self.dir_BroadPeak_mrg))

        my_job_rep = m_jobs.run_jobs(sh_file, sh_work_repPeak_file,          \
            l_sh_info, l_sh_work_rep)

        my_job_mrg = m_jobs.run_jobs(sh_file, sh_work_mrgPeak_file,          \
            l_sh_info, l_sh_work_mrg)
        
        my_job_rep.running_multi(cpu=8, is_debug=self.is_debug)
        my_job_mrg.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #6
0
    def make_igv_broad(self, core_num=4):
        sh_file              = "%s/s11.makeIGV_broad.sh"                    %\
            (self.scripts)
        sh_work_repPeak_file = "%s/s11.3.makeIGV.repBroadPeak_work.sh"      %\
            (self.scripts)
        sh_work_mrgPeak_file = "%s/s11.4.makeIGV.mrgBroadPeak_work.sh"      %\
            (self.scripts)

        l_sh_info = self.s11_makeIGV_broad()
        l_sh_work_rep = []
        l_sh_work_mrg = []

        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_BroadPeak_TDF, merge_name])
            l_brief = list(self.samInfo_pd_ChIP[idx]['brief_name'])
            for brief in l_brief:
                l_sh_work_rep.append("sh %s   %s %s %s"                     %\
                     (sh_file, brief, merge_name, self.dir_BroadPeak_rep))

            l_sh_work_mrg.append("sh %s  %s %s %s"                          %\
                 (sh_file, merge_name, merge_name, self.dir_BroadPeak_mrg))

        my_job_rep = m_jobs.run_jobs(sh_file, sh_work_repPeak_file,          \
            l_sh_info, l_sh_work_rep)

        my_job_mrg = m_jobs.run_jobs(sh_file, sh_work_mrgPeak_file,          \
            l_sh_info, l_sh_work_mrg)

        my_job_rep.running_multi(cpu=core_num, is_debug=self.is_debug)
        my_job_mrg.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #7
0
ファイル: module02_call_peaks.py プロジェクト: yzqheart/ChIP
    def run_macs_rep( self, pvalue=0.001, ref="hs" ):
        sh_file      = "%s/s06.1.macs2PeakRep.sh"      % (self.scripts)
        sh_work_file = "%s/s06.1.macs2PeakRep_work.sh" % (self.scripts)
        
        l_sh_info    = self.s06_1_macs2PeakRep( ref )
        l_sh_work    = []
    
        for brief_name in self.samInfo_pd_ChIP['brief_name']:
            idx   =(self.samInfo_pd_ChIP['brief_name'] == brief_name)
            if self.__is_input(idx):
                continue
            
            m01.make_dir([self.dir_Peak_rep, brief_name])
            ctrl_name  = self.samInfo_pd_ChIP[ idx ]['control'].values[0]
            
            shift_size = 300
            f_shiftSize = "%s/%s/out/out.tab"                               %\
                (self.dir_spp_rep_shiftSize, brief_name )
                
            if os.path.isfile( f_shiftSize ):
                val = get_shiftSize( f_shiftSize )
                if val > 0:
                    shift_size  = val
            
            l_sh_work.append("sh %s  %s %s %f %d"                           %\
                (sh_file, brief_name, ctrl_name, pvalue, shift_size))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #8
0
    def merge_density_100bp(self, core_num=4):

        m01.make_dir([self.dir_RPM_mrg])
        sh_file = "%s/s17.MergeDensity.sh" % (self.scripts)
        sh_work_file = "%s/s17.MergeDensity_work.sh" % (self.scripts)

        l_brief = self.samInfo_pd_ChIP['brief_name']
        l_merge = set(self.samInfo_pd_ChIP['merge_name'])

        l_brief2 = []
        for brief_name in l_brief:
            idx = (self.samInfo_pd_ChIP['brief_name'] == brief_name)
            if self.__is_input(idx):
                continue

            l_brief2.append(brief_name)

        l_merge2 = []
        for merge_name in l_merge:
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            l_merge2.append(merge_name)

        l_brief = l_brief2
        l_merge = l_merge2

        l_sh_info = self.s17_merge_RPKM()
        l_sh_work = []

        for ltype in ["rep", "mrg"]:
            l_sam = l_brief
            RPKM_dir = self.dir_Peak_rep
            if ltype == "mrg":
                l_sam = l_merge
                RPKM_dir = self.dir_Peak_mrg

            header = "\"#chr\\tbeg\\tend\\t%s\"" % ("\\t".join(l_sam))
            l_RPKM_file = [
                "%s/%s/%s.1kb.norm_avg.xls"                             %\
                (RPKM_dir, sam, sam) for sam in l_sam
            ]
            l_sh_work.append(
                "sh %s  %s %s %s %s"                                    %\
                (sh_file, header, "1kb", ltype, " ".join(l_RPKM_file))
            )

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #9
0
ファイル: module03_geneDensity.py プロジェクト: yzqheart/ChIP
    def merge_density_100bp(self):

        m01.make_dir([ self.dir_RPM_mrg ])
        sh_file      = "%s/s17.MergeDensity.sh"      % (self.scripts)
        sh_work_file = "%s/s17.MergeDensity_work.sh" % (self.scripts)
        
        l_brief = self.samInfo_pd_ChIP['brief_name']
        l_merge = set(self.samInfo_pd_ChIP['merge_name'])
        
        l_brief2 = []
        for brief_name in l_brief:
            idx   =(self.samInfo_pd_ChIP['brief_name'] == brief_name)
            if self.__is_input(idx):
                continue
            
            l_brief2.append(brief_name)
        
        l_merge2 = []
        for merge_name in l_merge:
            idx   =(self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue
            
            l_merge2.append(merge_name)
        
        l_brief = l_brief2
        l_merge = l_merge2
        
        l_sh_info = self.s17_merge_RPKM()
        l_sh_work = []
        
        for ltype in ["rep","mrg"]:
            l_sam = l_brief
            RPKM_dir = self.dir_Peak_rep
            if ltype == "mrg":
                l_sam = l_merge
                RPKM_dir = self.dir_Peak_mrg
            
            header = "\"#chr\\tbeg\\tend\\t%s\"" % ("\\t".join(l_sam))
            l_RPKM_file = [ 
                "%s/%s/%s.1kb.norm_avg.xls"                             %\
                (RPKM_dir, sam, sam) for sam in l_sam
            ]
            l_sh_work.append(
                "sh %s  %s %s %s %s"                                    %\
                (sh_file, header, "1kb", ltype, " ".join(l_RPKM_file))
            )
        
        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #10
0
ファイル: module03_geneDensity.py プロジェクト: yzqheart/ChIP
    def div_bed_to_bins_unique_mrg(self):
        sh_file      = "%s/s14.RPM_density_rep.sh"      % (self.scripts)
        sh_work_file = "%s/s14.RPM_density_rep_work.sh" % (self.scripts)

        l_sh_info = self.s14_RPM_density_mrg()
        l_sh_work = []
        
        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            m01.make_dir([ self.dir_RPM_bins_mrg, merge_name])
            
            mapped_reads = np.sum(self.stat_Info.StatInfo[merge_name]['q30'])
            l_sh_work.append("sh %s %s %d" % (sh_file,merge_name, mapped_reads))
        
        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #11
0
    def div_bed_to_bins_unique_mrg(self):
        sh_file = "%s/s14.RPM_density_rep.sh" % (self.scripts)
        sh_work_file = "%s/s14.RPM_density_rep_work.sh" % (self.scripts)

        l_sh_info = self.s14_RPM_density_mrg()
        l_sh_work = []

        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            m01.make_dir([self.dir_RPM_bins_mrg, merge_name])

            mapped_reads = np.sum(self.stat_Info.StatInfo[merge_name]['q30'])
            l_sh_work.append("sh %s %s %d" %
                             (sh_file, merge_name, mapped_reads))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #12
0
ファイル: module02_call_peaks.py プロジェクト: yzqheart/ChIP
    def get_shift_size_mrg(self):
        sh_file      = "%s/s05.2.spp_mrg_shiftSize.sh"      % (self.scripts)
        sh_work_file = "%s/s05.2.spp_mrg_shiftSize_work.sh" % (self.scripts)
        
        l_sh_info    = self.s05_2_spp_mrg_shiftSize()
        l_sh_work    = []
        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx   =(self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_spp_mrg_shiftSize, merge_name, "test"])
            m01.make_dir([self.dir_spp_mrg_shiftSize, merge_name, "out" ])       
            l_sh_work.append("sh %s  %s" % ( sh_file, merge_name ))
        
        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #13
0
    def get_shift_size_mrg(self, core_num=4):
        sh_file = "%s/s05.2.spp_mrg_shiftSize.sh" % (self.scripts)
        sh_work_file = "%s/s05.2.spp_mrg_shiftSize_work.sh" % (self.scripts)

        l_sh_info = self.s05_2_spp_mrg_shiftSize()
        l_sh_work = []
        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_spp_mrg_shiftSize, merge_name, "test"])
            m01.make_dir([self.dir_spp_mrg_shiftSize, merge_name, "out"])
            l_sh_work.append("sh %s  %s" % (sh_file, merge_name))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #14
0
ファイル: module02_call_peaks.py プロジェクト: yzqheart/ChIP
    def run_macs_rep_broad(self, pvalue=0.05, ref="hs"):
        sh_file       = "%s/s08.macs2BroadPeakRep.sh"      % (self.scripts)
        sh_work_file  = "%s/s08.macs2BroadPeakRep_work.sh" % (self.scripts)
        
        l_sh_info    = self.s08_macs2BroadPeakRep( ref )
        l_sh_work    = []
    
        for brief_name in self.samInfo_pd_ChIP['brief_name']:
            idx   =(self.samInfo_pd_ChIP['brief_name'] == brief_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_BroadPeak_rep, brief_name])
            ctrl_name  = self.samInfo_pd_ChIP[idx]['control'].values[0]

            l_sh_work.append("sh %s  %s %s %f"                              %\
                ( sh_file, brief_name, ctrl_name, pvalue ))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8,is_debug=self.is_debug)
コード例 #15
0
ファイル: module03_geneDensity.py プロジェクト: yzqheart/ChIP
    def div_bed_to_bins_unique_rep(self):
        sh_file      = "%s/s13.RPM_density_rep.sh"      % (self.scripts)
        sh_work_file = "%s/s13.RPM_density_rep_work.sh" % (self.scripts)

        l_sh_info = self.s13_RPM_density_rep()
        l_sh_work = []
        
        for brief_name in self.samInfo_pd_ChIP['brief_name']:
            m01.make_dir([ self.dir_RPM_bins_rep, brief_name])
            
            idx = (self.samInfo_pd_ChIP['brief_name'] == brief_name)
            merge_name = self.samInfo_pd_ChIP[ idx ]['merge_name'].values[0]
            
            l_brief = self.stat_Info.StatInfo[merge_name]['l_brief']
            idx2 = l_brief.index(brief_name)
            mapped_reads = self.stat_Info.StatInfo[merge_name]['q30'][idx2]
            l_sh_work.append("sh %s %s %d" % (sh_file, brief_name, mapped_reads))
        
        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #16
0
    def prepare_idr_input(self, top_peak=100000, core_num=4):
        sh_file = "%s/s07.1.IDR_prepare.sh" % (self.scripts)
        sh_work_file = "%s/s07.1.IDR_prepare_work.sh" % (self.scripts)

        l_sh_info = self.s07_1_IDR_prepare()
        l_sh_work = []

        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_Peak_idr, merge_name])
            l_brief = list(self.samInfo_pd_ChIP[idx]['brief_name'])
            list_brief = " ".join(l_brief)
            l_sh_work.append("sh %s  %s %d %s" %
                             (sh_file, merge_name, top_peak, list_brief))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #17
0
    def run_macs_rep_broad(self, pvalue=0.05, ref="hs", core_num=4):
        sh_file = "%s/s08.macs2BroadPeakRep.sh" % (self.scripts)
        sh_work_file = "%s/s08.macs2BroadPeakRep_work.sh" % (self.scripts)

        l_sh_info = self.s08_macs2BroadPeakRep(ref)
        l_sh_work = []

        for brief_name in self.samInfo_pd_ChIP['brief_name']:
            idx = (self.samInfo_pd_ChIP['brief_name'] == brief_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_BroadPeak_rep, brief_name])
            ctrl_name = self.samInfo_pd_ChIP[idx]['control'].values[0]

            l_sh_work.append("sh %s  %s %s %f"                              %\
                ( sh_file, brief_name, ctrl_name, pvalue ))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #18
0
    def div_bed_to_bins_unique_rep(self):
        sh_file = "%s/s13.RPM_density_rep.sh" % (self.scripts)
        sh_work_file = "%s/s13.RPM_density_rep_work.sh" % (self.scripts)

        l_sh_info = self.s13_RPM_density_rep()
        l_sh_work = []

        for brief_name in self.samInfo_pd_ChIP['brief_name']:
            m01.make_dir([self.dir_RPM_bins_rep, brief_name])

            idx = (self.samInfo_pd_ChIP['brief_name'] == brief_name)
            merge_name = self.samInfo_pd_ChIP[idx]['merge_name'].values[0]

            l_brief = self.stat_Info.StatInfo[merge_name]['l_brief']
            idx2 = l_brief.index(brief_name)
            mapped_reads = self.stat_Info.StatInfo[merge_name]['q30'][idx2]
            l_sh_work.append("sh %s %s %d" %
                             (sh_file, brief_name, mapped_reads))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #19
0
ファイル: module02_call_peaks.py プロジェクト: yzqheart/ChIP
    def run_macs_mrg_broad(self, pvalue=0.05, ref="hs"):
        sh_file       = "%s/s09.macs2BroadPeakMrg.sh"      % (self.scripts)
        sh_work_file  = "%s/s09.macs2BroadPeakMrg_work.sh" % (self.scripts)
        
        l_sh_info = self.s09_macs2BroadPeakMrg(ref)
        l_sh_work = []
    
        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_Peak_mrg, merge_name])
            ctrl_name = self.samInfo_pd_ChIP[idx]['control'].values[0]

            l_sh_work.append("sh %s  %s %s %f" % \
                (sh_file, merge_name, ctrl_name, pvalue))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
            
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #20
0
ファイル: module02_call_peaks.py プロジェクト: yzqheart/ChIP
    def prepare_idr_input(self, top_peak = 100000):
        sh_file       = "%s/s07.1.IDR_prepare.sh"      % (self.scripts)
        sh_work_file  = "%s/s07.1.IDR_prepare_work.sh" % (self.scripts)

        l_sh_info     = self.s07_1_IDR_prepare()
        l_sh_work     = []
        
        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx   =(self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue
            
            m01.make_dir( [ self.dir_Peak_idr, merge_name ] )
            l_brief   = list(self.samInfo_pd_ChIP[ idx ]['brief_name'])
            list_brief = " ".join(l_brief)
            l_sh_work.append(
                "sh %s  %s %d %s" % (sh_file, merge_name, top_peak,list_brief)
            )

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #21
0
    def run_macs_mrg_broad(self, pvalue=0.05, ref="hs", core_num=4):
        sh_file = "%s/s09.macs2BroadPeakMrg.sh" % (self.scripts)
        sh_work_file = "%s/s09.macs2BroadPeakMrg_work.sh" % (self.scripts)

        l_sh_info = self.s09_macs2BroadPeakMrg(ref)
        l_sh_work = []

        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_Peak_mrg, merge_name])
            ctrl_name = self.samInfo_pd_ChIP[idx]['control'].values[0]

            l_sh_work.append("sh %s  %s %s %f" % \
                (sh_file, merge_name, ctrl_name, pvalue))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)

        my_job.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #22
0
ファイル: module03_geneDensity.py プロジェクト: yzqheart/ChIP
    def run_anno_peak(self,  
                        TSS_genebody_up,TSS_genebody_down,TSS_promoter_up,
                        TSS_promoter_down,ext_binlen=50,body_bincnt=100,
                        tss_binlen=1):
      
        sh_file       = "%s/s12.PeakGeneRegion.sh"      % (self.scripts)
        sh_work_file  = "%s/s12.PeakGeneRegion_work.sh" % (self.scripts)

        l_sh_info     = self.s12_PeakGeneRegion(
                        TSS_genebody_up,TSS_genebody_down,TSS_promoter_up,
                        TSS_promoter_down,ext_binlen=50,body_bincnt=100,
                        tss_binlen=1)
                        
        l_sh_work     = []

        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            m01.make_dir([ self.dir_Peak_mrg_TSS,  merge_name ])
            m01.make_dir([ self.dir_Peak_mrg_Gene, merge_name ])
            l_sh_work.append("sh %s %s" % ( sh_file, merge_name ))
            
        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #23
0
ファイル: module02_call_peaks.py プロジェクト: yzqheart/ChIP
    def run_idr(self):
        sh_file            =                                                 \
            "%s/s07.2.IDR_usingMacs2Peak.sh"               % (self.scripts)
            
        sh_work_file_rep   =                                                 \
            "%s/s07.2.IDR_usingMacs2Peak_rep_work.sh"      % (self.scripts)
            
        sh_work_file_self  =                                                 \
            "%s/s07.2.IDR_usingMacs2Peak_selfReps_work.sh" % (self.scripts)
            
        sh_work_file_pool  =                                                 \
            "%s/s07.2.IDR_usingMacs2Peak_poolReps_work.sh" % (self.scripts)
            
        
        l_sh_info      = self.s07_2_usingMacs2Peak()
        l_sh_work_rep  = []
        l_sh_work_self = []
        l_sh_work_pool = []
        
        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx   =(self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue
            
            m01.make_dir([self.dir_Peak_idr, merge_name])
            l_brief = list(self.samInfo_pd_ChIP[idx]['brief_name'])
#            print merge_name,l_brief,list(l_brief)
            ### IDR ANALYSIS ON ORIGINAL REPLICATES
            subtype = "rep"
            m01.make_dir([self.dir_Peak_idr, merge_name, subtype])
            if len(l_brief) > 1:
                for i    in range( 0,len(l_brief)-1 ):
                    for j in range( i+1,len(l_brief) ):
                        in_sam1 = "%s" % (l_brief[i])
                        in_sam2 = "%s" % (l_brief[j])
                        l_sh_work_rep.append("sh %s  %s %s %s %s"           %\
                            (sh_file,in_sam1,in_sam2,merge_name,subtype))

            ### IDR ANALYSIS ON SELF-PSEUDOREPLICATES
            subtype = "selfPseudoReps"
            m01.make_dir([self.dir_Peak_idr, merge_name, subtype])
            for sam in l_brief:
                in_sam1 = "%s.pr1" % (sam)
                in_sam2 = "%s.pr2" % (sam)
                l_sh_work_self.append("sh %s   %s %s %s %s"                 %\
                     (sh_file, in_sam1, in_sam2, merge_name, subtype))
                
            
            ### IDR ANALYSIS ON POOLED-PSEUDOREPLICATES
            subtype = "pooledPseudoReps"
            m01.make_dir([ self.dir_Peak_idr, merge_name, subtype ])
            
            in_sam1 = "%s.pr1" % (merge_name)
            in_sam2 = "%s.pr2" % (merge_name)
            l_sh_work_pool.append("sh %s   %s %s %s %s"                     %\
                 (sh_file, in_sam1, in_sam2, merge_name, subtype))
        
        my_job_rep        = m_jobs.run_jobs(                                 \
            sh_file, sh_work_file_rep,  l_sh_info, l_sh_work_rep)

        my_job_self = m_jobs.run_jobs(                                       \
            sh_file, sh_work_file_self, l_sh_info, l_sh_work_self)

        my_job_pool = m_jobs.run_jobs(                                       \
            sh_file, sh_work_file_pool, l_sh_info, l_sh_work_pool)
            
        my_job_rep.running_multi(cpu=8, is_debug=self.is_debug)
        my_job_self.running_multi(cpu=8,is_debug=self.is_debug)
        my_job_pool.running_multi(cpu=8,is_debug=self.is_debug)
コード例 #24
0
    def run_idr(self, core_num=4):
        sh_file            =                                                 \
            "%s/s07.2.IDR_usingMacs2Peak.sh"               % (self.scripts)

        sh_work_file_rep   =                                                 \
            "%s/s07.2.IDR_usingMacs2Peak_rep_work.sh"      % (self.scripts)

        sh_work_file_self  =                                                 \
            "%s/s07.2.IDR_usingMacs2Peak_selfReps_work.sh" % (self.scripts)

        sh_work_file_pool  =                                                 \
            "%s/s07.2.IDR_usingMacs2Peak_poolReps_work.sh" % (self.scripts)

        l_sh_info = self.s07_2_usingMacs2Peak()
        l_sh_work_rep = []
        l_sh_work_self = []
        l_sh_work_pool = []

        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_Peak_idr, merge_name])
            l_brief = list(self.samInfo_pd_ChIP[idx]['brief_name'])
            #            print merge_name,l_brief,list(l_brief)
            ### IDR ANALYSIS ON ORIGINAL REPLICATES
            subtype = "rep"
            m01.make_dir([self.dir_Peak_idr, merge_name, subtype])
            if len(l_brief) > 1:
                for i in range(0, len(l_brief) - 1):
                    for j in range(i + 1, len(l_brief)):
                        in_sam1 = "%s" % (l_brief[i])
                        in_sam2 = "%s" % (l_brief[j])
                        l_sh_work_rep.append("sh %s  %s %s %s %s"           %\
                            (sh_file,in_sam1,in_sam2,merge_name,subtype))

            ### IDR ANALYSIS ON SELF-PSEUDOREPLICATES
            subtype = "selfPseudoReps"
            m01.make_dir([self.dir_Peak_idr, merge_name, subtype])
            for sam in l_brief:
                in_sam1 = "%s.pr1" % (sam)
                in_sam2 = "%s.pr2" % (sam)
                l_sh_work_self.append("sh %s   %s %s %s %s"                 %\
                     (sh_file, in_sam1, in_sam2, merge_name, subtype))

            ### IDR ANALYSIS ON POOLED-PSEUDOREPLICATES
            subtype = "pooledPseudoReps"
            m01.make_dir([self.dir_Peak_idr, merge_name, subtype])

            in_sam1 = "%s.pr1" % (merge_name)
            in_sam2 = "%s.pr2" % (merge_name)
            l_sh_work_pool.append("sh %s   %s %s %s %s"                     %\
                 (sh_file, in_sam1, in_sam2, merge_name, subtype))

        my_job_rep        = m_jobs.run_jobs(                                 \
            sh_file, sh_work_file_rep,  l_sh_info, l_sh_work_rep)

        my_job_self = m_jobs.run_jobs(                                       \
            sh_file, sh_work_file_self, l_sh_info, l_sh_work_self)

        my_job_pool = m_jobs.run_jobs(                                       \
            sh_file, sh_work_file_pool, l_sh_info, l_sh_work_pool)

        my_job_rep.running_multi(cpu=core_num, is_debug=self.is_debug)
        my_job_self.running_multi(cpu=core_num, is_debug=self.is_debug)
        my_job_pool.running_multi(cpu=core_num, is_debug=self.is_debug)