コード例 #1
0
    def sort_bdg(self, core_num=4):
        sh_file                                                             =\
            "%s/s10.sortbdg.sh"                     % (self.scripts)

        sh_work_repPeak_file                                                =\
            "%s/s10.1.sortbdg.repPeak_work.sh"      % (self.scripts)

        sh_work_mrgPeak_file                                                =\
            "%s/s10.2.sortbdg.mrgPeak_work.sh"      % (self.scripts)

        sh_work_repBroadPeak_file                                           =\
            "%s/s10.3.sortbdg.repBroadPeak_work.sh" % (self.scripts)

        sh_work_mrgBroadPeak_file                                           =\
            "%s/s10.4.sortbdg.mrgBroadPeak_work.sh" % (self.scripts)

        l_sh_info = self.s10_sortbdg()
        l_sh_work_repPeak = []
        l_sh_work_mrgPeak = []
        l_sh_work_repBroadPeak = []
        l_sh_work_mrgBroadPeak = []

        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            l_brief = list(self.samInfo_pd_ChIP[idx]['brief_name'])

            for brief in l_brief:
                l_sh_work_repBroadPeak.append("sh %s  %s %s"                %\
                    (sh_file, brief, self.dir_BroadPeak_rep))

            l_sh_work_mrgBroadPeak.append("sh %s  %s %s"                    %\
                 (sh_file, merge_name, self.dir_BroadPeak_mrg))

            for brief in l_brief:
                l_sh_work_repPeak.append("sh %s  %s %s"                     %\
                    (sh_file, brief, self.dir_Peak_rep))

            l_sh_work_mrgPeak.append("sh %s  %s %s"                         %\
                (sh_file, merge_name, self.dir_Peak_mrg))


        my_job_repPeak = m_jobs.run_jobs(sh_file,                            \
            sh_work_repPeak_file, l_sh_info, l_sh_work_repPeak)

        my_job_mrgPeak = m_jobs.run_jobs(sh_file,                            \
            sh_work_mrgPeak_file, l_sh_info, l_sh_work_mrgPeak)

        my_job_repBroadPeak = m_jobs.run_jobs(sh_file,                       \
            sh_work_repBroadPeak_file, l_sh_info, l_sh_work_repBroadPeak)

        my_job_mrgBroadPeak = m_jobs.run_jobs(sh_file,                       \
            sh_work_mrgBroadPeak_file, l_sh_info, l_sh_work_mrgBroadPeak)

        my_job_repPeak.running_multi(cpu=core_num, is_debug=self.is_debug)
        my_job_mrgPeak.running_multi(cpu=core_num, is_debug=self.is_debug)
        my_job_repBroadPeak.running_multi(cpu=core_num, is_debug=self.is_debug)
        my_job_mrgBroadPeak.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #2
0
ファイル: module03_geneDensity.py プロジェクト: yzqheart/ChIP
    def density_baseLevel( self ):
        sh_file      = "%s/s16.densityBaseLv.sh" % (self.scripts)
        sh_rep_work_file = "%s/s16.densityBaseLvRep_work.sh" % (self.scripts)
        sh_mrg_work_file = "%s/s16.densityBaseLvMrg_work.sh" % (self.scripts)
        
        l_brief = self.samInfo_pd_ChIP['brief_name']
        l_merge = set(self.samInfo_pd_ChIP['merge_name'])
        
        l_sh_info = self.s16_densityBaselv()
        l_sh_rep_work = []
        l_sh_mrg_work = []

        for brief_name in l_brief:
            idx   =(self.samInfo_pd_ChIP['brief_name'] == brief_name)
            if self.__is_input(idx):
                continue
            
            l_sh_rep_work.append("sh %s %s %s %s" % 
                (sh_file, brief_name, self.ref, self.dir_Peak_rep)
            )
        
        for merge_name in l_merge:
            idx   =(self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue
            
            l_sh_mrg_work.append("sh %s %s %s %s" % 
                (sh_file, merge_name, self.ref, self.dir_Peak_mrg)
            )
        
        my_job_rep = m_jobs.run_jobs(sh_file, sh_rep_work_file, l_sh_info, l_sh_rep_work)
        my_job_rep.running_multi(cpu=8, is_debug = self.is_debug)

        my_job_mrg = m_jobs.run_jobs(sh_file, sh_mrg_work_file, l_sh_info, l_sh_mrg_work)
        my_job_mrg.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #3
0
    def make_igv_broad(self, core_num=4):
        sh_file              = "%s/s11.makeIGV_broad.sh"                    %\
            (self.scripts)
        sh_work_repPeak_file = "%s/s11.3.makeIGV.repBroadPeak_work.sh"      %\
            (self.scripts)
        sh_work_mrgPeak_file = "%s/s11.4.makeIGV.mrgBroadPeak_work.sh"      %\
            (self.scripts)

        l_sh_info = self.s11_makeIGV_broad()
        l_sh_work_rep = []
        l_sh_work_mrg = []

        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_BroadPeak_TDF, merge_name])
            l_brief = list(self.samInfo_pd_ChIP[idx]['brief_name'])
            for brief in l_brief:
                l_sh_work_rep.append("sh %s   %s %s %s"                     %\
                     (sh_file, brief, merge_name, self.dir_BroadPeak_rep))

            l_sh_work_mrg.append("sh %s  %s %s %s"                          %\
                 (sh_file, merge_name, merge_name, self.dir_BroadPeak_mrg))

        my_job_rep = m_jobs.run_jobs(sh_file, sh_work_repPeak_file,          \
            l_sh_info, l_sh_work_rep)

        my_job_mrg = m_jobs.run_jobs(sh_file, sh_work_mrgPeak_file,          \
            l_sh_info, l_sh_work_mrg)

        my_job_rep.running_multi(cpu=core_num, is_debug=self.is_debug)
        my_job_mrg.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #4
0
ファイル: module02_call_peaks.py プロジェクト: yzqheart/ChIP
    def make_igv_broad(self):
        sh_file              = "%s/s11.makeIGV_broad.sh"                    %\
            (self.scripts)
        sh_work_repPeak_file = "%s/s11.3.makeIGV.repBroadPeak_work.sh"      %\
            (self.scripts)
        sh_work_mrgPeak_file = "%s/s11.4.makeIGV.mrgBroadPeak_work.sh"      %\
            (self.scripts)
        
        l_sh_info = self.s11_makeIGV_broad()
        l_sh_work_rep = []
        l_sh_work_mrg = []
        
        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx     = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue
            
            m01.make_dir([self.dir_BroadPeak_TDF, merge_name])            
            l_brief = list(self.samInfo_pd_ChIP[idx]['brief_name'])
            for brief in l_brief:
                l_sh_work_rep.append("sh %s   %s %s %s"                     %\
                     (sh_file, brief, merge_name, self.dir_BroadPeak_rep))

            l_sh_work_mrg.append("sh %s  %s %s %s"                          %\
                 (sh_file, merge_name, merge_name, self.dir_BroadPeak_mrg))

        my_job_rep = m_jobs.run_jobs(sh_file, sh_work_repPeak_file,          \
            l_sh_info, l_sh_work_rep)

        my_job_mrg = m_jobs.run_jobs(sh_file, sh_work_mrgPeak_file,          \
            l_sh_info, l_sh_work_mrg)
        
        my_job_rep.running_multi(cpu=8, is_debug=self.is_debug)
        my_job_mrg.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #5
0
    def run_anno_peak(self,
                      TSS_genebody_up,
                      TSS_genebody_down,
                      TSS_promoter_up,
                      TSS_promoter_down,
                      ext_binlen=50,
                      body_bincnt=100,
                      tss_binlen=1):

        sh_file = "%s/s12.PeakGeneRegion.sh" % (self.scripts)
        sh_work_file = "%s/s12.PeakGeneRegion_work.sh" % (self.scripts)

        l_sh_info = self.s12_PeakGeneRegion(TSS_genebody_up,
                                            TSS_genebody_down,
                                            TSS_promoter_up,
                                            TSS_promoter_down,
                                            ext_binlen=50,
                                            body_bincnt=100,
                                            tss_binlen=1)

        l_sh_work = []

        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            m01.make_dir([self.dir_Peak_mrg_TSS, merge_name])
            m01.make_dir([self.dir_Peak_mrg_Gene, merge_name])
            l_sh_work.append("sh %s %s" % (sh_file, merge_name))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #6
0
ファイル: module03_geneDensity.py プロジェクト: yzqheart/ChIP
    def merge_RPKM_uniq( self ):

        m01.make_dir([ self.dir_RPM_mrg ])
        sh_file      = "%s/s15.MergeRPKM.sh"        % (self.scripts)
        sh_work_file = "%s/s15.1.MergeRPKM_work.sh" % (self.scripts)
        
        l_brief = self.samInfo_pd_ChIP['brief_name']
        l_merge = set(self.samInfo_pd_ChIP['merge_name'])
        
        l_sh_info = self.s15_merge_RPKM()
        l_sh_work = []
        
        for window in [ "100","1kb" ]:
            for ltype in ["rep","mrg"]:
                l_sam = l_brief
                RPKM_dir = self.dir_RPM_bins_rep
                if ltype == "mrg":
                    l_sam = l_merge
                    RPKM_dir = self.dir_RPM_bins_mrg
                
                header = "\"#chr\\tbeg\\tend\\t%s\"" % ("\\t".join(l_sam))
                l_RPKM_file = [ 
                    "%s/%s/%s.RPKM.uniq.%s"                                 %\
                    (RPKM_dir,sam,sam,window) for sam in l_sam
                ]
                l_sh_work.append(
                    "sh %s  %s %s %s %s"                                    %\
                    (sh_file, header, window, ltype, " ".join(l_RPKM_file))
                )

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #7
0
ファイル: module02_call_peaks.py プロジェクト: yzqheart/ChIP
    def run_macs_rep( self, pvalue=0.001, ref="hs" ):
        sh_file      = "%s/s06.1.macs2PeakRep.sh"      % (self.scripts)
        sh_work_file = "%s/s06.1.macs2PeakRep_work.sh" % (self.scripts)
        
        l_sh_info    = self.s06_1_macs2PeakRep( ref )
        l_sh_work    = []
    
        for brief_name in self.samInfo_pd_ChIP['brief_name']:
            idx   =(self.samInfo_pd_ChIP['brief_name'] == brief_name)
            if self.__is_input(idx):
                continue
            
            m01.make_dir([self.dir_Peak_rep, brief_name])
            ctrl_name  = self.samInfo_pd_ChIP[ idx ]['control'].values[0]
            
            shift_size = 300
            f_shiftSize = "%s/%s/out/out.tab"                               %\
                (self.dir_spp_rep_shiftSize, brief_name )
                
            if os.path.isfile( f_shiftSize ):
                val = get_shiftSize( f_shiftSize )
                if val > 0:
                    shift_size  = val
            
            l_sh_work.append("sh %s  %s %s %f %d"                           %\
                (sh_file, brief_name, ctrl_name, pvalue, shift_size))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #8
0
    def run_macs_rep(self, pvalue=0.001, ref="hs", core_num=4):
        sh_file = "%s/s06.1.macs2PeakRep.sh" % (self.scripts)
        sh_work_file = "%s/s06.1.macs2PeakRep_work.sh" % (self.scripts)

        l_sh_info = self.s06_1_macs2PeakRep(ref)
        l_sh_work = []

        for brief_name in self.samInfo_pd_ChIP['brief_name']:
            idx = (self.samInfo_pd_ChIP['brief_name'] == brief_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_Peak_rep, brief_name])
            ctrl_name = self.samInfo_pd_ChIP[idx]['control'].values[0]

            shift_size = 300
            f_shiftSize = "%s/%s/out/out.tab"                               %\
                (self.dir_spp_rep_shiftSize, brief_name )

            if os.path.isfile(f_shiftSize):
                val = get_shiftSize(f_shiftSize)
                if val > 0:
                    shift_size = val

            l_sh_work.append("sh %s  %s %s %f %d"                           %\
                (sh_file, brief_name, ctrl_name, pvalue, shift_size))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #9
0
    def merge_RPKM_uniq(self):

        m01.make_dir([self.dir_RPM_mrg])
        sh_file = "%s/s15.MergeRPKM.sh" % (self.scripts)
        sh_work_file = "%s/s15.1.MergeRPKM_work.sh" % (self.scripts)

        l_brief = self.samInfo_pd_ChIP['brief_name']
        l_merge = set(self.samInfo_pd_ChIP['merge_name'])

        l_sh_info = self.s15_merge_RPKM()
        l_sh_work = []

        for window in ["100", "1kb"]:
            for ltype in ["rep", "mrg"]:
                l_sam = l_brief
                RPKM_dir = self.dir_RPM_bins_rep
                if ltype == "mrg":
                    l_sam = l_merge
                    RPKM_dir = self.dir_RPM_bins_mrg

                header = "\"#chr\\tbeg\\tend\\t%s\"" % ("\\t".join(l_sam))
                l_RPKM_file = [
                    "%s/%s/%s.RPKM.uniq.%s"                                 %\
                    (RPKM_dir,sam,sam,window) for sam in l_sam
                ]
                l_sh_work.append(
                    "sh %s  %s %s %s %s"                                    %\
                    (sh_file, header, window, ltype, " ".join(l_RPKM_file))
                )

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #10
0
 def __get_rmsk(self):
     sh_file       = "%s/db04.rmsk.sh"      % (self.scripts)
     sh_work_file  = "%s/db04.rmsk_work.sh" % (self.scripts)
     
     l_sh_info = self.db_04_rmsk()
     l_sh_work = []
     l_sh_work.append("sh %s %s" % (sh_file,self.ref))
     
     my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
     my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #11
0
 def __get_ref_fasta(self):
     sh_file       = "%s/db01.DownloadRef.sh"      % (self.scripts)
     sh_work_file  = "%s/db01.DownloadRef_work.sh" % (self.scripts)
     
     l_sh_info = self.db_01_DownloadRef()
     l_sh_work = []
     l_sh_work.append("sh %s %s" % (sh_file,self.ref))
     
     my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
     my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #12
0
 def __get_ref_index(self):
     sh_file       = "%s/db02.RefIndex.sh"      % (self.scripts)
     sh_work_file  = "%s/db02.RefIndex_work.sh" % (self.scripts)
     
     l_sh_info = self.db_02_BuildRefIndex()
     l_sh_work = []
     l_sh_work.append("sh %s %s" % (sh_file,self.ref))
     
     my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
     my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #13
0
    def __get_rmsk(self):
        sh_file = "%s/db04.rmsk.sh" % (self.scripts)
        sh_work_file = "%s/db04.rmsk_work.sh" % (self.scripts)

        l_sh_info = self.db_04_rmsk()
        l_sh_work = []
        l_sh_work.append("sh %s %s" % (sh_file, self.ref))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #14
0
    def __get_ref_fasta(self):
        sh_file = "%s/db01.DownloadRef.sh" % (self.scripts)
        sh_work_file = "%s/db01.DownloadRef_work.sh" % (self.scripts)

        l_sh_info = self.db_01_DownloadRef()
        l_sh_work = []
        l_sh_work.append("sh %s %s" % (sh_file, self.ref))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #15
0
    def __get_ref_index(self):
        sh_file = "%s/db02.RefIndex.sh" % (self.scripts)
        sh_work_file = "%s/db02.RefIndex_work.sh" % (self.scripts)

        l_sh_info = self.db_02_BuildRefIndex()
        l_sh_work = []
        l_sh_work.append("sh %s %s" % (sh_file, self.ref))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #16
0
    def __get_region(sefl, query):
        sh_file = "%s/db05.repeat.sh" % (self.scripts)
        sh_work_file = "%s/db05.repeat_work.sh" % (self.scripts)

        l_sh_info = self.db_05_Region(query)
        l_sh_work = []
        l_sh_work.append("sh %s %s" % (sh_file, self.ref))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)


#        my_job.running_SGE(vf="400m", maxjob=100, is_debug = self.is_debug)
コード例 #17
0
    def merge_density_100bp(self, core_num=4):

        m01.make_dir([self.dir_RPM_mrg])
        sh_file = "%s/s17.MergeDensity.sh" % (self.scripts)
        sh_work_file = "%s/s17.MergeDensity_work.sh" % (self.scripts)

        l_brief = self.samInfo_pd_ChIP['brief_name']
        l_merge = set(self.samInfo_pd_ChIP['merge_name'])

        l_brief2 = []
        for brief_name in l_brief:
            idx = (self.samInfo_pd_ChIP['brief_name'] == brief_name)
            if self.__is_input(idx):
                continue

            l_brief2.append(brief_name)

        l_merge2 = []
        for merge_name in l_merge:
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            l_merge2.append(merge_name)

        l_brief = l_brief2
        l_merge = l_merge2

        l_sh_info = self.s17_merge_RPKM()
        l_sh_work = []

        for ltype in ["rep", "mrg"]:
            l_sam = l_brief
            RPKM_dir = self.dir_Peak_rep
            if ltype == "mrg":
                l_sam = l_merge
                RPKM_dir = self.dir_Peak_mrg

            header = "\"#chr\\tbeg\\tend\\t%s\"" % ("\\t".join(l_sam))
            l_RPKM_file = [
                "%s/%s/%s.1kb.norm_avg.xls"                             %\
                (RPKM_dir, sam, sam) for sam in l_sam
            ]
            l_sh_work.append(
                "sh %s  %s %s %s %s"                                    %\
                (sh_file, header, "1kb", ltype, " ".join(l_RPKM_file))
            )

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #18
0
ファイル: module03_geneDensity.py プロジェクト: yzqheart/ChIP
    def merge_density_100bp(self):

        m01.make_dir([ self.dir_RPM_mrg ])
        sh_file      = "%s/s17.MergeDensity.sh"      % (self.scripts)
        sh_work_file = "%s/s17.MergeDensity_work.sh" % (self.scripts)
        
        l_brief = self.samInfo_pd_ChIP['brief_name']
        l_merge = set(self.samInfo_pd_ChIP['merge_name'])
        
        l_brief2 = []
        for brief_name in l_brief:
            idx   =(self.samInfo_pd_ChIP['brief_name'] == brief_name)
            if self.__is_input(idx):
                continue
            
            l_brief2.append(brief_name)
        
        l_merge2 = []
        for merge_name in l_merge:
            idx   =(self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue
            
            l_merge2.append(merge_name)
        
        l_brief = l_brief2
        l_merge = l_merge2
        
        l_sh_info = self.s17_merge_RPKM()
        l_sh_work = []
        
        for ltype in ["rep","mrg"]:
            l_sam = l_brief
            RPKM_dir = self.dir_Peak_rep
            if ltype == "mrg":
                l_sam = l_merge
                RPKM_dir = self.dir_Peak_mrg
            
            header = "\"#chr\\tbeg\\tend\\t%s\"" % ("\\t".join(l_sam))
            l_RPKM_file = [ 
                "%s/%s/%s.1kb.norm_avg.xls"                             %\
                (RPKM_dir, sam, sam) for sam in l_sam
            ]
            l_sh_work.append(
                "sh %s  %s %s %s %s"                                    %\
                (sh_file, header, "1kb", ltype, " ".join(l_RPKM_file))
            )
        
        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #19
0
    def density_baseLevel(self, core_num=4):
        sh_file = "%s/s16.densityBaseLv.sh" % (self.scripts)
        sh_rep_work_file = "%s/s16.densityBaseLvRep_work.sh" % (self.scripts)
        sh_mrg_work_file = "%s/s16.densityBaseLvMrg_work.sh" % (self.scripts)

        l_brief = self.samInfo_pd_ChIP['brief_name']
        l_merge = set(self.samInfo_pd_ChIP['merge_name'])

        l_sh_info = self.s16_densityBaselv()
        l_sh_rep_work = []
        l_sh_mrg_work = []

        for brief_name in l_brief:
            idx = (self.samInfo_pd_ChIP['brief_name'] == brief_name)
            if self.__is_input(idx):
                continue

            l_sh_rep_work.append(
                "sh %s %s %s %s" %
                (sh_file, brief_name, self.ref, self.dir_Peak_rep))

        for merge_name in l_merge:
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            l_sh_mrg_work.append(
                "sh %s %s %s %s" %
                (sh_file, merge_name, self.ref, self.dir_Peak_mrg))

        my_job_rep = m_jobs.run_jobs(sh_file, sh_rep_work_file, l_sh_info,
                                     l_sh_rep_work)
        my_job_rep.running_multi(cpu=core_num, is_debug=self.is_debug)

        my_job_mrg = m_jobs.run_jobs(sh_file, sh_mrg_work_file, l_sh_info,
                                     l_sh_mrg_work)
        my_job_mrg.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #20
0
 def run_QC(self):        
     sh_file       = "%s/s01.QC.sh"              % (self.scripts)
     sh_work_file  = "%s/s01.QC_work.sh"         % (self.scripts)
       
     l_sh_info = self.s01_QC()
     l_sh_work = []
     for samp in self.samInfo_pd_ChIP['sample']:
         make_dir(  [ self.dir_clean_data, samp ] )
         idx       = (self.samInfo_pd_ChIP['sample'] == samp)
         end       = self.samInfo_pd_ChIP[ idx ]['end_type'].values[0]
         data_dype = self.M_CvtEnd[ end ]
         l_sh_work.append("sh %s %s %d" % (sh_file, samp, data_dype) )
   
     my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
     my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #21
0
ファイル: module03_geneDensity.py プロジェクト: yzqheart/ChIP
    def div_bed_to_bins_unique_mrg(self):
        sh_file      = "%s/s14.RPM_density_rep.sh"      % (self.scripts)
        sh_work_file = "%s/s14.RPM_density_rep_work.sh" % (self.scripts)

        l_sh_info = self.s14_RPM_density_mrg()
        l_sh_work = []
        
        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            m01.make_dir([ self.dir_RPM_bins_mrg, merge_name])
            
            mapped_reads = np.sum(self.stat_Info.StatInfo[merge_name]['q30'])
            l_sh_work.append("sh %s %s %d" % (sh_file,merge_name, mapped_reads))
        
        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #22
0
    def run_QC(self, core_num=4):
        sh_file       = "%s/s01.QC.sh"              % (self.scripts)
        sh_work_file  = "%s/s01.QC_work.sh"         % (self.scripts)

        l_sh_info = self.s01_QC()
        l_sh_work = []
        for samp in self.samInfo_pd_ChIP['sample']:
            make_dir(  [ self.dir_clean_data, samp ] )
            idx       = (self.samInfo_pd_ChIP['sample'] == samp)
            end       = self.samInfo_pd_ChIP[ idx ]['end_type'].values[0]
            data_dype = self.M_CvtEnd[ end ]
            l_sh_work.append("sh %s %s %d" % (sh_file, samp, data_dype) )

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=core_num, is_debug = self.is_debug)
コード例 #23
0
    def div_bed_to_bins_unique_mrg(self):
        sh_file = "%s/s14.RPM_density_rep.sh" % (self.scripts)
        sh_work_file = "%s/s14.RPM_density_rep_work.sh" % (self.scripts)

        l_sh_info = self.s14_RPM_density_mrg()
        l_sh_work = []

        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            m01.make_dir([self.dir_RPM_bins_mrg, merge_name])

            mapped_reads = np.sum(self.stat_Info.StatInfo[merge_name]['q30'])
            l_sh_work.append("sh %s %s %d" %
                             (sh_file, merge_name, mapped_reads))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #24
0
    def get_shift_size_mrg(self, core_num=4):
        sh_file = "%s/s05.2.spp_mrg_shiftSize.sh" % (self.scripts)
        sh_work_file = "%s/s05.2.spp_mrg_shiftSize_work.sh" % (self.scripts)

        l_sh_info = self.s05_2_spp_mrg_shiftSize()
        l_sh_work = []
        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_spp_mrg_shiftSize, merge_name, "test"])
            m01.make_dir([self.dir_spp_mrg_shiftSize, merge_name, "out"])
            l_sh_work.append("sh %s  %s" % (sh_file, merge_name))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #25
0
ファイル: module02_call_peaks.py プロジェクト: yzqheart/ChIP
    def get_shift_size_mrg(self):
        sh_file      = "%s/s05.2.spp_mrg_shiftSize.sh"      % (self.scripts)
        sh_work_file = "%s/s05.2.spp_mrg_shiftSize_work.sh" % (self.scripts)
        
        l_sh_info    = self.s05_2_spp_mrg_shiftSize()
        l_sh_work    = []
        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx   =(self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_spp_mrg_shiftSize, merge_name, "test"])
            m01.make_dir([self.dir_spp_mrg_shiftSize, merge_name, "out" ])       
            l_sh_work.append("sh %s  %s" % ( sh_file, merge_name ))
        
        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #26
0
    def run_bwa(self, core_num=2):
        sh_file      = "%s/s02.bwa.sh"      % (self.scripts)
        sh_work_file = "%s/s02.bwa_work.sh" % (self.scripts)

        l_sh_info = self.s02_bwa()
        l_sh_work = []
        for samp in self.samInfo_pd_ChIP['sample']:
            idx        =(self.samInfo_pd_ChIP['sample'] == samp)
            brief_name = self.samInfo_pd_ChIP[ idx ]['brief_name'].values[0]
            end       = self.samInfo_pd_ChIP[ idx ]['end_type'].values[0]
            data_dype = self.M_CvtEnd[ end ]
            make_dir( [ self.dir_bam, brief_name ] )
            l_sh_work.append(
                "sh %s %s %s %d" % (sh_file, samp, brief_name, data_dype)
            )

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=core_num, is_debug = self.is_debug)
コード例 #27
0
    def run_bwa(self):
        sh_file      = "%s/s02.bwa.sh"      % (self.scripts)
        sh_work_file = "%s/s02.bwa_work.sh" % (self.scripts)

        l_sh_info = self.s02_bwa()
        l_sh_work = []
        for samp in self.samInfo_pd_ChIP['sample']:
            idx        =(self.samInfo_pd_ChIP['sample'] == samp)
            brief_name = self.samInfo_pd_ChIP[ idx ]['brief_name'].values[0]
            end       = self.samInfo_pd_ChIP[ idx ]['end_type'].values[0]
            data_dype = self.M_CvtEnd[ end ]
            make_dir( [ self.dir_bam, brief_name ] )
            l_sh_work.append(
                "sh %s %s %s %d" % (sh_file, samp, brief_name, data_dype)
            )

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #28
0
 def bam2repbed(self,ext_len=300):
     sh_file      = "%s/s03.bam2bedrep.sh"      % (self.scripts)
     sh_work_file = "%s/s03.bam2bedrep_work.sh" % (self.scripts)
     
     l_sh_info = self.s03_bam2bedrep()
     l_sh_work = []
     
     make_dir( [ self.dir_bed_rep ] )
     for samp in self.samInfo_pd_ChIP['sample']:
         idx        =(self.samInfo_pd_ChIP['sample'] == samp)
         brief_name = self.samInfo_pd_ChIP[ idx ]['brief_name'].values[0]
         end       = self.samInfo_pd_ChIP[ idx ]['end_type'].values[0]
         data_dype = self.M_CvtEnd[ end ]
         
         l_sh_work.append("sh %s  %s %s  %d" % \
             (sh_file, brief_name, data_dype, ext_len))
         
     my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
     my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #29
0
    def bam2repbed(self, ext_len=300, core_num=4):
        sh_file      = "%s/s03.bam2bedrep.sh"      % (self.scripts)
        sh_work_file = "%s/s03.bam2bedrep_work.sh" % (self.scripts)

        l_sh_info = self.s03_bam2bedrep()
        l_sh_work = []

        make_dir( [ self.dir_bed_rep ] )
        for samp in self.samInfo_pd_ChIP['sample']:
            idx        =(self.samInfo_pd_ChIP['sample'] == samp)
            brief_name = self.samInfo_pd_ChIP[ idx ]['brief_name'].values[0]
            end       = self.samInfo_pd_ChIP[ idx ]['end_type'].values[0]
            data_dype = self.M_CvtEnd[ end ]

            l_sh_work.append("sh %s  %s %s  %d" % \
                (sh_file, brief_name, data_dype, ext_len))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=core_num, is_debug = self.is_debug)
コード例 #30
0
    def run_macs_rep_broad(self, pvalue=0.05, ref="hs", core_num=4):
        sh_file = "%s/s08.macs2BroadPeakRep.sh" % (self.scripts)
        sh_work_file = "%s/s08.macs2BroadPeakRep_work.sh" % (self.scripts)

        l_sh_info = self.s08_macs2BroadPeakRep(ref)
        l_sh_work = []

        for brief_name in self.samInfo_pd_ChIP['brief_name']:
            idx = (self.samInfo_pd_ChIP['brief_name'] == brief_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_BroadPeak_rep, brief_name])
            ctrl_name = self.samInfo_pd_ChIP[idx]['control'].values[0]

            l_sh_work.append("sh %s  %s %s %f"                              %\
                ( sh_file, brief_name, ctrl_name, pvalue ))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #31
0
    def prepare_idr_input(self, top_peak=100000, core_num=4):
        sh_file = "%s/s07.1.IDR_prepare.sh" % (self.scripts)
        sh_work_file = "%s/s07.1.IDR_prepare_work.sh" % (self.scripts)

        l_sh_info = self.s07_1_IDR_prepare()
        l_sh_work = []

        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_Peak_idr, merge_name])
            l_brief = list(self.samInfo_pd_ChIP[idx]['brief_name'])
            list_brief = " ".join(l_brief)
            l_sh_work.append("sh %s  %s %d %s" %
                             (sh_file, merge_name, top_peak, list_brief))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #32
0
ファイル: module03_geneDensity.py プロジェクト: yzqheart/ChIP
    def div_bed_to_bins_unique_rep(self):
        sh_file      = "%s/s13.RPM_density_rep.sh"      % (self.scripts)
        sh_work_file = "%s/s13.RPM_density_rep_work.sh" % (self.scripts)

        l_sh_info = self.s13_RPM_density_rep()
        l_sh_work = []
        
        for brief_name in self.samInfo_pd_ChIP['brief_name']:
            m01.make_dir([ self.dir_RPM_bins_rep, brief_name])
            
            idx = (self.samInfo_pd_ChIP['brief_name'] == brief_name)
            merge_name = self.samInfo_pd_ChIP[ idx ]['merge_name'].values[0]
            
            l_brief = self.stat_Info.StatInfo[merge_name]['l_brief']
            idx2 = l_brief.index(brief_name)
            mapped_reads = self.stat_Info.StatInfo[merge_name]['q30'][idx2]
            l_sh_work.append("sh %s %s %d" % (sh_file, brief_name, mapped_reads))
        
        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #33
0
ファイル: module02_call_peaks.py プロジェクト: yzqheart/ChIP
    def run_macs_rep_broad(self, pvalue=0.05, ref="hs"):
        sh_file       = "%s/s08.macs2BroadPeakRep.sh"      % (self.scripts)
        sh_work_file  = "%s/s08.macs2BroadPeakRep_work.sh" % (self.scripts)
        
        l_sh_info    = self.s08_macs2BroadPeakRep( ref )
        l_sh_work    = []
    
        for brief_name in self.samInfo_pd_ChIP['brief_name']:
            idx   =(self.samInfo_pd_ChIP['brief_name'] == brief_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_BroadPeak_rep, brief_name])
            ctrl_name  = self.samInfo_pd_ChIP[idx]['control'].values[0]

            l_sh_work.append("sh %s  %s %s %f"                              %\
                ( sh_file, brief_name, ctrl_name, pvalue ))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8,is_debug=self.is_debug)
コード例 #34
0
    def mrgbed_multi(self, core_num=4):
        sh_file      = "%s/s04.2.bedmrg.multi.sh"      % (self.scripts)
        sh_work_file = "%s/s04.2.bedmrg.multi_work.sh" % (self.scripts)

        l_sh_info = self.s04_2_bedmrg_multi()
        l_sh_work = []

        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx       =(self.samInfo_pd_ChIP['merge_name'] == merge_name)

            merged    = "%s/%s" % (self.dir_bed_mrg, merge_name)
            l_brief   = self.samInfo_pd_ChIP[ idx ]['brief_name']
            l_bed_rep = [ "%s/%s"        % (self.dir_bed_rep,brief_name)     \
                for brief_name in l_brief ]

            l_sh_work.append(
                "sh %s  %s %s" % ( sh_file, merged, " ".join(l_bed_rep) )
            )

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=core_num, is_debug = self.is_debug)
コード例 #35
0
    def mrgbed_multi(self):        
        sh_file      = "%s/s04.2.bedmrg.multi.sh"      % (self.scripts)
        sh_work_file = "%s/s04.2.bedmrg.multi_work.sh" % (self.scripts)
        
        l_sh_info = self.s04_2_bedmrg_multi()
        l_sh_work = []
        
        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx       =(self.samInfo_pd_ChIP['merge_name'] == merge_name)

            merged    = "%s/%s" % (self.dir_bed_mrg, merge_name)
            l_brief   = self.samInfo_pd_ChIP[ idx ]['brief_name']
            l_bed_rep = [ "%s/%s"        % (self.dir_bed_rep,brief_name)     \
                for brief_name in l_brief ]
            
            l_sh_work.append(
                "sh %s  %s %s" % ( sh_file, merged, " ".join(l_bed_rep) )
            )
        
        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #36
0
ファイル: module02_call_peaks.py プロジェクト: yzqheart/ChIP
    def prepare_idr_input(self, top_peak = 100000):
        sh_file       = "%s/s07.1.IDR_prepare.sh"      % (self.scripts)
        sh_work_file  = "%s/s07.1.IDR_prepare_work.sh" % (self.scripts)

        l_sh_info     = self.s07_1_IDR_prepare()
        l_sh_work     = []
        
        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx   =(self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue
            
            m01.make_dir( [ self.dir_Peak_idr, merge_name ] )
            l_brief   = list(self.samInfo_pd_ChIP[ idx ]['brief_name'])
            list_brief = " ".join(l_brief)
            l_sh_work.append(
                "sh %s  %s %d %s" % (sh_file, merge_name, top_peak,list_brief)
            )

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #37
0
    def div_bed_to_bins_unique_rep(self):
        sh_file = "%s/s13.RPM_density_rep.sh" % (self.scripts)
        sh_work_file = "%s/s13.RPM_density_rep_work.sh" % (self.scripts)

        l_sh_info = self.s13_RPM_density_rep()
        l_sh_work = []

        for brief_name in self.samInfo_pd_ChIP['brief_name']:
            m01.make_dir([self.dir_RPM_bins_rep, brief_name])

            idx = (self.samInfo_pd_ChIP['brief_name'] == brief_name)
            merge_name = self.samInfo_pd_ChIP[idx]['merge_name'].values[0]

            l_brief = self.stat_Info.StatInfo[merge_name]['l_brief']
            idx2 = l_brief.index(brief_name)
            mapped_reads = self.stat_Info.StatInfo[merge_name]['q30'][idx2]
            l_sh_work.append("sh %s %s %d" %
                             (sh_file, brief_name, mapped_reads))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #38
0
    def run_macs_mrg_broad(self, pvalue=0.05, ref="hs", core_num=4):
        sh_file = "%s/s09.macs2BroadPeakMrg.sh" % (self.scripts)
        sh_work_file = "%s/s09.macs2BroadPeakMrg_work.sh" % (self.scripts)

        l_sh_info = self.s09_macs2BroadPeakMrg(ref)
        l_sh_work = []

        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_Peak_mrg, merge_name])
            ctrl_name = self.samInfo_pd_ChIP[idx]['control'].values[0]

            l_sh_work.append("sh %s  %s %s %f" % \
                (sh_file, merge_name, ctrl_name, pvalue))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)

        my_job.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #39
0
ファイル: module02_call_peaks.py プロジェクト: yzqheart/ChIP
    def run_macs_mrg_broad(self, pvalue=0.05, ref="hs"):
        sh_file       = "%s/s09.macs2BroadPeakMrg.sh"      % (self.scripts)
        sh_work_file  = "%s/s09.macs2BroadPeakMrg_work.sh" % (self.scripts)
        
        l_sh_info = self.s09_macs2BroadPeakMrg(ref)
        l_sh_work = []
    
        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_Peak_mrg, merge_name])
            ctrl_name = self.samInfo_pd_ChIP[idx]['control'].values[0]

            l_sh_work.append("sh %s  %s %s %f" % \
                (sh_file, merge_name, ctrl_name, pvalue))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
            
        my_job.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #40
0
ファイル: module03_geneDensity.py プロジェクト: yzqheart/ChIP
    def run_anno_peak(self,  
                        TSS_genebody_up,TSS_genebody_down,TSS_promoter_up,
                        TSS_promoter_down,ext_binlen=50,body_bincnt=100,
                        tss_binlen=1):
      
        sh_file       = "%s/s12.PeakGeneRegion.sh"      % (self.scripts)
        sh_work_file  = "%s/s12.PeakGeneRegion_work.sh" % (self.scripts)

        l_sh_info     = self.s12_PeakGeneRegion(
                        TSS_genebody_up,TSS_genebody_down,TSS_promoter_up,
                        TSS_promoter_down,ext_binlen=50,body_bincnt=100,
                        tss_binlen=1)
                        
        l_sh_work     = []

        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            m01.make_dir([ self.dir_Peak_mrg_TSS,  merge_name ])
            m01.make_dir([ self.dir_Peak_mrg_Gene, merge_name ])
            l_sh_work.append("sh %s %s" % ( sh_file, merge_name ))
            
        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=8, is_debug = self.is_debug)
コード例 #41
0
    def get_idr_Peak(self, core_num=4):
        prefix = ".".join(self.sam_ChIPinfo.split(".")[:-1])
        file_idr_out = "%s/IDR_result.%s.xls" % (self.dir_StatInfo, prefix)
        f_idr_out = open(file_idr_out, "r")

        sh_file = "%s/s07.3.IDR_pass_Peaks.sh" % (self.scripts)
        sh_work_file = "%s/s07.3.IDR_pass_Peaks_work.sh" % (self.scripts)

        l_sh_info = self.s07_3_IDR_passPeaks()
        l_sh_work = []

        h = f_idr_out.readline()
        for line in f_idr_out:
            line = line.strip()
            f = line.split()
            merge_name = f[0]
            peak_cnt = int(f[1])

            l_sh_work.append("sh %s  %s %d" % (sh_file, merge_name, peak_cnt))

        my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
        my_job.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #42
0
ファイル: module02_call_peaks.py プロジェクト: yzqheart/ChIP
 def get_idr_Peak(self):
     prefix      = ".".join( self.sam_ChIPinfo.split(".")[:-1] )
     file_idr_out= "%s/IDR_result.%s.xls" % (self.dir_StatInfo, prefix)
     f_idr_out   = open(file_idr_out,"r")
     
     sh_file       = "%s/s07.3.IDR_pass_Peaks.sh"      % (self.scripts)
     sh_work_file  = "%s/s07.3.IDR_pass_Peaks_work.sh" % (self.scripts)
     
     l_sh_info     = self.s07_3_IDR_passPeaks()
     l_sh_work     = []
     
     h = f_idr_out.readline()
     for line in f_idr_out:
         line      = line.strip()
         f         = line.split()
         merge_name= f[0]
         peak_cnt  = int(f[1])
         
         l_sh_work.append("sh %s  %s %d" %  (sh_file, merge_name,peak_cnt))
     
     my_job = m_jobs.run_jobs(sh_file, sh_work_file, l_sh_info, l_sh_work)
     my_job.running_multi( cpu=8, is_debug = self.is_debug )
コード例 #43
0
    def run_idr(self, core_num=4):
        sh_file            =                                                 \
            "%s/s07.2.IDR_usingMacs2Peak.sh"               % (self.scripts)

        sh_work_file_rep   =                                                 \
            "%s/s07.2.IDR_usingMacs2Peak_rep_work.sh"      % (self.scripts)

        sh_work_file_self  =                                                 \
            "%s/s07.2.IDR_usingMacs2Peak_selfReps_work.sh" % (self.scripts)

        sh_work_file_pool  =                                                 \
            "%s/s07.2.IDR_usingMacs2Peak_poolReps_work.sh" % (self.scripts)

        l_sh_info = self.s07_2_usingMacs2Peak()
        l_sh_work_rep = []
        l_sh_work_self = []
        l_sh_work_pool = []

        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            m01.make_dir([self.dir_Peak_idr, merge_name])
            l_brief = list(self.samInfo_pd_ChIP[idx]['brief_name'])
            #            print merge_name,l_brief,list(l_brief)
            ### IDR ANALYSIS ON ORIGINAL REPLICATES
            subtype = "rep"
            m01.make_dir([self.dir_Peak_idr, merge_name, subtype])
            if len(l_brief) > 1:
                for i in range(0, len(l_brief) - 1):
                    for j in range(i + 1, len(l_brief)):
                        in_sam1 = "%s" % (l_brief[i])
                        in_sam2 = "%s" % (l_brief[j])
                        l_sh_work_rep.append("sh %s  %s %s %s %s"           %\
                            (sh_file,in_sam1,in_sam2,merge_name,subtype))

            ### IDR ANALYSIS ON SELF-PSEUDOREPLICATES
            subtype = "selfPseudoReps"
            m01.make_dir([self.dir_Peak_idr, merge_name, subtype])
            for sam in l_brief:
                in_sam1 = "%s.pr1" % (sam)
                in_sam2 = "%s.pr2" % (sam)
                l_sh_work_self.append("sh %s   %s %s %s %s"                 %\
                     (sh_file, in_sam1, in_sam2, merge_name, subtype))

            ### IDR ANALYSIS ON POOLED-PSEUDOREPLICATES
            subtype = "pooledPseudoReps"
            m01.make_dir([self.dir_Peak_idr, merge_name, subtype])

            in_sam1 = "%s.pr1" % (merge_name)
            in_sam2 = "%s.pr2" % (merge_name)
            l_sh_work_pool.append("sh %s   %s %s %s %s"                     %\
                 (sh_file, in_sam1, in_sam2, merge_name, subtype))

        my_job_rep        = m_jobs.run_jobs(                                 \
            sh_file, sh_work_file_rep,  l_sh_info, l_sh_work_rep)

        my_job_self = m_jobs.run_jobs(                                       \
            sh_file, sh_work_file_self, l_sh_info, l_sh_work_self)

        my_job_pool = m_jobs.run_jobs(                                       \
            sh_file, sh_work_file_pool, l_sh_info, l_sh_work_pool)

        my_job_rep.running_multi(cpu=core_num, is_debug=self.is_debug)
        my_job_self.running_multi(cpu=core_num, is_debug=self.is_debug)
        my_job_pool.running_multi(cpu=core_num, is_debug=self.is_debug)
コード例 #44
0
ファイル: module02_call_peaks.py プロジェクト: yzqheart/ChIP
    def sort_bdg(self):
        sh_file                                                             =\
            "%s/s10.sortbdg.sh"                     % (self.scripts)
            
        sh_work_repPeak_file                                                =\
            "%s/s10.1.sortbdg.repPeak_work.sh"      % (self.scripts)
            
        sh_work_mrgPeak_file                                                =\
            "%s/s10.2.sortbdg.mrgPeak_work.sh"      % (self.scripts)
            
        sh_work_repBroadPeak_file                                           =\
            "%s/s10.3.sortbdg.repBroadPeak_work.sh" % (self.scripts)
            
        sh_work_mrgBroadPeak_file                                           =\
            "%s/s10.4.sortbdg.mrgBroadPeak_work.sh" % (self.scripts)
        
        l_sh_info = self.s10_sortbdg()
        l_sh_work_repPeak      = []
        l_sh_work_mrgPeak      = []
        l_sh_work_repBroadPeak = []
        l_sh_work_mrgBroadPeak = []
        
        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx = (self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue

            l_brief = list(self.samInfo_pd_ChIP[idx]['brief_name'])
            
            for brief in l_brief:
                l_sh_work_repBroadPeak.append("sh %s  %s %s"                %\
                    (sh_file, brief, self.dir_BroadPeak_rep))
         
            l_sh_work_mrgBroadPeak.append("sh %s  %s %s"                    %\
                 (sh_file, merge_name, self.dir_BroadPeak_mrg))

            for brief in l_brief:
                l_sh_work_repPeak.append("sh %s  %s %s"                     %\
                    (sh_file, brief, self.dir_Peak_rep))
         
            l_sh_work_mrgPeak.append("sh %s  %s %s"                         %\
                (sh_file, merge_name, self.dir_Peak_mrg))


        my_job_repPeak = m_jobs.run_jobs(sh_file,                            \
            sh_work_repPeak_file, l_sh_info, l_sh_work_repPeak)

        my_job_mrgPeak = m_jobs.run_jobs(sh_file,                            \
            sh_work_mrgPeak_file, l_sh_info, l_sh_work_mrgPeak)

        my_job_repBroadPeak = m_jobs.run_jobs(sh_file,                       \
            sh_work_repBroadPeak_file, l_sh_info, l_sh_work_repBroadPeak)

        my_job_mrgBroadPeak = m_jobs.run_jobs(sh_file,                       \
            sh_work_mrgBroadPeak_file, l_sh_info, l_sh_work_mrgBroadPeak)
        
        
        my_job_repPeak.running_multi(cpu=8, is_debug=self.is_debug)
        my_job_mrgPeak.running_multi(cpu=8, is_debug=self.is_debug)
        my_job_repBroadPeak.running_multi(cpu=8, is_debug=self.is_debug)
        my_job_mrgBroadPeak.running_multi(cpu=8, is_debug=self.is_debug)
コード例 #45
0
ファイル: module02_call_peaks.py プロジェクト: yzqheart/ChIP
    def run_idr(self):
        sh_file            =                                                 \
            "%s/s07.2.IDR_usingMacs2Peak.sh"               % (self.scripts)
            
        sh_work_file_rep   =                                                 \
            "%s/s07.2.IDR_usingMacs2Peak_rep_work.sh"      % (self.scripts)
            
        sh_work_file_self  =                                                 \
            "%s/s07.2.IDR_usingMacs2Peak_selfReps_work.sh" % (self.scripts)
            
        sh_work_file_pool  =                                                 \
            "%s/s07.2.IDR_usingMacs2Peak_poolReps_work.sh" % (self.scripts)
            
        
        l_sh_info      = self.s07_2_usingMacs2Peak()
        l_sh_work_rep  = []
        l_sh_work_self = []
        l_sh_work_pool = []
        
        for merge_name in set(self.samInfo_pd_ChIP['merge_name']):
            idx   =(self.samInfo_pd_ChIP['merge_name'] == merge_name)
            if self.__is_input(idx):
                continue
            
            m01.make_dir([self.dir_Peak_idr, merge_name])
            l_brief = list(self.samInfo_pd_ChIP[idx]['brief_name'])
#            print merge_name,l_brief,list(l_brief)
            ### IDR ANALYSIS ON ORIGINAL REPLICATES
            subtype = "rep"
            m01.make_dir([self.dir_Peak_idr, merge_name, subtype])
            if len(l_brief) > 1:
                for i    in range( 0,len(l_brief)-1 ):
                    for j in range( i+1,len(l_brief) ):
                        in_sam1 = "%s" % (l_brief[i])
                        in_sam2 = "%s" % (l_brief[j])
                        l_sh_work_rep.append("sh %s  %s %s %s %s"           %\
                            (sh_file,in_sam1,in_sam2,merge_name,subtype))

            ### IDR ANALYSIS ON SELF-PSEUDOREPLICATES
            subtype = "selfPseudoReps"
            m01.make_dir([self.dir_Peak_idr, merge_name, subtype])
            for sam in l_brief:
                in_sam1 = "%s.pr1" % (sam)
                in_sam2 = "%s.pr2" % (sam)
                l_sh_work_self.append("sh %s   %s %s %s %s"                 %\
                     (sh_file, in_sam1, in_sam2, merge_name, subtype))
                
            
            ### IDR ANALYSIS ON POOLED-PSEUDOREPLICATES
            subtype = "pooledPseudoReps"
            m01.make_dir([ self.dir_Peak_idr, merge_name, subtype ])
            
            in_sam1 = "%s.pr1" % (merge_name)
            in_sam2 = "%s.pr2" % (merge_name)
            l_sh_work_pool.append("sh %s   %s %s %s %s"                     %\
                 (sh_file, in_sam1, in_sam2, merge_name, subtype))
        
        my_job_rep        = m_jobs.run_jobs(                                 \
            sh_file, sh_work_file_rep,  l_sh_info, l_sh_work_rep)

        my_job_self = m_jobs.run_jobs(                                       \
            sh_file, sh_work_file_self, l_sh_info, l_sh_work_self)

        my_job_pool = m_jobs.run_jobs(                                       \
            sh_file, sh_work_file_pool, l_sh_info, l_sh_work_pool)
            
        my_job_rep.running_multi(cpu=8, is_debug=self.is_debug)
        my_job_self.running_multi(cpu=8,is_debug=self.is_debug)
        my_job_pool.running_multi(cpu=8,is_debug=self.is_debug)