def step1_generate_matrix(conf_dict, logfile): ''' generate expression matrix file main data processing step, including mapping, generate expression matrix and QC matrix which is used in next step for fastq format : STAR/bowtie2 mapping q30 filter, for sam format: q30 filter ''' #t= time.time() ### generate TF overlap matrix using bed tools wlog("generate TF overlap matrix", logfile) init = 0 for f in conf_dict['General']['peakfilenames']: if init == 0: cmd = '%s intersect -a %s -b %s -c ' % ( conf_dict['General']['bedtools'], conf_dict['General']['HMRpeak'], conf_dict['General']['peakFolder'] + f + ".bed") init = 1 else: cmd += '| %s intersect -a - -b %s -c ' % ( conf_dict['General']['bedtools'], conf_dict['General']['peakFolder'] + f + ".bed") cmd += '> %s' % (conf_dict['General']['outname'] + "_peakov.bed") rlogonly(cmd, logfile) ### generate HMsig wlog("generate HMsignal matrix", logfile) inf = open(conf_dict['General']['HMRpeak']) outf = open(conf_dict['General']['outname'] + "_HMsig.bed", 'w') for line in inf: ll = line.split() center = (int(ll[1]) + int(ll[2])) / 2 start = max(0, center - conf_dict['options']['ext']) end = center + conf_dict['options']['ext'] addsigALL = [] for bwsigfile in conf_dict['General']['signalfile']: addsigALL.append( bwsigAve(bwsigfile, ll[0], start, end, conf_dict['General']['bwsummary'])) newll = ll + addsigALL outf.write("\t".join(map(str, newll)) + "\n") inf.close() outf.close() #s1time = time.time() -t #wlog("time for Step1: %s"%(s1time),logfile) #conf_dict['results'] = {} #conf_dict['results']['expmat'] = conf_dict['Step2_ExpMat']['expmat'] #conf_dict['results']['qcmat'] = conf_dict['Step2_ExpMat']['qcmat'] return conf_dict
def step0_check_data(conf_dict, logfile): ''' step0 integrate data check and complement parameter ''' ### check data path , format , if "~" in conf_dict['General']['HMRpeak']: ewlog( 'require absolute path for HMRpeak bed file, HMRpeak file cannot contain "~", current HMRpeak file is %s' % (conf_dict['General']['HMRpeak']), logfile) if "~" in conf_dict['General']['signal']: ewlog( 'require absolute path for HMsignal bigwig file, signal file cannot contain "~", current signal file(s): %s' % (conf_dict['General']['signal']), logfile) if not conf_dict['General']['HMRpeak'].startswith('/'): conf_dict['General']['HMRpeak'] = conf_dict['General'][ 'startdir'] + conf_dict['General']['HMRpeak'] if not os.path.isfile(conf_dict['General']['HMRpeak']): ewlog("HMRpeak file %s not found" % (conf_dict['General']['HMRpeak']), logfile) if not conf_dict['General']['HMRpeak'].endswith('.bed'): ewlog('extenion of HMR peak file is not .bed', logfile) checkbed = checkbedformat(conf_dict['General']['HMRpeak'], 1000) if checkbed == "fail": ewlog("HMRpeak file is not a bed file", logfile) elif checkbed == "lesspeak": ewlog("HMRpeak file contains less than 1000 peaks") conf_dict['General']['signalname'] = [] conf_dict['General']['signalfile'] = [] for bwsignalfile in conf_dict['General']['signal']: if not bwsignalfile.startswith('/'): bwsignalfile = conf_dict['General']['startdir'] + bwsignalfile if not os.path.isfile(bwsignalfile): wlog("signal bw file %s not found, ignored" % (bwsignalfile), logfile) continue if bwsignalfile.endswith(".bw"): conf_dict['General']['signalfile'].append(bwsignalfile) conf_dict['General']['signalname'].append( bwsignalfile.split("/")[-1][:-3]) elif bwsignalfile.endswith(".bigwig"): conf_dict['General']['signalfile'].append(bwsignalfile) conf_dict['General']['signalname'].append( bwsignalfile.split("/")[-1][:-7]) else: wlog('[WARNING] extension of signal bw file is not bw/bigwig', logfile) conf_dict['General']['signalfile'].append(bwsignalfile) conf_dict['General']['signalname'].append( bwsignalfile.split("/")[-1]) if len(conf_dict['General']['signalfile']) == 0: ewlog("no signal bw file valid, exit") elif len(conf_dict['General']['signalfile']) > 4: ewlog( "maximum signal bw file is limited to 4. There were %s signal file inputed, exit" % (len(conf_dict['General']['signalfile']))) ### check TFpeak folder if "~" in conf_dict['General']['peakFolder']: ewlog( 'require absolute path for peak/track Folder, Folder cannot contain "~", current Folder is %s' % (conf_dict['General']['peakFolder']), logfile) if not conf_dict['General']['peakFolder'].startswith('/'): conf_dict['General']['peakFolder'] = conf_dict['General'][ 'startdir'] + conf_dict['General']['peakFolder'] if not conf_dict['General']['peakFolder'].endswith('/'): conf_dict['General']['peakFolder'] += "/" if not os.path.isdir(conf_dict['General']['peakFolder']): ewlog("Folder %s not found" % (conf_dict['General']['peakFolder']), logfile) if conf_dict['General']['mode'] == "signal": wlog("signal mode is activated", logfile) if conf_dict['General']['bwfolder']: wlog("bwFolder is specified, checking data for signal mode", logfile) if "~" in conf_dict['General']['bwfolder']: wlog( 'require absolute path for bwFolder, bwFolder cannot contain "~", current Folder is %s, use peak mode' % (conf_dict['General']['bwfolder']), logfile) conf_dict['General']['mode'] = "binary" else: if not conf_dict['General']['bwfolder'].startswith('/'): conf_dict['General']['bwfolder'] = conf_dict['General'][ 'startdir'] + conf_dict['General']['bwfolder'] if not conf_dict['General']['bwfolder'].endswith('/'): conf_dict['General']['bwfolder'] += "/" if not os.path.isdir(conf_dict['General']['bwfolder']): wlog( "bwFolder %s not found, use binary mode" % (conf_dict['General']['peakFolder']), logfile) conf_dict['General']['mode'] = "binary" else: wlog("bwfolder is not specified, use binary mode", logfile) conf_dict['General']['mode'] = "binary" else: wlog("binary mode is activaed", logfile) wlog( "Check the peak.bed files in the Folder, only '.bed' files with >1000 peaks are included in the following analysis", logfile) conf_dict['General']['peakfilenames'] = [] for f in os.listdir(conf_dict['General']['peakFolder']): if f.endswith(".bed") and os.path.isfile( conf_dict['General']['peakFolder'] + f): checkbed = checkbedformat(conf_dict['General']['HMRpeak'], 1000) if checkbed == "pass": conf_dict['General']['peakfilenames'].append(f[:-4]) if (len(conf_dict['General']['peakfilenames']) == 0): ewlog( "no peak file (cofactor candidate) in (bed format & >1000peaks) are included, exit", logfile) if conf_dict['General']['mode'] == "signal": conf_dict['General']['bwfilenames'] = [] for f in os.listdir(conf_dict['General']['bwfolder']): if f.endswith(".bw") and os.path.isfile( conf_dict['General']['bwfolder'] + f): conf_dict['General']['bwfilenames'].append(f[:-3]) ### compare the name from bwfiles and peak files conf_dict['General']['usefilename'] = [] for name in conf_dict['General']['bwfilenames']: if name in conf_dict['General']['peakfilenames']: conf_dict['General']['usefilename'].append(name) ### if less than 50% peakfiles share name with bwfiles, change back to peak mode if len(conf_dict['General']['usefilename']) < len( conf_dict['General']['peakfilenames']) * 0.5: conf_dict['General']['mode'] = "binary" wlog( "the number of shared peak&bw files is less than half of the number of peakfiles, use binary mode", logfile) else: wlog("all checks for signal mode passed, use signal mode", logfile) if conf_dict['General']['mode'] == "binary": conf_dict['General']['usefilename'] = conf_dict['General'][ 'peakfilenames'] wlog( "%s cofactor candidates are included" % (len(conf_dict['General']['usefilename'])), logfile) #checkbed = checkbedformat(conf_dict['General']['HMRpeak'],1000) #if checkbed == "pass": #conf_dict['General']['peakfilenames'].append(f[:-3]) outf = open( conf_dict['General']['outname'] + "_cofactor_candidate_list.txt", 'w') for cofactor in conf_dict['General']['usefilename']: outf.write(cofactor + "\n") outf.close() ### check options wlog('check option: ', logfile) # try: wlog( "extend length for HMsignal is %s bp" % (int(conf_dict['options']['ext'])), logfile) conf_dict['options']['ext'] = int(conf_dict['options']['ext']) except: wlog( "extend length %s is not valid, use default value: 1000bp" % (conf_dict['options']['ext']), logfile) conf_dict['options']['ext'] = 1000 try: wlog( "use Pvalue = %s as cutoff" % (str(float(conf_dict['options']['Pvalue']))), logfile) except: wlog( "input Pvalue %s is not recognized, use default Pvalue=0.001" % (conf_dict['options']['Pvalue']), logfile) conf_dict['options']['Pvalue'] = 0.001 if float(conf_dict['options']['Pvalue']) >= 1: wlog( "input Pvalue %s is not valid, use default Pvalue=0.001" % (conf_dict['options']['Pvalue']), logfile) conf_dict['options']['Pvalue'] = 0.001 try: usealpha = float(conf_dict['options']['Alpha']) if usealpha >= 1: wlog("alpha (for elastic-net) cannot be >=1, use alpha=0.5", logfile) conf_dict['options']['Alpha'] = 0.5 else: wlog( "Alpha (for elastic-net) = %s" % (str(float(conf_dict['options']['Alpha']))), logfile) conf_dict['options']['Alpha'] = usealpha except: wlog( "input alpha (for elastic-net) %s is not valid, use alpha=0.5" % (conf_dict['options']['Alpha']), logfile) wlog("Lambda choice is %s" % (conf_dict['options']['Lambda']), logfile) if conf_dict['options']['TopNcofactors'] == "all": wlog("all significant co-factors will be output", logfile) else: try: topTF = int(conf_dict['options']['TopNcofactors']) wlog( "the topN number %s will be output" % (conf_dict['options']['TopNcofactors']), logfile) conf_dict['options']['TopNcofactors'] = topTF except: wlog( "the topN number %s is not valid, output top5 co-factors" % (conf_dict['options']['TopNcofactors']), logfile) conf_dict['options']['TopNcofactors'] = 5 OS = platform.system() if OS == "Linux": bwsum_software = "bigWigSummary_linux" elif OS == "Darwin": bwsum_software = "bigWigSummary_mac" else: wlog( "detected system is nither linux nor mac, try linux version of bigWigSummary", logfile) bwsum_software = "bigWigSummary_linux" conf_dict['General'][ 'bwsummary'] = HMRpipe.__path__[0] + "/%s" % bwsum_software if os.path.isfile(HMRpipe.__path__[0] + "/bedtools"): conf_dict['General']['bedtools'] = HMRpipe.__path__[0] + "/bedtools" else: conf_dict['General']['bedtools'] = "bedtools" ### check Rscript #if not 'Usage' in sperr('Rscript')[1] and not 'version' in sperr('Rscript')[1]: # ewlog('require Rscript',logfile) ### check pdflatex if sp('pdflatex --help')[0] == "": wlog( 'pdflatex was not installed, ncHMR_detector is still processing but no summary report generated', logfile) conf_dict['General']['latex'] = 0 else: conf_dict['General']['latex'] = 1 return conf_dict
def step3_summary(conf_dict, logfile): ''' analysis part mainly Rscript dimentional reduction + clustering ''' # start # create section for wlog('collect results', logfile) # Rscript analysis.r expmat outname coverGN highvarZ selectPCcutoff rdnumber maxKnum summarydir = 'summary/' createDIR(summarydir) sp("mv %s_NCsummary.txt %s" % (conf_dict['General']['outname'], summarydir)) sp("mv %s_elnet_lambdaSelection.pdf %s" % (conf_dict['General']['outname'], summarydir)) if os.path.isfile("%s_cofactor_HMsignal.pdf" % conf_dict['General']['outname']): sp("mv %s_cofactor_HMsignal.pdf %s" % (conf_dict['General']['outname'], summarydir)) tmpresult = 'tmpResults/' createDIR(tmpresult) sp("mv %s_HMsig.bed %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_peakov.bed %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_cofactor_candidate_list.txt %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_filterNC.txt %s" % (conf_dict['General']['outname'], tmpresult)) wlog('generate summary documents', logfile) ### initiate QCdoc = """\documentclass[11pt,a4paper]{article} \usepackage{tabularx} \usepackage[english]{babel} \usepackage{array} \usepackage{graphicx} \usepackage{color} \DeclareGraphicsExtensions{.eps,.png,.pdf,.ps} \\begin{document} \\title{Summary reports of non-classical function detection of : %s} \\vspace{-1cm} \maketitle \\tableofcontents \\newpage \\newpage \section{Data description} \\begin{quotation} Table 1 mainly describes the input files, parameters and options. \end{quotation} \\begin{table}[h] \\small \caption{parameter description}\label{bstable} \\begin{tabularx}{\\textwidth}{ |X|l| } """ % (strlatexformat(conf_dict['General']['outname'])) ### table1 prepare parameter NcoTF = len(conf_dict['General']['peakfilenames']) QCdoc += """ \hline parameter & value \\\\ \hline output name & %s \\\\ \hline HMRpeak(peak filename) & %s \\\\ \hline mode & %s \\\\ \hline HM signal(bw filename) & \\begin{tabular}[c]{@{}l@{}}%s\end{tabular} \\\\ \hline \#cofactor candidates & %s \\\\ \hline options & value \\\\ \hline extend size & %sbp \\\\ \hline Alpha (Elastic net) & %s \\\\ \hline Pvalue cutoff & %s \\\\ \hline topN cofactors & %s \\\\ \hline """ % (strlatexformat(conf_dict['General']['outname']), strlatexformat(conf_dict['General']['HMRpeak'].split("/")[-1]), conf_dict['General']['mode'], strlatexformat("\\\\ ".join(conf_dict['General']['signalname'])), str(NcoTF), str(conf_dict['options']['ext']), str(conf_dict['options']['Alpha']), str(conf_dict['options']['Pvalue']), str(conf_dict['options']['TopNcofactors'])) QCdoc += """ \end{tabularx} \end{table} """ ### cross validation in elastic net QCdoc += """ \\newpage \\newpage \section{ElasticNet co-factor selection} In this step we use a feature selection (elastic-net. Zou, H. and Hastie T. (2005) to select potential co-factors which corresponded to the non-classical function. Below shows the cross-validation curve for the decison of lambda in elastic-net for each histone modification substrate. \\begin{figure}[h] \caption{cross-validation curve for lambda decision} \label{fig:profileunion} \setlength{\\abovecaptionskip}{0pt} \setlength{\\belowcaptionskip}{10pt} \centering {\includegraphics[width=0.8\\textwidth]{%s}} \end{figure} """ % (conf_dict['General']['outname'] + "_elnet_lambdaSelection.pdf") inf_ncsummary = open("summary/" + conf_dict['General']['outname'] + "_NCsummary.txt") line = inf_ncsummary.readline() if line.startswith("no non-classical function detected"): QCdoc += """ \\newpage \\newpage \section{potential co-factors corresponded to non-classical function} No significant co-factor was detected, indicating that the non-classical function of the HMR was not exist or none of the existing factor candidates act as a co-factor of the non-classical function. """ else: QCdoc += """ \\newpage \\newpage \section{potential co-factors corresponded to non-classical function} In summary, %s factors were predicted to potentially act as a co-factor of the non-classical function. The top%s co-factors were listed. \subsection{summary of co-factors} \\begin{quotation} The corresponded histone modification substrate (HMsubstrate), empirical P-value, R-square (ordered) and the number of non-classical (NC) sites for each potential co-factor were listed below. The empirical P-value was calculated based on the comparison of foreground (observed) R-square and background R-square (distribution of random R-square generated from the 1,000 permutations of co-binding events) for each potential co-factor. The non-classical (NC) sites were defined by lower HMsubstrate signal (using Otus' method) and co-binding events of each potential co-factor. \end{quotation} \\begin{table}[h] \\small \caption{cofactor summary}\label{bstable} \\begin{tabular}{ |l|l|l|l|l| } \hline co-factor & HMsubstrate & Pval & Rsquare & NCsites \\\\ """ % (int( sp("wc -l tmpResults/%s_filterNC.txt" % (conf_dict['General']['outname']))[0].split()[0]) - 1, int( sp("wc -l summary/%s_NCsummary.txt" % (conf_dict['General']['outname']))[0].split()[0]) - 1) for line in inf_ncsummary: if line.startswith("TFname"): continue ll = line.split() this_doc = """\hline %s & %s & %s & %s & %s \\\\ """ % (strlatexformat(ll[0]), strlatexformat( ll[1]), ll[2], round(float(ll[3]), 3), ll[5]) QCdoc += this_doc inf_ncsummary.close() QCdoc += """ \hline \end{tabular} \end{table} \\newpage \\newpage \subsection{Boxplot of HM on non-classical and classic sites} \\begin{quotation} Boxplot was generated to compare the difference of the histone mark (HM) signal on either non-classical or classic sites(peak). The non-classical sites were defined by lower HM signal (using Otus' method) and co-binding events of each potential co-factor. The boxplot corresponded to top co-factors were displayed. \end{quotation} \\begin{figure}[h] \caption{boxplot cofactor HMsignal} \label{fig:profileunion} \setlength{\\abovecaptionskip}{0pt} \setlength{\\belowcaptionskip}{10pt} \centering {\includegraphics[width=0.8\\textwidth]{%s}} \end{figure} """ % ((conf_dict['General']['outname'] + "_cofactor_HMsignal.pdf")) QCdoc += """ \\newpage \\newpage \section{Output list} \\begin{quotation} All the main output files were described in the following table \end{quotation} \\begin{table}[h] \\small \caption{output list}\label{bstable} \\begin{tabular}{ |l|l| } \hline description & filename \\\\ \hline summary table of non-classical (NC) function & summary/%s \\\\ \hline summary report (this doc) & summary/%s \\\\ \hline cobinding matrix on HMR peaks & tmpResults/%s \\\\ \hline histone mark signal on HMR peaks & tmpResults/%s \\\\ \hline \end{tabular} \end{table} \end{document} """ % (strlatexformat(conf_dict['General']['outname'] + "_NCsummary.txt"), strlatexformat(conf_dict['General']['outname'] + "_summary.pdf"), strlatexformat(conf_dict['General']['outname'] + "_peakov.bed"), strlatexformat(conf_dict['General']['outname'] + "_HMsig.bed")) latexfile = conf_dict['General']['outname'] + '_summary.tex' outf = open(summarydir + latexfile, 'w') outf.write(QCdoc) outf.close() cmd = "pdflatex %s" % (latexfile) cmd2 = 'cp %s ../' % (conf_dict['General']['outname'] + '_summary.pdf') if conf_dict['General']['latex'] == 1: wlog( 'pdflatex was detected in default PATH, generate summary report %s' % (conf_dict['General']['outname'] + '_summary.pdf'), logfile) os.chdir(summarydir) tmpobj = sp(cmd) tmpobj = sp(cmd) tmpobj = sp(cmd2) tmpobj = sp("rm %s_summary.aux" % conf_dict['General']['outname']) tmpobj = sp("rm %s_summary.log" % conf_dict['General']['outname']) tmpobj = sp("rm %s_summary.toc" % conf_dict['General']['outname']) # for files in os.listdir(plot_folder): # if os.path.isfile(files) and files[-12:-4] == "_summary": # if not files[-4:] in ['.tex','.pdf',',png','.txt']: # cmd = "rm %s"%(files) # rwlog(cmd,logfile) else: wlog( 'pdflatex was not detected in default PATH, generate summary report .tex file in summary/ folder, you can move the whole summary/ folder to the environment with pdflatex installed and run cmd in the summary/ folder: "pdflatex %s"' % (conf_dict['General']['outname'] + '_summary.tex'), logfile) #if conf_dict['clean']: # wlog('--clean pararmeter was turned on, remove internal files with large size',logfile) # rwlog("rm %s "%(conf_dict['General']['outputdirectory'] + 'expmatrix/' + conf_dict['General']['outname']+'_on_symbol.bed'),logfile) # rwlog("rm %s "%(conf_dict['General']['outputdirectory'] + 'expmatrix/' + conf_dict['General']['outname']+'_on_cds.bed'),logfile) # rwlog("rm %s "%(conf_dict['General']['outputdirectory'] + 'expmatrix/' + conf_dict['General']['outname']+'_on_3utr.bed'),logfile) # rwlog("rm %s "%(conf_dict['General']['outputdirectory'] + 'expmatrix/' + conf_dict['General']['outname']+'_on_5utr.bed'),logfile) # rwlog("rm %s "%(conf_dict['General']['outputdirectory'] + 'expmatrix/' + conf_dict['General']['outname']+'_on_TTSdis.bed'),logfile) # rwlog("rm %s "%(conf_dict['General']['outputdirectory'] + 'expmatrix/' + conf_dict['General']['outname']+'_combined.bed'),logfile) # rwlog("rm %s "%(conf_dict['General']['outputdirectory'] + 'expmatrix/' + conf_dict['General']['outname']+'_barcode_reform.txt'),logfile) # os.chdir("../") wlog('Step3 summary DONE, check %s for final outputs' % (summarydir), logfile) return conf_dict
def step0_check_data(conf_dict, logfile): ''' step0 integrate data check and complement parameter ''' ### check data path , format , if "~" in conf_dict['General']['HMRpeak']: ewlog( 'require absolute path for HMRpeak bed file, HMRpeak file cannot contain "~", current HMRpeak file is %s' % (conf_dict['General']['HMRpeak']), logfile) if "~" in conf_dict['General']['signal']: ewlog( 'require absolute path for HMsignal bigwig file, signal file cannot contain "~", current signal file is %s' % (conf_dict['General']['signal']), logfile) if not conf_dict['General']['HMRpeak'].startswith('/'): conf_dict['General']['HMRpeak'] = conf_dict['General'][ 'startdir'] + conf_dict['General']['HMRpeak'] if not conf_dict['General']['signal'].startswith('/'): conf_dict['General']['signal'] = conf_dict['General'][ 'startdir'] + conf_dict['General']['signal'] if not os.path.isfile(conf_dict['General']['HMRpeak']): ewlog("HMRpeak file %s not found" % (conf_dict['General']['HMRpeak']), logfile) if not os.path.isfile(conf_dict['General']['signal']): ewlog("signal bw file %s not found" % (conf_dict['General']['signal']), logfile) if not conf_dict['General']['HMRpeak'].endswith('.bed'): ewlog('extenion of HMR peak file is not .bed', logfile) checkbed = checkbedformat(conf_dict['General']['HMRpeak'], 1000) if checkbed == "fail": ewlog("HMRpeak file is not a bed file", logfile) elif checkbed == "lesspeak": ewlog("HMRpeak file contains less than 1000 peaks") if conf_dict['General']['signal'].endswith('.bw'): conf_dict['General']['signalname'] = conf_dict['General'][ 'signal'].split("/")[-1][:-3] elif conf_dict['General']['signal'].endswith('.bigwig'): conf_dict['General']['signalname'] = conf_dict['General'][ 'signal'].split("/")[-1][:-7] else: wlog('[WARNING] extension of signal bw file is not bw/bigwig', logfile) ### check TFpeak folder if "~" in conf_dict['General']['peakFolder']: ewlog( 'require absolute path for peakFolder, peakFolder cannot contain "~", current peakFolder is %s' % (conf_dict['General']['peakFolder']), logfile) if not conf_dict['General']['peakFolder'].startswith('/'): conf_dict['General']['peakFolder'] = conf_dict['General'][ 'startdir'] + conf_dict['General']['peakFolder'] if not conf_dict['General']['peakFolder'].endswith('/'): conf_dict['General']['peakFolder'] += "/" if not os.path.isdir(conf_dict['General']['peakFolder']): ewlog("peakFolder %s not found" % (conf_dict['General']['peakFolder']), logfile) wlog( "Check the peak.bed files in the peakFolder, only '.bed' files with >1000 peaks are included in the following analysis", logfile) conf_dict['General']['peakfilenames'] = [] for f in os.listdir(conf_dict['General']['peakFolder']): if f.endswith(".bed") and os.path.isfile( conf_dict['General']['peakFolder'] + f): checkbed = checkbedformat(conf_dict['General']['HMRpeak'], 1000) if checkbed == "pass": conf_dict['General']['peakfilenames'].append(f[:-4]) if (len(conf_dict['General']['peakfilenames']) == 0): ewlog( "no peak file (cofactor candidate) in bed format & >1000peaks are included, exit", logfile) else: wlog( "%s peak files (cofactor candidates) are included" % (len(conf_dict['General']['peakfilenames'])), logfile) outf = open( conf_dict['General']['outname'] + "_cofactor_candidate_list.txt", 'w') for cofactor in conf_dict['General']['peakfilenames']: outf.write(cofactor + "\n") outf.close() ### check options wlog('check option: ', logfile) try: wlog("extend length is %s bp" % (int(conf_dict['options']['ext'])), logfile) conf_dict['options']['ext'] = int(conf_dict['options']['ext']) except: wlog( "extend length %s is not valid, use default value: 1000bp" % (conf_dict['options']['ext']), logfile) conf_dict['options']['ext'] = 1000 try: wlog( "use Pvalue = %s as cutoff" % (str(float(conf_dict['options']['Pvalue']))), logfile) except: wlog( "input Pvalue %s is not recognized, use default Pvalue=0.001" % (conf_dict['options']['Pvalue']), logfile) conf_dict['options']['Pvalue'] = 0.001 if float(conf_dict['options']['Pvalue']) >= 1: wlog( "input Pvalue %s is not valid, use default Pvalue=0.001" % (conf_dict['options']['Pvalue']), logfile) conf_dict['options']['Pvalue'] = 0.001 try: usealpha = float(conf_dict['options']['Alpha']) if usealpha >= 1: wlog("alpha cannot be >=1, use alpha=0.5", logfile) conf_dict['options']['Alpha'] = 0.5 else: wlog("Alpha = %s" (str(float(conf_dict['options']['Alpha']))), logfile) conf_dict['options']['Alpha'] = usealpha except: wlog( "input alpha %s is not valid, use alpha=0.5" % (conf_dict['options']['Alpha']), logfile) wlog("Lambda choice is %s" % (conf_dict['options']['Lambda']), logfile) if conf_dict['options']['TopNcofactors'] == "all": wlog("all significant co-factors will be output", logfile) else: try: topTF = int(conf_dict['options']['TopNcofactors']) wlog( "the topN number %s will be output" % (conf_dict['options']['TopNcofactors']), logfile) conf_dict['options']['TopNcofactors'] = topTF except: wlog( "the topN number %s is not valid, output top5 co-factors" % (conf_dict['options']['TopNcofactors']), logfile) conf_dict['options']['TopNcofactors'] = 5 OS = platform.system() if OS == "Linux": bwsum_software = "bigWigSummary_linux" elif OS == "Darwin": bwsum_software = "bigWigSummary_mac" else: wlog("detected system is nither linux nor mac, try linux version", logfile) bwsum_software = "bigWigSummary_linux" conf_dict['General']['software'] = bwsum_software ### check Rscript #if not 'Usage' in sperr('Rscript')[1] and not 'version' in sperr('Rscript')[1]: # ewlog('require Rscript',logfile) ### check pdflatex if sp('pdflatex --help')[0] == "": wlog( 'pdflatex was not installed, HMR is still processing but no summary report generated', logfile) conf_dict['General']['latex'] = 0 else: conf_dict['General']['latex'] = 1 return conf_dict