def step4_SCpeakbias(conf_dict,logfile): wlog('readin sequence from 2bit',logfile) seq_dict = {} inf = open(conf_dict['options']['csize']) for line in inf: chrm = line.split()[0] seq_dict[chrm] = fetchseq_2bit_chrom(conf_dict['General']['twoBitToFa'],conf_dict['General']['sequence'],chrm) inf.close() conf_dict['results']['seqdict'] = seq_dict wlog('scan peak level bias',logfile) tmplog = bias_peakXcell_mat(conf_dict['General']['outname'], conf_dict['General']['bedtools'], conf_dict['options']['chromosome'], conf_dict['options']['kmer'], conf_dict['results']['biasMat'], conf_dict['results']['seqdict'], conf_dict['results']['finalcells'], conf_dict['General']['datatype'], conf_dict['options']['peakminreads'], conf_dict['options']['peakmaxreads'] ) return conf_dict
def step1_QC_format(conf_dict, logfile): ### preparing mapping state dict wlog('summarize reads count distribution', logfile) chrom_reads = split_chromosome_reads(conf_dict['General']['fragments'], conf_dict['General']['outname'], conf_dict['options']['scATAC10x'], conf_dict['options']['chromosome']) if "chrM" in chrom_reads: conf_dict['QC']["chrM_reads"] = chrom_reads["chrM"] else: conf_dict['QC']["chrM_reads"] = 0 chromatin_reads = 0 for chrom in chrom_reads.keys(): if chrom != "chrM": chromatin_reads += chrom_reads[chrom] conf_dict['QC']["chromatin_reads"] = chromatin_reads if conf_dict['General']['mode'] == "sc": wlog('filter high quality single cells', logfile) filter_highQcell_results = filter_highQcell_reads( conf_dict['General']['outname'], int(conf_dict['options']['readcutoff']), conf_dict['options']["usecells"]) if filter_highQcell_results == "fail": ewlog( 'obtain < 100 high quality cell with reads >= %s.' % (conf_dict['options']['readcutoff']), logfile) if len(conf_dict['options']["usecells"]) == 0: wlog('no specified cellname list inputed', logfile) elif filter_highQcell_results[4] == "highQ": wlog( 'obtain < 100 cell left after highQ + cellname filtering, use highQ cell only', logfile) wlog( 'obtain %s cells from filtering, containing %s reads' % (filter_highQcell_results[1], filter_highQcell_results[2]), logfile) conf_dict['results']['finalcells'] = filter_highQcell_results[0] conf_dict['QC']['totalcellnum'] = filter_highQcell_results[3] conf_dict['QC']['highQcellnum'] = filter_highQcell_results[1] conf_dict['QC']['finalusecellnum'] = len( conf_dict['results']['finalcells']) conf_dict['QC']['finalreadnum'] = filter_highQcell_results[2] return conf_dict
def step2_biasMat(conf_dict, logfile): ## readin 2bit seq ### obtain bias mat if conf_dict['options']['bias'] == "naked": wlog('obtain pre-processed bias matrix from naked DNA data', logfile) conf_dict['results']['biasMat'] = readBias( conf_dict['options']['biasfile']) elif conf_dict['QC']['chrM_reads'] < 500000: wlog( 'chrM reads number < 500k, obtain pre-processed bias matrix from naked DNA data', logfile) if not os.path.isfile(conf_dict['options']['biasfile']): ewlog("no naked DNA bias matrix, cannot estimate bias", logfile) else: conf_dict['results']['biasMat'] = readBias( conf_dict['options']['biasfile']) else: wlog('estimate bias matrix from mtDNA(chrM) data', logfile) conf_dict['results']['biasMatNaive'] = naive_kmerBias_chrM( conf_dict['General']['outname'], conf_dict['General']['sequence'], conf_dict['options']['kmer'], conf_dict['General']['twoBitToFa'], conf_dict['General']['format']) conf_dict['results']['biasfile'] = "%s_bias.txt" % ( conf_dict['General']['outname']) conf_dict['results']['biasMat'] = simplex_encoding( conf_dict['results']['biasMatNaive'], conf_dict['results']['biasfile']) return conf_dict
def step3_callpeak(conf_dict, logfile): conf_dict['results'][ 'peakfile'] = conf_dict['General']['outname'] + "_summitEXT.bed" macs3callpeak = 1 if conf_dict['options']['peak'] != "NA": conf_dict['QC']['peaknumTotal'] = extExternal( conf_dict['options']['peak'], conf_dict['results']['peakfile'], int(conf_dict['options']['extend'])) if conf_dict['QC']['peaknumTotal'] < 1000: wlog( "obtain < 1000 (%s) external inputted peaks, use macs3 to detect peaks" % conf_dict['QC']['peaknumTotal'], logfile) macs3callpeak = 1 else: wlog( "obtain %s peaks from (-p) inputted" % conf_dict['QC']['peaknumTotal'], logfile) macs3callpeak = 0 if macs3callpeak == 1: if conf_dict['General']['macs3'] == "NA": ewlog( "macs3 was not installed. SELMA requires macs3 installed in the default path ($PATH) for peak calling", logfile) ### callpeak if conf_dict['General']['genome'] == "hg38": gtag = "hs" else: gtag = "mm" if conf_dict['General']['format'] == "PE": macs3cmd = "macs3 callpeak -t %s -n %s -f BEDPE -g %s -q %s --keep-dup 1" % ( conf_dict['General']['outname'] + "_chromatin.bed", conf_dict['General']['outname'], gtag, conf_dict['options']['peakqval']) else: macs3cmd = "macs3 callpeak -t %s -n %s -f BED -g %s -q %s --keep-dup 1 --nomodel --extsize 100" % ( conf_dict['General']['outname'] + "_chromatin.bed", conf_dict['General']['outname'], gtag, conf_dict['options']['peakqval']) wlog("peak calling with macs3: %s" % macs3cmd, logfile) peaklog = sp(macs3cmd) ### ext peak from summit wlog( "extend peak summits to +/- %sbp" % conf_dict['options']['extend'], logfile) if not os.path.isfile(conf_dict['General']['outname'] + "_summits.bed"): ewlog( "no macs3 results detected, check whether macs3 was correctly installed.", logfile) conf_dict['QC']['peaknumTotal'] = extsummit( conf_dict['General']['outname'] + "_summits.bed", conf_dict['results']['peakfile'], int(conf_dict['options']['extend'])) if conf_dict['QC']['peaknumTotal'] < 1000: ewlog( "obtain < 1000 (%s) peaks, SELMA terminated" % conf_dict['QC']['peaknumTotal'], logfile) else: wlog("obtain %s peaks" % conf_dict['QC']['peaknumTotal'], logfile) return conf_dict
def step4_BULKcleavageBias(conf_dict, logfile): ### preparing mapping state dict wlog('split fragments to strand specific cleavage sites', logfile) if conf_dict['General']['format'] == "PE": cmdplus = """awk '{OFS="\\t";print $1,$2,$2+1,".",".","+"}' %s > %s""" % ( conf_dict['General']['outname'] + "_chromatin.bed", conf_dict['General']['outname'] + "_cleavage_plus.bed") cmdminus = """awk '{OFS="\\t";print $1,$3-1,$3,".",".","-"}' %s > %s""" % ( conf_dict['General']['outname'] + "_chromatin.bed", conf_dict['General']['outname'] + "_cleavage_minus.bed") else: cmdplus = """awk '{if($6=="+") print $0}' %s > %s""" % ( conf_dict['General']['outname'] + "_chromatin.bed", conf_dict['General']['outname'] + "_cleavage_plus.bed") cmdminus = """awk '{if($6=="-") print $0}' %s > %s""" % ( conf_dict['General']['outname'] + "_chromatin.bed", conf_dict['General']['outname'] + "_cleavage_minus.bed") tmplog = sp(cmdplus) tmplog = sp(cmdminus) wlog('pile up cleavage sites', logfile) pluslog1 = sp("macs3 pileup -i %s -f BED --extsize 1 -o %s " % (conf_dict['General']['outname'] + "_cleavage_plus.bed", conf_dict['General']['outname'] + "_cleavage_plus.bdg")) pluslog2 = sp( "sort -k1,1 -k2,2n %s > %s" % (conf_dict['General']['outname'] + "_cleavage_plus.bdg", conf_dict['General']['outname'] + "_cleavage_plus_sorted.bdg")) pluslog3 = sp("%s %s %s %s" % (conf_dict['General']['bedGraphToBigWig'], conf_dict['General']['outname'] + "_cleavage_plus_sorted.bdg", conf_dict['options']['csize'], conf_dict['General']['outname'] + "_cleavage_plus.bw")) minuslog1 = sp("macs3 pileup -i %s -f BED --extsize 1 -o %s " % (conf_dict['General']['outname'] + "_cleavage_minus.bed", conf_dict['General']['outname'] + "_cleavage_minus.bdg")) minuslog2 = sp( "sort -k1,1 -k2,2n %s > %s" % (conf_dict['General']['outname'] + "_cleavage_minus.bdg", conf_dict['General']['outname'] + "_cleavage_minus_sorted.bdg")) minuslog3 = sp( "%s %s %s %s" % (conf_dict['General']['bedGraphToBigWig'], conf_dict['General']['outname'] + "_cleavage_minus_sorted.bdg", conf_dict['options']['csize'], conf_dict['General']['outname'] + "_cleavage_minus.bw")) wlog("remove redundant position from the extended peak file", logfile) cmduni = """sort -k 1,1 -k 2,2g -k 3,3g %s | %s merge -i - > %s""" % ( conf_dict['results']['peakfile'], conf_dict['General']['bedtools'], conf_dict['General']['outname'] + "_summitEXTmerge.bed") tmplog = sp(cmduni) wlog('readin sequence from 2bit', logfile) seq_dict = {} inf = open(conf_dict['options']['csize']) for line in inf: chrm = line.split()[0] seq_dict[chrm] = fetchseq_2bit_chrom( conf_dict['General']['twoBitToFa'], conf_dict['General']['sequence'], chrm) inf.close() conf_dict['results']['seqdict'] = seq_dict wlog('calculate bias expected cleavages', logfile) if conf_dict['General']['datatype'] == "DNase": tmplog = bias_exp_cleavage_DNase( conf_dict['General']['outname'], conf_dict['General']['outname'] + "_summitEXTmerge.bed", conf_dict['results']['biasMat'], conf_dict['options']['kmer'], conf_dict['General']['bigWigSummary'], conf_dict['General']['bedGraphToBigWig'], conf_dict['results']['seqdict']) else: tmplog = bias_exp_cleavage_ATAC( conf_dict['General']['outname'], conf_dict['General']['outname'] + "_summitEXTmerge.bed", conf_dict['results']['biasMat'], conf_dict['options']['kmer'], conf_dict['General']['bigWigSummary'], conf_dict['General']['bedGraphToBigWig'], conf_dict['results']['seqdict']) # pluslog = sp( "sort -k1,1 -k2,2n %s > %s" % (conf_dict['General']['outname'] + "_biasExpCuts_plus.bdg", conf_dict['General']['outname'] + "_biasExpCuts_plus_sorted.bdg")) pluslog = sp( "%s %s %s %s" % (conf_dict['General']['bedGraphToBigWig'], conf_dict['General']['outname'] + "_biasExpCuts_plus_sorted.bdg", conf_dict['options']['csize'], conf_dict['General']['outname'] + "_biasExpCuts_plus.bw")) # minuslog = sp( "sort -k1,1 -k2,2n %s > %s" % (conf_dict['General']['outname'] + "_biasExpCuts_minus.bdg", conf_dict['General']['outname'] + "_biasExpCuts_minus_sorted.bdg")) minuslog = sp( "%s %s %s %s" % (conf_dict['General']['bedGraphToBigWig'], conf_dict['General']['outname'] + "_biasExpCuts_minus_sorted.bdg", conf_dict['options']['csize'], conf_dict['General']['outname'] + "_biasExpCuts_minus.bw")) return conf_dict
def step5_SCcellClustering(conf_dict,logfile): wlog('single-cell clustering analysis',logfile) if conf_dict['options']['clustermethod'] == "PCAkm": conf_dict['General']['scPackage'] = scClustering_PCAkm(conf_dict['General']['outname'], conf_dict['options']['lowbiaspeak'], conf_dict['options']['clusterNum'], conf_dict['options']['topDim'], int(conf_dict['options']['UMAP'])) if conf_dict['General']['scPackage'] == "noPackage": wlog("umap was not installed, UMAP scatter plot will not be generated",logfile) elif conf_dict['options']['clustermethod'] == "Seurat": conf_dict['General']['scPackage'] = scClustering_Seurat(conf_dict['General']['outname'], conf_dict['options']['lowbiaspeak'], conf_dict['options']['topDim'], int(conf_dict['options']['UMAP'])) if conf_dict['General']['scPackage'] == "noPackage": wlog("Seurat related packages were not installed, skip single-cell clustering step",logfile) elif conf_dict['options']['clustermethod'] == "ArchR": conf_dict['General']['scPackage'] = scClustering_ArchR(conf_dict['General']['outname'], conf_dict['General']['genome'], conf_dict['options']['lowbiaspeak'], conf_dict['options']['topDim'], int(conf_dict['options']['UMAP'])) if conf_dict['General']['scPackage'] == "noPackage": wlog("ArchR related packages were not installed, skip single-cell clustering step",logfile) elif conf_dict['options']['clustermethod'] == "APEC": conf_dict['General']['scPackage'] = scClustering_APEC(conf_dict['General']['outname'], conf_dict['options']['lowbiaspeak'], int(conf_dict['options']['UMAP'])) if conf_dict['General']['scPackage'] == "noPackage": wlog("APEC related packages were not installed, skip single-cell clustering step",logfile) elif conf_dict['options']['clustermethod'] == "Cicero": conf_dict['General']['scPackage'] = scClustering_Cicero(conf_dict['General']['outname'], conf_dict['options']['lowbiaspeak'], int(conf_dict['options']['UMAP'])) if conf_dict['General']['scPackage'] == "noPackage": wlog("Cicero related packages were not installed, skip single-cell clustering step",logfile) if os.path.isfile("%s_scClusters.txt"%(conf_dict['General']['outname'])): tmplist = [] inf = open("%s_scClusters.txt"%(conf_dict['General']['outname'])) for line in inf: ll = line.strip().split("\t") if ll[1] != "cluster": tmplist.append(ll[1]) inf.close() conf_dict['QC']['scClusters'] = len(set(tmplist)) else: conf_dict['QC']['scClusters'] = 0 return conf_dict
def stepFinal_summary(conf_dict, logfile): wlog('Collect results', logfile) summarydir = 'summary/' createDIR(summarydir) if "biasfile" in conf_dict['results'] and os.path.isfile( conf_dict['results']['biasfile']): sp("mv %s %s" % (conf_dict['results']['biasfile'], summarydir)) sp("mv %s_summitEXT.bed %s" % (conf_dict['General']['outname'], summarydir)) conf_dict['results']['umap'] = "NA" if conf_dict['General']['mode'] == "bulk": sp("mv %s_cleavage_plus.bw %s" % (conf_dict['General']['outname'], summarydir)) sp("mv %s_cleavage_minus.bw %s" % (conf_dict['General']['outname'], summarydir)) sp("mv %s_biasExpCuts_plus.bw %s" % (conf_dict['General']['outname'], summarydir)) sp("mv %s_biasExpCuts_minus.bw %s" % (conf_dict['General']['outname'], summarydir)) else: sp("gzip %s_peakXcellMat.txt" % (conf_dict['General']['outname'])) sp("gzip %s_peakFeatures.txt" % (conf_dict['General']['outname'])) sp("mv %s_peakXcellMat.txt.gz %s" % (conf_dict['General']['outname'], summarydir)) sp("mv %s_peakFeatures.txt.gz %s" % (conf_dict['General']['outname'], summarydir)) sp("mv %s_scClusters.txt %s" % (conf_dict['General']['outname'], summarydir)) if os.path.isfile("%s_clusteringUMAP.pdf" % (conf_dict['General']['outname'])): if conf_dict['options']['clustermethod'] in ["APEC", "Cicero"]: sp("mv %s_clusteringUMAP.pdf %s/%s_clusteringTSNE.pdf" % (conf_dict['General']['outname'], summarydir, conf_dict['General']['outname'])) conf_dict['results']['umap'] = "%s_clusteringTSNE.pdf" % ( conf_dict['General']['outname']) else: sp("mv %s_clusteringUMAP.pdf %s" % (conf_dict['General']['outname'], summarydir)) conf_dict['results']['umap'] = "%s_clusteringUMAP.pdf" % ( conf_dict['General']['outname']) tmpresult = 'tmpResults/' createDIR(tmpresult) sp("mv %s_chromatin.bed %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_chrM.bed %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_summits.bed %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_peaks.xls %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_peaks.narrowPeak %s" % (conf_dict['General']['outname'], tmpresult)) if conf_dict['General']['mode'] == "bulk": sp("mv %s_cleavage_plus.bed %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_cleavage_minus.bed %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_cleavage_plus.bdg %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_cleavage_minus.bdg %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_cleavage_plus_sorted.bdg %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_cleavage_minus_sorted.bdg %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_biasExpCuts_plus.bdg %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_biasExpCuts_minus.bdg %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_biasExpCuts_plus_sorted.bdg %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_biasExpCuts_minus_sorted.bdg %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_summitEXTmerge.bed %s" % (conf_dict['General']['outname'], tmpresult)) else: sp("mv %s_highQcellReads.bed %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_tmpSCreads.bed %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_tmpSCpeaks.bed %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_scOVcleavage.bed %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_scRscript.r %s" % (conf_dict['General']['outname'], tmpresult)) sp("mv %s_highQcellReads.bed %s" % (conf_dict['General']['outname'], tmpresult)) if conf_dict['options']['clustermethod'] == "ArchR": sp("mv %s_ArchR %s" % (conf_dict['General']['outname'], tmpresult)) if conf_dict['options']['clustermethod'] == "APEC": sp("mv %s_APEC %s" % (conf_dict['General']['outname'], tmpresult)) if conf_dict['options']['clustermethod'] == "Cicero": sp("mv %s_Cicero %s" % (conf_dict['General']['outname'], tmpresult)) if conf_dict['options']['keeptmp']: wlog('--keeptmp not setting, keep intermediate results', logfile) pass else: wlog('--keeptmp was not setting, remove intermediate results', logfile) sp("rm -r tmpResults/") wlog('Generate summary reports', logfile) outf = open("%s_summaryReports.txt" % conf_dict['General']['outname'], 'w') outf.write("#settings\n") outf.write("mode\t%s\n" % (conf_dict['General']['mode'])) outf.write("fragments\t%s\n" % (conf_dict['General']['fragments'])) outf.write("data format\t%s\n" % (conf_dict['General']['format'])) outf.write("data type\t%s\n" % (conf_dict['General']['datatype'])) outf.write("genome version\t%s\n" % (conf_dict['General']['genome'])) outf.write("output name\t%s\n" % (conf_dict['General']['outname'])) outf.write("\n#parameters\n") outf.write("peak extend size\t%s\n" % (conf_dict['options']['extend'])) outf.write("peak qvalue\t%s\n" % (conf_dict['options']['peakqval'])) outf.write("bias source\t%s\n" % (conf_dict['options']['bias'])) outf.write("k-mer\t%s\n" % (conf_dict['options']['kmer'])) if conf_dict['General']['mode'] == "sc": outf.write("[sc]reads cutoff\t%s\n" % (conf_dict['options']['readcutoff'])) outf.write("[sc]%low biaspeak\t" + str(conf_dict['options']['lowbiaspeak']) + "\n") outf.write("[sc]peak min reads\t%s\n" % (conf_dict['options']['peakminreads'])) outf.write("[sc]peak max reads\t%s\n" % (conf_dict['options']['peakmaxreads'])) outf.write("[sc]topN dimensions\t%s\n" % (conf_dict['options']['topDim'])) outf.write("[sc]clustering method\t%s\n" % (conf_dict['options']['clustermethod'])) # if conf_dict['options']['clustermethod'] == "PCAkm": # outf.write("[sc]cluster number\t%s\n"%(conf_dict['options']['clusterNum'])) outf.write("\n#QC\n") outf.write( "total reads\t%s\n" % (conf_dict['QC']['chrM_reads'] + conf_dict['QC']['chromatin_reads'])) outf.write("chromatin reads\t%s\n" % (conf_dict['QC']['chromatin_reads'])) outf.write("mtDNA reads\t%s\n" % (conf_dict['QC']['chrM_reads'])) outf.write("total peaks\t%s\n" % (conf_dict['QC']['peaknumTotal'])) if conf_dict['General']['mode'] == "sc": outf.write("total single-cells\t%s\n" % (conf_dict['QC']['totalcellnum'])) outf.write("high quality single-cells\t%s\n" % (conf_dict['QC']['highQcellnum'])) outf.write("single-cells for clustering\t%s\n" % (conf_dict['QC']['finalusecellnum'])) outf.write("reads in single-cells for clustering\t%s\n" % (conf_dict['QC']['finalreadnum'])) if conf_dict['QC']['scClusters'] > 0: outf.write("cluster number\t%s\n" % (conf_dict['QC']['scClusters'])) outf.write("\n#output results\n") outf.write("peaks (accessible regions)\t%s_summitEXT.bed\n" % (conf_dict['General']['outname'])) if conf_dict['General']['mode'] == "bulk": outf.write("observed cleavage (+ strand)\t%s_cleavage_plus.bw\n" % (conf_dict['General']['outname'])) outf.write("observed cleavage (- strand)\t%s_cleavage_minus.bw\n" % (conf_dict['General']['outname'])) outf.write( "bias expected cleavage (+ strand)\t%s_biasExpCuts_plus.bw\n" % (conf_dict['General']['outname'])) outf.write( "bias expected cleavage (- strand)\t%s_biasExpCuts_minus.bw\n" % (conf_dict['General']['outname'])) else: outf.write("peak bias features\t%s_peakFeatures.txt.gz\n" % (conf_dict['General']['outname'])) outf.write("peakXcell count\t%s_peakXcellMat.txt.gz\n" % (conf_dict['General']['outname'])) if os.path.isfile("%s%s_scClusters.txt" % (summarydir, conf_dict['General']['outname'])): outf.write("single-cell cluster\t%s_scClusters.txt\n" % (conf_dict['General']['outname'])) if os.path.isfile(summarydir + conf_dict['results']['umap']): if conf_dict['options']['clustermethod'] in ["APEC", "Cicero"]: outf.write("sc-cluster t-SNE\t%s\n" % (conf_dict['results']['umap'])) else: outf.write("sc-cluster UMAP\t%s\n" % (conf_dict['results']['umap'])) outf.close() ### check pdflatex QCdoc = """\documentclass[11pt,a4paper]{article} \\usepackage{tabularx} \\usepackage[english]{babel} \\usepackage{array} \\usepackage{graphicx} \\usepackage{color} \DeclareGraphicsExtensions{.eps,.png,.pdf,.ps} \\begin{document} \\title{SELMA summary reports for: %s} \\vspace{-1cm} \maketitle \\tableofcontents \\newpage \\newpage \section{Summary description} \\begin{quotation} Table 1 describes the input files and settings. \end{quotation} \\begin{table}[h] \\small \caption{ settings }\label{bstable} \\begin{tabularx}{\\textwidth}{ |X|l| } """ % (strlatexformat(conf_dict['General']['outname'])) ### table1 prepare parameter QCdoc += """ \hline parameter & value \\\\ \hline mode & %s \\\\ \hline fragment file & %s \\\\ \hline data format & %s \\\\ \hline data type & %s \\\\ \hline genome version & %s \\\\ \hline output name & %s \\\\ \hline \end{tabularx} \end{table} """ % (strlatexformat(conf_dict['General']['mode']), strlatexformat(conf_dict['General']['fragments'].split("/")[-1]), strlatexformat(conf_dict['General']['format']), strlatexformat(conf_dict['General']['datatype']), strlatexformat(conf_dict['General']['genome']), strlatexformat(conf_dict['General']['outname'])) QCdoc += """ \\newpage \\newpage \section{parameters and options} \\begin{quotation} Table 2 describes the parameters and options. \end{quotation} \\begin{table}[h] \\small \caption{parameters and options}\label{bstable} \\begin{tabularx}{\\textwidth}{ |X|l| } \hline parameter & value \\\\ \hline peak extend size & %s \\\\ \hline peak qvalue & %s \\\\ \hline bias source & %s \\\\ \hline k-mer & %s \\\\ \hline """ % (strlatexformat(conf_dict['options']['extend']), strlatexformat(conf_dict['options']['peakqval']), strlatexformat(conf_dict['options']['bias']), strlatexformat(conf_dict['options']['kmer'])) if conf_dict['General']['mode'] == "sc": QCdoc += """ [sc]reads cutoff & %s \\\\ \hline [sc]lowbias peak percent & %s \\\\ \hline [sc]peak min reads & %s \\\\ \hline [sc]peak max reads & %s \\\\ \hline [sc]topN dimensions & %s \\\\ \hline [sc]cluster methods & %s \\\\ \hline """ % ( strlatexformat(conf_dict['options']['readcutoff']), strlatexformat(conf_dict['options']['lowbiaspeak']), strlatexformat(conf_dict['options']['peakminreads']), strlatexformat(conf_dict['options']['peakmaxreads']), strlatexformat(conf_dict['options']['topDim']), strlatexformat(conf_dict['options']['clustermethod']), ) QCdoc += """ \end{tabularx} \end{table} """ QCdoc += """ \\newpage \\newpage \section{data quality} \\begin{quotation} Table 3 describes data Quality. \end{quotation} \\begin{table}[h] \\small \caption{data quality}\label{bstable} \\begin{tabularx}{\\textwidth}{ |X|l| } \hline parameter & value \\\\ \hline total reads & %s \\\\ \hline chromatin reads & %s \\\\ \hline mtDNA reads & %s \\\\ \hline total peaks & %s \\\\ \hline """ % (strlatexformat(conf_dict['QC']['chrM_reads'] + conf_dict['QC']['chromatin_reads']), strlatexformat(conf_dict['QC']['chromatin_reads']), strlatexformat(conf_dict['QC']['chrM_reads']), strlatexformat(conf_dict['QC']['peaknumTotal'])) if conf_dict['General']['mode'] == "sc": QCdoc += """ [sc]total single-cells(sc) & %s \\\\ \hline [sc]high quality sc & %s \\\\ \hline [sc]sc for clustering & %s \\\\ \hline [sc]reads in sc for clustering & %s \\\\ \hline """ % (strlatexformat(conf_dict['QC']['totalcellnum']), strlatexformat(conf_dict['QC']['highQcellnum']), strlatexformat(conf_dict['QC']['finalusecellnum']), strlatexformat(conf_dict['QC']['finalreadnum'])) if conf_dict['QC']['scClusters'] > 0: QCdoc += """[sc]number of cluster & %s \\\\ \hline """ % (strlatexformat(conf_dict['QC']['scClusters'])) QCdoc += """ \end{tabularx} \end{table} """ QCdoc += """ \\newpage \\newpage \section{output results} \\begin{quotation} Table 3 describes output results (in the summary/ folder). \end{quotation} \\begin{table}[h] \\small \caption{output results}\label{bstable} \\begin{tabularx}{\\textwidth}{ |X|l| } \hline parameter & value \\\\ \hline peaks (accessible regions) & %s \\\\ \hline """ % (strlatexformat(conf_dict['General']['outname'] + "_summitEXT.bed")) if conf_dict['General']['mode'] == "bulk": QCdoc += """ observed cleavage(+) & %s \\\\ \hline observed cleavage(-) & %s \\\\ \hline bias expected cleavage(+) & %s \\\\ \hline bias expected cleavage(-) & %s \\\\ \hline """ % (strlatexformat(conf_dict['General']['outname'] + "_cleavage_plus.bw"), strlatexformat(conf_dict['General']['outname'] + "_cleavage_minus.bw"), strlatexformat(conf_dict['General']['outname'] + "_biasExpCuts_plus.bw"), strlatexformat(conf_dict['General']['outname'] + "_biasExpCuts_minus.bw")) else: QCdoc += """ peak bias feature & %s \\\\ \hline peakXcell count & %s \\\\ \hline """ % (strlatexformat(conf_dict['General']['outname'] + "_peakFeatures.txt.gz"), strlatexformat(conf_dict['General']['outname'] + "_peakXcellMat.txt.gz")) if os.path.isfile("%s%s_scClusters.txt" % (summarydir, conf_dict['General']['outname'])): QCdoc += """single-cell cluster & %s \\\\ \hline """ % (strlatexformat(conf_dict['General']['outname'] + "_scClusters.txt")) if os.path.isfile(summarydir + conf_dict['results']['umap']): if conf_dict['options']['clustermethod'] in ["APEC", "Cicero"]: dimRedTerm = "t-SNE" else: dimRedTerm = "UMAP" QCdoc += """sc-cluster %s & %s \\\\ \hline """ % (dimRedTerm, strlatexformat(conf_dict['General']['outname'] + "_scClusters.txt")) QCdoc += """ \end{tabularx} \end{table} """ if os.path.isfile(summarydir + conf_dict['results']['umap']): if conf_dict['options']['clustermethod'] in ["APEC", "Cicero"]: dimRedTerm = "t-SNE" else: dimRedTerm = "UMAP" QCdoc += """ \\newpage \\newpage \section{%s scatter plot} The 2-dim scatter plot represent the %s results. Each dot represents an individual cell and the color represents cluster labels \\begin{figure}[h] \caption{cross-validation curve for lambda decision} \label{fig:profileunion} \setlength{\\abovecaptionskip}{0pt} \setlength{\\belowcaptionskip}{10pt} \centering {\includegraphics[width=0.8\\textwidth]{%s}} \end{figure} """ % (dimRedTerm, dimRedTerm, summarydir + conf_dict['results']['umap']) QCdoc += """ \end{document} """ latexfile = conf_dict['General']['outname'] + '_summaryReports.tex' outf = open(latexfile, 'w') outf.write(QCdoc) outf.close() check_latex = sp('which pdflatex') if check_latex[0].decode("ascii") == "": wlog( 'pdflatex was not installed, SELMA will not generate pdf version of summary report. Please copy the %s to an environment with pdflatex installed and complie the pdf file' % (conf_dict['General']['outname'] + '_summaryReports.tex'), logfile) else: cmd = "pdflatex %s" % (latexfile) tmpobj = sp(cmd) tmpobj = sp(cmd) #tmpobj = sp(cmd2) tmpobj = sp("rm %s_summaryReports.aux" % conf_dict['General']['outname']) tmpobj = sp("rm %s_summaryReports.log" % conf_dict['General']['outname']) tmpobj = sp("rm %s_summaryReports.toc" % conf_dict['General']['outname']) return conf_dict