def scanSeqMotif(sequencefile, motiffile, outputdir, prefix): motifname = getPrefix(motiffile) outfile = outputdir+"/"+prefix+"_"+motifname+".txt" createOdir(outputdir+"/"+prefix+"_"+motifname) command = "findMotifs.pl "+sequencefile+" fasta "+outputdir+"/"+prefix+"_"+motifname+"/ -find "+motiffile+" > "+outfile subprocess.call(command, shell=True) return outfile
def mainMd(argv): if len(argv) == 1: # if any arguments are given print usage message and then exit the programm usageMd() sys.exit(1) outputdir, selectodir, regionfile, genomefile, database, scan, prefix = initParamMd() # intialize to default all parameters outputdir, selectodir, regionfile, genomefile, database, scan, prefix = readOptMd(argv[1:], outputdir, selectodir, regionfile, genomefile, database, scan, prefix) checkRequiredMd(regionfile, genomefile) if selectodir == 'false': # If no output directory specified, create one folder in current directory createOdir(outputdir) welcomeMd() # print welcome message parametersMd(outputdir, regionfile, genomefile, database, scan, prefix) # print a summary of all parameters used running() # print running message print "" print "Step1: Transforming region bed file to fasta file..." fastafile = toFastaFile(regionfile, genomefile, outputdir, prefix) print "Step1: Transforming region bed file to fasta file achieved..." print "" print "Step2: Motif Discovery performed by MEME suite..." if scan == 'ON': motifFull(fastafile, outputdir, database, prefix) else: motifDeNovo(fastafile, outputdir, prefix) print "Step2: Motif Discovery performed by MEME suite achieved..." goodbyeCp() return
def mainCa(argv): if len(argv) == 1: # if any arguments are given print usage message and then exit the programm usageCa() sys.exit(1) # Settings default parameters outputdir, selectodir, filterqual, unmapped, filtercoord, indexGenome, rmvdup, sorting, indexBam, coordinateFile, prefix, genome, minqual, fastqfile, fastqfile1, fastqfile2, seq = initParamCa() # Getting options from command line and changes, if necessary, default parameters outputdir, selectodir, filterqual, unmapped, filtercoord, indexGenome, rmvdup, sorting, indexBam, coordinateFile, prefix, genome, minqual, fastqfile, fastqfile1, fastqfile2, seq = readOptCa(argv[1:], outputdir, selectodir, filterqual, unmapped, filtercoord, indexGenome, rmvdup, sorting, indexBam, coordinateFile, prefix, genome, minqual, fastqfile, fastqfile1, fastqfile2, seq) checkRequiredCa(seq, fastqfile2, genome) # Check if the required options have been specified if selectodir == 'false': # If no output directory specified, create one in current directory createOdir(outputdir) welcomeCa() # Print welcome message to stdout if seq == 'SE': # Print all specified parameters following if the data are single-end or paired-end to stdout if prefix == "": # If no prefix is given in the command line, take the prefixe of fastq file prefix = getPrefix(fastqfile) parametersSe(fastqfile, genome, outputdir, filterqual, minqual, unmapped, filtercoord, coordinateFile, indexGenome, rmvdup, sorting, indexBam, prefix) else: if prefix == "": prefix = getPrefix(fastqfile1) parametersPe(fastqfile1, fastqfile2, genome, outputdir, filterqual, minqual, unmapped, filtercoord, coordinateFile, indexGenome, rmvdup, sorting, indexBam, prefix) running() # Print message for starting analysis # Calling differents functions to perform the analysis genomeIndex(indexGenome, genome) bamname = readsAlignment(seq, fastqfile, fastqfile1, fastqfile2, outputdir, genome, prefix) bamname = unmappedFilter(unmapped, outputdir, bamname, prefix) bamname = minQFilter(filterqual, minqual, outputdir, bamname, prefix) bamname = removeDup(rmvdup, outputdir, bamname, prefix) bamname = coordFilter(filtercoord, coordinateFile, outputdir, bamname, prefix) bamname = sortAndIndexFile(sorting, indexBam, outputdir, bamname, prefix) goodbyeCa(bamname) # Print to stdout final message and resume the analyze return
def mainSm(argv): if len( argv ) == 1: # if any arguments are given print usage message and then exit the programm usageSm() sys.exit(1) outputdir, selectodir, regionfile, genomefile, motiffile, exclude, prefix = initParamSm( ) # intialize to default all parameters outputdir, selectodir, regionfile, genomefile, motiffile, exclude, prefix = readOptSm( argv[1:], outputdir, selectodir, regionfile, genomefile, motiffile, exclude, prefix) checkRequiredSm(regionfile, genomefile, motiffile) if selectodir == 'false': # If no output directory specified, create one folder in current directory createOdir(outputdir) welcomeSm() # print welcome message parametersSm(outputdir, regionfile, genomefile, motiffile, exclude, prefix) # print a summary of all parameters used running() # print running message print "" print "Step1: Generate random background region..." backgroundfile = createBackground(regionfile, exclude, outputdir, prefix) print "Step1: Generate random background region achieved..." print "" print "Step2: Transforming region bed file to fasta file..." fastafile = toFastaFile(regionfile, genomefile, outputdir, prefix) print "Step2: Transforming region bed file to fasta file achieved..." print "" print "Step3: Transforming background bed file to fasta file..." bgfastafile = toFastaFile(backgroundfile, genomefile, outputdir, "BG_" + prefix) print "Step3: Transforming background bed file to fasta file achieved..." print "" print "Step4: Scanning foreground sequence with motif file..." scanfile = scanSeqMotif(fastafile, motiffile, outputdir, prefix) print "Step4: Scanning foreground sequence with motif file achieved..." print "" print "Step5: Scanning background sequence with motif file..." scanbgfile = scanSeqMotif(bgfastafile, motiffile, outputdir, "BG_" + prefix) print "Step5: Scanning background sequence with motif file achieved..." print "" print "Step6: Creating score file for motif enrichment..." scorefile = createScoreFile(scanfile, scanbgfile, motiffile, outputdir, prefix) print "Step6: Creating score file for motif enrichment achieved..." print "" print "Step7: Sorting score file..." scorefile = sortScoreFile(scorefile, outputdir) print "Step7: Sorting score file achieved..." print "" print "Step8: Calculate AUC for motif enrichment..." scoreAUC(scorefile, outputdir) print "Step8: Calculate AUC for motif enrichment achieved..." print "Step9: plotting ROC curve..." plotRocCurve(scorefile, outputdir) print "Step9: plotting ROC curve achieved..." goodbyeCp() print "" return
def mainAp(argv): if len( argv ) == 1: # if any arguments are given print usage message and then exit the programm usageAp() sys.exit(1) outputdir, selectodir, peakfile, annofile, peakcaller, prefix, graph = initParamAp( ) # intialize to default all parameters outputdir, selectodir, peakfile, annofile, peakcaller, prefix, graph = readOptAp( argv[1:], outputdir, selectodir, peakfile, annofile, peakcaller, prefix, graph) checkRequiredAp(peakfile, annofile, peakcaller) if selectodir == 'false': # If no output directory specified, create one folder in current directory createOdir(outputdir) welcomeAp() # print welcome message if prefix == '': # If no prefix is given in the command line, give a default prefix prefix = 'AnnoPeaks' parametersAp(outputdir, peakfile, annofile, peakcaller, prefix) # print a summary of all parameters used running() # print running message print "" print "Step1 : Transforming peak file to coordinate file..." peakfile = toCoordFile(peakfile, outputdir, prefix) print "Step1 : Transforming peak file to coordinate file achieved..." print "" print "Step2 : Extract promoter regions from annotation file..." exportProm(annofile, outputdir) print "Step2 : Extract promoter regions from annotation file achieved..." print "" print "Step3 : Extract enhancer regions from annotation file..." exportEnh(annofile, outputdir) print "Step3 : Extract enhancer regions from annotation file achieved..." print "" print "Step4 : Annotate peaks that falls into promoter regions..." promfile = annotatePeaks(peakfile, outputdir, outputdir + "/promoter_region.bed", prefix) print "Step4 : Annotate peaks that falls into promoter regions achieved..." print "" print "Step5 : Annotate peaks that falls into enhancer regions..." enhfile = annotatePeaks(peakfile, outputdir, outputdir + "/enhancer_region.bed", prefix) print "Step5 : Annotate peaks that falls into enhancer regions achieved..." print "" nbpeak = countLines(peakfile) nbprom = countLines(promfile) nbenh = countLines(enhfile) summaryAp(nbpeak, nbprom, nbenh) print "" print "Step 6 : Plotting annotation results..." if graph == 'OFF': print "Skipped" else: plotAnno(nbpeak, nbprom, nbenh, outputdir) print "Step 6 : Plotting annotation results achieved..." goodbyeCp() return
def mainAp(argv): if len(argv) == 1: # if any arguments are given print usage message and then exit the programm usageAp() sys.exit(1) outputdir, selectodir, peakfile, annofile, peakcaller, prefix, graph = initParamAp() # intialize to default all parameters outputdir, selectodir, peakfile, annofile, peakcaller, prefix, graph = readOptAp(argv[1:], outputdir, selectodir, peakfile, annofile, peakcaller, prefix, graph) checkRequiredAp(peakfile, annofile, peakcaller) if selectodir == 'false': # If no output directory specified, create one folder in current directory createOdir(outputdir) welcomeAp() # print welcome message if prefix == '': # If no prefix is given in the command line, give a default prefix prefix = 'AnnoPeaks' parametersAp(outputdir, peakfile, annofile, peakcaller, prefix) # print a summary of all parameters used running() # print running message print "" print "Step1 : Transforming peak file to coordinate file..." peakfile = toCoordFile(peakfile, outputdir, prefix) print "Step1 : Transforming peak file to coordinate file achieved..." print "" print "Step2 : Extract promoter regions from annotation file..." exportProm(annofile, outputdir) print "Step2 : Extract promoter regions from annotation file achieved..." print "" print "Step3 : Extract enhancer regions from annotation file..." exportEnh(annofile, outputdir) print "Step3 : Extract enhancer regions from annotation file achieved..." print "" print "Step4 : Annotate peaks that falls into promoter regions..." promfile = annotatePeaks(peakfile, outputdir, outputdir+"/promoter_region.bed", prefix) print "Step4 : Annotate peaks that falls into promoter regions achieved..." print "" print "Step5 : Annotate peaks that falls into enhancer regions..." enhfile = annotatePeaks(peakfile, outputdir, outputdir+"/enhancer_region.bed", prefix) print "Step5 : Annotate peaks that falls into enhancer regions achieved..." print "" nbpeak = countLines(peakfile) nbprom = countLines(promfile) nbenh = countLines(enhfile) summaryAp(nbpeak, nbprom, nbenh) print "" print "Step 6 : Plotting annotation results..." if graph == 'OFF': print "Skipped" else: plotAnno(nbpeak, nbprom, nbenh, outputdir) print "Step 6 : Plotting annotation results achieved..." goodbyeCp() return
def mainCa(argv): if len( argv ) == 1: # if any arguments are given print usage message and then exit the programm usageCa() sys.exit(1) # Settings default parameters outputdir, selectodir, filterqual, unmapped, filtercoord, indexGenome, rmvdup, sorting, indexBam, coordinateFile, prefix, genome, minqual, fastqfile, fastqfile1, fastqfile2, seq = initParamCa( ) # Getting options from command line and changes, if necessary, default parameters outputdir, selectodir, filterqual, unmapped, filtercoord, indexGenome, rmvdup, sorting, indexBam, coordinateFile, prefix, genome, minqual, fastqfile, fastqfile1, fastqfile2, seq = readOptCa( argv[1:], outputdir, selectodir, filterqual, unmapped, filtercoord, indexGenome, rmvdup, sorting, indexBam, coordinateFile, prefix, genome, minqual, fastqfile, fastqfile1, fastqfile2, seq) checkRequiredCa( seq, fastqfile2, genome) # Check if the required options have been specified if selectodir == 'false': # If no output directory specified, create one in current directory createOdir(outputdir) welcomeCa() # Print welcome message to stdout if seq == 'SE': # Print all specified parameters following if the data are single-end or paired-end to stdout if prefix == "": # If no prefix is given in the command line, take the prefixe of fastq file prefix = getPrefix(fastqfile) parametersSe(fastqfile, genome, outputdir, filterqual, minqual, unmapped, filtercoord, coordinateFile, indexGenome, rmvdup, sorting, indexBam, prefix) else: if prefix == "": prefix = getPrefix(fastqfile1) parametersPe(fastqfile1, fastqfile2, genome, outputdir, filterqual, minqual, unmapped, filtercoord, coordinateFile, indexGenome, rmvdup, sorting, indexBam, prefix) running() # Print message for starting analysis # Calling differents functions to perform the analysis genomeIndex(indexGenome, genome) bamname = readsAlignment(seq, fastqfile, fastqfile1, fastqfile2, outputdir, genome, prefix) bamname = unmappedFilter(unmapped, outputdir, bamname, prefix) bamname = minQFilter(filterqual, minqual, outputdir, bamname, prefix) bamname = removeDup(rmvdup, outputdir, bamname, prefix) bamname = coordFilter(filtercoord, coordinateFile, outputdir, bamname, prefix) bamname = sortAndIndexFile(sorting, indexBam, outputdir, bamname, prefix) goodbyeCa(bamname) # Print to stdout final message and resume the analyze return
def mainTq(argv): if len(argv) == 1: # if any arguments are given print usage message and then exit the programm usageTq() sys.exit(1) outputdir, selectodir, fastqfile1, fastqfile2, fastqfile, seq1, seq2, lib = initParamTq() outputdir, selectodir, fastqfile1, fastqfile2, fastqfile, seq1, seq2, lib = readOptTq(argv[1:], outputdir, selectodir, fastqfile1, fastqfile2, fastqfile, seq1, seq2, lib) checkRequiredTq(fastqfile, fastqfile1, fastqfile2, lib) # check if the required options have been specified if selectodir == 'false': # If no output directory specified, create one folder in current directory createOdir(outputdir) welcomeTq() # print welcome message adaptaters = setAdapts(seq1, seq2, lib) parametersTq(outputdir, fastqfile, fastqfile1, fastqfile2, adaptaters, lib, seq1, seq2) running() # print running message print "Step1 : Quality check before trimming..." createOdir(outputdir+"/fastqc_report") if fastqfile != '': fastQc(fastqfile, outputdir) else: fastQc(fastqfile1, outputdir) fastQc(fastqfile2, outputdir) print "Step1 : Quality check before trimming achieved..." print "" print "Step2 : Trimming..." createOdir(outputdir+"/fastq_trim") if fastqfile != '': fastqtrim = trimmoSe(fastqfile, outputdir, adaptaters) else: fastqtrim1, fastqtrim2 = trimmoPe(fastqfile1, fastqfile2, outputdir, adaptaters) print "Step2 : Trimming achieved..." print "" print "Step3 : Quality check after trimming..." if fastqfile != '': fastQc(fastqtrim, outputdir) else: fastQc(fastqtrim1, outputdir) fastQc(fastqtrim2, outputdir) print "Step3 : Quality check after trimming achieved..." goodbyeCp() # print end of analysis message and the exit return
def mainCpnr(argv): if len( argv ) == 1: # if any arguments are given print usage message and then exit the programm usageCpnr() sys.exit(1) outputdir, selectodir, bamfile, ctrlfile, thresh, pvalue, qvalue, qc, prefix, spp = initParamCpnr( ) # intialize to default all parameters outputdir, selectodir, bamfile, ctrlfile, thresh, pvalue, qvalue, qc, prefix, spp = readOptCpnr( argv[1:], outputdir, selectodir, bamfile, ctrlfile, thresh, pvalue, qvalue, qc, prefix, spp) # read option on command line and changes parameters if necessary checkRequiredCpnr( bamfile, ctrlfile) # check if the required options have been specified if selectodir == 'false': # If no output directory specified, create one folder in current directory createOdir(outputdir) welcomeCpnr() # print welcome message if prefix == '': # If no prefix is given in the command line, give a default prefix prefix = 'CallPeaks_norep' parametersCpnr(outputdir, bamfile, ctrlfile, thresh, pvalue, qvalue, qc, prefix, spp) # print a summary of all parameters used running() # print running message print "" print "Step1 : Transformation from bam file to tagAlign file..." createOdir(outputdir + "/tagAlignfiles") # create new folder to put all tagAlign files for file_in in (bamfile, ctrlfile): # convert all given files from bam to tagAlign toTagAlign(file_in, outputdir + "/tagAlignfiles/") chipfile = newName(bamfile, outputdir + "/tagAlignfiles/") # rename the files ctrlfile = newName(ctrlfile, outputdir + "/tagAlignfiles/") print "Step1 : Transformation from bam file to tagAlign file achieved..." print "" print "Step2 : Cross-correlation by phantomPeaksQualTools before calling peaks..." createOdir(outputdir + "/PeakCalling") if qc == 'ON' and spp == 'OFF': # Cross-correlation analysis is asked qualCheck(chipfile, outputdir + "/PeakCalling", prefix) print "Step2 : Cross-correlation by phantomPeaksQualTools before calling peaks..." print "" else: print "skipped" print "" print "Step3 : PeakCalling using macs2..." if spp == 'OFF': peakCallMacs(chipfile, ctrlfile, outputdir, prefix, pvalue, qvalue, thresh) print "Step3 : PeakCalling using macs2 achieved..." print "" else: print "skipped" print "" if spp == 'ON': print "Step4 : PeakCalling based on adaptated IDR analysis..." print "\tStep4a : Splitting file into pseudo-replicates..." splitFile(chipfile, outputdir + "/tagAlignfiles/") print "\tStep4a : Splitting file into pseudo-replicates achieved..." prefixchip = getPrefix(chipfile) prefixchip = outputdir + "/tagAlignfiles/" + prefixchip print "Step4b : Peak Calling for each files (replicates and pseudo replicates)..." createOdir( outputdir + "/PeakCalling") # create new folder to put peak calling output for i in (chipfile, prefixchip + "_PR1.tagAlign.gz", prefixchip + "_PR2.tagAlign.gz"): peakCall(i, ctrlfile, outputdir + "/PeakCalling/" ) # for each files given, perform peak calling with spp print "Step4b : Peak Calling for each files (replicates and pseudo replicates) achieved..." print "Step4c : IDR analysis..." createOdir( outputdir + "/IDR" ) # create new folder to put IDR output, then, perform IDR analysis between each replicates and each pseudo-replicates consistency(prefixchip + "_PR1.tagAlign.gz", prefixchip + "_PR2.tagAlign.gz", ctrlfile, outputdir) idrthresh = 0.01 np = countConsistentPeaks(prefixchip + "_PR1.tagAlign.gz", prefixchip + "_PR2.tagAlign.gz", outputdir, idrthresh) print "number of consistent peaks between the two pseudo replicates: " + str( np) print "Step4c : IDR analysis achieved..." print "Step4d : Plotting IDR results..." createOdir( outputdir + "/IDR/plots" ) # create new folder to put IDR plot, then, create the plots for each IDR output files plotResults2(outputdir, prefixchip + "_PR1.tagAlign.gz", prefixchip + "_PR2.tagAlign.gz") print "Step4d : Plotting IDR results achieved..." print "Step5e : Creating final sets of peaks..." createOdir( outputdir + "/finalsets" ) # create new folder to put final peak sets, then, create final peak sets createFinalSets2(chipfile, ctrlfile, np, outputdir, prefix) goodbyeCp() # print end of analysis message and the exit return
def mainCpnr(argv): if len(argv) == 1: # if any arguments are given print usage message and then exit the programm usageCpnr() sys.exit(1) outputdir, selectodir, bamfile, ctrlfile, thresh, pvalue, qvalue, qc, prefix, spp = initParamCpnr() # intialize to default all parameters outputdir, selectodir, bamfile, ctrlfile, thresh, pvalue, qvalue, qc, prefix, spp = readOptCpnr(argv[1:], outputdir, selectodir, bamfile, ctrlfile, thresh, pvalue, qvalue, qc, prefix, spp ) # read option on command line and changes parameters if necessary checkRequiredCpnr(bamfile, ctrlfile) # check if the required options have been specified if selectodir == 'false': # If no output directory specified, create one folder in current directory createOdir(outputdir) welcomeCpnr() # print welcome message if prefix == '': # If no prefix is given in the command line, give a default prefix prefix = 'CallPeaks_norep' parametersCpnr(outputdir, bamfile, ctrlfile, thresh, pvalue, qvalue, qc, prefix, spp) # print a summary of all parameters used running() # print running message print "" print "Step1 : Transformation from bam file to tagAlign file..." createOdir(outputdir+"/tagAlignfiles") # create new folder to put all tagAlign files for file_in in (bamfile, ctrlfile): # convert all given files from bam to tagAlign toTagAlign(file_in, outputdir+"/tagAlignfiles/") chipfile = newName(bamfile, outputdir+"/tagAlignfiles/") # rename the files ctrlfile = newName(ctrlfile, outputdir+"/tagAlignfiles/") print "Step1 : Transformation from bam file to tagAlign file achieved..." print "" print "Step2 : Cross-correlation by phantomPeaksQualTools before calling peaks..." createOdir(outputdir+"/PeakCalling") if qc == 'ON' and spp == 'OFF': # Cross-correlation analysis is asked qualCheck(chipfile, outputdir+"/PeakCalling", prefix) print "Step2 : Cross-correlation by phantomPeaksQualTools before calling peaks..." print "" else: print "skipped" print "" print "Step3 : PeakCalling using macs2..." if spp == 'OFF': peakCallMacs(chipfile, ctrlfile, outputdir, prefix, pvalue, qvalue, thresh) print "Step3 : PeakCalling using macs2 achieved..." print "" else: print "skipped" print "" if spp == 'ON': print "Step4 : PeakCalling based on adaptated IDR analysis..." print "\tStep4a : Splitting file into pseudo-replicates..." splitFile(chipfile, outputdir+"/tagAlignfiles/") print "\tStep4a : Splitting file into pseudo-replicates achieved..." prefixchip = getPrefix(chipfile) prefixchip = outputdir+"/tagAlignfiles/"+prefixchip print "Step4b : Peak Calling for each files (replicates and pseudo replicates)..." createOdir(outputdir+"/PeakCalling") # create new folder to put peak calling output for i in (chipfile, prefixchip+"_PR1.tagAlign.gz", prefixchip+"_PR2.tagAlign.gz"): peakCall(i, ctrlfile, outputdir+"/PeakCalling/") # for each files given, perform peak calling with spp print "Step4b : Peak Calling for each files (replicates and pseudo replicates) achieved..." print "Step4c : IDR analysis..." createOdir(outputdir+"/IDR") # create new folder to put IDR output, then, perform IDR analysis between each replicates and each pseudo-replicates consistency(prefixchip+"_PR1.tagAlign.gz", prefixchip+"_PR2.tagAlign.gz", ctrlfile, outputdir) idrthresh = 0.01 np = countConsistentPeaks(prefixchip+"_PR1.tagAlign.gz", prefixchip+"_PR2.tagAlign.gz", outputdir, idrthresh) print "number of consistent peaks between the two pseudo replicates: "+str(np) print "Step4c : IDR analysis achieved..." print "Step4d : Plotting IDR results..." createOdir(outputdir+"/IDR/plots") # create new folder to put IDR plot, then, create the plots for each IDR output files plotResults2(outputdir, prefixchip+"_PR1.tagAlign.gz", prefixchip+"_PR2.tagAlign.gz") print "Step4d : Plotting IDR results achieved..." print "Step5e : Creating final sets of peaks..." createOdir(outputdir+"/finalsets") # create new folder to put final peak sets, then, create final peak sets createFinalSets2(chipfile , ctrlfile, np, outputdir, prefix) goodbyeCp() # print end of analysis message and the exit return
def mainCp(argv): if len( argv ) == 1: # if any arguments are given print usage message and then exit the programm usageCp() sys.exit(1) outputdir, selectodir, rep1, rep2, ctrl1, ctrl2, ctrlsup, idr, idrthresh, finalsets, plot, prefix = initParamCp( ) # initialize paramters to default outputdir, selectodir, rep1, rep2, ctrl1, ctrl2, ctrlsup, idr, idrthresh, finalsets, plot, prefix = readOptCp( argv[1:], outputdir, selectodir, rep1, rep2, ctrl1, ctrl2, ctrlsup, idr, idrthresh, finalsets, plot, prefix ) # read option from command line and changes parameters if necessary checkRequiredCp( rep1, rep2, ctrl1) # Check if the required parameters have been specified if selectodir == 'false': # if no output directory specified, create one folder in current directory createOdir(outputdir) welcomeCp() # print welcome message if prefix == '': # if no prefix is given in the command line, give a default prefix prefix = 'CallPeaks' parametersCp(outputdir, selectodir, rep1, rep2, ctrl1, ctrl2, ctrlsup, idr, idrthresh, finalsets, plot, prefix) # print a summary off all parameters used running() # print running message print "" print "Step1 : Transformation from bam file to tagAlign file..." createOdir(outputdir + "/tagAlignfiles") # create new folder to put transformed files for file_in in (rep1, rep2, ctrl1): toTagAlign(file_in, outputdir + "/tagAlignfiles/") # convert bam files to tagAlign files if ctrlsup == 'true': # if there is a second control file, convert it too toTagAlign(ctrl2, outputdir + "/tagAlignfiles/") ctrl2 = newName(ctrl2, outputdir + "/tagAlignfiles/") # rename file variables rep1 = newName(rep1, outputdir + "/tagAlignfiles/") rep2 = newName(rep2, outputdir + "/tagAlignfiles/") ctrl1 = newName(ctrl1, outputdir + "/tagAlignfiles/") print "Step1 : Transformation from bam file to tagAlign file achieved..." print "" print "Step2 : Merging Control files (if two files are given)..." if ctrlsup == 'true': # if there is two control files , merge them as one ctrlfile = mergeFile(ctrl1, ctrl2, outputdir + "/tagAlignfiles/", 'Control') print "Step2 : Merging Control files achieved..." else: print "skipped" ctrlfile = ctrl1 print "" print "Step3 : Creating Pool of replicates..." poolfile = mergeFile(rep1, rep2, outputdir + "/tagAlignfiles/", 'Pool') # merge two sample files to create pool print "Step3 : Creating Pool of replicates achieved..." print "" print "Step4 : Splitting samples files into pseudo replicates..." for file_in in ( rep1, rep2, poolfile ): # for each file given, split randomly into two pseudo replicates splitFile(file_in, outputdir + "/tagAlignfiles/") print "Step4 : Splitting samples files into pseudo replicates achieved..." print "" prefixr1 = getPrefix(rep1) prefixr2 = getPrefix(rep2) prefixpool = getPrefix(poolfile) prefixr1 = outputdir + "/tagAlignfiles/" + prefixr1 prefixr2 = outputdir + "/tagAlignfiles/" + prefixr2 prefixpool = outputdir + "/tagAlignfiles/" + prefixpool print "Step5 : Peak Calling for each files (replicates, pool and pseudo replicates)..." createOdir(outputdir + "/PeakCalling") # create new folder to put peak calling output for i in (rep1, rep2, poolfile, prefixr1 + "_PR1.tagAlign.gz", prefixr1 + "_PR2.tagAlign.gz", prefixr2 + "_PR1.tagAlign.gz", prefixr2 + "_PR2.tagAlign.gz", prefixpool + "_PR1.tagAlign.gz", prefixpool + "_PR2.tagAlign.gz"): peakCall(i, ctrlfile, outputdir + "/PeakCalling/" ) # for each files given, perform peak calling with spp print "Step5 : Peak Calling for each files achieved..." print "" print "Step6 : IDR analysis..." if idr == 'OFF': print "Skipped" else: # if idr analysis is selected createOdir( outputdir + "/IDR" ) # create new folder to put IDR output, then, perform IDR analysis between each replicates and each pseudo-replicates consistency(rep1, rep2, ctrlfile, outputdir) consistency(prefixr1 + "_PR1.tagAlign.gz", prefixr1 + "_PR2.tagAlign.gz", ctrlfile, outputdir) consistency(prefixr2 + "_PR1.tagAlign.gz", prefixr2 + "_PR2.tagAlign.gz", ctrlfile, outputdir) consistency(prefixpool + "_PR1.tagAlign.gz", prefixpool + "_PR2.tagAlign.gz", ctrlfile, outputdir) nt = countConsistentPeaks( rep1, rep2, outputdir, idrthresh ) # get number of peaks with IDR lower than specified threshold for each IDR output file np = countConsistentPeaks(prefixpool + "_PR1.tagAlign.gz", prefixpool + "_PR2.tagAlign.gz", outputdir, idrthresh) n1 = countConsistentPeaks(prefixr1 + "_PR1.tagAlign.gz", prefixr1 + "_PR2.tagAlign.gz", outputdir, idrthresh) n2 = countConsistentPeaks(prefixr2 + "_PR1.tagAlign.gz", prefixr2 + "_PR2.tagAlign.gz", outputdir, idrthresh) exportResults(nt, np, n1, n2, outputdir) # export metrics in a tab delimited file print "Step6 : IDR analysis achieved..." print "" print "Step7 : Plotting IDR results..." if plot == 'OFF': print "Skipped" else: # if no-plot option is not selected createOdir( outputdir + "/IDR/plots" ) # create new folder to put IDR plot, then, create the plots for each IDR output files plotResults(outputdir, rep1, rep2, prefixr1 + "_PR1.tagAlign.gz", prefixr1 + "_PR2.tagAlign.gz", prefixr2 + "_PR1.tagAlign.gz", prefixr2 + "_PR2.tagAlign.gz", prefixpool + "_PR1.tagAlign.gz", prefixpool + "_PR2.tagAlign.gz") print "Step7 : Plotting IDR results achieved..." print "" print "Step8 : Creating final sets of peaks..." if finalsets == 'OFF': print "Skipped" else: # if final peak sets creation is asked createOdir( outputdir + "/finalsets" ) # create new folder to put final peak sets, then, create final peak sets createFinalSets(poolfile, ctrlfile, nt, np, outputdir, prefix) print "Step8 : Creating final sets of peaks achieved..." goodbyeCp() # print end message and then exit return
def mainCp(argv): if len(argv) == 1: # if any arguments are given print usage message and then exit the programm usageCp() sys.exit(1) outputdir, selectodir, rep1, rep2, ctrl1, ctrl2, ctrlsup, idr, idrthresh, finalsets, plot, prefix = initParamCp() # initialize paramters to default outputdir, selectodir, rep1, rep2, ctrl1, ctrl2, ctrlsup, idr, idrthresh, finalsets, plot, prefix = readOptCp(argv[1:], outputdir, selectodir, rep1, rep2, ctrl1, ctrl2, ctrlsup, idr, idrthresh, finalsets, plot, prefix) # read option from command line and changes parameters if necessary checkRequiredCp(rep1, rep2, ctrl1) # Check if the required parameters have been specified if selectodir == 'false': # if no output directory specified, create one folder in current directory createOdir(outputdir) welcomeCp() # print welcome message if prefix == '': # if no prefix is given in the command line, give a default prefix prefix = 'CallPeaks' parametersCp(outputdir, selectodir, rep1, rep2, ctrl1, ctrl2, ctrlsup, idr, idrthresh, finalsets, plot, prefix) # print a summary off all parameters used running() # print running message print "" print "Step1 : Transformation from bam file to tagAlign file..." createOdir(outputdir+"/tagAlignfiles") # create new folder to put transformed files for file_in in (rep1, rep2, ctrl1): toTagAlign(file_in, outputdir+"/tagAlignfiles/") # convert bam files to tagAlign files if ctrlsup == 'true': # if there is a second control file, convert it too toTagAlign(ctrl2, outputdir+"/tagAlignfiles/") ctrl2 = newName(ctrl2, outputdir+"/tagAlignfiles/") # rename file variables rep1 = newName(rep1, outputdir+"/tagAlignfiles/") rep2 = newName(rep2, outputdir+"/tagAlignfiles/") ctrl1 = newName(ctrl1, outputdir+"/tagAlignfiles/") print "Step1 : Transformation from bam file to tagAlign file achieved..." print "" print "Step2 : Merging Control files (if two files are given)..." if ctrlsup == 'true': # if there is two control files , merge them as one ctrlfile = mergeFile(ctrl1, ctrl2, outputdir+"/tagAlignfiles/", 'Control') print "Step2 : Merging Control files achieved..." else: print "skipped" ctrlfile = ctrl1 print "" print "Step3 : Creating Pool of replicates..." poolfile = mergeFile(rep1, rep2, outputdir+"/tagAlignfiles/", 'Pool') # merge two sample files to create pool print "Step3 : Creating Pool of replicates achieved..." print "" print "Step4 : Splitting samples files into pseudo replicates..." for file_in in (rep1, rep2, poolfile): # for each file given, split randomly into two pseudo replicates splitFile(file_in, outputdir+"/tagAlignfiles/") print "Step4 : Splitting samples files into pseudo replicates achieved..." print "" prefixr1 = getPrefix(rep1) prefixr2 = getPrefix(rep2) prefixpool = getPrefix(poolfile) prefixr1 = outputdir+"/tagAlignfiles/"+prefixr1 prefixr2 = outputdir+"/tagAlignfiles/"+prefixr2 prefixpool = outputdir+"/tagAlignfiles/"+prefixpool print "Step5 : Peak Calling for each files (replicates, pool and pseudo replicates)..." createOdir(outputdir+"/PeakCalling") # create new folder to put peak calling output for i in (rep1, rep2, poolfile, prefixr1+"_PR1.tagAlign.gz", prefixr1+"_PR2.tagAlign.gz", prefixr2+"_PR1.tagAlign.gz", prefixr2+"_PR2.tagAlign.gz", prefixpool+"_PR1.tagAlign.gz", prefixpool+"_PR2.tagAlign.gz"): peakCall(i, ctrlfile, outputdir+"/PeakCalling/") # for each files given, perform peak calling with spp print "Step5 : Peak Calling for each files achieved..." print "" print "Step6 : IDR analysis..." if idr == 'OFF': print "Skipped" else: # if idr analysis is selected createOdir(outputdir+"/IDR") # create new folder to put IDR output, then, perform IDR analysis between each replicates and each pseudo-replicates consistency(rep1, rep2, ctrlfile, outputdir) consistency(prefixr1+"_PR1.tagAlign.gz", prefixr1+"_PR2.tagAlign.gz", ctrlfile, outputdir) consistency(prefixr2+"_PR1.tagAlign.gz", prefixr2+"_PR2.tagAlign.gz", ctrlfile, outputdir) consistency(prefixpool+"_PR1.tagAlign.gz", prefixpool+"_PR2.tagAlign.gz", ctrlfile, outputdir) nt = countConsistentPeaks(rep1, rep2, outputdir, idrthresh) # get number of peaks with IDR lower than specified threshold for each IDR output file np = countConsistentPeaks(prefixpool+"_PR1.tagAlign.gz", prefixpool+"_PR2.tagAlign.gz", outputdir, idrthresh) n1 = countConsistentPeaks(prefixr1+"_PR1.tagAlign.gz", prefixr1+"_PR2.tagAlign.gz", outputdir , idrthresh) n2 = countConsistentPeaks(prefixr2+"_PR1.tagAlign.gz", prefixr2+"_PR2.tagAlign.gz", outputdir , idrthresh) exportResults(nt, np, n1, n2, outputdir) # export metrics in a tab delimited file print "Step6 : IDR analysis achieved..." print "" print "Step7 : Plotting IDR results..." if plot == 'OFF': print "Skipped" else: # if no-plot option is not selected createOdir(outputdir+"/IDR/plots") # create new folder to put IDR plot, then, create the plots for each IDR output files plotResults(outputdir, rep1, rep2, prefixr1+"_PR1.tagAlign.gz", prefixr1+"_PR2.tagAlign.gz", prefixr2+"_PR1.tagAlign.gz", prefixr2+"_PR2.tagAlign.gz", prefixpool+"_PR1.tagAlign.gz", prefixpool+"_PR2.tagAlign.gz") print "Step7 : Plotting IDR results achieved..." print "" print "Step8 : Creating final sets of peaks..." if finalsets == 'OFF': print "Skipped" else: # if final peak sets creation is asked createOdir(outputdir+"/finalsets") # create new folder to put final peak sets, then, create final peak sets createFinalSets(poolfile , ctrlfile, nt, np, outputdir, prefix) print "Step8 : Creating final sets of peaks achieved..." goodbyeCp() # print end message and then exit return