Ejemplo n.º 1
0
def scanSeqMotif(sequencefile, motiffile, outputdir, prefix):
	motifname = getPrefix(motiffile)
	outfile = outputdir+"/"+prefix+"_"+motifname+".txt"
	createOdir(outputdir+"/"+prefix+"_"+motifname)
	command = "findMotifs.pl "+sequencefile+" fasta "+outputdir+"/"+prefix+"_"+motifname+"/ -find "+motiffile+" > "+outfile
	subprocess.call(command, shell=True)
	return outfile
Ejemplo n.º 2
0
def mainMd(argv):
	if len(argv) == 1: # if any arguments are given print usage message and then exit the programm
		usageMd()
		sys.exit(1)
	outputdir, selectodir, regionfile, genomefile, database, scan, prefix = initParamMd() # intialize to default all parameters
	outputdir, selectodir, regionfile, genomefile, database, scan, prefix = readOptMd(argv[1:], outputdir, selectodir, regionfile, genomefile, database, scan, prefix)
	checkRequiredMd(regionfile, genomefile)
	if selectodir == 'false': # If no output directory specified, create one folder in current directory
		createOdir(outputdir)
	welcomeMd() # print welcome message
	parametersMd(outputdir, regionfile, genomefile, database, scan, prefix) # print a summary of all parameters used
	running() # print running message
	print ""
	print "Step1: Transforming region bed file to fasta file..."
	fastafile = toFastaFile(regionfile, genomefile, outputdir, prefix)
	print "Step1: Transforming region bed file to fasta file achieved..."
	print ""
	print "Step2: Motif Discovery performed by MEME suite..."
	if scan == 'ON':
		motifFull(fastafile, outputdir, database, prefix)
	else:
		motifDeNovo(fastafile, outputdir, prefix)
	print "Step2: Motif Discovery performed by MEME suite achieved..."
	goodbyeCp()
	return
Ejemplo n.º 3
0
def mainCa(argv):
	if len(argv) == 1: # if any arguments are given print usage message and then exit the programm
		usageCa()
		sys.exit(1)
	# Settings default parameters
	outputdir, selectodir, filterqual, unmapped, filtercoord, indexGenome, rmvdup, sorting, indexBam, coordinateFile, prefix, genome, minqual, fastqfile, fastqfile1, fastqfile2, seq = initParamCa()      
	# Getting options from command line and changes, if necessary, default parameters
	outputdir, selectodir, filterqual, unmapped, filtercoord, indexGenome, rmvdup, sorting, indexBam, coordinateFile, prefix, genome, minqual, fastqfile, fastqfile1, fastqfile2, seq = readOptCa(argv[1:], outputdir, selectodir, filterqual, unmapped, filtercoord, indexGenome, rmvdup, sorting, indexBam, coordinateFile, prefix, genome, minqual, fastqfile, fastqfile1, fastqfile2, seq)
	checkRequiredCa(seq, fastqfile2, genome) # Check if the required options have been specified
	if selectodir == 'false': # If no output directory specified, create one in current directory
		createOdir(outputdir)
	welcomeCa() # Print welcome message to stdout
	if seq == 'SE': # Print all specified parameters following if the data are single-end or paired-end to stdout
		if prefix ==  "": # If no prefix is given in the command line, take the prefixe of fastq file
			prefix = getPrefix(fastqfile) 
		parametersSe(fastqfile, genome, outputdir, filterqual, minqual, unmapped, filtercoord, coordinateFile, indexGenome, rmvdup, sorting, indexBam, prefix)
	else:
		if prefix ==  "":
			prefix = getPrefix(fastqfile1)
		parametersPe(fastqfile1, fastqfile2, genome, outputdir, filterqual, minqual, unmapped, filtercoord, coordinateFile, indexGenome, rmvdup, sorting, indexBam, prefix)
	running() # Print message for starting analysis
	# Calling differents functions to perform the analysis
	genomeIndex(indexGenome, genome)
	bamname = readsAlignment(seq, fastqfile, fastqfile1, fastqfile2, outputdir, genome, prefix) 
	bamname = unmappedFilter(unmapped, outputdir, bamname, prefix) 
	bamname = minQFilter(filterqual, minqual, outputdir, bamname, prefix)
	bamname = removeDup(rmvdup, outputdir, bamname, prefix)
	bamname = coordFilter(filtercoord, coordinateFile, outputdir, bamname, prefix)
	bamname = sortAndIndexFile(sorting, indexBam, outputdir, bamname, prefix)
	goodbyeCa(bamname) # Print to stdout final message and resume the analyze
	return
Ejemplo n.º 4
0
def mainSm(argv):
    if len(
            argv
    ) == 1:  # if any arguments are given print usage message and then exit the programm
        usageSm()
        sys.exit(1)
    outputdir, selectodir, regionfile, genomefile, motiffile, exclude, prefix = initParamSm(
    )  # intialize to default all parameters
    outputdir, selectodir, regionfile, genomefile, motiffile, exclude, prefix = readOptSm(
        argv[1:], outputdir, selectodir, regionfile, genomefile, motiffile,
        exclude, prefix)
    checkRequiredSm(regionfile, genomefile, motiffile)
    if selectodir == 'false':  # If no output directory specified, create one folder in current directory
        createOdir(outputdir)
    welcomeSm()  # print welcome message
    parametersSm(outputdir, regionfile, genomefile, motiffile, exclude,
                 prefix)  # print a summary of all parameters used
    running()  # print running message
    print ""
    print "Step1: Generate random background region..."
    backgroundfile = createBackground(regionfile, exclude, outputdir, prefix)
    print "Step1: Generate random background region achieved..."
    print ""
    print "Step2: Transforming region bed file to fasta file..."
    fastafile = toFastaFile(regionfile, genomefile, outputdir, prefix)
    print "Step2: Transforming region bed file to fasta file achieved..."
    print ""
    print "Step3: Transforming background bed file to fasta file..."
    bgfastafile = toFastaFile(backgroundfile, genomefile, outputdir,
                              "BG_" + prefix)
    print "Step3: Transforming background bed file to fasta file achieved..."
    print ""
    print "Step4: Scanning foreground sequence with motif file..."
    scanfile = scanSeqMotif(fastafile, motiffile, outputdir, prefix)
    print "Step4: Scanning foreground sequence with motif file achieved..."
    print ""
    print "Step5: Scanning background sequence with motif file..."
    scanbgfile = scanSeqMotif(bgfastafile, motiffile, outputdir,
                              "BG_" + prefix)
    print "Step5: Scanning background sequence with motif file achieved..."
    print ""
    print "Step6: Creating score file for motif enrichment..."
    scorefile = createScoreFile(scanfile, scanbgfile, motiffile, outputdir,
                                prefix)
    print "Step6: Creating score file for motif enrichment achieved..."
    print ""
    print "Step7: Sorting score file..."
    scorefile = sortScoreFile(scorefile, outputdir)
    print "Step7: Sorting score file achieved..."
    print ""
    print "Step8: Calculate AUC for motif enrichment..."
    scoreAUC(scorefile, outputdir)
    print "Step8: Calculate AUC for motif enrichment achieved..."
    print "Step9: plotting ROC curve..."
    plotRocCurve(scorefile, outputdir)
    print "Step9: plotting ROC curve achieved..."
    goodbyeCp()
    print ""
    return
Ejemplo n.º 5
0
def mainAp(argv):
    if len(
            argv
    ) == 1:  # if any arguments are given print usage message and then exit the programm
        usageAp()
        sys.exit(1)
    outputdir, selectodir, peakfile, annofile, peakcaller, prefix, graph = initParamAp(
    )  # intialize to default all parameters
    outputdir, selectodir, peakfile, annofile, peakcaller, prefix, graph = readOptAp(
        argv[1:], outputdir, selectodir, peakfile, annofile, peakcaller,
        prefix, graph)
    checkRequiredAp(peakfile, annofile, peakcaller)
    if selectodir == 'false':  # If no output directory specified, create one folder in current directory
        createOdir(outputdir)
    welcomeAp()  # print welcome message
    if prefix == '':  # If no prefix is given in the command line, give a default prefix
        prefix = 'AnnoPeaks'
    parametersAp(outputdir, peakfile, annofile, peakcaller,
                 prefix)  # print a summary of all parameters used
    running()  # print running message
    print ""
    print "Step1 : Transforming peak file to coordinate file..."
    peakfile = toCoordFile(peakfile, outputdir, prefix)
    print "Step1 : Transforming peak file to coordinate file achieved..."
    print ""
    print "Step2 : Extract promoter regions from annotation file..."
    exportProm(annofile, outputdir)
    print "Step2 : Extract promoter regions from annotation file achieved..."
    print ""
    print "Step3 : Extract enhancer regions from annotation file..."
    exportEnh(annofile, outputdir)
    print "Step3 : Extract enhancer regions from annotation file achieved..."
    print ""
    print "Step4 : Annotate peaks that falls into promoter regions..."
    promfile = annotatePeaks(peakfile, outputdir,
                             outputdir + "/promoter_region.bed", prefix)
    print "Step4 : Annotate peaks that falls into promoter regions achieved..."
    print ""
    print "Step5 : Annotate peaks that falls into enhancer regions..."
    enhfile = annotatePeaks(peakfile, outputdir,
                            outputdir + "/enhancer_region.bed", prefix)
    print "Step5 : Annotate peaks that falls into enhancer regions achieved..."
    print ""
    nbpeak = countLines(peakfile)
    nbprom = countLines(promfile)
    nbenh = countLines(enhfile)
    summaryAp(nbpeak, nbprom, nbenh)
    print ""
    print "Step 6 : Plotting annotation results..."
    if graph == 'OFF':
        print "Skipped"
    else:
        plotAnno(nbpeak, nbprom, nbenh, outputdir)
        print "Step 6 : Plotting annotation results achieved..."
    goodbyeCp()
    return
Ejemplo n.º 6
0
def mainAp(argv):
	if len(argv) == 1: # if any arguments are given print usage message and then exit the programm
		usageAp()
		sys.exit(1)
	outputdir, selectodir, peakfile, annofile, peakcaller, prefix, graph = initParamAp() # intialize to default all parameters
	outputdir, selectodir, peakfile, annofile, peakcaller, prefix, graph = readOptAp(argv[1:], outputdir, selectodir, peakfile, annofile, peakcaller, prefix, graph)
	checkRequiredAp(peakfile, annofile, peakcaller)
	if selectodir == 'false': # If no output directory specified, create one folder in current directory
		createOdir(outputdir)
	welcomeAp() # print welcome message
	if prefix ==  '': # If no prefix is given in the command line, give a default prefix
		prefix = 'AnnoPeaks'
	parametersAp(outputdir, peakfile, annofile, peakcaller, prefix) # print a summary of all parameters used
	running() # print running message
	print ""
	print "Step1 : Transforming peak file to coordinate file..."
	peakfile = toCoordFile(peakfile, outputdir, prefix)
	print "Step1 : Transforming peak file to coordinate file achieved..."
	print ""
	print "Step2 : Extract promoter regions from annotation file..."
	exportProm(annofile, outputdir)
	print "Step2 : Extract promoter regions from annotation file achieved..."
	print ""
	print "Step3 : Extract enhancer regions from annotation file..."
	exportEnh(annofile, outputdir)
	print "Step3 : Extract enhancer regions from annotation file achieved..."
	print ""
	print "Step4 : Annotate peaks that falls into promoter regions..."
	promfile = annotatePeaks(peakfile, outputdir, outputdir+"/promoter_region.bed", prefix)
	print "Step4 : Annotate peaks that falls into promoter regions achieved..."
	print ""
	print "Step5 : Annotate peaks that falls into enhancer regions..."
	enhfile = annotatePeaks(peakfile, outputdir, outputdir+"/enhancer_region.bed", prefix)
	print "Step5 : Annotate peaks that falls into enhancer regions achieved..."
	print ""
	nbpeak = countLines(peakfile)
	nbprom = countLines(promfile)
	nbenh = countLines(enhfile)
	summaryAp(nbpeak, nbprom, nbenh)
	print ""
	print "Step 6 : Plotting annotation results..."
	if graph == 'OFF':
		print "Skipped"
	else:
		plotAnno(nbpeak, nbprom, nbenh, outputdir)
		print "Step 6 : Plotting annotation results achieved..."
	goodbyeCp()
	return
Ejemplo n.º 7
0
def mainCa(argv):
    if len(
            argv
    ) == 1:  # if any arguments are given print usage message and then exit the programm
        usageCa()
        sys.exit(1)
    # Settings default parameters
    outputdir, selectodir, filterqual, unmapped, filtercoord, indexGenome, rmvdup, sorting, indexBam, coordinateFile, prefix, genome, minqual, fastqfile, fastqfile1, fastqfile2, seq = initParamCa(
    )
    # Getting options from command line and changes, if necessary, default parameters
    outputdir, selectodir, filterqual, unmapped, filtercoord, indexGenome, rmvdup, sorting, indexBam, coordinateFile, prefix, genome, minqual, fastqfile, fastqfile1, fastqfile2, seq = readOptCa(
        argv[1:], outputdir, selectodir, filterqual, unmapped, filtercoord,
        indexGenome, rmvdup, sorting, indexBam, coordinateFile, prefix, genome,
        minqual, fastqfile, fastqfile1, fastqfile2, seq)
    checkRequiredCa(
        seq, fastqfile2,
        genome)  # Check if the required options have been specified
    if selectodir == 'false':  # If no output directory specified, create one in current directory
        createOdir(outputdir)
    welcomeCa()  # Print welcome message to stdout
    if seq == 'SE':  # Print all specified parameters following if the data are single-end or paired-end to stdout
        if prefix == "":  # If no prefix is given in the command line, take the prefixe of fastq file
            prefix = getPrefix(fastqfile)
        parametersSe(fastqfile, genome, outputdir, filterqual, minqual,
                     unmapped, filtercoord, coordinateFile, indexGenome,
                     rmvdup, sorting, indexBam, prefix)
    else:
        if prefix == "":
            prefix = getPrefix(fastqfile1)
        parametersPe(fastqfile1, fastqfile2, genome, outputdir, filterqual,
                     minqual, unmapped, filtercoord, coordinateFile,
                     indexGenome, rmvdup, sorting, indexBam, prefix)
    running()  # Print message for starting analysis
    # Calling differents functions to perform the analysis
    genomeIndex(indexGenome, genome)
    bamname = readsAlignment(seq, fastqfile, fastqfile1, fastqfile2, outputdir,
                             genome, prefix)
    bamname = unmappedFilter(unmapped, outputdir, bamname, prefix)
    bamname = minQFilter(filterqual, minqual, outputdir, bamname, prefix)
    bamname = removeDup(rmvdup, outputdir, bamname, prefix)
    bamname = coordFilter(filtercoord, coordinateFile, outputdir, bamname,
                          prefix)
    bamname = sortAndIndexFile(sorting, indexBam, outputdir, bamname, prefix)
    goodbyeCa(bamname)  # Print to stdout final message and resume the analyze
    return
Ejemplo n.º 8
0
def mainTq(argv):
	if len(argv) == 1: # if any arguments are given print usage message and then exit the programm
		usageTq()
		sys.exit(1)
	outputdir, selectodir, fastqfile1, fastqfile2, fastqfile, seq1, seq2, lib = initParamTq()
	outputdir, selectodir, fastqfile1, fastqfile2, fastqfile, seq1, seq2, lib = readOptTq(argv[1:], outputdir, selectodir, fastqfile1, fastqfile2, fastqfile, seq1, seq2, lib)
	checkRequiredTq(fastqfile, fastqfile1, fastqfile2, lib) # check if the required options have been specified
	if selectodir == 'false': # If no output directory specified, create one folder in current directory
		createOdir(outputdir)
	welcomeTq() # print welcome message
	adaptaters = setAdapts(seq1, seq2, lib)
	parametersTq(outputdir, fastqfile, fastqfile1, fastqfile2, adaptaters, lib, seq1, seq2)
	running() # print running message
	print "Step1 : Quality check before trimming..."
	createOdir(outputdir+"/fastqc_report")
	if fastqfile != '':
		fastQc(fastqfile, outputdir)
	else:
		fastQc(fastqfile1, outputdir)
		fastQc(fastqfile2, outputdir)
	print "Step1 : Quality check before trimming achieved..."
	print ""
	print "Step2 : Trimming..."
	createOdir(outputdir+"/fastq_trim")
	if fastqfile != '':
		fastqtrim = trimmoSe(fastqfile, outputdir, adaptaters)
	else:
		fastqtrim1, fastqtrim2 = trimmoPe(fastqfile1, fastqfile2, outputdir, adaptaters)
	print "Step2 : Trimming achieved..."
	print ""
	print "Step3 : Quality check after trimming..."
	if fastqfile != '':
		fastQc(fastqtrim, outputdir)
	else:
		fastQc(fastqtrim1, outputdir)
		fastQc(fastqtrim2, outputdir)
	print "Step3 : Quality check after trimming achieved..."
	goodbyeCp() # print end of analysis message and the exit
	return
Ejemplo n.º 9
0
def mainCpnr(argv):
    if len(
            argv
    ) == 1:  # if any arguments are given print usage message and then exit the programm
        usageCpnr()
        sys.exit(1)
    outputdir, selectodir, bamfile, ctrlfile, thresh, pvalue, qvalue, qc, prefix, spp = initParamCpnr(
    )  # intialize to default all parameters
    outputdir, selectodir, bamfile, ctrlfile, thresh, pvalue, qvalue, qc, prefix, spp = readOptCpnr(
        argv[1:], outputdir, selectodir, bamfile, ctrlfile, thresh, pvalue,
        qvalue, qc, prefix,
        spp)  # read option on command line and changes parameters if necessary
    checkRequiredCpnr(
        bamfile, ctrlfile)  # check if the required options have been specified
    if selectodir == 'false':  # If no output directory specified, create one folder in current directory
        createOdir(outputdir)
    welcomeCpnr()  # print welcome message
    if prefix == '':  # If no prefix is given in the command line, give a default prefix
        prefix = 'CallPeaks_norep'
    parametersCpnr(outputdir, bamfile, ctrlfile, thresh, pvalue, qvalue, qc,
                   prefix, spp)  # print a summary of all parameters used
    running()  # print running message
    print ""
    print "Step1 : Transformation from bam file to tagAlign file..."
    createOdir(outputdir +
               "/tagAlignfiles")  # create new folder to put all tagAlign files
    for file_in in (bamfile,
                    ctrlfile):  # convert all given files from bam to tagAlign
        toTagAlign(file_in, outputdir + "/tagAlignfiles/")
    chipfile = newName(bamfile,
                       outputdir + "/tagAlignfiles/")  # rename the files
    ctrlfile = newName(ctrlfile, outputdir + "/tagAlignfiles/")
    print "Step1 : Transformation from bam file to tagAlign file achieved..."
    print ""
    print "Step2 : Cross-correlation by phantomPeaksQualTools before calling peaks..."
    createOdir(outputdir + "/PeakCalling")
    if qc == 'ON' and spp == 'OFF':  # Cross-correlation analysis is asked
        qualCheck(chipfile, outputdir + "/PeakCalling", prefix)
        print "Step2 : Cross-correlation by phantomPeaksQualTools before calling peaks..."
        print ""
    else:
        print "skipped"
        print ""
    print "Step3 : PeakCalling using macs2..."
    if spp == 'OFF':
        peakCallMacs(chipfile, ctrlfile, outputdir, prefix, pvalue, qvalue,
                     thresh)
        print "Step3 : PeakCalling using macs2 achieved..."
        print ""
    else:
        print "skipped"
        print ""
    if spp == 'ON':
        print "Step4 : PeakCalling based on adaptated IDR analysis..."
        print "\tStep4a : Splitting file into pseudo-replicates..."
        splitFile(chipfile, outputdir + "/tagAlignfiles/")
        print "\tStep4a : Splitting file into pseudo-replicates achieved..."
        prefixchip = getPrefix(chipfile)
        prefixchip = outputdir + "/tagAlignfiles/" + prefixchip
        print "Step4b : Peak Calling for each files (replicates and pseudo replicates)..."
        createOdir(
            outputdir +
            "/PeakCalling")  # create new folder to put peak calling output
        for i in (chipfile, prefixchip + "_PR1.tagAlign.gz",
                  prefixchip + "_PR2.tagAlign.gz"):
            peakCall(i, ctrlfile, outputdir + "/PeakCalling/"
                     )  # for each files given, perform peak calling with spp
        print "Step4b : Peak Calling for each files (replicates and pseudo replicates) achieved..."
        print "Step4c : IDR analysis..."
        createOdir(
            outputdir + "/IDR"
        )  # create new folder to put IDR output, then, perform IDR analysis between each replicates and each pseudo-replicates
        consistency(prefixchip + "_PR1.tagAlign.gz",
                    prefixchip + "_PR2.tagAlign.gz", ctrlfile, outputdir)
        idrthresh = 0.01
        np = countConsistentPeaks(prefixchip + "_PR1.tagAlign.gz",
                                  prefixchip + "_PR2.tagAlign.gz", outputdir,
                                  idrthresh)
        print "number of consistent peaks between the two pseudo replicates: " + str(
            np)
        print "Step4c : IDR analysis achieved..."
        print "Step4d : Plotting IDR results..."
        createOdir(
            outputdir + "/IDR/plots"
        )  # create new folder to put IDR plot, then, create the plots for each IDR output files
        plotResults2(outputdir, prefixchip + "_PR1.tagAlign.gz",
                     prefixchip + "_PR2.tagAlign.gz")
        print "Step4d : Plotting IDR results achieved..."
        print "Step5e : Creating final sets of peaks..."
        createOdir(
            outputdir + "/finalsets"
        )  # create new folder to put final peak sets, then, create final peak sets
        createFinalSets2(chipfile, ctrlfile, np, outputdir, prefix)
    goodbyeCp()  # print end of analysis message and the exit
    return
Ejemplo n.º 10
0
def mainCpnr(argv):
	if len(argv) == 1: # if any arguments are given print usage message and then exit the programm
		usageCpnr()
		sys.exit(1)
	outputdir, selectodir, bamfile, ctrlfile, thresh, pvalue, qvalue, qc, prefix, spp = initParamCpnr() # intialize to default all parameters
	outputdir, selectodir, bamfile, ctrlfile, thresh, pvalue, qvalue, qc, prefix, spp  = readOptCpnr(argv[1:], outputdir, selectodir, bamfile, ctrlfile, thresh, pvalue, qvalue, qc, prefix, spp ) # read option on command line and changes parameters if necessary
	checkRequiredCpnr(bamfile, ctrlfile) # check if the required options have been specified
	if selectodir == 'false': # If no output directory specified, create one folder in current directory
		createOdir(outputdir)
	welcomeCpnr() # print welcome message
	if prefix ==  '': # If no prefix is given in the command line, give a default prefix
		prefix = 'CallPeaks_norep'
	parametersCpnr(outputdir, bamfile, ctrlfile, thresh, pvalue, qvalue, qc, prefix, spp) # print a summary of all parameters used
	running() # print running message
	print ""
	print "Step1 : Transformation from bam file to tagAlign file..."
	createOdir(outputdir+"/tagAlignfiles") # create new folder to put all tagAlign files
	for file_in in (bamfile, ctrlfile): # convert all given files from bam to tagAlign
		toTagAlign(file_in, outputdir+"/tagAlignfiles/")
	chipfile = newName(bamfile, outputdir+"/tagAlignfiles/") # rename the files
	ctrlfile = newName(ctrlfile, outputdir+"/tagAlignfiles/")
	print "Step1 : Transformation from bam file to tagAlign file achieved..."
	print ""
	print "Step2 : Cross-correlation by phantomPeaksQualTools before calling peaks..."
	createOdir(outputdir+"/PeakCalling")
	if qc == 'ON' and spp == 'OFF': # Cross-correlation analysis is asked
		qualCheck(chipfile, outputdir+"/PeakCalling", prefix)
		print "Step2 : Cross-correlation by phantomPeaksQualTools before calling peaks..."
		print ""
	else:
		print "skipped"
		print ""
	print "Step3 : PeakCalling using macs2..." 
	if spp == 'OFF':
		peakCallMacs(chipfile, ctrlfile, outputdir, prefix, pvalue, qvalue, thresh)
		print "Step3 : PeakCalling using macs2 achieved..."
		print ""
	else:
		print "skipped"
		print ""
	if spp == 'ON':
		print "Step4 : PeakCalling based on adaptated IDR analysis..."
		print "\tStep4a : Splitting file into pseudo-replicates..."
		splitFile(chipfile, outputdir+"/tagAlignfiles/")
		print "\tStep4a : Splitting file into pseudo-replicates achieved..."
		prefixchip = getPrefix(chipfile)
		prefixchip = outputdir+"/tagAlignfiles/"+prefixchip
		print "Step4b : Peak Calling for each files (replicates and pseudo replicates)..."
		createOdir(outputdir+"/PeakCalling") # create new folder to put peak calling output
		for i in (chipfile, prefixchip+"_PR1.tagAlign.gz", prefixchip+"_PR2.tagAlign.gz"):
			peakCall(i, ctrlfile, outputdir+"/PeakCalling/") # for each files given, perform peak calling with spp
		print "Step4b : Peak Calling for each files (replicates and pseudo replicates) achieved..."
		print "Step4c : IDR analysis..."
		createOdir(outputdir+"/IDR") # create new folder to put IDR output, then, perform IDR analysis between each replicates and each pseudo-replicates
		consistency(prefixchip+"_PR1.tagAlign.gz", prefixchip+"_PR2.tagAlign.gz", ctrlfile, outputdir)
		idrthresh = 0.01
		np = countConsistentPeaks(prefixchip+"_PR1.tagAlign.gz", prefixchip+"_PR2.tagAlign.gz", outputdir, idrthresh)
		print "number of consistent peaks between the two pseudo replicates: "+str(np)
		print "Step4c : IDR analysis achieved..."
		print "Step4d : Plotting IDR results..."
		createOdir(outputdir+"/IDR/plots") # create new folder to put IDR plot, then, create the plots for each IDR output files
		plotResults2(outputdir, prefixchip+"_PR1.tagAlign.gz", prefixchip+"_PR2.tagAlign.gz")
		print "Step4d : Plotting IDR results achieved..."
		print "Step5e : Creating final sets of peaks..."
		createOdir(outputdir+"/finalsets")  # create new folder to put final peak sets, then, create final peak sets
		createFinalSets2(chipfile , ctrlfile, np, outputdir, prefix)
	goodbyeCp() # print end of analysis message and the exit
	return
Ejemplo n.º 11
0
def mainCp(argv):
    if len(
            argv
    ) == 1:  # if any arguments are given print usage message and then exit the programm
        usageCp()
        sys.exit(1)
    outputdir, selectodir, rep1, rep2, ctrl1, ctrl2, ctrlsup, idr, idrthresh, finalsets, plot, prefix = initParamCp(
    )  # initialize paramters to default
    outputdir, selectodir, rep1, rep2, ctrl1, ctrl2, ctrlsup, idr, idrthresh, finalsets, plot, prefix = readOptCp(
        argv[1:], outputdir, selectodir, rep1, rep2, ctrl1, ctrl2, ctrlsup,
        idr, idrthresh, finalsets, plot, prefix
    )  # read option from command line and changes parameters if necessary
    checkRequiredCp(
        rep1, rep2,
        ctrl1)  # Check if the required parameters have been specified
    if selectodir == 'false':  # if no output directory specified, create one folder in current directory
        createOdir(outputdir)
    welcomeCp()  # print welcome message
    if prefix == '':  # if no prefix is given in the command line, give a default prefix
        prefix = 'CallPeaks'
    parametersCp(outputdir, selectodir, rep1, rep2, ctrl1, ctrl2, ctrlsup, idr,
                 idrthresh, finalsets, plot,
                 prefix)  # print a summary off all parameters used
    running()  # print running message
    print ""
    print "Step1 : Transformation from bam file to tagAlign file..."
    createOdir(outputdir +
               "/tagAlignfiles")  # create new folder to put transformed files
    for file_in in (rep1, rep2, ctrl1):
        toTagAlign(file_in, outputdir +
                   "/tagAlignfiles/")  # convert bam files to tagAlign files
    if ctrlsup == 'true':  # if there is a second control file, convert it too
        toTagAlign(ctrl2, outputdir + "/tagAlignfiles/")
        ctrl2 = newName(ctrl2,
                        outputdir + "/tagAlignfiles/")  # rename file variables
    rep1 = newName(rep1, outputdir + "/tagAlignfiles/")
    rep2 = newName(rep2, outputdir + "/tagAlignfiles/")
    ctrl1 = newName(ctrl1, outputdir + "/tagAlignfiles/")
    print "Step1 : Transformation from bam file to tagAlign file achieved..."
    print ""
    print "Step2 : Merging Control files (if two files are given)..."
    if ctrlsup == 'true':  # if there is two control files , merge them as one
        ctrlfile = mergeFile(ctrl1, ctrl2, outputdir + "/tagAlignfiles/",
                             'Control')
        print "Step2 : Merging Control files achieved..."
    else:
        print "skipped"
        ctrlfile = ctrl1
    print ""
    print "Step3 : Creating Pool of replicates..."
    poolfile = mergeFile(rep1, rep2, outputdir + "/tagAlignfiles/",
                         'Pool')  # merge two sample files to create pool
    print "Step3 : Creating Pool of replicates achieved..."
    print ""
    print "Step4 : Splitting samples files into pseudo replicates..."
    for file_in in (
            rep1, rep2, poolfile
    ):  # for each file given, split randomly into two pseudo replicates
        splitFile(file_in, outputdir + "/tagAlignfiles/")
    print "Step4 : Splitting samples files into pseudo replicates achieved..."
    print ""
    prefixr1 = getPrefix(rep1)
    prefixr2 = getPrefix(rep2)
    prefixpool = getPrefix(poolfile)
    prefixr1 = outputdir + "/tagAlignfiles/" + prefixr1
    prefixr2 = outputdir + "/tagAlignfiles/" + prefixr2
    prefixpool = outputdir + "/tagAlignfiles/" + prefixpool
    print "Step5 : Peak Calling for each files (replicates, pool and pseudo replicates)..."
    createOdir(outputdir +
               "/PeakCalling")  # create new folder to put peak calling output
    for i in (rep1, rep2, poolfile, prefixr1 + "_PR1.tagAlign.gz",
              prefixr1 + "_PR2.tagAlign.gz", prefixr2 + "_PR1.tagAlign.gz",
              prefixr2 + "_PR2.tagAlign.gz", prefixpool + "_PR1.tagAlign.gz",
              prefixpool + "_PR2.tagAlign.gz"):
        peakCall(i, ctrlfile, outputdir + "/PeakCalling/"
                 )  # for each files given, perform peak calling with spp
    print "Step5 : Peak Calling for each files achieved..."
    print ""
    print "Step6 : IDR analysis..."
    if idr == 'OFF':
        print "Skipped"
    else:  # if idr analysis is selected
        createOdir(
            outputdir + "/IDR"
        )  # create new folder to put IDR output, then, perform IDR analysis between each replicates and each pseudo-replicates
        consistency(rep1, rep2, ctrlfile, outputdir)
        consistency(prefixr1 + "_PR1.tagAlign.gz",
                    prefixr1 + "_PR2.tagAlign.gz", ctrlfile, outputdir)
        consistency(prefixr2 + "_PR1.tagAlign.gz",
                    prefixr2 + "_PR2.tagAlign.gz", ctrlfile, outputdir)
        consistency(prefixpool + "_PR1.tagAlign.gz",
                    prefixpool + "_PR2.tagAlign.gz", ctrlfile, outputdir)
        nt = countConsistentPeaks(
            rep1, rep2, outputdir, idrthresh
        )  # get number of peaks with IDR lower than specified threshold for each IDR output file
        np = countConsistentPeaks(prefixpool + "_PR1.tagAlign.gz",
                                  prefixpool + "_PR2.tagAlign.gz", outputdir,
                                  idrthresh)
        n1 = countConsistentPeaks(prefixr1 + "_PR1.tagAlign.gz",
                                  prefixr1 + "_PR2.tagAlign.gz", outputdir,
                                  idrthresh)
        n2 = countConsistentPeaks(prefixr2 + "_PR1.tagAlign.gz",
                                  prefixr2 + "_PR2.tagAlign.gz", outputdir,
                                  idrthresh)
        exportResults(nt, np, n1, n2,
                      outputdir)  # export metrics in a tab delimited file
        print "Step6 : IDR analysis achieved..."
    print ""
    print "Step7 : Plotting IDR results..."
    if plot == 'OFF':
        print "Skipped"
    else:  # if no-plot option is not selected
        createOdir(
            outputdir + "/IDR/plots"
        )  # create new folder to put IDR plot, then, create the plots for each IDR output files
        plotResults(outputdir, rep1, rep2, prefixr1 + "_PR1.tagAlign.gz",
                    prefixr1 + "_PR2.tagAlign.gz",
                    prefixr2 + "_PR1.tagAlign.gz",
                    prefixr2 + "_PR2.tagAlign.gz",
                    prefixpool + "_PR1.tagAlign.gz",
                    prefixpool + "_PR2.tagAlign.gz")
        print "Step7 : Plotting IDR results achieved..."
    print ""
    print "Step8 : Creating final sets of peaks..."
    if finalsets == 'OFF':
        print "Skipped"
    else:  # if final peak sets creation is asked
        createOdir(
            outputdir + "/finalsets"
        )  # create new folder to put final peak sets, then, create final peak sets
        createFinalSets(poolfile, ctrlfile, nt, np, outputdir, prefix)
        print "Step8 : Creating final sets of peaks achieved..."
    goodbyeCp()  # print end message and then exit
    return
Ejemplo n.º 12
0
def mainCp(argv):
	if len(argv) == 1: # if any arguments are given print usage message and then exit the programm
		usageCp()
		sys.exit(1)
	outputdir, selectodir, rep1, rep2, ctrl1, ctrl2, ctrlsup, idr, idrthresh, finalsets, plot, prefix = initParamCp() # initialize paramters to default
	outputdir, selectodir, rep1, rep2, ctrl1, ctrl2, ctrlsup, idr, idrthresh, finalsets, plot, prefix = readOptCp(argv[1:], outputdir, selectodir, rep1, rep2, ctrl1, ctrl2, ctrlsup, idr, idrthresh, finalsets, plot, prefix) # read option from command line and changes parameters if necessary
	checkRequiredCp(rep1, rep2, ctrl1) # Check if the required parameters have been specified
	if selectodir == 'false': # if no output directory specified, create one folder in current directory
		createOdir(outputdir)
	welcomeCp() # print welcome message
	if prefix ==  '': # if no prefix is given in the command line, give a default prefix
		prefix = 'CallPeaks' 
	parametersCp(outputdir, selectodir, rep1, rep2, ctrl1, ctrl2, ctrlsup, idr, idrthresh, finalsets, plot, prefix) # print a summary off all parameters used
	running() # print running message
	print ""
	print "Step1 : Transformation from bam file to tagAlign file..."
	createOdir(outputdir+"/tagAlignfiles") # create new folder to put transformed files
	for file_in in (rep1, rep2, ctrl1):
		toTagAlign(file_in, outputdir+"/tagAlignfiles/") # convert bam files to tagAlign files
	if ctrlsup == 'true': # if there is a second control file, convert it too
		toTagAlign(ctrl2, outputdir+"/tagAlignfiles/")
		ctrl2 = newName(ctrl2, outputdir+"/tagAlignfiles/") # rename file variables
	rep1 = newName(rep1, outputdir+"/tagAlignfiles/")
	rep2 = newName(rep2, outputdir+"/tagAlignfiles/")
	ctrl1 = newName(ctrl1, outputdir+"/tagAlignfiles/")
	print "Step1 : Transformation from bam file to tagAlign file achieved..."
	print ""
	print "Step2 : Merging Control files (if two files are given)..."
	if ctrlsup == 'true': # if there is two control files , merge them as one
		ctrlfile = mergeFile(ctrl1, ctrl2, outputdir+"/tagAlignfiles/", 'Control')
		print "Step2 : Merging Control files achieved..."
	else:
		print "skipped"
		ctrlfile = ctrl1
	print ""
	print "Step3 : Creating Pool of replicates..."
	poolfile = mergeFile(rep1, rep2, outputdir+"/tagAlignfiles/", 'Pool') # merge two sample files to create pool
	print "Step3 : Creating Pool of replicates achieved..."
	print ""
	print "Step4 : Splitting samples files into pseudo replicates..."
	for file_in in (rep1, rep2, poolfile): # for each file given, split randomly into two pseudo replicates
		splitFile(file_in, outputdir+"/tagAlignfiles/")
	print "Step4 : Splitting samples files into pseudo replicates achieved..."
	print ""
	prefixr1 = getPrefix(rep1)
	prefixr2 = getPrefix(rep2)
	prefixpool = getPrefix(poolfile)
	prefixr1 = outputdir+"/tagAlignfiles/"+prefixr1
	prefixr2 = outputdir+"/tagAlignfiles/"+prefixr2
	prefixpool = outputdir+"/tagAlignfiles/"+prefixpool
	print "Step5 : Peak Calling for each files (replicates, pool and pseudo replicates)..."
	createOdir(outputdir+"/PeakCalling") # create new folder to put peak calling output
	for i in (rep1, rep2, poolfile, prefixr1+"_PR1.tagAlign.gz", prefixr1+"_PR2.tagAlign.gz", prefixr2+"_PR1.tagAlign.gz", prefixr2+"_PR2.tagAlign.gz", prefixpool+"_PR1.tagAlign.gz", prefixpool+"_PR2.tagAlign.gz"):
		peakCall(i, ctrlfile, outputdir+"/PeakCalling/") # for each files given, perform peak calling with spp
	print "Step5 : Peak Calling for each files achieved..."
	print ""
	print "Step6 : IDR analysis..."
	if idr == 'OFF':
		print "Skipped"
	else: # if idr analysis is selected 
		createOdir(outputdir+"/IDR") # create new folder to put IDR output, then, perform IDR analysis between each replicates and each pseudo-replicates
		consistency(rep1, rep2, ctrlfile, outputdir) 
		consistency(prefixr1+"_PR1.tagAlign.gz", prefixr1+"_PR2.tagAlign.gz", ctrlfile, outputdir)
		consistency(prefixr2+"_PR1.tagAlign.gz", prefixr2+"_PR2.tagAlign.gz", ctrlfile, outputdir)
		consistency(prefixpool+"_PR1.tagAlign.gz", prefixpool+"_PR2.tagAlign.gz", ctrlfile, outputdir)
		nt = countConsistentPeaks(rep1, rep2, outputdir, idrthresh) # get number of peaks with IDR lower than specified threshold for each IDR output file
		np = countConsistentPeaks(prefixpool+"_PR1.tagAlign.gz", prefixpool+"_PR2.tagAlign.gz", outputdir, idrthresh)
		n1 = countConsistentPeaks(prefixr1+"_PR1.tagAlign.gz", prefixr1+"_PR2.tagAlign.gz", outputdir , idrthresh)
		n2 = countConsistentPeaks(prefixr2+"_PR1.tagAlign.gz", prefixr2+"_PR2.tagAlign.gz", outputdir , idrthresh)
		exportResults(nt, np, n1, n2, outputdir) # export metrics in a tab delimited file
		print "Step6 : IDR analysis achieved..."
	print ""
	print "Step7 : Plotting IDR results..."
	if plot == 'OFF':
		print "Skipped"
	else: # if no-plot option is not selected
		createOdir(outputdir+"/IDR/plots") # create new folder to put IDR plot, then, create the plots for each IDR output files
		plotResults(outputdir, rep1, rep2, prefixr1+"_PR1.tagAlign.gz", prefixr1+"_PR2.tagAlign.gz", prefixr2+"_PR1.tagAlign.gz", prefixr2+"_PR2.tagAlign.gz", prefixpool+"_PR1.tagAlign.gz", prefixpool+"_PR2.tagAlign.gz")
		print "Step7 : Plotting IDR results achieved..."
	print ""
	print "Step8 : Creating final sets of peaks..."
	if finalsets == 'OFF':
		print "Skipped"
	else: # if final peak sets creation is asked
		createOdir(outputdir+"/finalsets")  # create new folder to put final peak sets, then, create final peak sets
		createFinalSets(poolfile , ctrlfile, nt, np, outputdir, prefix)
		print "Step8 : Creating final sets of peaks achieved..."
	goodbyeCp() # print end message and then exit
	return