def main():
    '''
    main run call
    '''
    debug = False


    from optparse import OptionParser
    usage = "usage: %prog [options] -g [INPUT_GENOME] -i [INPUT_REGION_GFF] -r [RANKBY_BAM_FILE] -o [OUTPUT_FOLDER] [OPTIONAL_FLAGS]"
    parser = OptionParser(usage = usage)
    #required flags
    parser.add_option("-i","--i", dest="input",nargs = 1, default=None,
                      help = "Enter a .gff or .bed file of binding sites used to make enhancers")
    parser.add_option("-r","--rankby", dest="rankby",nargs = 1, default=None,
                      help = "bamfile to rank enhancer by")
    parser.add_option("-o","--out", dest="out",nargs = 1, default=None,
                      help = "Enter an output folder")
    parser.add_option("-g","--genome", dest="genome",nargs = 1, default=None,
                      help = "Reference genome file: example- hg18_refseq.ucsc")
    
    #optional flags
    parser.add_option("-b","--bams", dest="bams",nargs = 1, default=None,
                      help = "Enter a comma separated list of additional bam files to map to")
    parser.add_option("-c","--control", dest="control",nargs = 1, default=None,
                      help = "bamfile to rank enhancer by")
    parser.add_option("-s","--stitch", dest="stitch",nargs = 1, default=12500,
                      help = "Enter a max linking distance for stitching")
    parser.add_option("-t","--tss", dest="tss",nargs = 1, default=0,
                      help = "Enter a distance from TSS to exclude. 0 = no TSS exclusion")




    #RETRIEVING FLAGS
    (options,args) = parser.parse_args()


    if not options.input or not options.rankby or not options.out or not options.genome:
        print('hi there')
        parser.print_help()
        exit()

    #making the out folder if it doesn't exist
    outFolder = ROSE_utils.formatFolder(options.out,True)

    
    #figuring out folder schema
    gffFolder = ROSE_utils.formatFolder(outFolder+'gff/',True)
    mappedFolder = ROSE_utils.formatFolder(outFolder+ 'mappedGFF/',True)


    #GETTING INPUT FILE
    if options.input.split('.')[-1] == 'bed':
        #CONVERTING A BED TO GFF
        inputGFFName = options.input.split('/')[-1][0:-4]
        inputGFFFile = '%s%s.gff' % (gffFolder,inputGFFName)
        ROSE_utils.bedToGFF(options.input,inputGFFFile)
    elif options.input.split('.')[-1] =='gff':
        #COPY THE INPUT GFF TO THE GFF FOLDER
	inputGFFFile = options.input
        os.system('cp %s %s' % (inputGFFFile,gffFolder))        

    else:
        print('WARNING: INPUT FILE DOES NOT END IN .gff or .bed. ASSUMING .gff FILE FORMAT')
        #COPY THE INPUT GFF TO THE GFF FOLDER
	inputGFFFile = options.input
        os.system('cp %s %s' % (inputGFFFile,gffFolder))        



    #GETTING THE LIST OF BAMFILES TO PROCESS
    if options.control:        
        bamFileList = [options.rankby,options.control]

    else:
        bamFileList = [options.rankby]

    if options.bams:
        bamFileList += options.bams.split(',')
        bamFileLIst = ROSE_utils.uniquify(bamFileList)
    #optional args

    #Stitch parameter
    stitchWindow = int(options.stitch)
    
    #tss options
    tssWindow = int(options.tss)
    if tssWindow != 0:
        removeTSS = True
    else:
        removeTSS = False

    #GETTING THE BOUND REGION FILE USED TO DEFINE ENHANCERS
    print('USING %s AS THE INPUT GFF' % (inputGFFFile))
    inputName = inputGFFFile.split('/')[-1].split('.')[0]


    #GETTING THE GENOME
    genome = options.genome
    print('USING %s AS THE GENOME' % genome)
    

    #GETTING THE CORRECT ANNOT FILE
    cwd = os.getcwd()
##    genomeDict = {
##        'HG18':'%s/annotation/hg18_refseq.ucsc' % (cwd),
##        'MM9': '%s/annotation/mm9_refseq.ucsc' % (cwd),
##        'HG19':'%s/annotation/hg19_refseq.ucsc' % (cwd),
##        'MM8': '%s/annotation/mm8_refseq.ucsc' % (cwd),
##        'MM10':'%s/annotation/mm10_refseq.ucsc' % (cwd),
##        }
    
    annotFile = genome

##    annotFile = genomeDict[upper(genome)]

    #MAKING THE START DICT
    print('MAKING START DICT')
    startDict = ROSE_utils.makeStartDict(annotFile)


    #LOADING IN THE BOUND REGION REFERENCE COLLECTION
    print('LOADING IN GFF REGIONS')
    referenceCollection = ROSE_utils.gffToLocusCollection(inputGFFFile)

    #CHECKING INPUT REGIONS FOR FORMATTING
    print('CHECKING INPUT TO MAKE SURE EACH REGION HAS A UNIQUE IDENTIFIER')
    checkRefCollection(referenceCollection) #makes sure that all input regions have a unique ID

    #NOW STITCH REGIONS
    print('STITCHING REGIONS TOGETHER')
    stitchedCollection,debugOutput = regionStitching(inputGFFFile,stitchWindow,tssWindow,annotFile,removeTSS)

    
    #NOW MAKE A STITCHED COLLECTION GFF
    print('MAKING GFF FROM STITCHED COLLECTION')
    stitchedGFF=ROSE_utils.locusCollectionToGFF(stitchedCollection)
    
    if not removeTSS:
        stitchedGFFFile = '%s%s_%sKB_STITCHED.gff' % (gffFolder,inputName,stitchWindow/1000)
        stitchedGFFName = '%s_%sKB_STITCHED' % (inputName,stitchWindow/1000)
        debugOutFile = '%s%s_%sKB_STITCHED.debug' % (gffFolder,inputName,stitchWindow/1000)
    else:
        stitchedGFFFile = '%s%s_%sKB_STITCHED_TSS_DISTAL.gff' % (gffFolder,inputName,stitchWindow/1000)
        stitchedGFFName = '%s_%sKB_STITCHED_TSS_DISTAL' % (inputName,stitchWindow/1000)
        debugOutFile = '%s%s_%sKB_STITCHED_TSS_DISTAL.debug' % (gffFolder,inputName,stitchWindow/1000)

    #WRITING DEBUG OUTPUT TO DISK
        
    if debug:
        print('WRITING DEBUG OUTPUT TO DISK AS %s' % (debugOutFile))
        ROSE_utils.unParseTable(debugOutput,debugOutFile,'\t')

    #WRITE THE GFF TO DISK
    print('WRITING STITCHED GFF TO DISK AS %s' % (stitchedGFFFile))
    ROSE_utils.unParseTable(stitchedGFF,stitchedGFFFile,'\t')



    #SETTING UP THE OVERALL OUTPUT FILE
    outputFile1 = outFolder + stitchedGFFName + '_ENHANCER_REGION_MAP.txt'

    print('OUTPUT WILL BE WRITTEN TO  %s' % (outputFile1))
    
    #MAPPING TO THE NON STITCHED (ORIGINAL GFF)
    #MAPPING TO THE STITCHED GFF


    # bin for bam mapping
    nBin =1

    #IMPORTANT
    #CHANGE cmd1 and cmd2 TO PARALLELIZE OUTPUT FOR BATCH SUBMISSION
    #e.g. if using LSF cmd1 = "bsub python bamToGFF.py -f 1 -e 200 -r -m %s -b %s -i %s -o %s" % (nBin,bamFile,stitchedGFFFile,mappedOut1)

    for bamFile in bamFileList:

        bamFileName = bamFile.split('/')[-1]

        #MAPPING TO THE STITCHED GFF
        mappedOut1 ='%s%s_%s_MAPPED.gff' % (mappedFolder,stitchedGFFName,bamFileName)
        #WILL TRY TO RUN AS A BACKGROUND PROCESS. BATCH SUBMIT THIS LINE TO IMPROVE SPEED
        cmd1 = "python /usr/local/'bin'/ROSE_bamToGFF.py -f 1 -e 200 -r -m %s -b %s -i %s -o %s &" % (nBin,bamFile,stitchedGFFFile,mappedOut1)
        print(cmd1)
        os.system(cmd1)

        #MAPPING TO THE ORIGINAL GFF
        mappedOut2 ='%s%s_%s_MAPPED.gff' % (mappedFolder,inputName,bamFileName)
        #WILL TRY TO RUN AS A BACKGROUND PROCESS. BATCH SUBMIT THIS LINE TO IMPROVE SPEED
        cmd2 = "python /usr/local/'bin'/ROSE_bamToGFF.py -f 1 -e 200 -r -m %s -b %s -i %s -o %s &" % (nBin,bamFile,inputGFFFile,mappedOut2)
        print(cmd2)
        os.system(cmd2)
        

    
    print('PAUSING TO MAP')
    time.sleep(10)

    #CHECK FOR MAPPING OUTPUT
    outputDone = False
    ticker = 0
    print('WAITING FOR MAPPING TO COMPLETE. ELAPSED TIME (MIN):')
    while not outputDone:

        '''
        check every 5 minutes for completed output
        '''
        outputDone = True
        if ticker%6 == 0:
            print(ticker*5)
        ticker +=1
        #CHANGE THIS PARAMETER TO ALLOW MORE TIME TO MAP
        if ticker == 144:
            print('ERROR: OPERATION TIME OUT. MAPPING OUTPUT NOT DETECTED')
            exit()
            break
        for bamFile in bamFileList:
            
            #GET THE MAPPED OUTPUT NAMES HERE FROM MAPPING OF EACH BAMFILE
            bamFileName = bamFile.split('/')[-1]
            mappedOut1 ='%s%s_%s_MAPPED.gff' % (mappedFolder,stitchedGFFName,bamFileName)

            try:
                 mapFile = open(mappedOut1,'r')
                 mapFile.close()
            except IOError:
                outputDone = False

            mappedOut2 ='%s%s_%s_MAPPED.gff' % (mappedFolder,inputName,bamFileName)
            
            try:
                mapFile = open(mappedOut2,'r')
                mapFile.close()
            except IOError:
                outputDone = False
        if outputDone == True:
            break
        time.sleep(300)
    print('MAPPING TOOK %s MINUTES' % (ticker*5))

    print('BAM MAPPING COMPLETED NOW MAPPING DATA TO REGIONS')
    #CALCULATE DENSITY BY REGION
    mapCollection(stitchedCollection,referenceCollection,bamFileList,mappedFolder,outputFile1,refName = stitchedGFFName)


    time.sleep(10)

    print('CALLING AND PLOTTING SUPER-ENHANCERS')


    if options.control:

        rankbyName = options.rankby.split('/')[-1]
        controlName = options.control.split('/')[-1]
        cmd = 'R --no-save %s %s %s %s < /usr/local/bin/ROSE_callSuper.R' % (outFolder,outputFile1,inputName,controlName)

    else:
        rankbyName = options.rankby.split('/')[-1]
        controlName = 'NONE'
        cmd = 'R --no-save %s %s %s %s < /usr/local/bin/ROSE_callSuper.R' % (outFolder,outputFile1,inputName,controlName)
    print(cmd)
    os.system(cmd)
Beispiel #2
0
def main():
    '''
    main run call
    '''
    debug = False

    from optparse import OptionParser
    usage = "usage: %prog [options] -g [GENOME] -i [INPUT_ENHANCER_FILE]"
    parser = OptionParser(usage=usage)
    #required flags
    parser.add_option(
        "-i",
        "--i",
        dest="input",
        nargs=1,
        default=None,
        help="Enter a ROSE ranked enhancer or super-enhancer file")
    parser.add_option("-g",
                      "--genome",
                      dest="genome",
                      nargs=1,
                      default=None,
                      help="Enter the genome build (MM9,MM8,HG18,HG19,HG38)")

    #optional flags
    parser.add_option("-l",
                      "--list",
                      dest="geneList",
                      nargs=1,
                      default=None,
                      help="Enter a gene list to filter through")
    parser.add_option(
        "-o",
        "--out",
        dest="out",
        nargs=1,
        default=None,
        help="Enter an output folder. Default will be same folder as input file"
    )
    parser.add_option(
        "-r",
        "--refseq",
        dest="refseq",
        action='store_true',
        default=False,
        help="If flagged will write output by refseq ID and not common name")

    #RETRIEVING FLAGS
    (options, args) = parser.parse_args()

    if not options.input or not options.genome:

        parser.print_help()
        exit()

    #GETTING THE INPUT
    enhancerFile = options.input

    #making the out folder if it doesn't exist
    if options.out:
        outFolder = ROSE_utils.formatFolder(options.out, True)
    else:
        outFolder = '/'.join(enhancerFile.split('/')[0:-1]) + '/'

    #GETTING THE GENOME
    genome = options.genome
    print(('USING %s AS THE GENOME' % genome))

    #GETTING THE CORRECT ANNOT FILE
    cwd = os.getcwd()
    genomeDict = {
        'HG18': '%s/annotation/hg18_refseq.ucsc' % (cwd),
        'MM9': '%s/annotation/mm9_refseq.ucsc' % (cwd),
        'HG19': '%s/annotation/hg19_refseq.ucsc' % (cwd),
        'HG38': '%s/annotation/hg38_refseq.ucsc' % (cwd),
        'MM8': '%s/annotation/mm8_refseq.ucsc' % (cwd),
        'MM10': '%s/annotation/mm10_refseq.ucsc' % (cwd),
    }

    annotFile = genomeDict[genome.upper()]

    #GETTING THE TRANSCRIBED LIST
    if options.geneList:

        transcribedFile = options.geneList
    else:
        transcribedFile = ''

    enhancerToGeneTable, geneToEnhancerTable = mapEnhancerToGene(
        annotFile,
        enhancerFile,
        uniqueGenes=True,
        byRefseq=options.refseq,
        transcribedFile=transcribedFile)

    #Writing enhancer output
    enhancerFileName = enhancerFile.split('/')[-1].split('.')[0]

    #writing the enhancer table
    out1 = '%s%s_ENHANCER_TO_GENE.txt' % (outFolder, enhancerFileName)
    ROSE_utils.unParseTable(enhancerToGeneTable, out1, '\t')

    #writing the gene table
    out2 = '%s%s_GENE_TO_ENHANCER.txt' % (outFolder, enhancerFileName)
    ROSE_utils.unParseTable(geneToEnhancerTable, out2, '\t')
Beispiel #3
0
def main():
	'''
	main run call
	'''
	debug = False


	from optparse import OptionParser
	usage = "usage: %prog [options] -g [GENOME] -i [INPUT_REGION_GFF] -r [RANKBY_BAM_FILE] -o [OUTPUT_FOLDER] [OPTIONAL_FLAGS]"
	parser = OptionParser(usage = usage)
	#required flags
	parser.add_option("-i","--i", dest="input",nargs = 1, default=None,
						help = "Enter a .gff or .bed file of binding sites used to make enhancers")
	parser.add_option("-r","--rankby", dest="rankby",nargs = 1, default=None,
						help = "bamfile to rank enhancer by")
	parser.add_option("-o","--out", dest="out",nargs = 1, default=None,
						help = "Enter an output folder")
	parser.add_option("-g","--genome", dest="genome",nargs = 1, default=None,
						help = "Enter the genome build (MM9,MM8,HG18,HG19)")

	#optional flags
	parser.add_option("-b","--bams", dest="bams",nargs = 1, default=None,
						help = "Enter a comma separated list of additional bam files to map to")
	parser.add_option("-c","--control", dest="control",nargs = 1, default=None,
						help = "bamfile to rank enhancer by")
	parser.add_option("-s","--stitch", dest="stitch",nargs = 1, default=12500,
						help = "Enter a max linking distance for stitching")
	parser.add_option("-t","--tss", dest="tss",nargs = 1, default=0,
						help = "Enter a distance from TSS to exclude. 0 = no TSS exclusion")




	#RETRIEVING FLAGS
	(options,args) = parser.parse_args()


	if not options.input or not options.rankby or not options.out or not options.genome:
		print('hi there')
		parser.print_help()
		exit()

	#making the out folder if it doesn't exist
	outFolder = ROSE_utils.formatFolder(options.out,True)


	#figuring out folder schema
	gffFolder = ROSE_utils.formatFolder(outFolder+'gff/',True)
	mappedFolder = ROSE_utils.formatFolder(outFolder+ 'mappedGFF/',True)


	#GETTING INPUT FILE
	if options.input.split('.')[-1] == 'bed':
		#CONVERTING A BED TO GFF
		inputGFFName = options.input.split('/')[-1][0:-4]
		inputGFFFile = '%s%s.gff' % (gffFolder,inputGFFName)
		ROSE_utils.bedToGFF(options.input,inputGFFFile)
	elif options.input.split('.')[-1] =='gff':
		#COPY THE INPUT GFF TO THE GFF FOLDER
	inputGFFFile = options.input
		os.system('cp %s %s' % (inputGFFFile,gffFolder))

	else:
		print('WARNING: INPUT FILE DOES NOT END IN .gff or .bed. ASSUMING .gff FILE FORMAT')
		#COPY THE INPUT GFF TO THE GFF FOLDER
	inputGFFFile = options.input
		os.system('cp %s %s' % (inputGFFFile,gffFolder))
def main():
    '''
    main run call
    '''
    debug = False

    from optparse import OptionParser
    usage = "usage: %prog [options] -g [GENOME] -i [INPUT_ENHANCER_FILE]"
    parser = OptionParser(usage=usage)
    #required flags
    parser.add_option(
        "-i",
        "--i",
        dest="input",
        nargs=1,
        default=None,
        help="Enter a ROSE ranked enhancer or super-enhancer file")
    parser.add_option("-g",
                      "--genome",
                      dest="genome",
                      nargs=1,
                      default=None,
                      help="Enter the genome build (MM9,MM8,HG18,HG19)")

    #optional flags
    parser.add_option("-l",
                      "--list",
                      dest="geneList",
                      nargs=1,
                      default=None,
                      help="Enter a gene list to filter through")
    parser.add_option(
        "-o",
        "--out",
        dest="out",
        nargs=1,
        default=None,
        help="Enter an output folder. Default will be same folder as input file"
    )
    parser.add_option(
        "-w",
        "--window",
        dest="window",
        nargs=1,
        default=50000,
        help="Enter a search distance for genes. Default is 50,000bp")
    parser.add_option(
        "-f",
        "--format",
        dest="formatTable",
        action="store_true",
        default=False,
        help="If flagged, maintains original formatting of input table")

    #RETRIEVING FLAGS
    (options, args) = parser.parse_args()

    if not options.input or not options.genome:

        parser.print_help()
        exit()

    #GETTING THE INPUT
    enhancerFile = options.input
    window = int(options.window)

    #making the out folder if it doesn't exist
    if options.out:
        outFolder = ROSE_utils.formatFolder(options.out, True)
    else:
        outFolder = join(enhancerFile.split('/')[0:-1], '/') + '/'

    #GETTING THE GENOME
    genome = options.genome
    print('USING %s AS THE GENOME' % genome)

    #CHECK FORMATTING FLAG
    if options.formatTable:
        noFormatTable = True
    else:
        noFormatTable = False

    #GETTING THE CORRECT ANNOT FILE
    cwd = os.getcwd()
    genomeDict = {
        'HG18': '%s/annotation/hg18_refseq.ucsc' % (cwd),
        'MM9': '%s/annotation/mm9_refseq.ucsc' % (cwd),
        'HG19': '%s/annotation/hg19_refseq.ucsc' % (cwd),
        'MM8': '%s/annotation/mm8_refseq.ucsc' % (cwd),
        'MM10': '%s/annotation/mm10_refseq.ucsc' % (cwd),
    }

    annotFile = genomeDict[upper(genome)]

    #GETTING THE TRANSCRIBED LIST
    if options.geneList:

        transcribedFile = options.geneList
    else:
        transcribedFile = ''

    enhancerToGeneTable, geneToEnhancerTable = mapEnhancerToGene(
        annotFile, enhancerFile, transcribedFile, True, window, noFormatTable)

    #Writing enhancer output
    enhancerFileName = enhancerFile.split('/')[-1].split('.')[0]

    if window != 50000:
        #writing the enhancer table
        out1 = '%s%s_ENHANCER_TO_GENE_%sKB.txt' % (outFolder, enhancerFileName,
                                                   window / 1000)
        ROSE_utils.unParseTable(enhancerToGeneTable, out1, '\t')

        #writing the gene table
        out2 = '%s%s_GENE_TO_ENHANCER_%sKB.txt' % (outFolder, enhancerFileName,
                                                   window / 1000)
        ROSE_utils.unParseTable(geneToEnhancerTable, out2, '\t')
    else:
        #writing the enhancer table
        out1 = '%s%s_ENHANCER_TO_GENE.txt' % (outFolder, enhancerFileName)
        ROSE_utils.unParseTable(enhancerToGeneTable, out1, '\t')

        #writing the gene table
        out2 = '%s%s_GENE_TO_ENHANCER.txt' % (outFolder, enhancerFileName)
        ROSE_utils.unParseTable(geneToEnhancerTable, out2, '\t')
Beispiel #5
0
def main():
    '''
    main run call
    '''
    debug = False

    from optparse import OptionParser
    usage = "usage: %prog [options] -g [GENOME] -i [INPUT_REGION_GFF] -r [RANKBY_BAM_FILE] -o [OUTPUT_FOLDER] [OPTIONAL_FLAGS]"
    parser = OptionParser(usage=usage)
    #required flags
    parser.add_option(
        "-i",
        "--i",
        dest="input",
        nargs=1,
        default=None,
        help="Enter a .gff or .bed file of binding sites used to make enhancers"
    )
    parser.add_option("-r",
                      "--rankby",
                      dest="rankby",
                      nargs=1,
                      default=None,
                      help="bamfile to rank enhancer by")
    parser.add_option("-o",
                      "--out",
                      dest="out",
                      nargs=1,
                      default=None,
                      help="Enter an output folder")
    parser.add_option(
        "-g",
        "--genome",
        dest="genome",
        nargs=1,
        default=None,
        help="Enter the genome build (MM9,MM8,HG18,HG19,MM10,HG38)")

    #optional flags
    parser.add_option(
        "-b",
        "--bams",
        dest="bams",
        nargs=1,
        default=None,
        help="Enter a comma separated list of additional bam files to map to")
    parser.add_option("-c",
                      "--control",
                      dest="control",
                      nargs=1,
                      default=None,
                      help="bamfile to rank enhancer by")
    parser.add_option("-s",
                      "--stitch",
                      dest="stitch",
                      nargs=1,
                      default=12500,
                      help="Enter a max linking distance for stitching")
    parser.add_option(
        "-t",
        "--tss",
        dest="tss",
        nargs=1,
        default=0,
        help="Enter a distance from TSS to exclude. 0 = no TSS exclusion")

    #RETRIEVING FLAGS
    (options, args) = parser.parse_args()

    if not options.input or not options.rankby or not options.out or not options.genome:
        print('hi there')
        parser.print_help()
        exit()

    #making the out folder if it doesn't exist
    outFolder = ROSE_utils.formatFolder(options.out, True)

    #figuring out folder schema
    gffFolder = ROSE_utils.formatFolder(outFolder + 'gff/', True)
    mappedFolder = ROSE_utils.formatFolder(outFolder + 'mappedGFF/', True)

    #GETTING INPUT FILE
    if options.input.split('.')[-1] == 'bed':
        #CONVERTING A BED TO GFF
        inputGFFName = options.input.split('/')[-1][0:-4]
        inputGFFFile = '%s%s.gff' % (gffFolder, inputGFFName)
        ROSE_utils.bedToGFF(options.input, inputGFFFile)
    elif options.input.split('.')[-1] == 'gff':
        #COPY THE INPUT GFF TO THE GFF FOLDER
        inputGFFFile = options.input
        os.system('cp %s %s' % (inputGFFFile, gffFolder))

    else:
        print(
            'WARNING: INPUT FILE DOES NOT END IN .gff or .bed. ASSUMING .gff FILE FORMAT'
        )
        #COPY THE INPUT GFF TO THE GFF FOLDER
        inputGFFFile = options.input
        os.system('cp %s %s' % (inputGFFFile, gffFolder))

    #GETTING THE LIST OF BAMFILES TO PROCESS
    if options.control:
        bamFileList = [options.rankby, options.control]

    else:
        bamFileList = [options.rankby]

    if options.bams:
        bamFileList += options.bams.split(',')
        bamFileLIst = ROSE_utils.uniquify(bamFileList)
    #optional args

    #Stitch parameter
    stitchWindow = int(options.stitch)

    #tss options
    tssWindow = int(options.tss)
    if tssWindow != 0:
        removeTSS = True
    else:
        removeTSS = False

    #GETTING THE BOUND REGION FILE USED TO DEFINE ENHANCERS
    print('USING %s AS THE INPUT GFF' % (inputGFFFile))
    inputName = inputGFFFile.split('/')[-1].split('.')[0]

    #GETTING THE GENOME
    genome = options.genome
    print('USING %s AS THE GENOME' % genome)

    #GETTING THE CORRECT ANNOT FILE
    cwd = os.getcwd()
    genomeDict = {
        'HG18': '%s/annotation/hg18_refseq.ucsc' % (cwd),
        'MM9': '%s/annotation/mm9_refseq.ucsc' % (cwd),
        'HG19': '%s/annotation/hg19_refseq.ucsc' % (cwd),
        'MM8': '%s/annotation/mm8_refseq.ucsc' % (cwd),
        'MM10': '%s/annotation/mm10_refseq.ucsc' % (cwd),
        'HG38': '%s/annotation/hg38_refseq.ucsc' % (cwd),
    }

    annotFile = genomeDict[upper(genome)]

    #MAKING THE START DICT
    print('MAKING START DICT')
    startDict = ROSE_utils.makeStartDict(annotFile)

    #LOADING IN THE BOUND REGION REFERENCE COLLECTION
    print('LOADING IN GFF REGIONS')
    referenceCollection = ROSE_utils.gffToLocusCollection(inputGFFFile)

    #NOW STITCH REGIONS
    print('STITCHING REGIONS TOGETHER')
    stitchedCollection, debugOutput = regionStitching(inputGFFFile,
                                                      stitchWindow, tssWindow,
                                                      annotFile, removeTSS)

    #NOW MAKE A STITCHED COLLECTION GFF
    print('MAKING GFF FROM STITCHED COLLECTION')
    stitchedGFF = ROSE_utils.locusCollectionToGFF(stitchedCollection)

    if not removeTSS:
        stitchedGFFFile = '%s%s_%sKB_STITCHED.gff' % (gffFolder, inputName,
                                                      stitchWindow / 1000)
        stitchedGFFName = '%s_%sKB_STITCHED' % (inputName, stitchWindow / 1000)
        debugOutFile = '%s%s_%sKB_STITCHED.debug' % (gffFolder, inputName,
                                                     stitchWindow / 1000)
    else:
        stitchedGFFFile = '%s%s_%sKB_STITCHED_TSS_DISTAL.gff' % (
            gffFolder, inputName, stitchWindow / 1000)
        stitchedGFFName = '%s_%sKB_STITCHED_TSS_DISTAL' % (inputName,
                                                           stitchWindow / 1000)
        debugOutFile = '%s%s_%sKB_STITCHED_TSS_DISTAL.debug' % (
            gffFolder, inputName, stitchWindow / 1000)

    #WRITING DEBUG OUTPUT TO DISK

    if debug:
        print('WRITING DEBUG OUTPUT TO DISK AS %s' % (debugOutFile))
        ROSE_utils.unParseTable(debugOutput, debugOutFile, '\t')

    #WRITE THE GFF TO DISK
    print('WRITING STITCHED GFF TO DISK AS %s' % (stitchedGFFFile))
    ROSE_utils.unParseTable(stitchedGFF, stitchedGFFFile, '\t')

    #SETTING UP THE OVERALL OUTPUT FILE
    outputFile1 = outFolder + stitchedGFFName + '_ENHANCER_REGION_MAP.txt'

    print('OUTPUT WILL BE WRITTEN TO  %s' % (outputFile1))

    #MAPPING TO THE NON STITCHED (ORIGINAL GFF)
    #MAPPING TO THE STITCHED GFF

    # bin for bam mapping
    nBin = 1

    #IMPORTANT
    #CHANGE cmd1 and cmd2 TO PARALLELIZE OUTPUT FOR BATCH SUBMISSION
    #e.g. if using LSF cmd1 = "bsub python bamToGFF.py -f 1 -e 200 -r -m %s -b %s -i %s -o %s" % (nBin,bamFile,stitchedGFFFile,mappedOut1)

    for bamFile in bamFileList:

        bamFileName = bamFile.split('/')[-1]

        #MAPPING TO THE STITCHED GFF
        mappedOut1 = '%s%s_%s_MAPPED.gff' % (mappedFolder, stitchedGFFName,
                                             bamFileName)
        #WILL TRY TO RUN AS A BACKGROUND PROCESS. BATCH SUBMIT THIS LINE TO IMPROVE SPEED
        cmd1 = "python ROSE_bamToGFF_turbo.py -e 200 -r -m %s -b %s -i %s -o %s &" % (
            nBin, bamFile, stitchedGFFFile, mappedOut1)
        print(cmd1)
        os.system(cmd1)

        #MAPPING TO THE ORIGINAL GFF
        mappedOut2 = '%s%s_%s_MAPPED.gff' % (mappedFolder, inputName,
                                             bamFileName)
        #WILL TRY TO RUN AS A BACKGROUND PROCESS. BATCH SUBMIT THIS LINE TO IMPROVE SPEED
        cmd2 = "python ROSE_bamToGFF_turbo.py 1 -e 200 -r -m %s -b %s -i %s -o %s &" % (
            nBin, bamFile, inputGFFFile, mappedOut2)
        print(cmd2)
        os.system(cmd2)

    print('PAUSING TO MAP')
    time.sleep(10)

    #CHECK FOR MAPPING OUTPUT
    outputDone = False
    ticker = 0
    print('WAITING FOR MAPPING TO COMPLETE. ELAPSED TIME (MIN):')
    while not outputDone:
        '''
        check every 1 minutes for completed output
        '''
        outputDone = True
        if ticker % 6 == 0:
            print(ticker * 5)
        ticker += 1
        #CHANGE THIS PARAMETER TO ALLOW MORE TIME TO MAP
        if ticker == 120:
            print(
                'ERROR: OPERATION TIME OUT. MAPPING OUTPUT NOT DETECTED AFTER 2 HOURS'
            )
            exit()
            break
        for bamFile in bamFileList:

            #GET THE MAPPED OUTPUT NAMES HERE FROM MAPPING OF EACH BAMFILE
            bamFileName = bamFile.split('/')[-1]
            mappedOut1 = '%s%s_%s_MAPPED.gff' % (mappedFolder, stitchedGFFName,
                                                 bamFileName)

            try:
                mapFile = open(mappedOut1, 'r')
                mapFile.close()
            except IOError:
                outputDone = False

            mappedOut2 = '%s%s_%s_MAPPED.gff' % (mappedFolder, inputName,
                                                 bamFileName)

            try:
                mapFile = open(mappedOut2, 'r')
                mapFile.close()
            except IOError:
                outputDone = False
        if outputDone == True:
            break
        time.sleep(60)
    print('MAPPING TOOK %s MINUTES' % (ticker))

    print('BAM MAPPING COMPLETED NOW MAPPING DATA TO REGIONS')
    #CALCULATE DENSITY BY REGION
    mapCollection(stitchedCollection,
                  referenceCollection,
                  bamFileList,
                  mappedFolder,
                  outputFile1,
                  refName=stitchedGFFName)

    time.sleep(10)

    print('CALLING AND PLOTTING SUPER-ENHANCERS')

    if options.control:

        rankbyName = options.rankby.split('/')[-1]
        controlName = options.control.split('/')[-1]
        cmd = 'R --no-save %s %s %s %s < ROSE_callSuper.R' % (
            outFolder, outputFile1, inputName, controlName)

    else:
        rankbyName = options.rankby.split('/')[-1]
        controlName = 'NONE'
        cmd = 'R --no-save %s %s %s %s < ROSE_callSuper.R' % (
            outFolder, outputFile1, inputName, controlName)
    print(cmd)
    os.system(cmd)

    #calling the gene mapper
    time.sleep(20)
    superTableFile = "%s_SuperEnhancers.table.txt" % (inputName)
    cmd = "python ROSE_geneMapper.py -g %s -i %s%s" % (genome, outFolder,
                                                       superTableFile)
    os.system(cmd)
def main():
    '''
    main run call
    '''
    debug = False


    from optparse import OptionParser
    usage = "usage: %prog [options] -g [GENOME] -i [INPUT_ENHANCER_FILE]"
    parser = OptionParser(usage = usage)
    #required flags
    parser.add_option("-i","--i", dest="input",nargs = 1, default=None,
                      help = "Enter a ROSE ranked enhancer or super-enhancer file")
    parser.add_option("-g","--genome", dest="genome",nargs = 1, default=None,
                      help = "Enter the genome build (MM9,MM8,HG18,HG19)")

    #optional flags
    parser.add_option("-l","--list", dest="geneList",nargs = 1, default=None,
                      help = "Enter a gene list to filter through")
    parser.add_option("-o","--out", dest="out",nargs = 1, default=None,
                      help = "Enter an output folder. Default will be same folder as input file")
    parser.add_option("-w","--window", dest="window",nargs = 1, default=50000,
                      help = "Enter a search distance for genes. Default is 50,000bp")
    parser.add_option("-f","--format", dest="formatTable",action= "store_true", default=False,
                      help = "If flagged, maintains original formatting of input table")

    #RETRIEVING FLAGS
    (options,args) = parser.parse_args()


    if not options.input or not options.genome:

        parser.print_help()
        exit()

    #GETTING THE INPUT
    enhancerFile = options.input
    window = int(options.window)

    #making the out folder if it doesn't exist
    if options.out:
        outFolder = ROSE_utils.formatFolder(options.out,True)
    else:
        outFolder = join(enhancerFile.split('/')[0:-1],'/') + '/'


    #GETTING THE GENOME
    genome = options.genome
    print('USING %s AS THE GENOME' % genome)

    #CHECK FORMATTING FLAG
    if options.formatTable:
        noFormatTable =True
    else:
        noFormatTable = False

    #GETTING THE CORRECT ANNOT FILE
    cwd = os.getcwd()
    genomeDict = {
        'HG18':'%s/annotation/hg18_refseq.ucsc' % (cwd),
        'MM9': '%s/annotation/mm9_refseq.ucsc' % (cwd),
        'HG19':'%s/annotation/hg19_refseq.ucsc' % (cwd),
        'MM8': '%s/annotation/mm8_refseq.ucsc' % (cwd),
        'MM10':'%s/annotation/mm10_refseq.ucsc' % (cwd),
        }

    annotFile = genomeDict[upper(genome)]

    #GETTING THE TRANSCRIBED LIST
    if options.geneList:

        transcribedFile = options.geneList
    else:
        transcribedFile = ''

    enhancerToGeneTable,geneToEnhancerTable = mapEnhancerToGene(annotFile,enhancerFile,transcribedFile,True,window,noFormatTable)

    #Writing enhancer output
    enhancerFileName = enhancerFile.split('/')[-1].split('.')[0]

    if window != 50000:
        #writing the enhancer table
        out1 = '%s%s_ENHANCER_TO_GENE_%sKB.txt' % (outFolder,enhancerFileName,window/1000)
        ROSE_utils.unParseTable(enhancerToGeneTable,out1,'\t')

        #writing the gene table
        out2 = '%s%s_GENE_TO_ENHANCER_%sKB.txt' % (outFolder,enhancerFileName,window/1000)
        ROSE_utils.unParseTable(geneToEnhancerTable,out2,'\t')
    else:
        #writing the enhancer table
        out1 = '%s%s_ENHANCER_TO_GENE.txt' % (outFolder,enhancerFileName)
        ROSE_utils.unParseTable(enhancerToGeneTable,out1,'\t')

        #writing the gene table
        out2 = '%s%s_GENE_TO_ENHANCER.txt' % (outFolder,enhancerFileName)
        ROSE_utils.unParseTable(geneToEnhancerTable,out2,'\t')