Ejemplo n.º 1
0
def renameCollTrack(genome, oldTn, newTn):
    oldPath = createCollectedPath(genome, oldTn)
    if not os.path.exists(oldPath):
        print '(TN did not exist in collTracks..)'
    else:
        print '(renaming track in collTracks..)'
        newPath = createCollectedPath(genome, newTn)
        if not ONLY_SIMULATION:    
            assert not os.path.exists(newPath), 'ERROR: Target path already exists: ' + newPath
            ensurePathExists(newPath)
            shutil.move(oldPath, newPath)
        else:
            print 'Would move %s to %s' %  (oldPath, newPath)
def renameCollTrack(genome, oldTn, newTn):
    oldPath = createCollectedPath(genome, oldTn)
    if not os.path.exists(oldPath):
        print '(TN did not exist in collTracks..)'
    else:
        print '(renaming track in collTracks..)'
        newPath = createCollectedPath(genome, newTn)
        if not ONLY_SIMULATION:
            assert not os.path.exists(
                newPath), 'ERROR: Target path already exists: ' + newPath
            ensurePathExists(newPath)
            shutil.move(oldPath, newPath)
        else:
            print 'Would move %s to %s' % (oldPath, newPath)
Ejemplo n.º 3
0
    def writeTrackData(choices, genome, tn):
        from gold.util.RandomUtil import random
        from gold.util.CommonFunctions import createCollectedPath
        from quick.util.CommonFunctions import ensurePathExists

        trackFn = createCollectedPath(genome, tn,
                                      'simulatedTracks.category.bed')
        ensurePathExists(trackFn)
        trackFile = open(trackFn, 'w')
        #determinePossibilities
        numPossiblePositions = int(choices[2])
        spacingBetweenPositions = 1e3
        possiblePositions = [
            i * spacingBetweenPositions
            for i in range(1, int(numPossiblePositions))
        ]
        numHighProbPositions = int(choices[3])
        highProbPossiblePositions = possiblePositions[0:numHighProbPositions]
        lowProbPossiblePositions = possiblePositions[numHighProbPositions:]

        largestPossiblePosition = possiblePositions[-1]
        print 'largestPossiblePosition: ', largestPossiblePosition / 1e6, 'M'
        assert largestPossiblePosition < 1.5e8  #just due to hardcoded analysis region below..

        sizePerPosition = 591  #empirical across all VDR binding sites..
        print 'Total BpCoverage: ', len(possiblePositions) * sizePerPosition

        #make samples
        numExperiments = int(choices[0])
        proportionFromHighProbPositions = float(choices[4])
        fixedNumFromHighProbPositions = int(choices[5])
        #numPositionsPerExperiment = [3000]*9
        numPositionsPerExperiment = [
            int(x) for x in choices[1].split(',')
        ]  #[3073, 7118, 5290, 3059, 4051, 1021, 200, 610, 573]
        for experimentIndex in range(numExperiments):
            #sampledPositions = random.sample(possiblePositions, numPositionsPerExperiment[experimentIndex])
            numHighProbSamples = int(numPositionsPerExperiment[experimentIndex]
                                     * proportionFromHighProbPositions
                                     ) + fixedNumFromHighProbPositions
            numLowProbSamples = numPositionsPerExperiment[
                experimentIndex] - numHighProbSamples
            print 'numHighProbSamples: %i, out of numHighProbPossiblePositions: %i' % (
                numHighProbSamples, len(highProbPossiblePositions))
            sampledPositions = random.sample(highProbPossiblePositions, numHighProbSamples ) \
                            + random.sample(lowProbPossiblePositions, numLowProbSamples )
            sampledSegments = [(position, position + sizePerPosition)
                               for position in sampledPositions]
            for seg in sampledSegments:
                trackFile.write('\t'.join([
                    'chr1',
                    '%i' % seg[0],
                    '%i' % seg[1],
                    'T%i' % experimentIndex
                ]) + '\n')

        trackFile.close()
Ejemplo n.º 4
0
    def execute(cls, choices, galaxyFn=None, username=''):
        from gold.util.RandomUtil import random

        outputFile = open(galaxyFn, 'w')
        genome = choices[0]
        histItem = choices[2]
        trackItem = choices[3]
        chromRegsPath = GenomeInfo.getChrRegsFn(genome)

        chrSizeDict = dict([(chrom, GenomeInfo.getChrLen(genome, chrom))
                            for chrom in GenomeInfo.getChrList(genome)])
        geSource = headLinesStr = None
        if choices[1] == 'history':

            trackType = choices[2].split(':')[1]
            username = ''.join(
                [chr(random.randint(97, 122)) for i in range(6)])
            tempFn = createCollectedPath(
                genome, [],
                username + '_'.join([str(v) for v in time.localtime()[:6]]) +
                '.' + trackType)
            fnSource = ExternalTrackManager.extractFnFromGalaxyTN(
                choices[2].split(':'))
            open(tempFn, 'w').write(open(fnSource, 'r').read())

            if trackType in ['valued.bed', 'category.bed', 'bed']:
                geSource = GenomeElementSorter(
                    BedGenomeElementSource(tempFn, genome=genome)).__iter__()

            #elif trackType == 'gtrack':
            #    geSource = GenomeElementSorter(GtrackGenomeElementSource(tempFn, genome=genome)).__iter__()
            #    headLinesStr = geSource.getHeaderLines().replace('##','\n##')

            cls.WriteExpandedElementsToFile(geSource,
                                            chrSizeDict,
                                            outputFile,
                                            headLinesStr,
                                            writeHeaderFlag=True)
            os.remove(tempFn)

        else:
            writeHeaderFlag = True
            for chrom in GenomeInfo.getChrList(genome):
                gRegion = GenomeRegion(genome, chrom, 0, chrSizeDict[chrom])
                plTrack = PlainTrack(trackItem.split(':'))
                geSource = GenomeElementTvWrapper(
                    plTrack.getTrackView(gRegion)).__iter__()
                cls.WriteExpandedElementsToFile(geSource, chrSizeDict,
                                                outputFile, headLinesStr,
                                                writeHeaderFlag)
                writeHeaderFlag = False
        outputFile.close()
Ejemplo n.º 5
0
def createChromosomeFile(genome, chromNames, referToCollected=False):
    """genome chromNames"""
    # python quick/extra/CustomFuncCatalog.py CreateChromosomeFile mm9 'chr1, chr2, ...'"
    
    chrList = chromNames.replace(' ','').split(',')
    if referToCollected:
        from gold.util.CommonFunctions import createCollectedPath
        basePath = createCollectedPath(genome, GenomeInfo.getChrTrackName(genome))
    else:
        basePath = gcf.createOrigPath(genome, GenomeInfo.getChrTrackName(genome))

    # Why is this file a category.bed file?
    outFn = basePath + os.sep + 'chromosomes.category.bed'
    qcf.ensurePathExists(outFn)
    print 'Creating: ' + outFn

    outFile = open(outFn, 'w')
    for chr in chrList:
        outFile.write('\t'.join([chr, '0', str(GenomeInfo.getChrLen(genome, chr)), chr]) + os.linesep)
    outFile.close()
 def execute(cls, choices, galaxyFn=None, username=''):
     outputFile =  open(galaxyFn, 'w')
     genome = choices[0]
     histItem = choices[2]
     trackItem = choices[3]
     chromRegsPath = GenomeInfo.getChrRegsFn(genome)
     
     chrSizeDict =  dict([ ( chrom, GenomeInfo.getChrLen(genome, chrom)) for chrom in GenomeInfo.getChrList(genome)])
     geSource = headLinesStr = None
     if choices[1] == 'History':
         
         trackType = choices[2].split(':')[1]
         username = ''.join([chr(random.randint(97,122)) for i in range(6)]) 
         tempFn = createCollectedPath(genome, [], username+'_'.join([str(v) for v in time.localtime()[:6]])+'.'+trackType)
         fnSource = ExternalTrackManager.extractFnFromGalaxyTN(choices[2].split(':'))
         open(tempFn,'w').write(open(fnSource,'r').read())
         
         
         if trackType in ['marked.bed', 'category.bed', 'bed']:
             geSource = GenomeElementSorter(BedGenomeElementSource(tempFn, genome=genome)).__iter__()
         
         elif trackType == 'gtrack':
             geSource = GenomeElementSorter(GtrackGenomeElementSource(tempFn, genome=genome)).__iter__()
             headLinesStr = geSource.getHeaderLines().replace('##','\n##')
         
         cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag=True)
         os.remove(tempFn)
     
     else:
         writeHeaderFlag = True
         for chrom in GenomeInfo.getChrList(genome):
             gRegion = GenomeRegion(genome, chrom, 0, chrSizeDict[chrom])
             plTrack = PlainTrack(trackItem.split(':'))
             geSource = GenomeElementTvWrapper(plTrack.getTrackView(gRegion)).__iter__()
             cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag)
             writeHeaderFlag = False    
     outputFile.close()
 def getCollectedPathGFF(cls, abbrv):
     return createCollectedPath(abbrv, GenomeInfo.getGenomeAnnotationsTrackName(abbrv), 'genome_annotations.gff')
 def getBasePathGFF(cls, abbrv):
     return createCollectedPath(abbrv, GenomeInfo.getGenomeAnnotationsTrackName(abbrv))
 def getBasePathSequence(cls, abbrv):
     return createCollectedPath(abbrv, GenomeInfo.getSequenceTrackName(abbrv))
Ejemplo n.º 10
0
 def getTableData(self, genome, trackName):
     webObject, paramForm = self._getWebPageAndForm( self._makeUrlstreng(self._sessionId, 'hgta_outputType', trackName[-1]) )
     response = mechanize.urlopen(paramForm.click('hgta_doTopSubmit'))
     fn = createCollectedPath(genome, trackName, 'fromUcsc.'+trackName[-1])
     ensurePathExists(fn)
     open(fn,'w').write(response.read())    
Ejemplo n.º 11
0
 def getTableData(self, genome, trackName):
     webObject, paramForm = self._getWebPageAndForm( self._makeUrlstreng(self._sessionId, 'hgta_outputType', trackName[-1]) )
     response = mechanize.urlopen(paramForm.click('hgta_doTopSubmit'))
     fn = createCollectedPath(genome, trackName, 'fromUcsc.'+trackName[-1])
     ensurePathExists(fn)
     open(fn,'w').write(response.read())