def renameCollTrack(genome, oldTn, newTn): oldPath = createCollectedPath(genome, oldTn) if not os.path.exists(oldPath): print '(TN did not exist in collTracks..)' else: print '(renaming track in collTracks..)' newPath = createCollectedPath(genome, newTn) if not ONLY_SIMULATION: assert not os.path.exists(newPath), 'ERROR: Target path already exists: ' + newPath ensurePathExists(newPath) shutil.move(oldPath, newPath) else: print 'Would move %s to %s' % (oldPath, newPath)
def renameCollTrack(genome, oldTn, newTn): oldPath = createCollectedPath(genome, oldTn) if not os.path.exists(oldPath): print '(TN did not exist in collTracks..)' else: print '(renaming track in collTracks..)' newPath = createCollectedPath(genome, newTn) if not ONLY_SIMULATION: assert not os.path.exists( newPath), 'ERROR: Target path already exists: ' + newPath ensurePathExists(newPath) shutil.move(oldPath, newPath) else: print 'Would move %s to %s' % (oldPath, newPath)
def writeTrackData(choices, genome, tn): from gold.util.RandomUtil import random from gold.util.CommonFunctions import createCollectedPath from quick.util.CommonFunctions import ensurePathExists trackFn = createCollectedPath(genome, tn, 'simulatedTracks.category.bed') ensurePathExists(trackFn) trackFile = open(trackFn, 'w') #determinePossibilities numPossiblePositions = int(choices[2]) spacingBetweenPositions = 1e3 possiblePositions = [ i * spacingBetweenPositions for i in range(1, int(numPossiblePositions)) ] numHighProbPositions = int(choices[3]) highProbPossiblePositions = possiblePositions[0:numHighProbPositions] lowProbPossiblePositions = possiblePositions[numHighProbPositions:] largestPossiblePosition = possiblePositions[-1] print 'largestPossiblePosition: ', largestPossiblePosition / 1e6, 'M' assert largestPossiblePosition < 1.5e8 #just due to hardcoded analysis region below.. sizePerPosition = 591 #empirical across all VDR binding sites.. print 'Total BpCoverage: ', len(possiblePositions) * sizePerPosition #make samples numExperiments = int(choices[0]) proportionFromHighProbPositions = float(choices[4]) fixedNumFromHighProbPositions = int(choices[5]) #numPositionsPerExperiment = [3000]*9 numPositionsPerExperiment = [ int(x) for x in choices[1].split(',') ] #[3073, 7118, 5290, 3059, 4051, 1021, 200, 610, 573] for experimentIndex in range(numExperiments): #sampledPositions = random.sample(possiblePositions, numPositionsPerExperiment[experimentIndex]) numHighProbSamples = int(numPositionsPerExperiment[experimentIndex] * proportionFromHighProbPositions ) + fixedNumFromHighProbPositions numLowProbSamples = numPositionsPerExperiment[ experimentIndex] - numHighProbSamples print 'numHighProbSamples: %i, out of numHighProbPossiblePositions: %i' % ( numHighProbSamples, len(highProbPossiblePositions)) sampledPositions = random.sample(highProbPossiblePositions, numHighProbSamples ) \ + random.sample(lowProbPossiblePositions, numLowProbSamples ) sampledSegments = [(position, position + sizePerPosition) for position in sampledPositions] for seg in sampledSegments: trackFile.write('\t'.join([ 'chr1', '%i' % seg[0], '%i' % seg[1], 'T%i' % experimentIndex ]) + '\n') trackFile.close()
def execute(cls, choices, galaxyFn=None, username=''): from gold.util.RandomUtil import random outputFile = open(galaxyFn, 'w') genome = choices[0] histItem = choices[2] trackItem = choices[3] chromRegsPath = GenomeInfo.getChrRegsFn(genome) chrSizeDict = dict([(chrom, GenomeInfo.getChrLen(genome, chrom)) for chrom in GenomeInfo.getChrList(genome)]) geSource = headLinesStr = None if choices[1] == 'history': trackType = choices[2].split(':')[1] username = ''.join( [chr(random.randint(97, 122)) for i in range(6)]) tempFn = createCollectedPath( genome, [], username + '_'.join([str(v) for v in time.localtime()[:6]]) + '.' + trackType) fnSource = ExternalTrackManager.extractFnFromGalaxyTN( choices[2].split(':')) open(tempFn, 'w').write(open(fnSource, 'r').read()) if trackType in ['valued.bed', 'category.bed', 'bed']: geSource = GenomeElementSorter( BedGenomeElementSource(tempFn, genome=genome)).__iter__() #elif trackType == 'gtrack': # geSource = GenomeElementSorter(GtrackGenomeElementSource(tempFn, genome=genome)).__iter__() # headLinesStr = geSource.getHeaderLines().replace('##','\n##') cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag=True) os.remove(tempFn) else: writeHeaderFlag = True for chrom in GenomeInfo.getChrList(genome): gRegion = GenomeRegion(genome, chrom, 0, chrSizeDict[chrom]) plTrack = PlainTrack(trackItem.split(':')) geSource = GenomeElementTvWrapper( plTrack.getTrackView(gRegion)).__iter__() cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag) writeHeaderFlag = False outputFile.close()
def createChromosomeFile(genome, chromNames, referToCollected=False): """genome chromNames""" # python quick/extra/CustomFuncCatalog.py CreateChromosomeFile mm9 'chr1, chr2, ...'" chrList = chromNames.replace(' ','').split(',') if referToCollected: from gold.util.CommonFunctions import createCollectedPath basePath = createCollectedPath(genome, GenomeInfo.getChrTrackName(genome)) else: basePath = gcf.createOrigPath(genome, GenomeInfo.getChrTrackName(genome)) # Why is this file a category.bed file? outFn = basePath + os.sep + 'chromosomes.category.bed' qcf.ensurePathExists(outFn) print 'Creating: ' + outFn outFile = open(outFn, 'w') for chr in chrList: outFile.write('\t'.join([chr, '0', str(GenomeInfo.getChrLen(genome, chr)), chr]) + os.linesep) outFile.close()
def execute(cls, choices, galaxyFn=None, username=''): outputFile = open(galaxyFn, 'w') genome = choices[0] histItem = choices[2] trackItem = choices[3] chromRegsPath = GenomeInfo.getChrRegsFn(genome) chrSizeDict = dict([ ( chrom, GenomeInfo.getChrLen(genome, chrom)) for chrom in GenomeInfo.getChrList(genome)]) geSource = headLinesStr = None if choices[1] == 'History': trackType = choices[2].split(':')[1] username = ''.join([chr(random.randint(97,122)) for i in range(6)]) tempFn = createCollectedPath(genome, [], username+'_'.join([str(v) for v in time.localtime()[:6]])+'.'+trackType) fnSource = ExternalTrackManager.extractFnFromGalaxyTN(choices[2].split(':')) open(tempFn,'w').write(open(fnSource,'r').read()) if trackType in ['marked.bed', 'category.bed', 'bed']: geSource = GenomeElementSorter(BedGenomeElementSource(tempFn, genome=genome)).__iter__() elif trackType == 'gtrack': geSource = GenomeElementSorter(GtrackGenomeElementSource(tempFn, genome=genome)).__iter__() headLinesStr = geSource.getHeaderLines().replace('##','\n##') cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag=True) os.remove(tempFn) else: writeHeaderFlag = True for chrom in GenomeInfo.getChrList(genome): gRegion = GenomeRegion(genome, chrom, 0, chrSizeDict[chrom]) plTrack = PlainTrack(trackItem.split(':')) geSource = GenomeElementTvWrapper(plTrack.getTrackView(gRegion)).__iter__() cls.WriteExpandedElementsToFile(geSource, chrSizeDict, outputFile, headLinesStr, writeHeaderFlag) writeHeaderFlag = False outputFile.close()
def getCollectedPathGFF(cls, abbrv): return createCollectedPath(abbrv, GenomeInfo.getGenomeAnnotationsTrackName(abbrv), 'genome_annotations.gff')
def getBasePathGFF(cls, abbrv): return createCollectedPath(abbrv, GenomeInfo.getGenomeAnnotationsTrackName(abbrv))
def getBasePathSequence(cls, abbrv): return createCollectedPath(abbrv, GenomeInfo.getSequenceTrackName(abbrv))
def getTableData(self, genome, trackName): webObject, paramForm = self._getWebPageAndForm( self._makeUrlstreng(self._sessionId, 'hgta_outputType', trackName[-1]) ) response = mechanize.urlopen(paramForm.click('hgta_doTopSubmit')) fn = createCollectedPath(genome, trackName, 'fromUcsc.'+trackName[-1]) ensurePathExists(fn) open(fn,'w').write(response.read())
def getTableData(self, genome, trackName): webObject, paramForm = self._getWebPageAndForm( self._makeUrlstreng(self._sessionId, 'hgta_outputType', trackName[-1]) ) response = mechanize.urlopen(paramForm.click('hgta_doTopSubmit')) fn = createCollectedPath(genome, trackName, 'fromUcsc.'+trackName[-1]) ensurePathExists(fn) open(fn,'w').write(response.read())