def _removeAllTrackData(self, trackName): self._removeDir( createDirPath(trackName, self.GENOME, allowOverlaps=False), trackName) self._removeDir( createDirPath(trackName, self.GENOME, allowOverlaps=True), trackName) self._removeDir(createOrigPath(self.GENOME, trackName), trackName)
def _getDirPaths(self, trackName): noOverlapsPath = createDirPath(trackName, self.GENOME, allowOverlaps=False) withOverlapsPath = createDirPath(trackName, self.GENOME, allowOverlaps=True) return noOverlapsPath, withOverlapsPath
def _preProcess(self, trackName): self._removeDir( createDirPath(trackName, self.GENOME, allowOverlaps=False), trackName) self._removeDir( createDirPath(trackName, self.GENOME, allowOverlaps=True), trackName) PreProcessAllTracksJob(self.GENOME, trackName, username="******").process()
def _removePreprocessedTrackData(self, trackName): self._removeDir( createDirPath(trackName, self.GENOME, allowOverlaps=False), trackName) self._removeDir( createDirPath(trackName, self.GENOME, allowOverlaps=True), trackName) TrackInfo.removeFilteredEntriesFromShelve(self.GENOME, trackName)
def testCreateDirPath(self): trackName = ['melting', 'discr'] self.assertEqual('BASE/' + str(CompBinManager.getIndexBinSize()) + '/noOverlaps/hg18/melting/discr/chr1', \ createDirPath(trackName, self.genome, self.chr, False, 'BASE')) self.assertEqual('BASE/' + str(CompBinManager.getIndexBinSize()) + '/withOverlaps/hg18/melting/discr/chr1', \ createDirPath(trackName, self.genome, self.chr, True, 'BASE')) self.assertEqual('BASE/' + str(CompBinManager.getIndexBinSize()) + '/noOverlaps/hg18/melting/discr', \ createDirPath(trackName, self.genome, None, False, 'BASE')) self.assertEqual('BASE/' + str(CompBinManager.getIndexBinSize()) + '/noOverlaps/hg18/melting/discr/', \ createDirPath(trackName, self.genome, '', False, 'BASE'))
def renameProcTrack(genome, oldTn, newTn): for allowOverlaps in [False, True]: oldPath = createDirPath(oldTn, genome, allowOverlaps=allowOverlaps) if not os.path.exists(oldPath): print 'Warning: TN did not exist as preproc ' + ('with overlaps' if allowOverlaps else ' without overlaps') else: print '(renaming TN in preproc ' + ('with overlaps' if allowOverlaps else ' without overlaps') + ')' newPath = createDirPath(newTn, genome, allowOverlaps=allowOverlaps) if not ONLY_SIMULATION: assert not os.path.exists(newPath), 'ERROR: Target path already exists: ' + newPath ensurePathExists(newPath) shutil.move(oldPath, newPath) else: print 'Would move %s to %s' % (oldPath, newPath)
def _compute(self): kwArgs = copy(self._kwArgs) if 'rawStatistic' in kwArgs: del kwArgs['rawStatistic'] matrixElRes = [] tr1Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(), self._track.trackName, True) tr2Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(), self._track2.trackName, True) assert len(tr1Subtypes) > 0, str(self._track.trackName) assert len(tr2Subtypes) > 0, str(self._track2.trackName) if 'minimal' in self._kwArgs: tr1Subtypes = tr1Subtypes[:1] tr2Subtypes = tr2Subtypes[:1] for subtype1 in tr1Subtypes: #['0','1']: #for subtype2 in ['0','1']: for subtype2 in tr2Subtypes: # print ',' tn1 = self._track.trackName + [subtype1] tn2 = self._track2.trackName + [subtype2] if not os.path.exists(createDirPath( tn1, self.getGenome())) or not os.path.exists( createDirPath(tn2, self.getGenome())): raise IncompatibleTracksError #print ',' track1 = Track(tn1) track1.formatConverters = self._track.formatConverters track2 = Track(tn2) track2.formatConverters = self._track2.formatConverters #self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) ) matrixElRes.append( self._rawStatistic(self._region, track1, track2, **kwArgs).getResult()) ResultsMemoizer.flushStoredResults() #assert len(self._children) == 7 #return dict(zip( '00,01,10,11'.split(','), [x.getResult() for x in self._children[3:]])) allChildRes = array(matrixElRes) #allChildRes = array([x.getResult() for x in self._children[3:]]) allChildRes = allChildRes.reshape((len(tr1Subtypes), len(tr2Subtypes))) return { 'Result': OrderedDict([('Matrix', allChildRes.tolist()), ('Rows', tr1Subtypes), ('Cols', tr2Subtypes)]) }
def getTrackData(self, trackName, genome, chr, allowOverlaps, forceChrFolders=False): trackData = TrackData() brShelve = BoundingRegionShelve(genome, trackName, allowOverlaps) if not forceChrFolders and brShelve.fileExists(): chr = None dir = createDirPath(trackName, genome, chr, allowOverlaps) for fn in os.listdir(dir): fullFn = dir + os.sep + fn if fn[0] == '.' or os.path.isdir(fullFn): continue if isBoundingRegionFileName(fn): if fullFn not in self._fileDict: self._fileDict[fullFn] = brShelve trackData.boundingRegionShelve = self._fileDict[fullFn] continue prefix, elementDim, dtypeDim, dtype = parseMemmapFileFn(fn) assert prefix not in trackData trackData[prefix] = self._getFile(chr, dir, fullFn, elementDim, dtype, dtypeDim) return trackData
def _compute(self): kwArgs = copy(self._kwArgs) if 'rawStatistic' in kwArgs: del kwArgs['rawStatistic'] matrixElRes = [] tr1Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(), self._track.trackName, True) assert len(tr1Subtypes) > 0 for subtype1 in tr1Subtypes:#['0','1']: for subtype2 in ['0','1']: tn1 = self._track.trackName + [subtype1] tn2 = self._track2.trackName + [subtype2] if not os.path.exists(createDirPath(tn1,self.getGenome())) or not os.path.exists(createDirPath(tn2,self.getGenome())): raise IncompatibleTracksError #print ',' track1 = Track( tn1) track1.formatConverters = self._track.formatConverters track2 = Track( tn2) track2.formatConverters = self._track2.formatConverters #self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) ) matrixElRes.append( self._rawStatistic(self._region, track1, track2, **kwArgs).getResult() ) ResultsMemoizer.flushStoredResults() #assert len(self._children) == 7 #return dict(zip( '00,01,10,11'.split(','), [x.getResult() for x in self._children[3:]])) allChildRes = array(matrixElRes) #allChildRes = array([x.getResult() for x in self._children[3:]]) allChildRes = allChildRes.reshape((-1,2)) return OrderedDict([('Matrix', allChildRes.tolist()), ('Rows', tr1Subtypes), ('Cols', ['Case','Control'])])
def removeOutdatedPreProcessedFiles(cls, genome, trackName, allowOverlaps, mode): collector = PreProcMetaDataCollector(genome, trackName) if cls.preProcFilesExist(genome, trackName, allowOverlaps) and not \ collector.hasRemovedPreProcFiles(allowOverlaps): dirPath = createDirPath(trackName, genome, allowOverlaps=allowOverlaps) assert (dirPath.startswith(PROCESSED_DATA_PATH)) if mode == 'Real': print 'Removing outdated preprocessed data: ', dirPath for fn in os.listdir(dirPath): fullFn = os.path.join(dirPath, fn) if os.path.isfile(fullFn): os.unlink(fullFn) if os.path.isdir(fullFn): if cls._isOldTypeChromDirectory(fullFn, genome): shutil.rmtree(fullFn) else: print 'Would now have removed outdated preprocessed data if real run: ', dirPath collector.updateRemovedPreProcFilesFlag(allowOverlaps, True) if mode == 'Real': ti = TrackInfo(genome, trackName) ti.resetTimeOfPreProcessing()
def _createChildren(self): kwArgs = copy(self._kwArgs) if 'rawStatistic' in kwArgs: del kwArgs['rawStatistic'] track2 = self._track2 if hasattr(self, '_track2') else None self._addChild( FormatSpecStat(self._region, self._track, TrackFormatReq(dense=False, val='tc') ) ) #self._track.formatConverters = 'Dummy' #to avoid check of tracks not being used.. #self._track2.formatConverters = 'Dummy' #to avoid check of tracks not being used.. #self._addChild( RawDataStat(self._region, self._track2, TrackFormatReq(dense=False, val='tc') ) ) self._addChild( self._rawStatistic(self._region, self._track, track2, **kwArgs) ) #This will actually compute, without any use for it. self._indexOfFirstSubCatChild = len(self._children) for subtype1 in ['0','1']: #for subtype2 in ['0','1']: tn1 = self._track.trackName + [subtype1] if not os.path.exists(createDirPath(tn1, self.getGenome())): #logMessage('DID NOT EXIST.. '+createOrigPath(self.getGenome(),tn1)) raise IncompatibleTracksError #else: # logMessage('DID EXIST') track1 = Track( tn1) track1.formatConverters = self._track.formatConverters #track2 = Track( self._track2.trackName + [subtype2]) #track2.formatConverters = self._track2.formatConverters self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) )
def removeChrMemmapFolders(genome, trackName, allowOverlaps): chrList = TrackInfoDataCollector(genome, trackName).getPreProcessedChrs(allowOverlaps) for chr in chrList: path = createDirPath(trackName, genome, chr, allowOverlaps) assert os.path.exists(path), 'Path does not exist: ' + path assert os.path.isdir(path), 'Path is not a directory: ' + path shutil.rmtree(path)
def _createPreProcFiles(self): collector = TrackInfoDataCollector(self._genome, self._trackName) collector.updateMetaDataForFinalization(self._geSource.getFileSuffix(), self._geSource.getPrefixList(), \ self._geSource.getValDataType(), self._geSource.getValDim(), \ self._geSource.getEdgeWeightDataType(), self._geSource.getEdgeWeightDim(), \ self._geSource.hasUndirectedEdges(), self._geSource.getVersion(), PreProcessUtils.constructId(self._geSource)) if collector.getNumElements(self._chr, self._allowOverlaps) == 0: return if self._mode != 'Real': for ge in self._geSource: pass return dirPath = createDirPath(self._trackName, self._genome, self._chr, self._allowOverlaps) dir = OutputDirectory(dirPath, collector.getPrefixList(self._allowOverlaps), \ collector.getNumElements(self._chr, self._allowOverlaps),\ GenomeInfo.getChrLen(self._genome, self._chr), \ collector.getValDataType(), collector.getValDim(), \ collector.getEgdeWeightDataType(), collector.getEgdeWeightDim(), \ collector.getMaxNumEdges(self._chr, self._allowOverlaps), \ collector.getMaxStrLens(self._chr, self._allowOverlaps)) writeFunc = dir.writeRawSlice if self._geSource.isSliceSource() else dir.writeElement for ge in self._geSource: writeFunc(ge) collector.appendPreProcessedChr(self._allowOverlaps, self._chr) dir.close()
def getTrackData(self, trackName, genome, chr, allowOverlaps, forceChrFolders=False): trackData = TrackData() brShelve = BoundingRegionShelve(genome, trackName, allowOverlaps) if not forceChrFolders and brShelve.fileExists(): chr = None dir = createDirPath(trackName, genome, chr, allowOverlaps) for fn in os.listdir(dir): fullFn = dir + os.sep + fn if fn[0] == '.' or os.path.isdir(fullFn): continue if isBoundingRegionFileName(fn): if fullFn not in self._fileDict: self._fileDict[fullFn] = brShelve trackData.boundingRegionShelve = self._fileDict[fullFn] continue prefix, elementDim, dtypeDim, dtype = parseMemmapFileFn(fn) assert prefix not in trackData trackData[prefix] = self._getFile(chr, dir, fullFn, elementDim, dtype, dtypeDim) return trackData
def merge(genome, trackName, allowOverlaps): path = createDirPath(trackName, genome, allowOverlaps=allowOverlaps) chrList = TrackInfoDataCollector(genome, trackName).getPreProcessedChrs(allowOverlaps) existingChrList = [chr for chr in ChrMemmapFolderMerger._existingChrIter(path, chrList)] if len(existingChrList) == 0: raise EmptyGESourceError('No data lines has been read from source file (probably because it is empty).') firstChrTrackData = TrackSource().getTrackData(trackName, genome, existingChrList[0], allowOverlaps, forceChrFolders=True) arrayList = firstChrTrackData.keys() for arrayName in arrayList: mergedArray = firstChrTrackData[arrayName][:] elementDim, dtypeDim = parseMemmapFileFn(firstChrTrackData[arrayName].filename)[1:3] del firstChrTrackData[arrayName] for chr in existingChrList[1:]: chrTrackData = TrackSource().getTrackData(trackName, genome, chr, allowOverlaps, forceChrFolders=True) mergedArray = ChrMemmapFolderMerger.mergeArrays(mergedArray, np.array(chrTrackData[arrayName][:])) elementDimNew, dtypeDimNew = parseMemmapFileFn(chrTrackData[arrayName].filename)[1:3] elementDim = max(elementDim, elementDimNew) dtypeDim = max(dtypeDim, dtypeDimNew) del chrTrackData[arrayName] mergedFn = createMemmapFileFn(path, arrayName, elementDim, dtypeDim, str(mergedArray.dtype)) f = np.memmap(mergedFn, dtype=mergedArray.dtype, mode='w+', shape=mergedArray.shape) f[:] = mergedArray f.flush() del f del mergedArray
def renameExistingStdTrackIfNeeded(cls, genome, stdTrackName): oldTrackName = None for allowOverlaps in [False, True]: parentDir = createDirPath(stdTrackName[:-1], genome, allowOverlaps=allowOverlaps) if os.path.exists(parentDir): dirContents = os.listdir(parentDir) realDirs = [ x for x in dirContents if os.path.isdir(os.path.join(parentDir, x)) and not os.path.islink(os.path.join(parentDir, x)) ] reqDirName = stdTrackName[-1] reqDirPath = os.path.join(parentDir, reqDirName) from gold.application.LogSetup import logMessage logMessage('Checking ' + reqDirPath) if os.path.islink(reqDirPath) and not os.path.isdir( os.readlink(reqDirPath)): # This is to fix a bug that ended in the symlink pointing to a file os.remove(reqDirPath) logMessage('Removed ' + reqDirPath) if realDirs and reqDirName not in dirContents: oldTrackName = stdTrackName[:-1] + [realDirs[0]] os.symlink(realDirs[0], reqDirPath) if oldTrackName is not None: ti = TrackInfo(genome, oldTrackName) ti.trackName = stdTrackName ti.store()
def removeOutdatedPreProcessedFiles(trackName, geSource, allowOverlaps, mode): genome = geSource.getGenome() if PreProcessUtils.preProcFilesExist(trackName, geSource, allowOverlaps) and not \ TrackInfoDataCollector(genome, trackName).hasRemovedPreProcFiles(allowOverlaps): dirPath = createDirPath(trackName, genome, allowOverlaps=allowOverlaps) assert( dirPath.startswith(PROCESSED_DATA_PATH) ) if mode == 'Real': print 'Removing outdated preprocessed data: ', dirPath for fn in os.listdir(dirPath): fullFn = os.path.join(dirPath, fn) if os.path.isfile(fullFn): os.unlink(fullFn) if os.path.isdir(fullFn): if not PreProcessUtils._isSubTrackDirectory(fullFn): shutil.rmtree(fullFn) else: print 'Would now have removed outdated preprocessed data if real run: ', dirPath TrackInfoDataCollector(genome, trackName).updateRemovedPreProcFilesFlag(allowOverlaps, True) if mode == 'Real': ti = TrackInfo(genome, trackName) ti.resetTimeOfPreProcessing()
def _createChildren(self): kwArgs = copy(self._kwArgs) if 'rawStatistic' in kwArgs: del kwArgs['rawStatistic'] track2 = self._track2 if hasattr(self, '_track2') else None self._addChild( FormatSpecStat(self._region, self._track, TrackFormatReq(dense=False, val='tc'))) #self._track.formatConverters = 'Dummy' #to avoid check of tracks not being used.. #self._track2.formatConverters = 'Dummy' #to avoid check of tracks not being used.. #self._addChild( RawDataStat(self._region, self._track2, TrackFormatReq(dense=False, val='tc') ) ) self._addChild( self._rawStatistic( self._region, self._track, track2, ** kwArgs)) #This will actually compute, without any use for it. self._indexOfFirstSubCatChild = len(self._children) for subtype1 in ['0', '1']: #for subtype2 in ['0','1']: tn1 = self._track.trackName + [subtype1] if not os.path.exists(createDirPath(tn1, self.getGenome())): #logMessage('DID NOT EXIST.. '+createOrigPath(self.getGenome(),tn1)) raise IncompatibleTracksError #else: # logMessage('DID EXIST') track1 = Track(tn1) track1.formatConverters = self._track.formatConverters #track2 = Track( self._track2.trackName + [subtype2]) #track2.formatConverters = self._track2.formatConverters self._addChild( self._rawStatistic(self._region, track1, track2, **kwArgs))
def removeChrMemmapFolders(genome, trackName, allowOverlaps): chrList = PreProcMetaDataCollector(genome, trackName).getPreProcessedChrs(allowOverlaps) for chr in chrList: path = createDirPath(trackName, genome, chr, allowOverlaps) assert os.path.exists(path), 'Path does not exist: ' + path assert os.path.isdir(path), 'Path is not a directory: ' + path shutil.rmtree(path)
def _renameTrackNameIfIllegal(self, trackName): from gold.description.AnalysisDefHandler import replaceIllegalElements legalTrackName = [replaceIllegalElements(x) for x in trackName] if legalTrackName != trackName and os.path.exists(createDirPath(trackName, self._genome)): renameTrack(self._genome, trackName, legalTrackName) return legalTrackName
def renameProcTrack(genome, oldTn, newTn): for allowOverlaps in [False, True]: oldPath = createDirPath(oldTn, genome, allowOverlaps=allowOverlaps) if not os.path.exists(oldPath): print 'Warning: TN did not exist as preproc ' + ( 'with overlaps' if allowOverlaps else ' without overlaps') else: print '(renaming TN in preproc ' + ('with overlaps' if allowOverlaps else ' without overlaps') + ')' newPath = createDirPath(newTn, genome, allowOverlaps=allowOverlaps) if not ONLY_SIMULATION: assert not os.path.exists( newPath), 'ERROR: Target path already exists: ' + newPath ensurePathExists(newPath) shutil.move(oldPath, newPath) else: print 'Would move %s to %s' % (oldPath, newPath)
def merge(genome, trackName, allowOverlaps): path = createDirPath(trackName, genome, allowOverlaps=allowOverlaps) collector = PreProcMetaDataCollector(genome, trackName) chrList = collector.getPreProcessedChrs(allowOverlaps) if not collector.getTrackFormat().reprIsDense(): chrList = sorted(chrList) existingChrList = [ chr for chr in ChrMemmapFolderMerger._existingChrIter(path, chrList) ] if len(existingChrList) == 0: raise EmptyGESourceError( 'No data lines has been read from source file (probably because it is empty).' ) firstChrTrackData = TrackSource().getTrackData(trackName, genome, existingChrList[0], allowOverlaps, forceChrFolders=True) arrayList = firstChrTrackData.keys() for arrayName in arrayList: mergedArray = firstChrTrackData[arrayName][:] elementDim, dtypeDim = parseMemmapFileFn( firstChrTrackData[arrayName].filename)[1:3] del firstChrTrackData[arrayName] for chr in existingChrList[1:]: chrTrackData = TrackSource().getTrackData(trackName, genome, chr, allowOverlaps, forceChrFolders=True) mergedArray = ChrMemmapFolderMerger.mergeArrays( mergedArray, np.array(chrTrackData[arrayName][:])) elementDimNew, dtypeDimNew = parseMemmapFileFn( chrTrackData[arrayName].filename)[1:3] elementDim = max(elementDim, elementDimNew) dtypeDim = max(dtypeDim, dtypeDimNew) del chrTrackData[arrayName] mergedFn = createMemmapFileFn(path, arrayName, elementDim, dtypeDim, str(mergedArray.dtype)) f = np.memmap(mergedFn, dtype=mergedArray.dtype, mode='w+', shape=mergedArray.shape) f[:] = mergedArray f.flush() del f del mergedArray
def _preProcess(self, trackName, noOverlapsFileCount=None, withOverlapsFileCount=None, \ noOverlapsChrElCount=None, withOverlapsChrElCount=None, customBins={}): trackName = self.TRACK_NAME_PREFIX + trackName noOverlapsPath = createDirPath(trackName, self.GENOME, allowOverlaps=False) withOverlapsPath = createDirPath(trackName, self.GENOME, allowOverlaps=True) self._removeDir(noOverlapsPath, trackName) self._removeDir(withOverlapsPath, trackName) self._runWithProfiling( 'PreProcessAllTracksJob(' + repr(self.GENOME) + ',' + repr(trackName) + ', username="******").process()', {'PreProcessAllTracksJob', PreProcessAllTracksJob}) if noOverlapsFileCount is not None: self.assertEquals( noOverlapsFileCount, len([ x for x in os.listdir(noOverlapsPath) if not x.startswith('.') ])) if withOverlapsFileCount is not None: self.assertEquals( withOverlapsFileCount, len([ x for x in os.listdir(withOverlapsPath) if not x.startswith('.') ])) if noOverlapsChrElCount is not None: self.assertChrElCounts(trackName, noOverlapsChrElCount, False, customBins) if withOverlapsChrElCount is not None: self.assertChrElCounts(trackName, withOverlapsChrElCount, True, customBins) self._storeProfile()
def _renameTrackNameIfIllegal(self, trackName): from gold.util.CommonFunctions import replaceIllegalElementsInTrackNames legalTrackName = [ replaceIllegalElementsInTrackNames(x) for x in trackName ] if legalTrackName != trackName and os.path.exists( createDirPath(trackName, self._genome)): renameTrack(self._genome, trackName, legalTrackName) return legalTrackName
def setUp(self): self.stdout = sys.stdout sys.stdout = open('/dev/null', 'w') self._trackName = ['intensity_test'] self._genome = 'TestGenome' self._chr = 'chrM' self._path = createDirPath(self._trackName, self._genome) assert self._path.endswith(self._trackName[-1]) removeDirectoryTree(self._path) gold.util.CompBinManager.COMP_BIN_SIZE = config.Config.COMP_BIN_SIZE gold.statistic.CreateFunctionTrackStat.GenomeInfo = MyGenomeInfo
def __init__(self, genome, trackName, allowOverlaps): assert allowOverlaps in [False, True] self._genome = genome self._trackName = trackName self._fn = createDirPath(trackName, genome, allowOverlaps=allowOverlaps) + os.sep + BR_SHELVE_FILE_NAME self._contents = {} #None self._updatedChrs = set([]) from quick.application.UserBinSource import MinimalBinSource self._minimalRegion = MinimalBinSource(genome)[0]
def getSubtypes(genome, trackName, fullAccess=False): dirPath = createDirPath(trackName, genome) subtypes = [fn for fn in ProcTrackOptions._getDirContents(genome, trackName) \ if not (fn[0] in ['.','_'] or os.path.isfile(dirPath + os.sep + fn) \ or GenomeInfo.isValidChr(genome, fn))] #fixme, just temporarily:, these dirs should start with _ subtypes= [x for x in subtypes if not x in ['external','ucsc'] ] if not fullAccess and not ProcTrackOptions._isLiteratureTrack(genome, trackName): subtypes = [x for x in subtypes if not TrackInfo(genome, trackName+[x]).private] return sorted(subtypes, key=str.lower)
def renameTrack(genome, oldTn, newTn): assert newTn != oldTn[:len(newTn)], 'ERROR: it is not allowed to move a track into itself (%s -> %s)' % (':'.join(oldTn), ':'.join(newTn)) #First check to filter out misspellings.. oldPath = createDirPath(oldTn, genome) assert os.path.exists(oldPath), 'ERROR: TN did not exist in processed tracks: ' + oldPath #renaming TI first, in case of problems, such as incomplete records.. renameTrackInfo(genome, oldTn, newTn) try: renameStdTrack(genome, oldTn, newTn) except Exception, e: print e
def getSubtypes(genome, trackName, fullAccess=False): dirPath = createDirPath(trackName, genome) subtypes = [fn for fn in ProcTrackOptions._getDirContents(genome, trackName) \ if not (fn[0] in ['.','_'] or os.path.isfile(dirPath + os.sep + fn) \ or GenomeInfo.isValidChr(genome, fn))] if not fullAccess and not ProcTrackOptions._isLiteratureTrack( genome, trackName): subtypes = [ x for x in subtypes if x not in ['external'] and not TrackInfo(genome, trackName + [x]).private ] return sorted(subtypes, key=smartStrLower)
def _createOutputDirectory(self, genome, chr, trackName, allowOverlaps, geSourceManager): dirPath = createDirPath(trackName, genome, chr, allowOverlaps) from quick.util.GenomeInfo import GenomeInfo return OutputDirectory(dirPath, geSourceManager.getPrefixList(), \ geSourceManager.getNumElementsForChr(chr), \ GenomeInfo.getChrLen(genome, chr), \ geSourceManager.getValDataType(), \ geSourceManager.getValDim(), \ geSourceManager.getEdgeWeightDataType(), \ geSourceManager.getEdgeWeightDim(), \ geSourceManager.getMaxNumEdgesForChr(chr), \ geSourceManager.getMaxStrLensForChr(chr), \ geSourceManager.isSorted())
def _preProcess(self, trackName, noOverlapsFileCount=None, withOverlapsFileCount=None, \ noOverlapsChrElCount=None, withOverlapsChrElCount=None, customBins={}): trackName = self.TRACK_NAME_PREFIX + trackName noOverlapsPath = createDirPath(trackName, self.GENOME, allowOverlaps=False) withOverlapsPath = createDirPath(trackName, self.GENOME, allowOverlaps=True) self._removeDir(noOverlapsPath, trackName) self._removeDir(withOverlapsPath, trackName) self._runWithProfiling('PreProcessAllTracksJob(' + repr(self.GENOME) + ',' + repr(trackName) + ', username="******").process()',\ globals(), locals()) if noOverlapsFileCount is not None: self.assertEquals(noOverlapsFileCount, len([x for x in os.listdir(noOverlapsPath) if not x.startswith('.')])) if withOverlapsFileCount is not None: self.assertEquals(withOverlapsFileCount, len([x for x in os.listdir(withOverlapsPath) if not x.startswith('.')])) if noOverlapsChrElCount is not None: self.assertChrElCounts(trackName, noOverlapsChrElCount, False, customBins) if withOverlapsChrElCount is not None: self.assertChrElCounts(trackName, withOverlapsChrElCount, True, customBins) self._storeProfile()
def renameExistingStdTrackIfNeeded(cls, genome, stdTrackName): oldTrackName = None for allowOverlaps in [False, True]: parentDir = createDirPath(stdTrackName[:-1], genome, allowOverlaps=allowOverlaps) if os.path.exists(parentDir): dirContents = os.listdir(parentDir) if len(dirContents) == 1 and dirContents[0] != stdTrackName[-1]: oldDir = parentDir + os.sep + dirContents[0] oldTrackName = stdTrackName[:-1] + [dirContents[0]] newDir = parentDir + os.sep + stdTrackName[-1] os.rename(oldDir, newDir) if oldTrackName is not None: ti = TrackInfo(genome, oldTrackName) ti.trackName = stdTrackName ti.store()
def preProcFilesExist(genome, trackName, allowOverlaps): collector = PreProcMetaDataCollector(genome, trackName) preProcFilesExist = collector.preProcFilesExist(allowOverlaps) if preProcFilesExist is None: dirPath = createDirPath(trackName, genome, allowOverlaps=allowOverlaps) if BoundingRegionShelve(genome, trackName, allowOverlaps).fileExists(): preProcFilesExist = True # any( fn.split('.')[0] in ['start', 'end', 'val', 'edges'] \ # for fn in os.listdir(dirPath) if os.path.isfile(os.path.join(dirPath, fn)) ) else: if os.path.exists(dirPath): preProcFilesExist = PreProcessUtils._hasOldTypeChromSubDirs(dirPath, genome) else: preProcFilesExist = False collector.updatePreProcFilesExistFlag(allowOverlaps, preProcFilesExist) return preProcFilesExist
def preProcFilesExist(trackName, geSource, allowOverlaps): genome = geSource.getGenome() preProcFilesExist = TrackInfoDataCollector(genome, trackName).preProcFilesExist(allowOverlaps) if preProcFilesExist is None: dirPath = createDirPath(trackName, genome, allowOverlaps=allowOverlaps) if BoundingRegionShelve(genome, trackName, allowOverlaps).fileExists(): preProcFilesExist = \ any( fn.split('.')[0] in ['start', 'end', 'val', 'edges'] \ for fn in os.listdir(dirPath) if os.path.isfile(os.path.join(dirPath, fn)) ) else: preProcFilesExist = os.path.exists(dirPath) and \ any( not PreProcessUtils._isSubTrackDirectory(os.path.join(dirPath, fn)) \ for fn in os.listdir(dirPath) if os.path.isdir(os.path.join(dirPath, fn)) ) TrackInfoDataCollector(genome, trackName).updatePreProcFilesExistFlag(allowOverlaps, preProcFilesExist) return preProcFilesExist
def __init__(self, genome, trackName, allowOverlaps): assert allowOverlaps in [False, True] self._genome = genome self._trackName = trackName self._fn = createDirPath( trackName, genome, allowOverlaps=allowOverlaps) + os.sep + BR_SHELVE_FILE_NAME self._contents = {} #None self._updatedChrs = set([]) from quick.application.UserBinSource import MinimalBinSource minimalBinList = MinimalBinSource(genome) self._minimalRegion = minimalBinList[ 0] if minimalBinList is not None else None
def renameTrack(genome, oldTn, newTn): assert newTn != oldTn[:len( newTn )], 'ERROR: it is not allowed to move a track into itself (%s -> %s)' % ( ':'.join(oldTn), ':'.join(newTn)) #First check to filter out misspellings.. oldPath = createDirPath(oldTn, genome) assert os.path.exists( oldPath), 'ERROR: TN did not exist in processed tracks: ' + oldPath #renaming TI first, in case of problems, such as incomplete records.. renameTrackInfo(genome, oldTn, newTn) try: renameStdTrack(genome, oldTn, newTn) except Exception, e: print e
def _createChildren(self): kwArgs = copy(self._kwArgs) if 'rawStatistic' in kwArgs: del kwArgs['rawStatistic'] self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(dense=False, val='tc') ) ) self._addChild( RawDataStat(self._region, self._track2, TrackFormatReq(dense=False, val='tc') ) ) self._addChild( self._rawStatistic(self._region, self._track, self._track2, **kwArgs) ) #try: for subtype1 in ['0','1']: for subtype2 in ['0','1']: tn1 = self._track.trackName + [subtype1] tn2 = self._track2.trackName + [subtype2] if not os.path.exists(createDirPath(tn1, self.getGenome())) or not os.path.exists(createDirPath(tn2,self.getGenome())): raise IncompatibleTracksError track1 = Track( tn1) track1.formatConverters = self._track.formatConverters track2 = Track( tn2) track2.formatConverters = self._track2.formatConverters self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) )
def _createChildren(self): kwArgs = copy(self._kwArgs) if 'rawStatistic' in kwArgs: del kwArgs['rawStatistic'] self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(dense=False, val='tc') ) ) self._addChild( RawDataStat(self._region, self._track2, TrackFormatReq(dense=False, val='tc') ) ) self._addChild( self._rawStatistic(self._region, self._track, self._track2, **kwArgs) ) #try: for subtype1 in ['0','1']: for subtype2 in ['0','1']: tn1 = self._track.trackName + [subtype1] tn2 = self._track2.trackName + [subtype2] if not os.path.exists(createDirPath(tn1, self.getGenome())) or not os.path.exists(createDirPath(tn2,self.getGenome())): raise IncompatibleTracksError track1 = Track( tn1) track1.formatConverters = self._track.formatConverters track2 = Track( tn2) track2.formatConverters = self._track2.formatConverters self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) )
def execute(choices, galaxyFn=None, username=''): '''Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.gtr If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' print 'Executing... starting to remove ' + choices[0] + os.linesep paths = [NONSTANDARD_DATA_PATH, ORIG_DATA_PATH, PARSING_ERROR_DATA_PATH, NMER_CHAIN_DATA_PATH] +\ [createDirPath('', '', allowOverlaps=x) for x in [False, True]] for p in paths: genome = choices[0] origPath = os.sep.join([ p, genome ]) trashPath = os.sep.join([ p, ".trash", genome ]) if os.path.exists(origPath): print 'Moving ' + genome + ' to .trash in folder: ' + p + os.linesep ensurePathExists(trashPath) shutil.move(origPath, trashPath)
def execute(choices, galaxyFn=None, username=''): '''Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' genomesList = [] for v in GalaxyInterface.getAllGenomes(username): if choices[3].get(v[0]): if choices[3][v[0]] and isdir(createDirPath(choices[1].split(':'),v[1])): genomesList.append(v[1]) #genomesList = [v[1] for v in GalaxyInterface.getAllGenomes(username) if choices[3][v[0]] and isdir(createDirPath(choices[1].split(':'),v[1]))] #print 'Executing...' genomes = [choices[0]] + genomesList oldTn = choices[1] newTn = choices[2] for genome in genomes: renameTrack(genome, oldTn.split(':'), newTn.split(':')) print '%s renamed to %s in genome %s.' % (oldTn, newTn, genome)
def extract(cls, trackName, regionList, fn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, globalCoords=True, \ addSuffix=False, asOriginal=False, allowOverlaps=False, ignoreEmpty=False): from gold.origdata.TrackGenomeElementSource import TrackGenomeElementSource assert len(regionList) > 0 for region in regionList: genome = region.genome break #To silently extract correctly if track type is dense if allowOverlaps: allowOverlaps = os.path.exists( createDirPath(trackName, genome, allowOverlaps=True)) trackGESource = TrackGenomeElementSource(genome, trackName, regionList, globalCoords=globalCoords, \ allowOverlaps=allowOverlaps, printWarnings=False) composerCls = None if asOriginal: ti = TrackInfo(genome, trackName) if ti.fileType != '': try: composerCls = getComposerClsFromFileSuffix(ti.fileType) except: pass if composerCls is None: composerCls = getComposerClsFromFileFormatName(fileFormatName) if addSuffix: fn = os.path.splitext( fn)[0] + '.' + composerCls.getDefaultFileNameSuffix() composer = composerCls(trackGESource) ok = composer.composeToFile(fn, ignoreEmpty=ignoreEmpty) if ok: return fn
def extract(cls, trackName, regionList, fn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, globalCoords=True, \ addSuffix=False, asOriginal=False, allowOverlaps=False, ignoreEmpty=False): from gold.origdata.TrackGenomeElementSource import TrackGenomeElementSource from gold.origdata.FileFormatComposer import getComposerClsFromFileFormatName, getComposerClsFromFileSuffix assert len(regionList) > 0 for region in regionList: genome = region.genome break #To silently extract correctly if track type is dense if allowOverlaps: allowOverlaps = os.path.exists(createDirPath(trackName, genome, allowOverlaps=True)) trackGESource = TrackGenomeElementSource(genome, trackName, regionList, globalCoords=globalCoords, \ allowOverlaps=allowOverlaps, printWarnings=False) composerCls = None if asOriginal: ti = TrackInfo(genome, trackName) if ti.fileType != '': try: composerCls = getComposerClsFromFileSuffix(ti.fileType) except: pass if composerCls is None: composerCls = getComposerClsFromFileFormatName(fileFormatName) if addSuffix: fn = os.path.splitext(fn)[0] + '.' + composerCls.getDefaultFileNameSuffix() composer = composerCls(trackGESource) ok = composer.composeToFile(fn, ignoreEmpty=ignoreEmpty) if ok: return fn
class RemoveGenomeTool(GeneralGuiTool): ALL_PATHS = OrderedDict([('collectedTracks', NONSTANDARD_DATA_PATH), ('standardizedTracks', ORIG_DATA_PATH), ('parsingErrorTracks', PARSING_ERROR_DATA_PATH), ('nmerChains', NMER_CHAIN_DATA_PATH), ('preProcessedTracks (noOverlaps)', createDirPath('', '', allowOverlaps=False)), ('preProcessedTracks (withOverlaps)', createDirPath('', '', allowOverlaps=True))]) @staticmethod def getToolName(): return "Remove genome" @staticmethod def getInputBoxNames(): return [('Genome', 'genome'), ('From which paths to remove the genome', 'paths')] @staticmethod def getOptionsBoxGenome(): return "__genome__" @classmethod def getOptionsBoxPaths(cls, prevChoices): return OrderedDict([(key, True) for key in cls.ALL_PATHS.keys()]) #@staticmethod #def getOptionsBox3(prevChoices): # return [''] #@staticmethod #def getOptionsBox4(prevChoices): # return [''] #@staticmethod #def getDemoSelections(): # return ['testChoice1','..'] @classmethod def execute(cls, choices, galaxyFn=None, username=''): '''Is called when execute-button is pushed by web-user. Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.gtr If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files). choices is a list of selections made by web-user in each options box. ''' print 'Executing... starting to remove ' + choices[0] + os.linesep paths = [ cls.ALL_PATHS[key] for key, val in choices.paths.iteritems() if val ] for p in paths: genome = choices.genome origPath = os.sep.join([p, genome]) trashPath = os.sep.join([p, ".trash", genome]) if os.path.exists(origPath): print 'Moving ' + genome + ' to .trash in folder: ' + p + os.linesep ensurePathExists(trashPath) shutil.move(origPath, trashPath) @staticmethod def validateAndReturnErrors(choices): ''' Should validate the selected input parameters. If the parameters are not valid, an error text explaining the problem should be returned. The GUI then shows this text to the user (if not empty) and greys out the execute button (even if the text is empty). If all parameters are valid, the method should return None, which enables the execute button. ''' if not choices.genome: return 'Please select a genome' if not any([val for val in choices.paths.values()]): return 'Please select at least one path' #@staticmethod #def isPublic(): # return False # #@staticmethod #def isRedirectTool(): # return False # @staticmethod def getToolDescription(): return 'This tool will remove a genome and associated tracks. '+\ '(Note: Genome is not deleted, but moved to .trash directories)' @staticmethod def isDynamic(): return False
def setUp(self): self._path = createDirPath(['testBoundingRegionShelve'], 'TestGenome', allowOverlaps=False) self._fn = self._path + os.sep + 'boundingRegions.shelve'
executeShellCmd('tar xfz %s --keep-newer-files -C %s' % (testGenomeFn, ORIG_DATA_PATH), \ pipe=False, printError=True, onError='exit') print 'OK: Extracted TestGenome files.' PreProcessAllTracksJob.PASS_ON_EXCEPTIONS = True try: PreProcessAllTracksJob('TestGenome').process() PreProcessAllTracksJob('TestGenome', GenomeInfo.getChrTrackName('TestGenome')).process() print 'OK: Finished preprocessing TestGenome.' except Exception, e: print 'FAILED: Error when preprocessing TestGenome. Error:' print ' ' + str(e).strip() sys.exit(1) for allowOverlaps in [False, True]: fromDir = createDirPath(['GESourceTracks'], 'TestGenome', allowOverlaps=allowOverlaps) toDir = createDirPath([], 'ModelsForExternalTracks', allowOverlaps=allowOverlaps) try: if not os.path.exists(toDir): shutil.copytree(fromDir, toDir) print 'OK: Copied from %s to %s.' % (fromDir, toDir) except Exception, e: print 'FAILED: Error occurred copying from %s to %s: ' % (fromDir, toDir) + str(e).strip() sys.exit(1) for track in ProcTrackOptions.getSubtypes('TestGenome', ['GESourceTracks']): ti = TrackInfo('TestGenome', ['GESourceTracks', track]) ti.trackName = [track] ti.genome = 'ModelsForExternalTracks' ti.store()
def setUp(self): self._path = createDirPath(['testBoundingRegionShelve'], 'TestGenome', allowOverlaps=False) self._fn = self._path + os.sep + 'boundingRegions.shelve'
def oldTypePreProcFilesExist(cls, genome, trackName, allowOverlaps): dirPath = createDirPath(trackName, genome, allowOverlaps=allowOverlaps) return os.path.exists(dirPath) and cls._hasOldTypeChromSubDirs( dirPath, genome)
def _getDirContents(genome, trackName): dirPath = createDirPath(trackName, genome) # print '<br>',"PATH: ", dirPath,'<br>' return os.listdir(dirPath) if os.path.exists(dirPath) else []
print 'OK: Extracted TestGenome files.' PreProcessAllTracksJob.PASS_ON_EXCEPTIONS = True try: PreProcessAllTracksJob('TestGenome').process() PreProcessAllTracksJob( 'TestGenome', GenomeInfo.getChrTrackName('TestGenome')).process() print 'OK: Finished preprocessing TestGenome.' except Exception, e: print 'FAILED: Error when preprocessing TestGenome. Error:' print ' ' + str(e).strip() sys.exit(1) for allowOverlaps in [False, True]: fromDir = createDirPath(['GESourceTracks'], 'TestGenome', allowOverlaps=allowOverlaps) toDir = createDirPath([], 'ModelsForExternalTracks', allowOverlaps=allowOverlaps) try: if not os.path.exists(toDir): shutil.copytree(fromDir, toDir) print 'OK: Copied from %s to %s.' % (fromDir, toDir) except Exception, e: print 'FAILED: Error occurred copying from %s to %s: ' % ( fromDir, toDir) + str(e).strip() sys.exit(1) for track in ProcTrackOptions.getSubtypes('TestGenome', ['GESourceTracks']):
def _preProcess(self, trackName): self._removeDir(createDirPath(trackName, self.GENOME, allowOverlaps=False), trackName) self._removeDir(createDirPath(trackName, self.GENOME, allowOverlaps=True), trackName) PreProcessAllTracksJob(self.GENOME, trackName, username="******").process()
def _getDirContents(genome, trackName): dirPath = createDirPath(trackName, genome) # print '<br>',"PATH: ", dirPath,'<br>' return os.listdir(dirPath) if os.path.exists(dirPath) else []