Ejemplo n.º 1
0
 def _removeAllTrackData(self, trackName):
     self._removeDir(
         createDirPath(trackName, self.GENOME, allowOverlaps=False),
         trackName)
     self._removeDir(
         createDirPath(trackName, self.GENOME, allowOverlaps=True),
         trackName)
     self._removeDir(createOrigPath(self.GENOME, trackName), trackName)
Ejemplo n.º 2
0
 def _getDirPaths(self, trackName):
     noOverlapsPath = createDirPath(trackName,
                                    self.GENOME,
                                    allowOverlaps=False)
     withOverlapsPath = createDirPath(trackName,
                                      self.GENOME,
                                      allowOverlaps=True)
     return noOverlapsPath, withOverlapsPath
Ejemplo n.º 3
0
 def _preProcess(self, trackName):
     self._removeDir(
         createDirPath(trackName, self.GENOME, allowOverlaps=False),
         trackName)
     self._removeDir(
         createDirPath(trackName, self.GENOME, allowOverlaps=True),
         trackName)
     PreProcessAllTracksJob(self.GENOME, trackName,
                            username="******").process()
    def _removePreprocessedTrackData(self, trackName):
        self._removeDir(
            createDirPath(trackName, self.GENOME, allowOverlaps=False),
            trackName)
        self._removeDir(
            createDirPath(trackName, self.GENOME, allowOverlaps=True),
            trackName)

        TrackInfo.removeFilteredEntriesFromShelve(self.GENOME, trackName)
    def testCreateDirPath(self):
        trackName = ['melting', 'discr']
        self.assertEqual('BASE/' + str(CompBinManager.getIndexBinSize()) + '/noOverlaps/hg18/melting/discr/chr1', \
                         createDirPath(trackName, self.genome, self.chr, False, 'BASE'))
        self.assertEqual('BASE/' + str(CompBinManager.getIndexBinSize()) + '/withOverlaps/hg18/melting/discr/chr1', \
                         createDirPath(trackName, self.genome, self.chr, True, 'BASE'))

        self.assertEqual('BASE/' + str(CompBinManager.getIndexBinSize()) + '/noOverlaps/hg18/melting/discr', \
                         createDirPath(trackName, self.genome, None, False, 'BASE'))
        self.assertEqual('BASE/' + str(CompBinManager.getIndexBinSize()) + '/noOverlaps/hg18/melting/discr/', \
                         createDirPath(trackName, self.genome, '', False, 'BASE'))
Ejemplo n.º 6
0
def renameProcTrack(genome, oldTn, newTn):
    for allowOverlaps in [False, True]:
        oldPath = createDirPath(oldTn, genome, allowOverlaps=allowOverlaps)
        if not os.path.exists(oldPath):
            print 'Warning: TN did not exist as preproc ' + ('with overlaps' if allowOverlaps else ' without overlaps')
        else:
            print '(renaming TN in preproc ' + ('with overlaps' if allowOverlaps else ' without overlaps') + ')'
            newPath = createDirPath(newTn, genome, allowOverlaps=allowOverlaps)
            if not ONLY_SIMULATION:    
                assert not os.path.exists(newPath), 'ERROR: Target path already exists: ' + newPath
                ensurePathExists(newPath)
                shutil.move(oldPath, newPath)
            else:
                print 'Would move %s to %s' %  (oldPath, newPath)
    def _compute(self):
        kwArgs = copy(self._kwArgs)
        if 'rawStatistic' in kwArgs:
            del kwArgs['rawStatistic']

        matrixElRes = []
        tr1Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(),
                                                   self._track.trackName, True)
        tr2Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(),
                                                   self._track2.trackName,
                                                   True)
        assert len(tr1Subtypes) > 0, str(self._track.trackName)
        assert len(tr2Subtypes) > 0, str(self._track2.trackName)
        if 'minimal' in self._kwArgs:
            tr1Subtypes = tr1Subtypes[:1]
            tr2Subtypes = tr2Subtypes[:1]
        for subtype1 in tr1Subtypes:  #['0','1']:
            #for subtype2 in ['0','1']:
            for subtype2 in tr2Subtypes:
                #                print ','
                tn1 = self._track.trackName + [subtype1]
                tn2 = self._track2.trackName + [subtype2]
                if not os.path.exists(createDirPath(
                        tn1, self.getGenome())) or not os.path.exists(
                            createDirPath(tn2, self.getGenome())):
                    raise IncompatibleTracksError

                #print ','
                track1 = Track(tn1)
                track1.formatConverters = self._track.formatConverters
                track2 = Track(tn2)
                track2.formatConverters = self._track2.formatConverters
                #self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) )
                matrixElRes.append(
                    self._rawStatistic(self._region, track1, track2,
                                       **kwArgs).getResult())
            ResultsMemoizer.flushStoredResults()

        #assert len(self._children) == 7
        #return dict(zip( '00,01,10,11'.split(','), [x.getResult() for x in self._children[3:]]))

        allChildRes = array(matrixElRes)
        #allChildRes = array([x.getResult() for x in self._children[3:]])
        allChildRes = allChildRes.reshape((len(tr1Subtypes), len(tr2Subtypes)))
        return {
            'Result':
            OrderedDict([('Matrix', allChildRes.tolist()),
                         ('Rows', tr1Subtypes), ('Cols', tr2Subtypes)])
        }
Ejemplo n.º 8
0
    def getTrackData(self, trackName, genome, chr, allowOverlaps, forceChrFolders=False):
        trackData = TrackData()
        
        brShelve = BoundingRegionShelve(genome, trackName, allowOverlaps)        
        if not forceChrFolders and brShelve.fileExists():
            chr = None
        
        dir = createDirPath(trackName, genome, chr, allowOverlaps)

        for fn in os.listdir(dir):
            fullFn = dir + os.sep + fn
            
            if fn[0] == '.' or os.path.isdir(fullFn):
                continue
                
            if isBoundingRegionFileName(fn):
                if fullFn not in self._fileDict:
                    self._fileDict[fullFn] = brShelve
                trackData.boundingRegionShelve = self._fileDict[fullFn]
                continue
            
            prefix, elementDim, dtypeDim, dtype = parseMemmapFileFn(fn)
            
            assert prefix not in trackData
            trackData[prefix] = self._getFile(chr, dir, fullFn, elementDim, dtype, dtypeDim)
        
        return trackData
 def _compute(self):
     kwArgs = copy(self._kwArgs)
     if 'rawStatistic' in kwArgs:
         del kwArgs['rawStatistic']
         
     matrixElRes = []
     tr1Subtypes = ProcTrackOptions.getSubtypes(self.getGenome(), self._track.trackName, True)
     assert len(tr1Subtypes) > 0
     for subtype1 in tr1Subtypes:#['0','1']:
         for subtype2 in ['0','1']:
             tn1 = self._track.trackName + [subtype1]
             tn2 = self._track2.trackName + [subtype2]
             if not os.path.exists(createDirPath(tn1,self.getGenome())) or not os.path.exists(createDirPath(tn2,self.getGenome())):
                 raise IncompatibleTracksError
             
             #print ','
             track1 = Track( tn1)
             track1.formatConverters = self._track.formatConverters
             track2 = Track( tn2)
             track2.formatConverters = self._track2.formatConverters
             #self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) )
             matrixElRes.append( self._rawStatistic(self._region, track1, track2, **kwArgs).getResult() )
             ResultsMemoizer.flushStoredResults()
     
     #assert len(self._children) == 7
     #return dict(zip( '00,01,10,11'.split(','), [x.getResult() for x in self._children[3:]]))
     
     allChildRes = array(matrixElRes)
     #allChildRes = array([x.getResult() for x in self._children[3:]])
     allChildRes = allChildRes.reshape((-1,2))
     return OrderedDict([('Matrix', allChildRes.tolist()), ('Rows', tr1Subtypes), ('Cols', ['Case','Control'])])
    def removeOutdatedPreProcessedFiles(cls, genome, trackName, allowOverlaps,
                                        mode):
        collector = PreProcMetaDataCollector(genome, trackName)
        if cls.preProcFilesExist(genome, trackName, allowOverlaps) and not \
            collector.hasRemovedPreProcFiles(allowOverlaps):
            dirPath = createDirPath(trackName,
                                    genome,
                                    allowOverlaps=allowOverlaps)

            assert (dirPath.startswith(PROCESSED_DATA_PATH))
            if mode == 'Real':
                print 'Removing outdated preprocessed data: ', dirPath
                for fn in os.listdir(dirPath):
                    fullFn = os.path.join(dirPath, fn)
                    if os.path.isfile(fullFn):
                        os.unlink(fullFn)
                    if os.path.isdir(fullFn):
                        if cls._isOldTypeChromDirectory(fullFn, genome):
                            shutil.rmtree(fullFn)
            else:
                print 'Would now have removed outdated preprocessed data if real run: ', dirPath

            collector.updateRemovedPreProcFilesFlag(allowOverlaps, True)

        if mode == 'Real':
            ti = TrackInfo(genome, trackName)
            ti.resetTimeOfPreProcessing()
    def _createChildren(self):
        kwArgs = copy(self._kwArgs)
        if 'rawStatistic' in kwArgs:
            del kwArgs['rawStatistic']
        track2 = self._track2 if hasattr(self, '_track2') else None
        self._addChild( FormatSpecStat(self._region, self._track, TrackFormatReq(dense=False, val='tc') ) )
        #self._track.formatConverters = 'Dummy' #to avoid check of tracks not being used..
        #self._track2.formatConverters = 'Dummy' #to avoid check of tracks not being used..
        #self._addChild( RawDataStat(self._region, self._track2, TrackFormatReq(dense=False, val='tc') ) )
        self._addChild( self._rawStatistic(self._region, self._track, track2, **kwArgs) ) #This will actually compute, without any use for it. 
        self._indexOfFirstSubCatChild = len(self._children)

        for subtype1 in ['0','1']:
            #for subtype2 in ['0','1']:
            tn1 = self._track.trackName + [subtype1]
            if not os.path.exists(createDirPath(tn1, self.getGenome())):
                #logMessage('DID NOT EXIST.. '+createOrigPath(self.getGenome(),tn1))
                raise IncompatibleTracksError
            #else:
            #    logMessage('DID EXIST')
            track1 = Track( tn1)
            track1.formatConverters = self._track.formatConverters
            #track2 = Track( self._track2.trackName + [subtype2])
            #track2.formatConverters = self._track2.formatConverters
            self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) )
Ejemplo n.º 12
0
 def removeChrMemmapFolders(genome, trackName, allowOverlaps):
     chrList = TrackInfoDataCollector(genome, trackName).getPreProcessedChrs(allowOverlaps)
     for chr in chrList:
         path = createDirPath(trackName, genome, chr, allowOverlaps)
         assert os.path.exists(path), 'Path does not exist: ' + path
         assert os.path.isdir(path), 'Path is not a directory: ' + path
         shutil.rmtree(path)
    def _createPreProcFiles(self):
        collector = TrackInfoDataCollector(self._genome, self._trackName)
        collector.updateMetaDataForFinalization(self._geSource.getFileSuffix(), self._geSource.getPrefixList(), \
                                                self._geSource.getValDataType(), self._geSource.getValDim(), \
                                                self._geSource.getEdgeWeightDataType(), self._geSource.getEdgeWeightDim(), \
                                                self._geSource.hasUndirectedEdges(),
                                                self._geSource.getVersion(), PreProcessUtils.constructId(self._geSource))

        if collector.getNumElements(self._chr, self._allowOverlaps) == 0:
            return
        
        if self._mode != 'Real':
            for ge in self._geSource:
                pass
            return
        
        dirPath = createDirPath(self._trackName, self._genome, self._chr, self._allowOverlaps)

        dir = OutputDirectory(dirPath, collector.getPrefixList(self._allowOverlaps), \
                              collector.getNumElements(self._chr, self._allowOverlaps),\
                              GenomeInfo.getChrLen(self._genome, self._chr), \
                              collector.getValDataType(), collector.getValDim(), \
                              collector.getEgdeWeightDataType(), collector.getEgdeWeightDim(), \
                              collector.getMaxNumEdges(self._chr, self._allowOverlaps), \
                              collector.getMaxStrLens(self._chr, self._allowOverlaps))
        
        writeFunc = dir.writeRawSlice if self._geSource.isSliceSource() else dir.writeElement
        
        for ge in self._geSource:
            writeFunc(ge)
        
        collector.appendPreProcessedChr(self._allowOverlaps, self._chr)
        
        dir.close()
Ejemplo n.º 14
0
    def getTrackData(self, trackName, genome, chr, allowOverlaps, forceChrFolders=False):
        trackData = TrackData()
        
        brShelve = BoundingRegionShelve(genome, trackName, allowOverlaps)        
        if not forceChrFolders and brShelve.fileExists():
            chr = None
        
        dir = createDirPath(trackName, genome, chr, allowOverlaps)

        for fn in os.listdir(dir):
            fullFn = dir + os.sep + fn
            
            if fn[0] == '.' or os.path.isdir(fullFn):
                continue
                
            if isBoundingRegionFileName(fn):
                if fullFn not in self._fileDict:
                    self._fileDict[fullFn] = brShelve
                trackData.boundingRegionShelve = self._fileDict[fullFn]
                continue
            
            prefix, elementDim, dtypeDim, dtype = parseMemmapFileFn(fn)
            
            assert prefix not in trackData
            trackData[prefix] = self._getFile(chr, dir, fullFn, elementDim, dtype, dtypeDim)
        
        return trackData
    def merge(genome, trackName, allowOverlaps):
        path = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)

        chrList = TrackInfoDataCollector(genome, trackName).getPreProcessedChrs(allowOverlaps)
        existingChrList = [chr for chr in ChrMemmapFolderMerger._existingChrIter(path, chrList)]
        if len(existingChrList) == 0:
            raise EmptyGESourceError('No data lines has been read from source file (probably because it is empty).')
            
        firstChrTrackData = TrackSource().getTrackData(trackName, genome, existingChrList[0], allowOverlaps, forceChrFolders=True)
        arrayList = firstChrTrackData.keys()
        for arrayName in arrayList:
            mergedArray = firstChrTrackData[arrayName][:]
            elementDim, dtypeDim = parseMemmapFileFn(firstChrTrackData[arrayName].filename)[1:3]
            del firstChrTrackData[arrayName]
            
            for chr in existingChrList[1:]:
                chrTrackData = TrackSource().getTrackData(trackName, genome, chr, allowOverlaps, forceChrFolders=True)
            
                mergedArray = ChrMemmapFolderMerger.mergeArrays(mergedArray, np.array(chrTrackData[arrayName][:]))
                elementDimNew, dtypeDimNew = parseMemmapFileFn(chrTrackData[arrayName].filename)[1:3]
                elementDim = max(elementDim, elementDimNew)
                dtypeDim = max(dtypeDim, dtypeDimNew)
                
                del chrTrackData[arrayName]
            
            mergedFn = createMemmapFileFn(path, arrayName, elementDim, dtypeDim, str(mergedArray.dtype))
            
            f = np.memmap(mergedFn, dtype=mergedArray.dtype, mode='w+', shape=mergedArray.shape)
            f[:] = mergedArray
            f.flush()
            del f
            del mergedArray
Ejemplo n.º 16
0
    def renameExistingStdTrackIfNeeded(cls, genome, stdTrackName):
        oldTrackName = None
        for allowOverlaps in [False, True]:
            parentDir = createDirPath(stdTrackName[:-1],
                                      genome,
                                      allowOverlaps=allowOverlaps)
            if os.path.exists(parentDir):
                dirContents = os.listdir(parentDir)
                realDirs = [
                    x for x in dirContents
                    if os.path.isdir(os.path.join(parentDir, x))
                    and not os.path.islink(os.path.join(parentDir, x))
                ]

                reqDirName = stdTrackName[-1]
                reqDirPath = os.path.join(parentDir, reqDirName)

                from gold.application.LogSetup import logMessage
                logMessage('Checking ' + reqDirPath)

                if os.path.islink(reqDirPath) and not os.path.isdir(
                        os.readlink(reqDirPath)):
                    # This is to fix a bug that ended in the symlink pointing to a file
                    os.remove(reqDirPath)
                    logMessage('Removed ' + reqDirPath)

                if realDirs and reqDirName not in dirContents:
                    oldTrackName = stdTrackName[:-1] + [realDirs[0]]
                    os.symlink(realDirs[0], reqDirPath)

        if oldTrackName is not None:
            ti = TrackInfo(genome, oldTrackName)
            ti.trackName = stdTrackName
            ti.store()
Ejemplo n.º 17
0
 def removeOutdatedPreProcessedFiles(trackName, geSource, allowOverlaps, mode):
     genome = geSource.getGenome()
 
     if PreProcessUtils.preProcFilesExist(trackName, geSource, allowOverlaps) and not \
         TrackInfoDataCollector(genome, trackName).hasRemovedPreProcFiles(allowOverlaps):
             dirPath = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)
             
             assert( dirPath.startswith(PROCESSED_DATA_PATH) )
             if mode == 'Real':
                 print 'Removing outdated preprocessed data: ', dirPath
                 for fn in os.listdir(dirPath):
                     fullFn = os.path.join(dirPath, fn)
                     if os.path.isfile(fullFn):
                         os.unlink(fullFn)
                     if os.path.isdir(fullFn):
                         if not PreProcessUtils._isSubTrackDirectory(fullFn):
                             shutil.rmtree(fullFn)
             else:
                 print 'Would now have removed outdated preprocessed data if real run: ', dirPath
             
             TrackInfoDataCollector(genome, trackName).updateRemovedPreProcFilesFlag(allowOverlaps, True)
     
     if mode == 'Real':
         ti = TrackInfo(genome, trackName)
         ti.resetTimeOfPreProcessing()
    def _createChildren(self):
        kwArgs = copy(self._kwArgs)
        if 'rawStatistic' in kwArgs:
            del kwArgs['rawStatistic']
        track2 = self._track2 if hasattr(self, '_track2') else None
        self._addChild(
            FormatSpecStat(self._region, self._track,
                           TrackFormatReq(dense=False, val='tc')))
        #self._track.formatConverters = 'Dummy' #to avoid check of tracks not being used..
        #self._track2.formatConverters = 'Dummy' #to avoid check of tracks not being used..
        #self._addChild( RawDataStat(self._region, self._track2, TrackFormatReq(dense=False, val='tc') ) )
        self._addChild(
            self._rawStatistic(
                self._region, self._track, track2, **
                kwArgs))  #This will actually compute, without any use for it.
        self._indexOfFirstSubCatChild = len(self._children)

        for subtype1 in ['0', '1']:
            #for subtype2 in ['0','1']:
            tn1 = self._track.trackName + [subtype1]
            if not os.path.exists(createDirPath(tn1, self.getGenome())):
                #logMessage('DID NOT EXIST.. '+createOrigPath(self.getGenome(),tn1))
                raise IncompatibleTracksError
            #else:
            #    logMessage('DID EXIST')
            track1 = Track(tn1)
            track1.formatConverters = self._track.formatConverters
            #track2 = Track( self._track2.trackName + [subtype2])
            #track2.formatConverters = self._track2.formatConverters
            self._addChild(
                self._rawStatistic(self._region, track1, track2, **kwArgs))
Ejemplo n.º 19
0
 def removeChrMemmapFolders(genome, trackName, allowOverlaps):
     chrList = PreProcMetaDataCollector(genome, trackName).getPreProcessedChrs(allowOverlaps)
     for chr in chrList:
         path = createDirPath(trackName, genome, chr, allowOverlaps)
         assert os.path.exists(path), 'Path does not exist: ' + path
         assert os.path.isdir(path), 'Path is not a directory: ' + path
         shutil.rmtree(path)
 def _renameTrackNameIfIllegal(self, trackName):
     from gold.description.AnalysisDefHandler import replaceIllegalElements
     legalTrackName = [replaceIllegalElements(x) for x in trackName]
     
     if legalTrackName != trackName and os.path.exists(createDirPath(trackName, self._genome)):
         renameTrack(self._genome, trackName, legalTrackName)
         
     return legalTrackName
def renameProcTrack(genome, oldTn, newTn):
    for allowOverlaps in [False, True]:
        oldPath = createDirPath(oldTn, genome, allowOverlaps=allowOverlaps)
        if not os.path.exists(oldPath):
            print 'Warning: TN did not exist as preproc ' + (
                'with overlaps' if allowOverlaps else ' without overlaps')
        else:
            print '(renaming TN in preproc ' + ('with overlaps'
                                                if allowOverlaps else
                                                ' without overlaps') + ')'
            newPath = createDirPath(newTn, genome, allowOverlaps=allowOverlaps)
            if not ONLY_SIMULATION:
                assert not os.path.exists(
                    newPath), 'ERROR: Target path already exists: ' + newPath
                ensurePathExists(newPath)
                shutil.move(oldPath, newPath)
            else:
                print 'Would move %s to %s' % (oldPath, newPath)
    def merge(genome, trackName, allowOverlaps):
        path = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)

        collector = PreProcMetaDataCollector(genome, trackName)
        chrList = collector.getPreProcessedChrs(allowOverlaps)
        if not collector.getTrackFormat().reprIsDense():
            chrList = sorted(chrList)

        existingChrList = [
            chr
            for chr in ChrMemmapFolderMerger._existingChrIter(path, chrList)
        ]
        if len(existingChrList) == 0:
            raise EmptyGESourceError(
                'No data lines has been read from source file (probably because it is empty).'
            )

        firstChrTrackData = TrackSource().getTrackData(trackName,
                                                       genome,
                                                       existingChrList[0],
                                                       allowOverlaps,
                                                       forceChrFolders=True)
        arrayList = firstChrTrackData.keys()
        for arrayName in arrayList:
            mergedArray = firstChrTrackData[arrayName][:]
            elementDim, dtypeDim = parseMemmapFileFn(
                firstChrTrackData[arrayName].filename)[1:3]
            del firstChrTrackData[arrayName]

            for chr in existingChrList[1:]:
                chrTrackData = TrackSource().getTrackData(trackName,
                                                          genome,
                                                          chr,
                                                          allowOverlaps,
                                                          forceChrFolders=True)

                mergedArray = ChrMemmapFolderMerger.mergeArrays(
                    mergedArray, np.array(chrTrackData[arrayName][:]))
                elementDimNew, dtypeDimNew = parseMemmapFileFn(
                    chrTrackData[arrayName].filename)[1:3]
                elementDim = max(elementDim, elementDimNew)
                dtypeDim = max(dtypeDim, dtypeDimNew)

                del chrTrackData[arrayName]

            mergedFn = createMemmapFileFn(path, arrayName, elementDim,
                                          dtypeDim, str(mergedArray.dtype))

            f = np.memmap(mergedFn,
                          dtype=mergedArray.dtype,
                          mode='w+',
                          shape=mergedArray.shape)
            f[:] = mergedArray
            f.flush()
            del f
            del mergedArray
Ejemplo n.º 23
0
    def _preProcess(self, trackName, noOverlapsFileCount=None, withOverlapsFileCount=None, \
                    noOverlapsChrElCount=None, withOverlapsChrElCount=None, customBins={}):
        trackName = self.TRACK_NAME_PREFIX + trackName
        noOverlapsPath = createDirPath(trackName,
                                       self.GENOME,
                                       allowOverlaps=False)
        withOverlapsPath = createDirPath(trackName,
                                         self.GENOME,
                                         allowOverlaps=True)
        self._removeDir(noOverlapsPath, trackName)
        self._removeDir(withOverlapsPath, trackName)

        self._runWithProfiling(
            'PreProcessAllTracksJob(' + repr(self.GENOME) + ',' +
            repr(trackName) + ', username="******").process()',
            {'PreProcessAllTracksJob', PreProcessAllTracksJob})

        if noOverlapsFileCount is not None:
            self.assertEquals(
                noOverlapsFileCount,
                len([
                    x for x in os.listdir(noOverlapsPath)
                    if not x.startswith('.')
                ]))

        if withOverlapsFileCount is not None:
            self.assertEquals(
                withOverlapsFileCount,
                len([
                    x for x in os.listdir(withOverlapsPath)
                    if not x.startswith('.')
                ]))

        if noOverlapsChrElCount is not None:
            self.assertChrElCounts(trackName, noOverlapsChrElCount, False,
                                   customBins)

        if withOverlapsChrElCount is not None:
            self.assertChrElCounts(trackName, withOverlapsChrElCount, True,
                                   customBins)

        self._storeProfile()
    def _renameTrackNameIfIllegal(self, trackName):
        from gold.util.CommonFunctions import replaceIllegalElementsInTrackNames
        legalTrackName = [
            replaceIllegalElementsInTrackNames(x) for x in trackName
        ]

        if legalTrackName != trackName and os.path.exists(
                createDirPath(trackName, self._genome)):
            renameTrack(self._genome, trackName, legalTrackName)

        return legalTrackName
    def setUp(self):
        self.stdout = sys.stdout
        sys.stdout = open('/dev/null', 'w')

        self._trackName = ['intensity_test']
        self._genome = 'TestGenome'
        self._chr = 'chrM'
        self._path = createDirPath(self._trackName, self._genome)
        assert self._path.endswith(self._trackName[-1])
        removeDirectoryTree(self._path)
        gold.util.CompBinManager.COMP_BIN_SIZE = config.Config.COMP_BIN_SIZE
        gold.statistic.CreateFunctionTrackStat.GenomeInfo = MyGenomeInfo
 def __init__(self, genome, trackName, allowOverlaps):
     assert allowOverlaps in [False, True]
     
     self._genome = genome
     self._trackName = trackName
     
     self._fn = createDirPath(trackName, genome, allowOverlaps=allowOverlaps) + os.sep + BR_SHELVE_FILE_NAME
     self._contents = {} #None
     self._updatedChrs = set([])
     
     from quick.application.UserBinSource import MinimalBinSource
     self._minimalRegion = MinimalBinSource(genome)[0]
Ejemplo n.º 27
0
    def getSubtypes(genome, trackName, fullAccess=False):
        dirPath = createDirPath(trackName, genome)
        subtypes = [fn for fn in ProcTrackOptions._getDirContents(genome, trackName) \
                    if not (fn[0] in ['.','_'] or os.path.isfile(dirPath + os.sep + fn) \
                    or GenomeInfo.isValidChr(genome, fn))]

        #fixme, just temporarily:, these dirs should start with _
        subtypes= [x for x in subtypes if not x in ['external','ucsc'] ]
        
        if not fullAccess and not ProcTrackOptions._isLiteratureTrack(genome, trackName):
            subtypes = [x for x in subtypes if not TrackInfo(genome, trackName+[x]).private]

        return sorted(subtypes, key=str.lower)
Ejemplo n.º 28
0
def renameTrack(genome, oldTn, newTn):
    assert newTn != oldTn[:len(newTn)], 'ERROR: it is not allowed to move a track into itself (%s -> %s)' % (':'.join(oldTn), ':'.join(newTn))

    #First check to filter out misspellings..
    oldPath = createDirPath(oldTn, genome)
    assert os.path.exists(oldPath), 'ERROR: TN did not exist in processed tracks: ' + oldPath
    
    #renaming TI first, in case of problems, such as incomplete records..
    renameTrackInfo(genome, oldTn, newTn)
    try:
        renameStdTrack(genome, oldTn, newTn)
    except Exception, e:
        print e
    def getSubtypes(genome, trackName, fullAccess=False):
        dirPath = createDirPath(trackName, genome)
        subtypes = [fn for fn in ProcTrackOptions._getDirContents(genome, trackName) \
                    if not (fn[0] in ['.','_'] or os.path.isfile(dirPath + os.sep + fn) \
                    or GenomeInfo.isValidChr(genome, fn))]

        if not fullAccess and not ProcTrackOptions._isLiteratureTrack(
                genome, trackName):
            subtypes = [
                x for x in subtypes if x not in ['external']
                and not TrackInfo(genome, trackName + [x]).private
            ]

        return sorted(subtypes, key=smartStrLower)
Ejemplo n.º 30
0
    def _createOutputDirectory(self, genome, chr, trackName, allowOverlaps,
                               geSourceManager):
        dirPath = createDirPath(trackName, genome, chr, allowOverlaps)

        from quick.util.GenomeInfo import GenomeInfo
        return  OutputDirectory(dirPath, geSourceManager.getPrefixList(), \
                                geSourceManager.getNumElementsForChr(chr), \
                                GenomeInfo.getChrLen(genome, chr), \
                                geSourceManager.getValDataType(), \
                                geSourceManager.getValDim(), \
                                geSourceManager.getEdgeWeightDataType(), \
                                geSourceManager.getEdgeWeightDim(), \
                                geSourceManager.getMaxNumEdgesForChr(chr), \
                                geSourceManager.getMaxStrLensForChr(chr), \
                                geSourceManager.isSorted())
 def _preProcess(self, trackName, noOverlapsFileCount=None, withOverlapsFileCount=None, \
                 noOverlapsChrElCount=None, withOverlapsChrElCount=None, customBins={}):
     trackName = self.TRACK_NAME_PREFIX + trackName
     noOverlapsPath = createDirPath(trackName, self.GENOME, allowOverlaps=False)
     withOverlapsPath = createDirPath(trackName, self.GENOME, allowOverlaps=True)
     self._removeDir(noOverlapsPath, trackName)
     self._removeDir(withOverlapsPath, trackName)
     
     self._runWithProfiling('PreProcessAllTracksJob(' + repr(self.GENOME) + ',' + repr(trackName) + ', username="******").process()',\
                                globals(), locals())
     
     if noOverlapsFileCount is not None:
         self.assertEquals(noOverlapsFileCount, len([x for x in os.listdir(noOverlapsPath) if not x.startswith('.')]))
         
     if withOverlapsFileCount is not None:
         self.assertEquals(withOverlapsFileCount, len([x for x in os.listdir(withOverlapsPath) if not x.startswith('.')]))
         
     if noOverlapsChrElCount is not None:
         self.assertChrElCounts(trackName, noOverlapsChrElCount, False, customBins)
         
     if withOverlapsChrElCount is not None:
         self.assertChrElCounts(trackName, withOverlapsChrElCount, True, customBins)
         
     self._storeProfile()
 def renameExistingStdTrackIfNeeded(cls, genome, stdTrackName):
     oldTrackName = None
     for allowOverlaps in [False, True]:
         parentDir = createDirPath(stdTrackName[:-1], genome, allowOverlaps=allowOverlaps)
         if os.path.exists(parentDir):
             dirContents = os.listdir(parentDir)
             if len(dirContents) == 1 and dirContents[0] != stdTrackName[-1]:
                 oldDir = parentDir + os.sep + dirContents[0]
                 oldTrackName = stdTrackName[:-1] + [dirContents[0]]
                 newDir = parentDir + os.sep + stdTrackName[-1]
                 os.rename(oldDir, newDir)
     
     if oldTrackName is not None:
         ti = TrackInfo(genome, oldTrackName)
         ti.trackName = stdTrackName
         ti.store()
Ejemplo n.º 33
0
 def preProcFilesExist(genome, trackName, allowOverlaps):
     collector = PreProcMetaDataCollector(genome, trackName)
     preProcFilesExist = collector.preProcFilesExist(allowOverlaps)
     if preProcFilesExist is None:
         dirPath = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)
         if BoundingRegionShelve(genome, trackName, allowOverlaps).fileExists():
             preProcFilesExist = True
             #    any( fn.split('.')[0] in ['start', 'end', 'val', 'edges'] \
             #         for fn in os.listdir(dirPath) if os.path.isfile(os.path.join(dirPath, fn)) )
         else:
             if os.path.exists(dirPath):
                 preProcFilesExist = PreProcessUtils._hasOldTypeChromSubDirs(dirPath, genome)
             else:
                 preProcFilesExist = False
         collector.updatePreProcFilesExistFlag(allowOverlaps, preProcFilesExist)
     return preProcFilesExist
Ejemplo n.º 34
0
 def preProcFilesExist(trackName, geSource, allowOverlaps):
     genome = geSource.getGenome()
     
     preProcFilesExist = TrackInfoDataCollector(genome, trackName).preProcFilesExist(allowOverlaps)
     if preProcFilesExist is None:
         dirPath = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)
         if BoundingRegionShelve(genome, trackName, allowOverlaps).fileExists():
             preProcFilesExist = \
                 any( fn.split('.')[0] in ['start', 'end', 'val', 'edges'] \
                      for fn in os.listdir(dirPath) if os.path.isfile(os.path.join(dirPath, fn)) )
         else:
             preProcFilesExist = os.path.exists(dirPath) and \
                 any( not PreProcessUtils._isSubTrackDirectory(os.path.join(dirPath, fn)) \
                      for fn in os.listdir(dirPath) if os.path.isdir(os.path.join(dirPath, fn)) )
         TrackInfoDataCollector(genome, trackName).updatePreProcFilesExistFlag(allowOverlaps, preProcFilesExist)
     return preProcFilesExist
    def __init__(self, genome, trackName, allowOverlaps):
        assert allowOverlaps in [False, True]

        self._genome = genome
        self._trackName = trackName

        self._fn = createDirPath(
            trackName, genome,
            allowOverlaps=allowOverlaps) + os.sep + BR_SHELVE_FILE_NAME
        self._contents = {}  #None
        self._updatedChrs = set([])

        from quick.application.UserBinSource import MinimalBinSource
        minimalBinList = MinimalBinSource(genome)
        self._minimalRegion = minimalBinList[
            0] if minimalBinList is not None else None
def renameTrack(genome, oldTn, newTn):
    assert newTn != oldTn[:len(
        newTn
    )], 'ERROR: it is not allowed to move a track into itself (%s -> %s)' % (
        ':'.join(oldTn), ':'.join(newTn))

    #First check to filter out misspellings..
    oldPath = createDirPath(oldTn, genome)
    assert os.path.exists(
        oldPath), 'ERROR: TN did not exist in processed tracks: ' + oldPath

    #renaming TI first, in case of problems, such as incomplete records..
    renameTrackInfo(genome, oldTn, newTn)
    try:
        renameStdTrack(genome, oldTn, newTn)
    except Exception, e:
        print e
 def _createChildren(self):
     kwArgs = copy(self._kwArgs)
     if 'rawStatistic' in kwArgs:
         del kwArgs['rawStatistic']
     self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(dense=False, val='tc') ) )
     self._addChild( RawDataStat(self._region, self._track2, TrackFormatReq(dense=False, val='tc') ) )
     self._addChild( self._rawStatistic(self._region, self._track, self._track2, **kwArgs) )
     #try:
     for subtype1 in ['0','1']:
         for subtype2 in ['0','1']:
             tn1 = self._track.trackName + [subtype1]
             tn2 = self._track2.trackName + [subtype2]
             
             if not os.path.exists(createDirPath(tn1, self.getGenome())) or not os.path.exists(createDirPath(tn2,self.getGenome())):
                 raise IncompatibleTracksError
             track1 = Track( tn1)
             track1.formatConverters = self._track.formatConverters
             track2 = Track( tn2)
             track2.formatConverters = self._track2.formatConverters
             self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) )
 def _createChildren(self):
     kwArgs = copy(self._kwArgs)
     if 'rawStatistic' in kwArgs:
         del kwArgs['rawStatistic']
     self._addChild( RawDataStat(self._region, self._track, TrackFormatReq(dense=False, val='tc') ) )
     self._addChild( RawDataStat(self._region, self._track2, TrackFormatReq(dense=False, val='tc') ) )
     self._addChild( self._rawStatistic(self._region, self._track, self._track2, **kwArgs) )
     #try:
     for subtype1 in ['0','1']:
         for subtype2 in ['0','1']:
             tn1 = self._track.trackName + [subtype1]
             tn2 = self._track2.trackName + [subtype2]
             
             if not os.path.exists(createDirPath(tn1, self.getGenome())) or not os.path.exists(createDirPath(tn2,self.getGenome())):
                 raise IncompatibleTracksError
             track1 = Track( tn1)
             track1.formatConverters = self._track.formatConverters
             track2 = Track( tn2)
             track2.formatConverters = self._track2.formatConverters
             self._addChild(self._rawStatistic(self._region, track1, track2, **kwArgs) )
Ejemplo n.º 39
0
    def execute(choices, galaxyFn=None, username=''):
        '''Is called when execute-button is pushed by web-user.
        Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.gtr
        If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
        choices is a list of selections made by web-user in each options box.
        '''
        print 'Executing... starting to remove ' + choices[0] + os.linesep

        paths = [NONSTANDARD_DATA_PATH, ORIG_DATA_PATH, PARSING_ERROR_DATA_PATH, NMER_CHAIN_DATA_PATH] +\
                 [createDirPath('', '', allowOverlaps=x) for x in [False, True]]
        
        for p in paths:
            genome = choices[0]
            origPath = os.sep.join([ p, genome ])
            trashPath = os.sep.join([ p, ".trash", genome ])

            if os.path.exists(origPath):
                print 'Moving ' + genome + ' to .trash in folder: ' + p + os.linesep
                ensurePathExists(trashPath)
                shutil.move(origPath, trashPath)
Ejemplo n.º 40
0
    def execute(choices, galaxyFn=None, username=''):
        '''Is called when execute-button is pushed by web-user.
        Should print output as HTML to standard out, which will be directed to a results page in Galaxy history.
        If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
        choices is a list of selections made by web-user in each options box.
        '''
        
        genomesList = []
        for v in GalaxyInterface.getAllGenomes(username):
            if choices[3].get(v[0]):
                if choices[3][v[0]] and isdir(createDirPath(choices[1].split(':'),v[1])):
                    genomesList.append(v[1])
        #genomesList = [v[1] for v in GalaxyInterface.getAllGenomes(username) if choices[3][v[0]] and isdir(createDirPath(choices[1].split(':'),v[1]))]

        #print 'Executing...'
        genomes = [choices[0]] + genomesList
        oldTn = choices[1]
        newTn = choices[2]
        for genome in genomes:
            renameTrack(genome, oldTn.split(':'), newTn.split(':'))
            print '%s renamed to %s in genome %s.' % (oldTn, newTn, genome)
Ejemplo n.º 41
0
    def extract(cls, trackName, regionList, fn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, globalCoords=True, \
                addSuffix=False, asOriginal=False, allowOverlaps=False, ignoreEmpty=False):
        from gold.origdata.TrackGenomeElementSource import TrackGenomeElementSource

        assert len(regionList) > 0
        for region in regionList:
            genome = region.genome
            break

        #To silently extract correctly if track type is dense
        if allowOverlaps:
            allowOverlaps = os.path.exists(
                createDirPath(trackName, genome, allowOverlaps=True))

        trackGESource = TrackGenomeElementSource(genome, trackName, regionList, globalCoords=globalCoords, \
                                                 allowOverlaps=allowOverlaps, printWarnings=False)

        composerCls = None
        if asOriginal:
            ti = TrackInfo(genome, trackName)
            if ti.fileType != '':
                try:
                    composerCls = getComposerClsFromFileSuffix(ti.fileType)
                except:
                    pass

        if composerCls is None:
            composerCls = getComposerClsFromFileFormatName(fileFormatName)

        if addSuffix:
            fn = os.path.splitext(
                fn)[0] + '.' + composerCls.getDefaultFileNameSuffix()

        composer = composerCls(trackGESource)
        ok = composer.composeToFile(fn, ignoreEmpty=ignoreEmpty)

        if ok:
            return fn
Ejemplo n.º 42
0
    def extract(cls, trackName, regionList, fn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, globalCoords=True, \
                addSuffix=False, asOriginal=False, allowOverlaps=False, ignoreEmpty=False):
        from gold.origdata.TrackGenomeElementSource import TrackGenomeElementSource
        from gold.origdata.FileFormatComposer import getComposerClsFromFileFormatName, getComposerClsFromFileSuffix
        
        assert len(regionList) > 0
        for region in regionList:
            genome = region.genome
            break
        
        #To silently extract correctly if track type is dense
        if allowOverlaps:
            allowOverlaps = os.path.exists(createDirPath(trackName, genome, allowOverlaps=True))
            
        trackGESource = TrackGenomeElementSource(genome, trackName, regionList, globalCoords=globalCoords, \
                                                 allowOverlaps=allowOverlaps, printWarnings=False)
        
        composerCls = None
        if asOriginal:
            ti = TrackInfo(genome, trackName)
            if ti.fileType != '':
                try:
                    composerCls = getComposerClsFromFileSuffix(ti.fileType)
                except:
                    pass
        
        if composerCls is None:
            composerCls = getComposerClsFromFileFormatName(fileFormatName)

        if addSuffix:
            fn = os.path.splitext(fn)[0] + '.' + composerCls.getDefaultFileNameSuffix()
        
        composer = composerCls(trackGESource)
        ok = composer.composeToFile(fn, ignoreEmpty=ignoreEmpty)
        
        if ok:
            return fn
class RemoveGenomeTool(GeneralGuiTool):
    ALL_PATHS = OrderedDict([('collectedTracks', NONSTANDARD_DATA_PATH),
                             ('standardizedTracks', ORIG_DATA_PATH),
                             ('parsingErrorTracks', PARSING_ERROR_DATA_PATH),
                             ('nmerChains', NMER_CHAIN_DATA_PATH),
                             ('preProcessedTracks (noOverlaps)',
                              createDirPath('', '', allowOverlaps=False)),
                             ('preProcessedTracks (withOverlaps)',
                              createDirPath('', '', allowOverlaps=True))])

    @staticmethod
    def getToolName():
        return "Remove genome"

    @staticmethod
    def getInputBoxNames():
        return [('Genome', 'genome'),
                ('From which paths to remove the genome', 'paths')]

    @staticmethod
    def getOptionsBoxGenome():
        return "__genome__"

    @classmethod
    def getOptionsBoxPaths(cls, prevChoices):
        return OrderedDict([(key, True) for key in cls.ALL_PATHS.keys()])

    #@staticmethod
    #def getOptionsBox3(prevChoices):
    #    return ['']

    #@staticmethod
    #def getOptionsBox4(prevChoices):
    #    return ['']

    #@staticmethod
    #def getDemoSelections():
    #    return ['testChoice1','..']

    @classmethod
    def execute(cls, choices, galaxyFn=None, username=''):
        '''Is called when execute-button is pushed by web-user.
        Should print output as HTML to standard out, which will be directed to a results page in Galaxy history. If getOutputFormat is anything else than HTML, the output should be written to the file with path galaxyFn.gtr
        If needed, StaticFile can be used to get a path where additional files can be put (e.g. generated image files).
        choices is a list of selections made by web-user in each options box.
        '''

        print 'Executing... starting to remove ' + choices[0] + os.linesep

        paths = [
            cls.ALL_PATHS[key] for key, val in choices.paths.iteritems() if val
        ]

        for p in paths:
            genome = choices.genome
            origPath = os.sep.join([p, genome])
            trashPath = os.sep.join([p, ".trash", genome])

            if os.path.exists(origPath):
                print 'Moving ' + genome + ' to .trash in folder: ' + p + os.linesep
                ensurePathExists(trashPath)
                shutil.move(origPath, trashPath)

    @staticmethod
    def validateAndReturnErrors(choices):
        '''
        Should validate the selected input parameters. If the parameters are not valid,
        an error text explaining the problem should be returned. The GUI then shows this text
        to the user (if not empty) and greys out the execute button (even if the text is empty).
        If all parameters are valid, the method should return None, which enables the execute button.
        '''
        if not choices.genome:
            return 'Please select a genome'

        if not any([val for val in choices.paths.values()]):
            return 'Please select at least one path'

    #@staticmethod
    #def isPublic():
    #    return False
    #
    #@staticmethod
    #def isRedirectTool():
    #    return False
    #
    @staticmethod
    def getToolDescription():
        return 'This tool will remove a genome and associated tracks. '+\
               '(Note: Genome is not deleted, but moved to .trash directories)'

    @staticmethod
    def isDynamic():
        return False
 def setUp(self):
     self._path = createDirPath(['testBoundingRegionShelve'],
                                'TestGenome',
                                allowOverlaps=False)
     self._fn = self._path + os.sep + 'boundingRegions.shelve'
Ejemplo n.º 45
0
    executeShellCmd('tar xfz %s --keep-newer-files -C %s' % (testGenomeFn, ORIG_DATA_PATH), \
                    pipe=False, printError=True, onError='exit')
    print 'OK: Extracted TestGenome files.'

    PreProcessAllTracksJob.PASS_ON_EXCEPTIONS = True
    try:
        PreProcessAllTracksJob('TestGenome').process()
        PreProcessAllTracksJob('TestGenome', GenomeInfo.getChrTrackName('TestGenome')).process()
        print 'OK: Finished preprocessing TestGenome.'
    except Exception, e:
        print 'FAILED: Error when preprocessing TestGenome. Error:'
        print '        ' + str(e).strip()
        sys.exit(1)
    
    for allowOverlaps in [False, True]:
        fromDir = createDirPath(['GESourceTracks'], 'TestGenome', allowOverlaps=allowOverlaps)
        toDir = createDirPath([], 'ModelsForExternalTracks', allowOverlaps=allowOverlaps)
        try:
            if not os.path.exists(toDir):
                shutil.copytree(fromDir, toDir)
                print 'OK: Copied from %s to %s.' % (fromDir, toDir)
        except Exception, e:
            print 'FAILED: Error occurred copying from %s to %s: ' % (fromDir, toDir) + str(e).strip()
            sys.exit(1)
            
    for track in ProcTrackOptions.getSubtypes('TestGenome', ['GESourceTracks']):
        ti = TrackInfo('TestGenome', ['GESourceTracks', track])
        ti.trackName = [track]
        ti.genome = 'ModelsForExternalTracks'
        ti.store()
            
 def setUp(self):
     self._path = createDirPath(['testBoundingRegionShelve'], 'TestGenome', allowOverlaps=False)
     self._fn = self._path + os.sep + 'boundingRegions.shelve'
 def oldTypePreProcFilesExist(cls, genome, trackName, allowOverlaps):
     dirPath = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)
     return os.path.exists(dirPath) and cls._hasOldTypeChromSubDirs(
         dirPath, genome)
Ejemplo n.º 48
0
 def _getDirContents(genome, trackName):
     dirPath = createDirPath(trackName, genome)
     #        print '<br>',"PATH: ", dirPath,'<br>'
     return os.listdir(dirPath) if os.path.exists(dirPath) else []
Ejemplo n.º 49
0
    print 'OK: Extracted TestGenome files.'

    PreProcessAllTracksJob.PASS_ON_EXCEPTIONS = True
    try:
        PreProcessAllTracksJob('TestGenome').process()
        PreProcessAllTracksJob(
            'TestGenome', GenomeInfo.getChrTrackName('TestGenome')).process()
        print 'OK: Finished preprocessing TestGenome.'
    except Exception, e:
        print 'FAILED: Error when preprocessing TestGenome. Error:'
        print '        ' + str(e).strip()
        sys.exit(1)

    for allowOverlaps in [False, True]:
        fromDir = createDirPath(['GESourceTracks'],
                                'TestGenome',
                                allowOverlaps=allowOverlaps)
        toDir = createDirPath([],
                              'ModelsForExternalTracks',
                              allowOverlaps=allowOverlaps)
        try:
            if not os.path.exists(toDir):
                shutil.copytree(fromDir, toDir)
                print 'OK: Copied from %s to %s.' % (fromDir, toDir)
        except Exception, e:
            print 'FAILED: Error occurred copying from %s to %s: ' % (
                fromDir, toDir) + str(e).strip()
            sys.exit(1)

    for track in ProcTrackOptions.getSubtypes('TestGenome',
                                              ['GESourceTracks']):
 def _preProcess(self, trackName):
     self._removeDir(createDirPath(trackName, self.GENOME, allowOverlaps=False), trackName)
     self._removeDir(createDirPath(trackName, self.GENOME, allowOverlaps=True), trackName)
     PreProcessAllTracksJob(self.GENOME, trackName, username="******").process()
Ejemplo n.º 51
0
    def _getDirContents(genome, trackName):
        dirPath = createDirPath(trackName, genome)
#        print '<br>',"PATH: ", dirPath,'<br>'
        return os.listdir(dirPath) if os.path.exists(dirPath) else []