Ejemplo n.º 1
0
 def removeOutdatedPreProcessedFiles(genome, trackName, allowOverlaps, mode):
     collector = PreProcMetaDataCollector(genome, trackName)
     if PreProcessUtils.preProcFilesExist(genome, trackName, allowOverlaps) and not \
         collector.hasRemovedPreProcFiles(allowOverlaps):
             dirPath = createDirPath(trackName, genome, allowOverlaps=allowOverlaps)
             
             assert dirPath.startswith(Config.PROCESSED_DATA_PATH), \
                 "Processed data path '%s' does not start with '%s'" % \
                 (dirPath, Config.PROCESSED_DATA_PATH)
             if mode == 'Real':
                 print 'Removing outdated preprocessed data: ', dirPath
                 for fn in os.listdir(dirPath):
                     fullFn = os.path.join(dirPath, fn)
                     if os.path.isfile(fullFn):
                         os.unlink(fullFn)
                     if os.path.isdir(fullFn):
                         if PreProcessUtils._isOldTypeChromDirectory(fullFn, genome):
                             shutil.rmtree(fullFn)
             else:
                 print 'Would now have removed outdated preprocessed data if real run: ', dirPath
             
             collector.updateRemovedPreProcFilesFlag(allowOverlaps, True)
     
     if mode == 'Real':
         ti = TrackInfo(genome, trackName)
         ti.resetTimeOfPreProcessing()
Ejemplo n.º 2
0
 def _calcAndStoreSubTrackCount(self, trackName):
     ti = TrackInfo(self._genome, trackName)
     trackCount = 0
     for subTrackName in ProcTrackOptions.getSubtypes(self._genome, trackName, True):
         subTrackCount = TrackInfo(self._genome, trackName + [subTrackName]).subTrackCount
         if subTrackCount:
             trackCount += subTrackCount
     if ti.isValid():
         trackCount += 1
     ti.subTrackCount = trackCount
     ti.store()
Ejemplo n.º 3
0
def modifyTnRecord(genome, oldTn, newTn, verbose):
    trackInfo = TrackInfo(genome, oldTn)
    assert trackInfo.trackName == oldTn
    assert trackInfo.timeOfPreProcessing is not None, 'ERROR: trackInfo-object not complete for TN (is this track preprocessed?): ' + str(oldTn)
    #if trackInfo.timeOfPreProcessing is None:
        #print 'WARNING: timeOfPreProcessing is None for: ',oldTn
        
    trackInfo.trackName = newTn
    if not ONLY_SIMULATION:
        trackInfo.store()
        if verbose:
            print '(Storing track-info with new tn: %s)' % str(newTn)
    else:
        if verbose:
            print 'Would now store track-info with new tn: %s' % str(newTn)
Ejemplo n.º 4
0
    def getTrackExtractionOptions(genome, trackName):
        from gtrackcore.track.core.Track import PlainTrack
        from gtrackcore.input.userbins.UserBinSource import MinimalBinSource
        from gtrackcore.extract.fileformats.FileFormatComposer import \
            findMatchingFileFormatComposers, getComposerClsFromFileSuffix

        tf = PlainTrack(trackName).getTrackView(
            MinimalBinSource(genome)[0]).trackFormat

        extractionOptions = []
        matchingComposers = findMatchingFileFormatComposers(tf)
        for composerInfo in matchingComposers:
            allOverlapRules = tf.getAllOverlapRules()
            for allowOverlaps in allOverlapRules:
                extractionOptions.append( \
                    (composerInfo.trackFormatName.capitalize() + \
                        ' ' + TrackExtractor.getFileFormatText(composerInfo.fileFormatName) + \
                        (', ' + (TrackExtractor.ALLOW_OVERLAPS_TRUE_TEXT if allowOverlaps else \
                                 TrackExtractor.ALLOW_OVERLAPS_FALSE_TEXT) \
                                 if len(allOverlapRules) > 1 else ''), \
                     composerInfo.fileSuffix) )

        ti = TrackInfo(genome, trackName)
        if ti.fileType != '':
            try:
                extractionOptions.append(
                    (TrackExtractor.ORIG_FILE_FORMAT_TEXT.capitalize() + \
                        ' ' + TrackExtractor.getFileSuffixText(ti.fileType), \
                     getComposerClsFromFileSuffix(ti.fileType).getDefaultFileNameSuffix()))
            except Exception, e:
                print 'Error:', e
Ejemplo n.º 5
0
def modifyTnRecord(genome, oldTn, newTn, verbose):
    trackInfo = TrackInfo(genome, oldTn)
    assert trackInfo.trackName == oldTn
    assert trackInfo.timeOfPreProcessing is not None, 'ERROR: trackInfo-object not complete for TN (is this track preprocessed?): ' + str(
        oldTn)
    #if trackInfo.timeOfPreProcessing is None:
    #print 'WARNING: timeOfPreProcessing is None for: ',oldTn

    trackInfo.trackName = newTn
    if not ONLY_SIMULATION:
        trackInfo.store()
        if verbose:
            print '(Storing track-info with new tn: %s)' % str(newTn)
    else:
        if verbose:
            print 'Would now store track-info with new tn: %s' % str(newTn)
Ejemplo n.º 6
0
 def constructId(geSource):
     from gtrackcore.preprocess.PreProcessTracksJob import PreProcessTracksJob
     if geSource.hasOrigFile():
         origPath = os.path.dirname(geSource.getFileName()) if not geSource.isExternal() else geSource.getFileName()
         return TrackInfo.constructIdFromPath(geSource.getGenome(), origPath, \
                                              geSource.getVersion(), PreProcessTracksJob.VERSION)
     else:
         return geSource.getId()
Ejemplo n.º 7
0
    def getUniqueKey(self, genome):
        assert( not None in [self._trackFormatReq.allowOverlaps(), \
                             self._trackFormatReq.borderHandling()] )

        if not self._trackId:
            self._trackId = TrackInfo(genome, self.trackName).id

        return hash((tuple(self.trackName), self._trackId, self._trackFormatReq.allowOverlaps(), \
                     self._trackFormatReq.borderHandling()))
Ejemplo n.º 8
0
    def isValidTrack(genome, trackName, fullAccess=False):
        if not TrackInfo(genome, trackName).isValid(fullAccess):
            return False

        for fn in ProcTrackOptions._getDirContents(genome, trackName):
            if GenomeInfo.isValidChr(genome,
                                     fn) or isBoundingRegionFileName(fn):
                return True
        return False
Ejemplo n.º 9
0
    def getUniqueKey(self, genome):
        if not self._trackId:
            self._trackId = TrackInfo(genome, self.trackName).id

        return hash((tuple(self.trackName),
                     self._trackId if self._trackId else '',
                     getClassName(self.formatConverters[0]) if self.formatConverters else '',
                     self.formatConverters[0].VERSION if self.formatConverters else '',
                     self._trackFormatReq.allowOverlaps() if self._trackFormatReq.allowOverlaps() else '',
                     self._trackFormatReq.borderHandling() if self._trackFormatReq.borderHandling() else ''))
Ejemplo n.º 10
0
    def getUniqueKey(self, genome):
        assert self.formatConverters is not None and len(
            self.formatConverters) == 1, 'FC: ' + str(self.formatConverters)
        assert( not None in [self._trackFormatReq.allowOverlaps(), \
                             self._trackFormatReq.borderHandling()] )

        if not self._trackId:
            self._trackId = TrackInfo(genome, self.trackName).id

        return hash((tuple(self.trackName), self._trackId, getClassName(self.formatConverters[0]), \
                     self.formatConverters[0].VERSION, self._trackFormatReq.allowOverlaps(), \
                     self._trackFormatReq.borderHandling()))
Ejemplo n.º 11
0
 def _findTrackInfoBasedMetaData(self):
     if not self._foundTrackInfoBasedMetaData:
         if PreProcMetaDataCollector.hasKey(self._genome, self._trackName):
             collector = PreProcMetaDataCollector(self._genome, self._trackName)
             self._fileSuffix = collector.getFileSuffix()
             self._preProcVersion = collector.getPreProcVersion()
             self._id = collector.getId()
             self._undirectedEdges = True if collector.hasUndirectedEdges() else False
         else:
             ti = TrackInfo(self._genome, self._trackName)
             self._fileSuffix = ti.fileType
             self._preProcVersion = ti.preProcVersion
             self._id = ti.id
             self._undirectedEdges = True if ti.undirectedEdges else False
Ejemplo n.º 12
0
 def _calcAndStoreSubTrackCount(self, trackName):
     ti = TrackInfo(self._genome, trackName)
     trackCount = 0
     for subTrackName in ProcTrackOptions.getSubtypes(
             self._genome, trackName, True):
         subTrackCount = TrackInfo(self._genome,
                                   trackName + [subTrackName]).subTrackCount
         if subTrackCount:
             trackCount += subTrackCount
     if ti.isValid():
         trackCount += 1
     ti.subTrackCount = trackCount
     ti.store()
Ejemplo n.º 13
0
 def shouldPreProcessGESource(trackName, geSource, allowOverlaps):
     genome = geSource.getGenome()
     storedInfo = TrackInfo(genome, trackName)
     
     validFilesExist = PreProcessUtils.preProcFilesExist(genome, trackName, allowOverlaps) and \
         storedInfo.isValid()
     
     if not geSource.hasOrigFile():
         return False if validFilesExist or geSource.isExternal() else True
     
     storedAsAccordingToGeSource = \
         (PreProcessUtils.constructId(geSource) == storedInfo.id and \
          geSource.getVersion() == storedInfo.preProcVersion)
     
     #from gtrackcore.application.LogSetup import logMessage
     #logMessage(geSource.getGenome())
     #logMessage(':'.join(trackName))
     #logMessage('%s %s %s %s %s' % (PreProcessUtils.preProcFilesExist(genome, trackName, allowOverlaps), \
     #                               storedInfo.isValid(), \
     #                               geSource.hasOrigFile(), \
     #                               PreProcessUtils.constructId(geSource) == storedInfo.id, \
     #                               geSource.getVersion() == storedInfo.preProcVersion))
     
     return not (validFilesExist and storedAsAccordingToGeSource)
Ejemplo n.º 14
0
    def extract(cls, trackName, regionList, fn, fileFormatName=DEFAULT_FILE_FORMAT_NAME, globalCoords=True, \
                addSuffix=False, asOriginal=False, allowOverlaps=False, ignoreEmpty=False):
        from gtrackcore.input.adapters.TrackGenomeElementSource import TrackGenomeElementSource
        from gtrackcore.extract.fileformats.FileFormatComposer import getComposerClsFromFileFormatName, getComposerClsFromFileSuffix

        assert len(regionList) > 0
        for region in regionList:
            genome = region.genome
            break

        #To silently extract correctly if track type is dense
        if allowOverlaps:
            allowOverlaps = os.path.exists(
                createDirPath(trackName, genome, allowOverlaps=True))

        trackGESource = TrackGenomeElementSource(genome, trackName, regionList, globalCoords=globalCoords, \
                                                 allowOverlaps=allowOverlaps, printWarnings=False)

        composerCls = None
        if asOriginal:
            ti = TrackInfo(genome, trackName)
            if ti.fileType != '':
                try:
                    composerCls = getComposerClsFromFileSuffix(ti.fileType)
                except:
                    pass

        if composerCls is None:
            composerCls = getComposerClsFromFileFormatName(fileFormatName)

        if addSuffix:
            fn = os.path.splitext(
                fn)[0] + '.' + composerCls.getDefaultFileNameSuffix()

        composer = composerCls(trackGESource)
        ok = composer.composeToFile(fn, ignoreEmpty=ignoreEmpty)

        if ok:
            return fn
Ejemplo n.º 15
0
    def finalize(self, username, printMsg):
        ti = TrackInfo(self._genome, self._trackName)

        ti.fileType = self._fileSuffix
        trackFormat = self.getTrackFormat()
        ti.trackFormatName = trackFormat.getFormatName()
        ti.markType = trackFormat.getValTypeName()
        ti.weightType = trackFormat.getWeightTypeName()
        ti.undirectedEdges = self._undirectedEdges
        ti.preProcVersion = self._preProcVersion

        ti.origElCount = self._numElements[True]
        ti.clusteredElCount = self._numElements[False]

        if trackFormat.isDense() and trackFormat.isInterval():
            ti.origElCount -= len(self._boundingRegionTuples[True])
            ti.clusteredElCount -= len(self._boundingRegionTuples[False])

        if True in self._valCategories:
            ti.numValCategories = len(self._valCategories[True])

        if False in self._valCategories:
            ti.numClusteredValCategories = len(self._valCategories[False])

        if True in self._edgeWeightCategories:
            ti.numEdgeWeightCategories = len(self._edgeWeightCategories[True])

        ti.id = self._id
        ti.timeOfPreProcessing = datetime.datetime.now()

        ti.lastUpdatedBy = username
        if ti.hbContact == '':
            ti.hbContact = username

        ti.store()

        if printMsg:
            print "Finished preprocessing track '%s'." % ':'.join(
                self._trackName)
            print

        self.removeEntry()
Ejemplo n.º 16
0
 def _calcAndStoreSubTrackCount(self, trackName):
     ti = TrackInfo(self._genome, trackName)
     if ti.isValid():
         ti.subTrackCount = 1
         ti.store()
Ejemplo n.º 17
0
 def _calcAndStoreSubTrackCount(self, trackName):
     ti = TrackInfo(self._genome, trackName)
     if ti.isValid():
         ti.subTrackCount = 1
         ti.store()
Ejemplo n.º 18
0
    def finalize(self, username, printMsg):
        ti = TrackInfo(self._genome, self._trackName)
        
        ti.fileType = self._fileSuffix
        trackFormat = self.getTrackFormat()
        ti.trackFormatName = trackFormat.getFormatName()
        ti.markType = trackFormat.getValTypeName()
        ti.weightType = trackFormat.getWeightTypeName()
        ti.undirectedEdges = self._undirectedEdges
        ti.preProcVersion = self._preProcVersion

        ti.origElCount = self._numElements[True]
        ti.clusteredElCount = self._numElements[False]
        
        if trackFormat.isDense() and trackFormat.isInterval():
            ti.origElCount -= len(self._boundingRegionTuples[True])
            ti.clusteredElCount -= len(self._boundingRegionTuples[False])

        if True in self._valCategories:
            ti.numValCategories = len(self._valCategories[True])
        
        if False in self._valCategories:
            ti.numClusteredValCategories = len(self._valCategories[False])

        if True in self._edgeWeightCategories:
            ti.numEdgeWeightCategories = len(self._edgeWeightCategories[True])
        
        ti.id = self._id
        ti.timeOfPreProcessing = datetime.datetime.now()
    
        ti.lastUpdatedBy = username
        if ti.hbContact == '':
            ti.hbContact = username
        
        ti.store()
        
        if printMsg:
            print "Finished preprocessing track '%s'." % ':'.join(self._trackName)
            print
        
        self.removeEntry()