def _createPreProcFiles(self):
        geSource = self._geSourceManager.getGESource()
        genome = geSource.genome
        
        collector = PreProcMetaDataCollector(genome, self._trackName)
        
        collector.updateMetaDataForFinalization(geSource.getFileSuffix(), geSource.getPrefixList(), \
                                                geSource.getValDataType(), geSource.getValDim(), \
                                                geSource.getEdgeWeightDataType(), geSource.getEdgeWeightDim(), \
                                                geSource.hasUndirectedEdges(),
                                                geSource.getVersion(), PreProcessUtils.constructId(geSource), \
                                                self._geSourceManager.getNumElements(), \
                                                self._geSourceManager.getBoundingRegionTuples(), \
                                                self._geSourceManager.getValCategories(), \
                                                self._geSourceManager.getEdgeWeightCategories(), \
                                                self._allowOverlaps)

        if self._geSourceManager.getNumElements() == 0:
            return
        
        if self._mode != 'Real':
            for ge in geSource:
                pass
            return
        
        output = OutputManager(genome, self._trackName, self._allowOverlaps, self._geSourceManager)
        
        writeFunc = output.writeRawSlice if geSource.isSliceSource() else output.writeElement
        
        for ge in geSource:
            writeFunc(ge)
        
        collector.flagChrsAsPreProcessed(self._allowOverlaps, self._geSourceManager.getAllChrs())
        
        output.close()
    def _allGESourceManagers(self, trackName, allowOverlaps):
        collector = PreProcMetaDataCollector(self._genome, trackName)
        if allowOverlaps == False and collector.overlapRuleHasBeenFinalized(True):
            for i in range(1):
                self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                yield self._getGESourceManagerFromTrack(trackName)
        else:
            for geSource in self._allGESources(trackName):
                if allowOverlaps == True:
                    tf = TrackFormat.createInstanceFromGeSource(geSource)
                    if tf.isDense() or geSource.hasNoOverlappingElements():
                        return

                self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \
                                (' (filename: "%s")' % geSource.getFileName() if geSource.hasOrigFile() else '') + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                if PreProcessUtils.shouldPreProcessGESource(trackName, geSource, allowOverlaps):
                    yield self._getGESourceManagerFromGESource(geSource)
Example #3
0
    def _allGESourceManagers(self, trackName, allowOverlaps):
        collector = PreProcMetaDataCollector(self._genome, trackName)
        if allowOverlaps == False and collector.overlapRuleHasBeenFinalized(
                True):
            for i in range(1):
                self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                yield self._getGESourceManagerFromTrack(trackName)
        else:
            for geSource in self._allGESources(trackName):
                if allowOverlaps == True:
                    tf = TrackFormat.createInstanceFromGeSource(geSource)
                    if tf.isDense() or geSource.hasNoOverlappingElements():
                        return

                self._status = 'Trying to prepare preprocessing for track "%s"' % ':'.join(trackName) + \
                                (' (filename: "%s")' % geSource.getFileName() if geSource.hasOrigFile() else '') + \
                                (' (allowOverlaps: %s)' % allowOverlaps)
                if PreProcessUtils.shouldPreProcessGESource(
                        trackName, geSource, allowOverlaps):
                    yield self._getGESourceManagerFromGESource(geSource)
    def process(self):
        assert self._genome is not None, 'Error: genome must be specified when preprocessing tracks.'

        atLeastOneFinalized = False
        for trackName in self._allTrackNames():
            assert trackName != ['']
            overlapRulesProcessedForTrackName = []
            collector = PreProcMetaDataCollector(self._genome, trackName)

            try:
                trackName = self._renameTrackNameIfIllegal(trackName)

                for allowOverlaps in [True, False]:
                    anyGeSourceManagers = False

                    for geSourceManager in self._allGESourceManagers(trackName, allowOverlaps):
                        anyGeSourceManagers = True

                        # PreProcess if needed
                        if self._shouldPreProcess():
                            PreProcessUtils.removeOutdatedPreProcessedFiles(self._genome, trackName, allowOverlaps, self._mode)

                            if self._shouldPrintProcessMessages() and allowOverlaps not in overlapRulesProcessedForTrackName:
                                self._printProcessTrackMessage(trackName, allowOverlaps)
                                overlapRulesProcessedForTrackName.append(allowOverlaps)

                            self._status = 'Trying to preprocess geSource...'
                            geSourceJob = PreProcessGeSourceJob(trackName, geSourceManager, allowOverlaps, self._mode)
                            anyWarnings = geSourceJob.process()

                            if self._raiseIfAnyWarnings and anyWarnings and trackName not in self._warningTrackNames:
                                self._warningTrackNames.append(trackName)

                            collector.updatePreProcDirtyStatus(geSourceJob.hasModifiedData())

                    # Finalize overlapRule output if needed
                    if anyGeSourceManagers and self._shouldFinalize() and collector.preProcIsDirty():
                        if self._mode == 'Real' and self._shouldMergeChrFolders():
                            self._status = 'Trying to combine chromosome vectors into combined vectors.'
                            PreProcessUtils.createBoundingRegionShelve(self._genome, trackName, allowOverlaps)
                            ChrMemmapFolderMerger.merge(self._genome, trackName, allowOverlaps)

                            self._status = 'Trying to remove chromosome folders'
                            PreProcessUtils.removeChrMemmapFolders(self._genome, trackName, allowOverlaps)

                        self._status = 'Trying to check whether 3D data is correct'
                        PreProcessUtils.checkIfEdgeIdsExist(self._genome, trackName, allowOverlaps)
                        PreProcessUtils.checkUndirectedEdges(self._genome, trackName, allowOverlaps)
                        PreProcessUtils.checkUndirectedEdges(self._genome, trackName, allowOverlaps)
                        collector.markOverlapRuleAsFinalized(allowOverlaps)

                # Finalize track if needed
                if self._shouldFinalize():
                    if collector.preProcIsDirty():
                        self._status = 'Trying to finalize.'
                        collector.finalize(self._username, self._shouldPrintProcessMessages())
                        if not atLeastOneFinalized:
                            atLeastOneFinalized = True
                    else:
                        collector.removeEntry()

            except NotSupportedError, e:
                collector.removeEntry()
                if self.PASS_ON_EXCEPTIONS:
                    raise
                else:
                    self._printExceptionMsg(e, trackName, Error=False)
            except Exception, e:
                collector.removeEntry()
                if self.PASS_ON_EXCEPTIONS:
                    raise
                else:
                    self._printExceptionMsg(e, trackName, Error=True)
Example #5
0
    def process(self):
        assert self._genome is not None, 'Error: genome must be specified when preprocessing tracks.'

        atLeastOneFinalized = False
        for trackName in self._allTrackNames():
            assert trackName != ['']
            overlapRulesProcessedForTrackName = []
            collector = PreProcMetaDataCollector(self._genome, trackName)

            try:
                trackName = self._renameTrackNameIfIllegal(trackName)

                for allowOverlaps in [True, False]:
                    anyGeSourceManagers = False

                    for geSourceManager in self._allGESourceManagers(
                            trackName, allowOverlaps):
                        anyGeSourceManagers = True

                        # PreProcess if needed
                        if self._shouldPreProcess():
                            PreProcessUtils.removeOutdatedPreProcessedFiles(
                                self._genome, trackName, allowOverlaps,
                                self._mode)

                            if self._shouldPrintProcessMessages(
                            ) and allowOverlaps not in overlapRulesProcessedForTrackName:
                                self._printProcessTrackMessage(
                                    trackName, allowOverlaps)
                                overlapRulesProcessedForTrackName.append(
                                    allowOverlaps)

                            self._status = 'Trying to preprocess geSource...'
                            geSourceJob = PreProcessGeSourceJob(
                                trackName, geSourceManager, allowOverlaps,
                                self._mode)
                            anyWarnings = geSourceJob.process()

                            if self._raiseIfAnyWarnings and anyWarnings and trackName not in self._warningTrackNames:
                                self._warningTrackNames.append(trackName)

                            collector.updatePreProcDirtyStatus(
                                geSourceJob.hasModifiedData())

                    # Finalize overlapRule output if needed
                    if anyGeSourceManagers and self._shouldFinalize(
                    ) and collector.preProcIsDirty():
                        if self._mode == 'Real' and self._shouldMergeChrFolders(
                        ):
                            self._status = 'Trying to combine chromosome vectors into combined vectors.'
                            PreProcessUtils.createBoundingRegionShelve(
                                self._genome, trackName, allowOverlaps)
                            ChrMemmapFolderMerger.merge(
                                self._genome, trackName, allowOverlaps)

                            self._status = 'Trying to remove chromosome folders'
                            PreProcessUtils.removeChrMemmapFolders(
                                self._genome, trackName, allowOverlaps)

                        self._status = 'Trying to check whether 3D data is correct'
                        PreProcessUtils.checkIfEdgeIdsExist(
                            self._genome, trackName, allowOverlaps)
                        PreProcessUtils.checkUndirectedEdges(
                            self._genome, trackName, allowOverlaps)
                        PreProcessUtils.checkUndirectedEdges(
                            self._genome, trackName, allowOverlaps)
                        collector.markOverlapRuleAsFinalized(allowOverlaps)

                # Finalize track if needed
                if self._shouldFinalize():
                    if collector.preProcIsDirty():
                        self._status = 'Trying to finalize.'
                        collector.finalize(self._username,
                                           self._shouldPrintProcessMessages())
                        if not atLeastOneFinalized:
                            atLeastOneFinalized = True
                    else:
                        collector.removeEntry()

            except NotSupportedError, e:
                collector.removeEntry()
                if self.PASS_ON_EXCEPTIONS:
                    raise
                else:
                    self._printExceptionMsg(e, trackName, Error=False)
            except Exception, e:
                collector.removeEntry()
                if self.PASS_ON_EXCEPTIONS:
                    raise
                else:
                    self._printExceptionMsg(e, trackName, Error=True)