Python JobFailException Examples

Programming Language: Python

Namespace/Package Name: nupic.swarming.utils

Method/Function: JobFailException

Examples at hotexamples.com: 2

Python JobFailException - 2 examples found. These are the top rated real world Python examples of nupic.swarming.utils.JobFailException extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def run(self):
        """ Runs the given OPF task against the given Model instance """

        self._logger.debug("Starting Dummy Model: modelID=%s;" %
                           (self._modelID))

        # =========================================================================
        # Initialize periodic activities (e.g., for model result updates)
        # =========================================================================
        periodic = self._initPeriodicActivities()

        self._optimizedMetricLabel = self._optimizeKeyPattern
        self._reportMetricLabels = [self._optimizeKeyPattern]

        # =========================================================================
        # Create our top-level loop-control iterator
        # =========================================================================
        if self._iterations >= 0:
            iterTracker = iter(xrange(self._iterations))
        else:
            iterTracker = iter(itertools.count())

        # =========================================================================
        # This gets set in the unit tests. It tells the worker to sys exit
        #  the first N models. This is how we generate orphaned models
        doSysExit = False
        if self._sysExitModelRange is not None:
            modelAndCounters = self._jobsDAO.modelsGetUpdateCounters(
                self._jobID)
            modelIDs = [x[0] for x in modelAndCounters]
            modelIDs.sort()
            (beg, end) = self._sysExitModelRange
            if self._modelID in modelIDs[int(beg):int(end)]:
                doSysExit = True

        if self._delayModelRange is not None:
            modelAndCounters = self._jobsDAO.modelsGetUpdateCounters(
                self._jobID)
            modelIDs = [x[0] for x in modelAndCounters]
            modelIDs.sort()
            (beg, end) = self._delayModelRange
            if self._modelID in modelIDs[int(beg):int(end)]:
                time.sleep(10)

            # DEBUG!!!! infinite wait if we have 50 models
            #if len(modelIDs) >= 50:
            #  jobCancel = self._jobsDAO.jobGetFields(self._jobID, ['cancel'])[0]
            #  while not jobCancel:
            #    time.sleep(1)
            #    jobCancel = self._jobsDAO.jobGetFields(self._jobID, ['cancel'])[0]

        if self._errModelRange is not None:
            modelAndCounters = self._jobsDAO.modelsGetUpdateCounters(
                self._jobID)
            modelIDs = [x[0] for x in modelAndCounters]
            modelIDs.sort()
            (beg, end) = self._errModelRange
            if self._modelID in modelIDs[int(beg):int(end)]:
                raise RuntimeError(
                    "Exiting with error due to errModelRange parameter")

        # =========================================================================
        # Delay, if necessary
        if self._delay is not None:
            time.sleep(self._delay)

        # =========================================================================
        # Run it!
        # =========================================================================
        self._currentRecordIndex = 0
        while True:

            # =========================================================================
            # Check if the model should be stopped
            # =========================================================================

            # If killed by a terminator, stop running
            if self._isKilled:
                break

            # If job stops or hypersearch ends, stop running
            if self._isCanceled:
                break

            # If model is mature, stop running ONLY IF  we are not the best model
            # for the job. Otherwise, keep running so we can keep returning
            # predictions to the user
            if self._isMature:
                if not self._isBestModel:
                    self._cmpReason = self._jobsDAO.CMPL_REASON_STOPPED
                    break
                else:
                    self._cmpReason = self._jobsDAO.CMPL_REASON_EOF

            # =========================================================================
            # Get the the next record, and "write it"
            # =========================================================================
            try:
                self._currentRecordIndex = next(iterTracker)
            except StopIteration:
                break

            # "Write" a dummy output value. This is used to test that the batched
            # writing works properly

            self._writePrediction(ModelResult(None, None, None, None))

            periodic.tick()

            # =========================================================================
            # Compute wait times. See if model should exit
            # =========================================================================

            if self.__shouldSysExit(self._currentRecordIndex):
                sys.exit(1)

            # Simulate computation time
            if self._busyWaitTime is not None:
                time.sleep(self._busyWaitTime)
                self.__computeWaitTime()

            # Asked to abort after so many iterations?
            if doSysExit:
                sys.exit(1)

            # Asked to raise a jobFailException?
            if self._jobFailErr:
                raise utils.JobFailException(
                    "E10000", "dummyModel's jobFailErr was True.")

        # =========================================================================
        # Handle final operations
        # =========================================================================
        if self._doFinalize:
            if not self._makeCheckpoint:
                self._model = None

            # Delay finalization operation
            if self._finalDelay is not None:
                time.sleep(self._finalDelay)

            self._finalize()

        self._logger.info("Finished: modelID=%r " % (self._modelID))

        return (self._cmpReason, None)

Example #2

Show file

File: ModelRunner.py Project: sahityasridhar08/nupic.py

    def __runTaskMainLoop(self, numIters, learningOffAt=None):
        """ Main loop of the OPF Model Runner.

    Parameters:
    -----------------------------------------------------------------------

    recordIterator:    Iterator for counting number of records (see _runTask)
    learningOffAt:     If not None, learning is turned off when we reach this
                        iteration number

    """

        ## Reset sequence states in the model, so it starts looking for a new
        ## sequence
        self._model.resetSequenceStates()

        self._currentRecordIndex = -1
        while True:

            # If killed by a terminator, stop running
            if self._isKilled:
                break

            # If job stops or hypersearch ends, stop running
            if self._isCanceled:
                break

            # If the process is about to be killed, set as orphaned
            if self._isInterrupted.isSet():
                self.__setAsOrphaned()
                break

            # If model is mature, stop running ONLY IF  we are not the best model
            # for the job. Otherwise, keep running so we can keep returning
            # predictions to the user
            if self._isMature:
                if not self._isBestModel:
                    self._cmpReason = self._jobsDAO.CMPL_REASON_STOPPED
                    break
                else:
                    self._cmpReason = self._jobsDAO.CMPL_REASON_EOF

            # Turn off learning?
                if learningOffAt is not None \
                          and self._currentRecordIndex == learningOffAt:
                    self._model.disableLearning()

            # Read input record. Note that any failure here is a critical JOB failure
            #  and results in the job being immediately canceled and marked as
            #  failed. The runModelXXX code in hypesearch.utils, if it sees an
            #  exception of type utils.JobFailException, will cancel the job and
            #  copy the error message into the job record.
            try:
                inputRecord = self._inputSource.getNextRecordDict()
                if self._currentRecordIndex < 0:
                    self._inputSource.setTimeout(10)
            except Exception as e:
                raise utils.JobFailException(ErrorCodes.streamReading,
                                             str(e.args),
                                             traceback.format_exc())

            if inputRecord is None:
                # EOF
                self._cmpReason = self._jobsDAO.CMPL_REASON_EOF
                break

            if inputRecord:
                # Process input record
                self._currentRecordIndex += 1

                result = self._model.run(inputRecord=inputRecord)

                # Compute metrics.
                result.metrics = self.__metricMgr.update(result)
                # If there are None, use defaults. see MetricsManager.getMetrics()
                # TODO remove this when JAVA API server is gone
                if not result.metrics:
                    result.metrics = self.__metricMgr.getMetrics()

                # Write the result to the output cache. Don't write encodings, if they
                # were computed
                if InferenceElement.encodings in result.inferences:
                    result.inferences.pop(InferenceElement.encodings)
                result.sensorInput.dataEncodings = None
                self._writePrediction(result)

                # Run periodic activities
                self._periodic.tick()

                if numIters >= 0 and self._currentRecordIndex >= numIters - 1:
                    break

            else:
                # Input source returned an empty record.
                #
                # NOTE: This is okay with Stream-based Source (when it times out
                # waiting for next record), but not okay with FileSource, which should
                # always return either with a valid record or None for EOF.
                raise ValueError("Got an empty record from FileSource: %r" %
                                 inputRecord)