Example #1
0
    def run(self):
        """ Runs the given OPF task against the given Model instance """

        self._logger.debug("Starting Dummy Model: modelID=%s;" %
                           (self._modelID))

        # =========================================================================
        # Initialize periodic activities (e.g., for model result updates)
        # =========================================================================
        periodic = self._initPeriodicActivities()

        self._optimizedMetricLabel = self._optimizeKeyPattern
        self._reportMetricLabels = [self._optimizeKeyPattern]

        # =========================================================================
        # Create our top-level loop-control iterator
        # =========================================================================
        if self._iterations >= 0:
            iterTracker = iter(xrange(self._iterations))
        else:
            iterTracker = iter(itertools.count())

        # =========================================================================
        # This gets set in the unit tests. It tells the worker to sys exit
        #  the first N models. This is how we generate orphaned models
        doSysExit = False
        if self._sysExitModelRange is not None:
            modelAndCounters = self._jobsDAO.modelsGetUpdateCounters(
                self._jobID)
            modelIDs = [x[0] for x in modelAndCounters]
            modelIDs.sort()
            (beg, end) = self._sysExitModelRange
            if self._modelID in modelIDs[int(beg):int(end)]:
                doSysExit = True

        if self._delayModelRange is not None:
            modelAndCounters = self._jobsDAO.modelsGetUpdateCounters(
                self._jobID)
            modelIDs = [x[0] for x in modelAndCounters]
            modelIDs.sort()
            (beg, end) = self._delayModelRange
            if self._modelID in modelIDs[int(beg):int(end)]:
                time.sleep(10)

            # DEBUG!!!! infinite wait if we have 50 models
            #if len(modelIDs) >= 50:
            #  jobCancel = self._jobsDAO.jobGetFields(self._jobID, ['cancel'])[0]
            #  while not jobCancel:
            #    time.sleep(1)
            #    jobCancel = self._jobsDAO.jobGetFields(self._jobID, ['cancel'])[0]

        if self._errModelRange is not None:
            modelAndCounters = self._jobsDAO.modelsGetUpdateCounters(
                self._jobID)
            modelIDs = [x[0] for x in modelAndCounters]
            modelIDs.sort()
            (beg, end) = self._errModelRange
            if self._modelID in modelIDs[int(beg):int(end)]:
                raise RuntimeError(
                    "Exiting with error due to errModelRange parameter")

        # =========================================================================
        # Delay, if necessary
        if self._delay is not None:
            time.sleep(self._delay)

        # =========================================================================
        # Run it!
        # =========================================================================
        self._currentRecordIndex = 0
        while True:

            # =========================================================================
            # Check if the model should be stopped
            # =========================================================================

            # If killed by a terminator, stop running
            if self._isKilled:
                break

            # If job stops or hypersearch ends, stop running
            if self._isCanceled:
                break

            # If model is mature, stop running ONLY IF  we are not the best model
            # for the job. Otherwise, keep running so we can keep returning
            # predictions to the user
            if self._isMature:
                if not self._isBestModel:
                    self._cmpReason = self._jobsDAO.CMPL_REASON_STOPPED
                    break
                else:
                    self._cmpReason = self._jobsDAO.CMPL_REASON_EOF

            # =========================================================================
            # Get the the next record, and "write it"
            # =========================================================================
            try:
                self._currentRecordIndex = next(iterTracker)
            except StopIteration:
                break

            # "Write" a dummy output value. This is used to test that the batched
            # writing works properly

            self._writePrediction(ModelResult(None, None, None, None))

            periodic.tick()

            # =========================================================================
            # Compute wait times. See if model should exit
            # =========================================================================

            if self.__shouldSysExit(self._currentRecordIndex):
                sys.exit(1)

            # Simulate computation time
            if self._busyWaitTime is not None:
                time.sleep(self._busyWaitTime)
                self.__computeWaitTime()

            # Asked to abort after so many iterations?
            if doSysExit:
                sys.exit(1)

            # Asked to raise a jobFailException?
            if self._jobFailErr:
                raise utils.JobFailException(
                    "E10000", "dummyModel's jobFailErr was True.")

        # =========================================================================
        # Handle final operations
        # =========================================================================
        if self._doFinalize:
            if not self._makeCheckpoint:
                self._model = None

            # Delay finalization operation
            if self._finalDelay is not None:
                time.sleep(self._finalDelay)

            self._finalize()

        self._logger.info("Finished: modelID=%r " % (self._modelID))

        return (self._cmpReason, None)
Example #2
0
    def __runTaskMainLoop(self, numIters, learningOffAt=None):
        """ Main loop of the OPF Model Runner.

    Parameters:
    -----------------------------------------------------------------------

    recordIterator:    Iterator for counting number of records (see _runTask)
    learningOffAt:     If not None, learning is turned off when we reach this
                        iteration number

    """

        ## Reset sequence states in the model, so it starts looking for a new
        ## sequence
        self._model.resetSequenceStates()

        self._currentRecordIndex = -1
        while True:

            # If killed by a terminator, stop running
            if self._isKilled:
                break

            # If job stops or hypersearch ends, stop running
            if self._isCanceled:
                break

            # If the process is about to be killed, set as orphaned
            if self._isInterrupted.isSet():
                self.__setAsOrphaned()
                break

            # If model is mature, stop running ONLY IF  we are not the best model
            # for the job. Otherwise, keep running so we can keep returning
            # predictions to the user
            if self._isMature:
                if not self._isBestModel:
                    self._cmpReason = self._jobsDAO.CMPL_REASON_STOPPED
                    break
                else:
                    self._cmpReason = self._jobsDAO.CMPL_REASON_EOF

            # Turn off learning?
                if learningOffAt is not None \
                          and self._currentRecordIndex == learningOffAt:
                    self._model.disableLearning()

            # Read input record. Note that any failure here is a critical JOB failure
            #  and results in the job being immediately canceled and marked as
            #  failed. The runModelXXX code in hypesearch.utils, if it sees an
            #  exception of type utils.JobFailException, will cancel the job and
            #  copy the error message into the job record.
            try:
                inputRecord = self._inputSource.getNextRecordDict()
                if self._currentRecordIndex < 0:
                    self._inputSource.setTimeout(10)
            except Exception as e:
                raise utils.JobFailException(ErrorCodes.streamReading,
                                             str(e.args),
                                             traceback.format_exc())

            if inputRecord is None:
                # EOF
                self._cmpReason = self._jobsDAO.CMPL_REASON_EOF
                break

            if inputRecord:
                # Process input record
                self._currentRecordIndex += 1

                result = self._model.run(inputRecord=inputRecord)

                # Compute metrics.
                result.metrics = self.__metricMgr.update(result)
                # If there are None, use defaults. see MetricsManager.getMetrics()
                # TODO remove this when JAVA API server is gone
                if not result.metrics:
                    result.metrics = self.__metricMgr.getMetrics()

                # Write the result to the output cache. Don't write encodings, if they
                # were computed
                if InferenceElement.encodings in result.inferences:
                    result.inferences.pop(InferenceElement.encodings)
                result.sensorInput.dataEncodings = None
                self._writePrediction(result)

                # Run periodic activities
                self._periodic.tick()

                if numIters >= 0 and self._currentRecordIndex >= numIters - 1:
                    break

            else:
                # Input source returned an empty record.
                #
                # NOTE: This is okay with Stream-based Source (when it times out
                # waiting for next record), but not okay with FileSource, which should
                # always return either with a valid record or None for EOF.
                raise ValueError("Got an empty record from FileSource: %r" %
                                 inputRecord)