def run(self): """ Runs the given OPF task against the given Model instance """ self._logger.debug("Starting Dummy Model: modelID=%s;" % (self._modelID)) # ========================================================================= # Initialize periodic activities (e.g., for model result updates) # ========================================================================= periodic = self._initPeriodicActivities() self._optimizedMetricLabel = self._optimizeKeyPattern self._reportMetricLabels = [self._optimizeKeyPattern] # ========================================================================= # Create our top-level loop-control iterator # ========================================================================= if self._iterations >= 0: iterTracker = iter(xrange(self._iterations)) else: iterTracker = iter(itertools.count()) # ========================================================================= # This gets set in the unit tests. It tells the worker to sys exit # the first N models. This is how we generate orphaned models doSysExit = False if self._sysExitModelRange is not None: modelAndCounters = self._jobsDAO.modelsGetUpdateCounters( self._jobID) modelIDs = [x[0] for x in modelAndCounters] modelIDs.sort() (beg, end) = self._sysExitModelRange if self._modelID in modelIDs[int(beg):int(end)]: doSysExit = True if self._delayModelRange is not None: modelAndCounters = self._jobsDAO.modelsGetUpdateCounters( self._jobID) modelIDs = [x[0] for x in modelAndCounters] modelIDs.sort() (beg, end) = self._delayModelRange if self._modelID in modelIDs[int(beg):int(end)]: time.sleep(10) # DEBUG!!!! infinite wait if we have 50 models #if len(modelIDs) >= 50: # jobCancel = self._jobsDAO.jobGetFields(self._jobID, ['cancel'])[0] # while not jobCancel: # time.sleep(1) # jobCancel = self._jobsDAO.jobGetFields(self._jobID, ['cancel'])[0] if self._errModelRange is not None: modelAndCounters = self._jobsDAO.modelsGetUpdateCounters( self._jobID) modelIDs = [x[0] for x in modelAndCounters] modelIDs.sort() (beg, end) = self._errModelRange if self._modelID in modelIDs[int(beg):int(end)]: raise RuntimeError( "Exiting with error due to errModelRange parameter") # ========================================================================= # Delay, if necessary if self._delay is not None: time.sleep(self._delay) # ========================================================================= # Run it! # ========================================================================= self._currentRecordIndex = 0 while True: # ========================================================================= # Check if the model should be stopped # ========================================================================= # If killed by a terminator, stop running if self._isKilled: break # If job stops or hypersearch ends, stop running if self._isCanceled: break # If model is mature, stop running ONLY IF we are not the best model # for the job. Otherwise, keep running so we can keep returning # predictions to the user if self._isMature: if not self._isBestModel: self._cmpReason = self._jobsDAO.CMPL_REASON_STOPPED break else: self._cmpReason = self._jobsDAO.CMPL_REASON_EOF # ========================================================================= # Get the the next record, and "write it" # ========================================================================= try: self._currentRecordIndex = next(iterTracker) except StopIteration: break # "Write" a dummy output value. This is used to test that the batched # writing works properly self._writePrediction(ModelResult(None, None, None, None)) periodic.tick() # ========================================================================= # Compute wait times. See if model should exit # ========================================================================= if self.__shouldSysExit(self._currentRecordIndex): sys.exit(1) # Simulate computation time if self._busyWaitTime is not None: time.sleep(self._busyWaitTime) self.__computeWaitTime() # Asked to abort after so many iterations? if doSysExit: sys.exit(1) # Asked to raise a jobFailException? if self._jobFailErr: raise utils.JobFailException( "E10000", "dummyModel's jobFailErr was True.") # ========================================================================= # Handle final operations # ========================================================================= if self._doFinalize: if not self._makeCheckpoint: self._model = None # Delay finalization operation if self._finalDelay is not None: time.sleep(self._finalDelay) self._finalize() self._logger.info("Finished: modelID=%r " % (self._modelID)) return (self._cmpReason, None)
def __runTaskMainLoop(self, numIters, learningOffAt=None): """ Main loop of the OPF Model Runner. Parameters: ----------------------------------------------------------------------- recordIterator: Iterator for counting number of records (see _runTask) learningOffAt: If not None, learning is turned off when we reach this iteration number """ ## Reset sequence states in the model, so it starts looking for a new ## sequence self._model.resetSequenceStates() self._currentRecordIndex = -1 while True: # If killed by a terminator, stop running if self._isKilled: break # If job stops or hypersearch ends, stop running if self._isCanceled: break # If the process is about to be killed, set as orphaned if self._isInterrupted.isSet(): self.__setAsOrphaned() break # If model is mature, stop running ONLY IF we are not the best model # for the job. Otherwise, keep running so we can keep returning # predictions to the user if self._isMature: if not self._isBestModel: self._cmpReason = self._jobsDAO.CMPL_REASON_STOPPED break else: self._cmpReason = self._jobsDAO.CMPL_REASON_EOF # Turn off learning? if learningOffAt is not None \ and self._currentRecordIndex == learningOffAt: self._model.disableLearning() # Read input record. Note that any failure here is a critical JOB failure # and results in the job being immediately canceled and marked as # failed. The runModelXXX code in hypesearch.utils, if it sees an # exception of type utils.JobFailException, will cancel the job and # copy the error message into the job record. try: inputRecord = self._inputSource.getNextRecordDict() if self._currentRecordIndex < 0: self._inputSource.setTimeout(10) except Exception as e: raise utils.JobFailException(ErrorCodes.streamReading, str(e.args), traceback.format_exc()) if inputRecord is None: # EOF self._cmpReason = self._jobsDAO.CMPL_REASON_EOF break if inputRecord: # Process input record self._currentRecordIndex += 1 result = self._model.run(inputRecord=inputRecord) # Compute metrics. result.metrics = self.__metricMgr.update(result) # If there are None, use defaults. see MetricsManager.getMetrics() # TODO remove this when JAVA API server is gone if not result.metrics: result.metrics = self.__metricMgr.getMetrics() # Write the result to the output cache. Don't write encodings, if they # were computed if InferenceElement.encodings in result.inferences: result.inferences.pop(InferenceElement.encodings) result.sensorInput.dataEncodings = None self._writePrediction(result) # Run periodic activities self._periodic.tick() if numIters >= 0 and self._currentRecordIndex >= numIters - 1: break else: # Input source returned an empty record. # # NOTE: This is okay with Stream-based Source (when it times out # waiting for next record), but not okay with FileSource, which should # always return either with a valid record or None for EOF. raise ValueError("Got an empty record from FileSource: %r" % inputRecord)