def CreateNested(cls, client, method, args): """Creates a new nested operation, which is based on the current operation. The current operation is stopped so that the nested operation can be run. The nested operation must complete successfully before the parent operation will be continued. The new operation's id parenthesizes the current operation id. For example: current op_id: o12345 nested op_id: (o12345) This ensures that at most one nested operation runs at a time (and that it sorts and therefore runs before the current op), and makes it easy to identify nested operations when debugging. """ current = OpContext.current() assert current is not None and current.executing_op is not None, \ 'outer operation must be running in order to execute a nested operation' current_op = current.executing_op assert 'headers' not in args, 'headers are derived from the current operation' args['headers'] = {'op_id': '+%s' % current_op.operation_id, 'op_timestamp': current_op.timestamp} nested_op = yield gen.Task(Operation.CreateAndExecute, client, current_op.user_id, current_op.device_id, method, args) # If nested op is in quarantine, then fail this operation, since it cannot start until the # nested op has successfully completed. if nested_op.quarantine: raise TooManyRetriesError('Nested operation "%s" already exists and is in quarantine.' % nested_op.operation_id) raise StopOperationError()
def GetCurrent(cls): """Returns the operation currently being executed. If no operation is being executed, returns a default operation with user_id and device_id set to None. """ current = OpContext.current() if current is not None and current.executing_op is not None: return current.executing_op return Operation()
def Execute(self, operation_id=None, wait_callback=None): """Starts execution of all operations for the managed user. Once all operations have been completed, or if another server is already executing the operations, then the callback passed to __init__ is invoked. If the "operation_id" argument is provided, it is used as a hint as to where to start execution. However, if an operation with a lower id is found in the database, that is executed first, in order to ensure that the server executes operations in the same order that a device submitted them. If "wait_callback" is specified, then it is invoked when the "operation_id" operation is complete (but other operations for the user may still be running). If "operation_id" is None in this case, then "wait_callback" will only be invoked once all operations for this user are complete. """ def _OnCompletedOp(type=None, value=None, tb=None): """Wraps the caller's callback so that it is called in the original context, and any exception is raised in the original context. """ if (type, value, tb) != (None, None, None): raise type, value, tb wait_callback() @gen.engine def _ExecuteAll(): """Executes all ops within the scope of an OpContext. "yield" is not supported in the static scope of OpContext, which is why this is a separate function. """ try: self._is_executing = True self._requery = True while self._requery: yield self._ExecuteAll(operation_id=operation_id) finally: # Notify any remaining listeners that their operations are complete (since all operations are now complete). for cb_op_id in self._sync_cb_map.keys(): self._InvokeSyncCallbacks(cb_op_id) # Complete execution. self._is_executing = False self._callback() # Add callbacks for synchronous case. if wait_callback is not None: self._sync_cb_map[operation_id].append(stack_context.wrap(_OnCompletedOp)) if not self._is_executing: # Establish op context, and then call another func, since it is not safe to use a yield in the static scope # of the "with stack_context" statement. with stack_context.StackContext(OpContext()): _ExecuteAll() else: # Sets flag so that once all operations are executed, the list of operations is re-queried # in order to find any newly added operations. self._requery = True
def CreateNested(cls, client, method, args): """Creates a new nested operation, which is based on the current operation. The current operation is stopped so that the nested operation can be run. The nested operation must complete successfully before the parent operation will be continued. The new operation's id parenthesizes the current operation id. For example: current op_id: o12345 nested op_id: (o12345) This ensures that at most one nested operation runs at a time (and that it sorts and therefore runs before the current op), and makes it easy to identify nested operations when debugging. """ current = OpContext.current() assert current is not None and current.executing_op is not None, \ 'outer operation must be running in order to execute a nested operation' current_op = current.executing_op assert 'headers' not in args, 'headers are derived from the current operation' args['headers'] = { 'op_id': '+%s' % current_op.operation_id, 'op_timestamp': current_op.timestamp } nested_op = yield gen.Task(Operation.CreateAndExecute, client, current_op.user_id, current_op.device_id, method, args) # If nested op is in quarantine, then fail this operation, since it cannot start until the # nested op has successfully completed. if nested_op.quarantine: raise TooManyRetriesError( 'Nested operation "%s" already exists and is in quarantine.' % nested_op.operation_id) raise StopOperationError()
def _ExecuteOp(self, op): """Executes the operation by marshalling the JSON-encoded op data as arguments to the operation method. The execution of the operation is wrapped in an execution scope, which will capture all logging during the execution of this operation. """ # If necessary, wait until back-off has expired before execution begins. if op.backoff is not None: yield gen.Task(IOLoop.current().add_timeout, op.backoff) # Enter execution scope for this operation, so that it can be accessed in OpContext, and so that op-specific # logging will be started. with OpContext.current().Enter(op): op_entry = self._op_map[op.method] op_args = json.loads(op.json) # If not already done, update the lock to remember the id of the op that is being run. In # case of server failure, the server that takes over this lock will know where to start. if self._lock.resource_data != op.operation_id: self._lock.resource_data = op.operation_id yield gen.Task(self._lock.Update, self._client) # Migrate the arguments to the current server message version, as the format in the operations # table may be out-dated. Remove the headers object from the message, since it's not an # expected argument to the method. op_message = message.Message(op_args) yield gen.Task(op_message.Migrate, self._client, migrate_version=message.MAX_MESSAGE_VERSION, migrators=op_entry.migrators) try: del op_args['headers'] # Scrub the op args for logging in order to minimize personal information in the logs. scrubbed_op_args = op_args if op_entry.scrubber is not None: scrubbed_op_args = deepcopy(op_args) op_entry.scrubber(scrubbed_op_args) args_str = pprint.pformat(scrubbed_op_args) logging.info('EXECUTE: user: %d, device: %d, op: %s, method: %s:%s%s' % (op.user_id, op.device_id, op.operation_id, op.method, ('\n' if args_str.find('\n') != -1 else ' '), args_str)) _ops_per_min.increment() if op.attempts > 0: _retries_per_min.increment() # Starting operation from beginning, so reset modified db state in the # OpMgrDBClient wrapper so we'll know if any modifications happened before an abort. self._client.ResetDBModified() # Actually execute the operation by invoking its handler method. results = yield gen.Task(op_entry.handler, self._client, **op_args) # Invokes synchronous callback if applicable. elapsed_secs = time.time() - op.timestamp logging.info('SUCCESS: user: %d, device: %d, op: %s, method: %s in %.3fs%s' % (op.user_id, op.device_id, op.operation_id, op.method, elapsed_secs, (': %s' % pprint.pformat(results) if results else ''))) _avg_op_time.add(elapsed_secs) # Notify any waiting for op to finish that it's now complete. self._InvokeSyncCallbacks(op.operation_id) # Delete the op, now that it's been successfully executed. yield self._DeleteOp(op) except StopOperationError: # Stop the current operation in order to run a nested operation. pass except FailpointError: # Retry immediately if the operation is retried due to a failpoint. type, value, tb = sys.exc_info() logging.warning('restarting op due to failpoint: %s (%d)', value.filename, value.lineno) except Exception: type, value, tb = sys.exc_info() # Notify any waiting for op to finish that it failed (don't even wait for retries). self._InvokeSyncCallbacks(op.operation_id, type, value, tb) # Check for abortable exceptions, but only on 1st attempt. if op.attempts == 0 and issubclass(type, _ABORTABLE_EXCEPTIONS): yield self._AbortOp(op, type, value, tb) else: initial_backoff = UserOpManager._INITIAL_BACKOFF_SECS if issubclass(type, _SMALLER_RETRY_EXCEPTIONS): initial_backoff = UserOpManager._SMALL_INITIAL_BACKOFF_SECS yield self._FailOp(op, type, value, tb, initial_backoff_secs=initial_backoff)