def package_request(self, workflow, job, step, handler, arguments, context, is_blocking=False): """Prepare an incoming request to be processed.""" invocation = mr.models.kv.invocation.Invocation( invocation_id=None, workflow_name=workflow.workflow_name, step_name=step.step_name, direction=mr.constants.D_MAP) invocation.save() _flow_logger.debug( "+ Writing ARGUMENTS dataset for root invocation: " "[%s]", invocation) dq = mr.models.kv.queues.dataset.DatasetQueue( workflow, invocation, mr.models.kv.queues.dataset.DT_ARGUMENTS) for (k, v) in arguments: data = { 'p': (k, v), } dq.add(data) request = mr.models.kv.request.Request( request_id=None, workflow_name=workflow.workflow_name, job_name=job.job_name, invocation_id=invocation.invocation_id, context=context, is_blocking=is_blocking) request.save() _logger.debug("Received request: [%s]", request) message_parameters = mr.shared_types.QUEUE_MESSAGE_PARAMETERS_CLS( workflow=workflow, invocation=invocation, request=request, job=job, step=step, handler=handler) return message_parameters
def package_request(self, workflow, job, step, handler, arguments, context): """Prepare an incoming request to be processed.""" invocation = mr.models.kv.invocation.Invocation( invocation_id=None, workflow_name=workflow.workflow_name, step_name=step.step_name, direction=mr.constants.D_MAP) invocation.save() _flow_logger.debug("+ Writing ARGUMENTS dataset for root invocation: " "[%s]", invocation) dq = mr.models.kv.queues.dataset.DatasetQueue( workflow, invocation, mr.models.kv.queues.dataset.DT_ARGUMENTS) for (k, v) in arguments: data = { 'p': (k, v), } dq.add(data) request = mr.models.kv.request.Request( request_id=None, workflow_name=workflow.workflow_name, job_name=job.job_name, invocation_id=invocation.invocation_id, context=context) request.save() _logger.debug("Received request: [%s]", request) message_parameters = mr.shared_types.QUEUE_MESSAGE_PARAMETERS_CLS( workflow=workflow, invocation=invocation, request=request, job=job, step=step, handler=handler) return message_parameters
def __store_reduction_result(self, message_parameters, reduce_result_gen, store_to_invocation, decrement_invocation=None): """Store the reduction result. This is code common to both/all kinds of reduction. """ workflow = message_parameters.workflow request = message_parameters.request # Store result _logger.debug("Writing reduction result: [%s] [%s]", store_to_invocation, store_to_invocation.direction) _flow_logger.debug( "+ Writing POST-REDUCE dataset from [%s] to [%s] " "and decrementing [%s].", message_parameters.invocation, store_to_invocation, decrement_invocation) dq = mr.models.kv.queues.dataset.DatasetQueue( workflow, store_to_invocation, mr.models.kv.queues.dataset.DT_POST_REDUCE) i = 0 for (k, v) in reduce_result_gen: data = { # Pair 'p': (k, v), } dq.add(data) i += 1 assert i > 0, "No reduction results to store by [%s] to [%s]." % \ (message_parameters.invocation, store_to_invocation) _logger.debug( "We've posted the reduction result to invocation: " "[%s]", store_to_invocation) if decrement_invocation is not None: _logger.debug("Decrementing invocation: [%s] WAITING=(%d)", decrement_invocation, decrement_invocation.mapped_waiting) # Decrement the "waiting" counter on the parent of the parent # (the step that mapped the steps that produced the results # that we're reducing), or notify that the job is done (if # there is no parent's parent). decrement_invocation = self.__decrement_invocation( workflow, decrement_invocation) if decrement_invocation.mapped_waiting == 0: # We've posted the reduction of the results of our map step # to its parent, and all mapped steps of that parent have # now been reported. _logger.debug( "Invocation [%s] mapped-waiting count has " "dropped to (0), and will be reduced.", decrement_invocation) pusher = _get_pusher() # Queue a reduction with our parent's parent (the parent of # the original mapping). It will access all of the results # that have been posted back to it. pusher.queue_reduce_step_from_parameters( message_parameters, decrement_invocation) else: _logger.debug( "Invocation [%s] mapped-waiting " "count after REDUCE: (%d)", decrement_invocation, decrement_invocation.mapped_waiting) else: # We've reduced our way back up to the original request. _logger.debug("No further parents on request: %s", request) # If we're a non-blocking request, This will be the last # opportunity to handle the result before sending the request into # oblivion. if request.is_blocking is False: _logger.debug("Writing result for non-blocking request: %s", request) rr = get_request_receiver() rr.render_result(request) _logger.debug("Marking request as complete: [%s]", request.request_id) request.is_done = True request.save() # We allow for the result to be written into the request response, # and *then* cleanup. However, if the request was non-blocking, # we'll queue it for cleanup, now. if request.is_blocking is False: _logger.debug( "Request is non-blocking, so we'll clean it up " "immediately: %s", request) wm = mr.workflow_manager.get_wm() managed_workflow = wm.get(workflow.workflow_name) managed_workflow.cleanup_queue.add_request(request)
def handle_reduce(self, message_parameters): """Corresponds to steps received with a type of mr.constants.D_REDUCE. As we work our way down from the request/job/original-step to successive mappings, we link them by way of the parent_invocation_id. When we're working our way up through reduction, the parent_invocation_id of each reduction invocation points to the invocation record that we're reducing. We'll then continue to queue successive invocation for the parents of parents, until we make it all of the way to the original step (which will have no parent). """ step = message_parameters.step reduce_invocation = message_parameters.invocation workflow = message_parameters.workflow request = message_parameters.request assert step.reduce_handler_name is not None try: # The parent of the current invocation is the invocation that had all # of the mappings to be reduced. map_invocation = mr.models.kv.invocation.get( workflow, reduce_invocation.parent_invocation_id) if map_invocation.mapped_waiting is None: _logger.debug( "Processing REDUCE [%s] -of- original MAP " "invocation [%s] that rendered a DATASET.", reduce_invocation, map_invocation) return self.__handle_mapped_dataset_reduce( message_parameters, step, map_invocation, workflow, request) else: _logger.debug( "Processing REDUCE [%s] -of- original MAP " "invocation [%s] that rendered DOWNSTREAM " "MAPPINGS.", reduce_invocation, map_invocation) return self.__handle_mapped_mapping_reduce( message_parameters, step, map_invocation, workflow, request) except Exception as e: _logger.exception( "Exception while processing REDUCE under " "request: %s", request) if issubclass(e.__class__, mr.handlers.general.HandlerException): # TODO(dustin): Finish debugging this. print("REDUCE ERROR STDOUT >>>>>>>>>>>>>") print(e.stdout) print("REDUCE ERROR STDERR >>>>>>>>>>>>>") print(e.stderr) print("REDUCE ERROR <<<<<<<<<<<<<<<<<<<<") # Formally mark the request as failed but finished. In the event # that request-cleanup is disabled, forensics will be intact. reduce_invocation.error = traceback.format_exc() reduce_invocation.save() request.failed_invocation_id = reduce_invocation.invocation_id request.is_done = True request.save() # Send notification. notify = mr.log.get_notify() notify.exception( "Reducer invocation [%s] under request [%s] " "failed. HANDLER=[%s]", reduce_invocation.invocation_id, request.request_id, step.reduce_handler_name) # Schedule the request for destruction. wm = mr.workflow_manager.get_wm() managed_workflow = wm.get(workflow.workflow_name) managed_workflow.cleanup_queue.add_request(request) raise
def handle_map(self, message_parameters): """Handle one dequeued map job.""" request = message_parameters.request step = message_parameters.step invocation = message_parameters.invocation workflow = message_parameters.workflow _logger.debug("Processing MAP: [%s] [%s]", invocation, invocation.created_timestamp) try: ## Call the handler. _flow_logger.debug( " Reading ARGUMENTS dataset for (and from) " "mapper: [%s]", invocation) dq = mr.models.kv.queues.dataset.DatasetQueue( workflow, invocation, mr.models.kv.queues.dataset.DT_ARGUMENTS) # Enumerate the 'p' member of every record. arguments = (d['p'] for d in dq.list_data()) if mr.config.IS_DEBUG is True: arguments = list(arguments) _logger.debug("Sending arguments to mapper:\n%s", pprint.pformat(arguments)) wrapped_arguments = { 'arguments': arguments, } construction_context = mr.handlers.general.HANDLER_CONTEXT_CLS( request=request, invocation=invocation) handler_result_gen = self.__call_handler(construction_context, workflow, step.map_handler_name, wrapped_arguments) path_type = next(handler_result_gen) _logger.debug("Mapper [%s] path-type: [%s]", invocation, path_type.__class__.__name__) assert issubclass(path_type.__class__, mr.handlers.scope.MrConfigure) is True # Manage downstream steps that were mapped to (the handler was a # generator). if issubclass(path_type.__class__, mr.handlers.scope.MrConfigureToMap) is True: self.__map_to_downstream(path_type.next_step_name, step.map_handler_name, handler_result_gen, workflow, invocation, message_parameters) elif issubclass(path_type.__class__, mr.handlers.scope.MrConfigureToReturn) is True: self.__map_collect_result(step.map_handler_name, handler_result_gen, workflow, invocation, message_parameters) except Exception as e: _logger.exception( "Exception while processing MAP under request: " "%s", request) if issubclass(e.__class__, mr.handlers.general.HandlerException): # TODO(dustin): Finish debugging this. print("MAP ERROR STDOUT >>>>>>>>>>>>>") print(e.stdout) print("MAP ERROR STDERR >>>>>>>>>>>>>") print(e.stderr) print("MAP ERROR <<<<<<<<<<<<<<<<<<<<") invocation.error = traceback.format_exc() invocation.save() # Formally mark the request as failed but finished. In the event # that request-cleanup is disabled, forensics will be intact. request.failed_invocation_id = invocation.invocation_id request.is_done = True request.save() # Send notification. notify = mr.log.get_notify() notify.exception( "Mapper invocation [%s] under request [%s] " "failed. HANDLER=[%s]", invocation.invocation_id, request.request_id, step.map_handler_name) # Schedule the request for destruction. wm = mr.workflow_manager.get_wm() managed_workflow = wm.get(workflow.workflow_name) managed_workflow.cleanup_queue.add_request(request) raise
def __store_reduction_result( self, message_parameters, reduce_result_gen, store_to_invocation, decrement_invocation=None ): """Store the reduction result. This is code common to both/all kinds of reduction. """ workflow = message_parameters.workflow request = message_parameters.request # Store result _logger.debug("Writing reduction result: [%s] [%s]", store_to_invocation, store_to_invocation.direction) _flow_logger.debug( "+ Writing POST-REDUCE dataset from [%s] to [%s] " "and decrementing [%s].", message_parameters.invocation, store_to_invocation, decrement_invocation, ) dq = mr.models.kv.queues.dataset.DatasetQueue( workflow, store_to_invocation, mr.models.kv.queues.dataset.DT_POST_REDUCE ) i = 0 for (k, v) in reduce_result_gen: data = { # Pair "p": (k, v) } dq.add(data) i += 1 assert i > 0, "No reduction results to store by [%s] to [%s]." % ( message_parameters.invocation, store_to_invocation, ) _logger.debug("We've posted the reduction result to invocation: " "[%s]", store_to_invocation) if decrement_invocation is not None: _logger.debug( "Decrementing invocation: [%s] WAITING=(%d)", decrement_invocation, decrement_invocation.mapped_waiting ) # Decrement the "waiting" counter on the parent of the parent # (the step that mapped the steps that produced the results # that we're reducing), or notify that the job is done (if # there is no parent's parent). decrement_invocation = self.__decrement_invocation(workflow, decrement_invocation) if decrement_invocation.mapped_waiting == 0: # We've posted the reduction of the results of our map step # to its parent, and all mapped steps of that parent have # now been reported. _logger.debug( "Invocation [%s] mapped-waiting count has " "dropped to (0), and will be reduced.", decrement_invocation, ) pusher = _get_pusher() # Queue a reduction with our parent's parent (the parent of # the original mapping). It will access all of the results # that have been posted back to it. pusher.queue_reduce_step_from_parameters(message_parameters, decrement_invocation) else: _logger.debug( "Invocation [%s] mapped-waiting " "count after REDUCE: (%d)", decrement_invocation, decrement_invocation.mapped_waiting, ) else: # We've reduced our way back up to the original request. _logger.debug("No further parents on request: %s", request) # If we're a non-blocking request, This will be the last # opportunity to handle the result before sending the request into # oblivion. if request.is_blocking is False: _logger.debug("Writing result for non-blocking request: %s", request) rr = get_request_receiver() rr.render_result(request) _logger.debug("Marking request as complete: [%s]", request.request_id) request.is_done = True request.save() # We allow for the result to be written into the request response, # and *then* cleanup. However, if the request was non-blocking, # we'll queue it for cleanup, now. if request.is_blocking is False: _logger.debug("Request is non-blocking, so we'll clean it up " "immediately: %s", request) wm = mr.workflow_manager.get_wm() managed_workflow = wm.get(workflow.workflow_name) managed_workflow.cleanup_queue.add_request(request)
def handle_reduce(self, message_parameters): """Corresponds to steps received with a type of mr.constants.D_REDUCE. As we work our way down from the request/job/original-step to successive mappings, we link them by way of the parent_invocation_id. When we're working our way up through reduction, the parent_invocation_id of each reduction invocation points to the invocation record that we're reducing. We'll then continue to queue successive invocation for the parents of parents, until we make it all of the way to the original step (which will have no parent). """ step = message_parameters.step reduce_invocation = message_parameters.invocation workflow = message_parameters.workflow request = message_parameters.request assert step.reduce_handler_name is not None try: # The parent of the current invocation is the invocation that had all # of the mappings to be reduced. map_invocation = mr.models.kv.invocation.get(workflow, reduce_invocation.parent_invocation_id) if map_invocation.mapped_waiting is None: _logger.debug( "Processing REDUCE [%s] -of- original MAP " "invocation [%s] that rendered a DATASET.", reduce_invocation, map_invocation, ) return self.__handle_mapped_dataset_reduce(message_parameters, step, map_invocation, workflow, request) else: _logger.debug( "Processing REDUCE [%s] -of- original MAP " "invocation [%s] that rendered DOWNSTREAM " "MAPPINGS.", reduce_invocation, map_invocation, ) return self.__handle_mapped_mapping_reduce(message_parameters, step, map_invocation, workflow, request) except Exception as e: _logger.exception("Exception while processing REDUCE under " "request: %s", request) if issubclass(e.__class__, mr.handlers.general.HandlerException): # TODO(dustin): Finish debugging this. print("REDUCE ERROR STDOUT >>>>>>>>>>>>>") print(e.stdout) print("REDUCE ERROR STDERR >>>>>>>>>>>>>") print(e.stderr) print("REDUCE ERROR <<<<<<<<<<<<<<<<<<<<") # Formally mark the request as failed but finished. In the event # that request-cleanup is disabled, forensics will be intact. reduce_invocation.error = traceback.format_exc() reduce_invocation.save() request.failed_invocation_id = reduce_invocation.invocation_id request.is_done = True request.save() # Send notification. notify = mr.log.get_notify() notify.exception( "Reducer invocation [%s] under request [%s] " "failed. HANDLER=[%s]", reduce_invocation.invocation_id, request.request_id, step.reduce_handler_name, ) # Schedule the request for destruction. wm = mr.workflow_manager.get_wm() managed_workflow = wm.get(workflow.workflow_name) managed_workflow.cleanup_queue.add_request(request) raise
def handle_map(self, message_parameters): """Handle one dequeued map job.""" request = message_parameters.request step = message_parameters.step invocation = message_parameters.invocation workflow = message_parameters.workflow _logger.debug("Processing MAP: [%s] [%s]", invocation, invocation.created_timestamp) try: ## Call the handler. _flow_logger.debug(" Reading ARGUMENTS dataset for (and from) " "mapper: [%s]", invocation) dq = mr.models.kv.queues.dataset.DatasetQueue( workflow, invocation, mr.models.kv.queues.dataset.DT_ARGUMENTS ) # Enumerate the 'p' member of every record. arguments = (d["p"] for d in dq.list_data()) if mr.config.IS_DEBUG is True: arguments = list(arguments) _logger.debug("Sending arguments to mapper:\n%s", pprint.pformat(arguments)) wrapped_arguments = {"arguments": arguments} construction_context = mr.handlers.general.HANDLER_CONTEXT_CLS(request=request, invocation=invocation) handler_result_gen = self.__call_handler( construction_context, workflow, step.map_handler_name, wrapped_arguments ) path_type = next(handler_result_gen) _logger.debug("Mapper [%s] path-type: [%s]", invocation, path_type.__class__.__name__) assert issubclass(path_type.__class__, mr.handlers.scope.MrConfigure) is True # Manage downstream steps that were mapped to (the handler was a # generator). if issubclass(path_type.__class__, mr.handlers.scope.MrConfigureToMap) is True: self.__map_to_downstream( path_type.next_step_name, step.map_handler_name, handler_result_gen, workflow, invocation, message_parameters, ) elif issubclass(path_type.__class__, mr.handlers.scope.MrConfigureToReturn) is True: self.__map_collect_result( step.map_handler_name, handler_result_gen, workflow, invocation, message_parameters ) except Exception as e: _logger.exception("Exception while processing MAP under request: " "%s", request) if issubclass(e.__class__, mr.handlers.general.HandlerException): # TODO(dustin): Finish debugging this. print("MAP ERROR STDOUT >>>>>>>>>>>>>") print(e.stdout) print("MAP ERROR STDERR >>>>>>>>>>>>>") print(e.stderr) print("MAP ERROR <<<<<<<<<<<<<<<<<<<<") invocation.error = traceback.format_exc() invocation.save() # Formally mark the request as failed but finished. In the event # that request-cleanup is disabled, forensics will be intact. request.failed_invocation_id = invocation.invocation_id request.is_done = True request.save() # Send notification. notify = mr.log.get_notify() notify.exception( "Mapper invocation [%s] under request [%s] " "failed. HANDLER=[%s]", invocation.invocation_id, request.request_id, step.map_handler_name, ) # Schedule the request for destruction. wm = mr.workflow_manager.get_wm() managed_workflow = wm.get(workflow.workflow_name) managed_workflow.cleanup_queue.add_request(request) raise
def __store_reduction_result(self, message_parameters, reduce_result_gen, store_to_invocation, decrement_invocation=None): """Store the reduction result. This is code common to both/all kinds of reduction. """ workflow = message_parameters.workflow request = message_parameters.request # Store result _logger.debug("Writing reduction result: [%s] [%s]", store_to_invocation, store_to_invocation.direction) _flow_logger.debug("+ Writing POST-REDUCE dataset from [%s] to [%s] " "and decrementing [%s].", message_parameters.invocation, store_to_invocation, decrement_invocation) dq = mr.models.kv.queues.dataset.DatasetQueue( workflow, store_to_invocation, mr.models.kv.queues.dataset.DT_POST_REDUCE) i = 0 for (k, v) in reduce_result_gen: data = { # Pair 'p': (k, v), } dq.add(data) i += 1 assert i > 0, "No reduction results to store by [%s] to [%s]." % \ (message_parameters.invocation, store_to_invocation) _logger.debug("We've posted the reduction result to invocation: " "[%s]", store_to_invocation) if decrement_invocation is not None: _logger.debug("Decrementing invocation: [%s] WAITING=(%d)", decrement_invocation, decrement_invocation.mapped_waiting) # Decrement the "waiting" counter on the parent of the parent # (the step that mapped the steps that produced the results # that we're reducing), or notify that the job is done (if # there is no parent's parent). decrement_invocation = self.__decrement_invocation( workflow, decrement_invocation) if decrement_invocation.mapped_waiting == 0: # We've posted the reduction of the results of our map step # to its parent, and all mapped steps of that parent have # now been reported. _logger.debug("Invocation [%s] mapped-waiting count has " "dropped to (0), and will be reduced.", decrement_invocation) pusher = _get_pusher() # Queue a reduction with our parent's parent (the parent of # the original mapping). It will access all of the results # that have been posted back to it. pusher.queue_reduce_step_from_parameters( message_parameters, decrement_invocation) else: _logger.debug("Invocation [%s] mapped-waiting " "count after REDUCE: (%d)", decrement_invocation, decrement_invocation.mapped_waiting) else: # We've reduced our way back up to the original request. _logger.debug("No further parents. Marking request as " "complete: [%s]", request.request_id) request.done = True request.save()
def handle_reduce(self, message_parameters): """Corresponds to steps received with a type of mr.constants.D_REDUCE. As we work our way down from the request/job/original-step to successive mappings, we link them by way of the parent_invocation_id. When we're working our way up through reduction, the parent_invocation_id of each reduction invocation points to the invocation record that we're reducing. We'll then continue to queue successive invocation for the parents of parents, until we make it all of the way to the original step (which will have no parent). """ step = message_parameters.step reduce_invocation = message_parameters.invocation workflow = message_parameters.workflow request = message_parameters.request assert step.reduce_handler_name is not None try: # The parent of the current invocation is the invocation that had all # of the mappings to be reduced. map_invocation = mr.models.kv.invocation.get( workflow, reduce_invocation.parent_invocation_id) if map_invocation.mapped_waiting is None: _logger.debug("Processing REDUCE [%s] -of- original MAP " "invocation [%s] that rendered a DATASET.", reduce_invocation, map_invocation) return self.__handle_mapped_dataset_reduce( message_parameters, step, map_invocation, workflow, request) else: _logger.debug("Processing REDUCE [%s] -of- original MAP " "invocation [%s] that rendered DOWNSTREAM " "MAPPINGS.", reduce_invocation, map_invocation) return self.__handle_mapped_mapping_reduce( message_parameters, step, map_invocation, workflow, request) except: _logger.exception("Exception while processing REDUCE under " "request: %s", request) # TODO(dustin): We might have to remove the chain of invocations, on error. reduce_invocation.error = traceback.format_exc() reduce_invocation.save() request.failed_invocation_id = reduce_invocation.invocation_id request.done = True request.save() raise
def handle_map(self, message_parameters): """Handle one dequeued map job.""" request = message_parameters.request step = message_parameters.step invocation = message_parameters.invocation workflow = message_parameters.workflow _logger.debug("Processing MAP: [%s]", invocation) try: ## Call the handler. _flow_logger.debug(" Reading ARGUMENTS dataset for (and from) " "mapper: [%s]", invocation) dq = mr.models.kv.queues.dataset.DatasetQueue( workflow, invocation, mr.models.kv.queues.dataset.DT_ARGUMENTS) # Enumerate the 'p' member of every record. arguments = (d['p'] for d in dq.list_data()) if mr.config.IS_DEBUG is True: arguments = list(arguments) _logger.debug("Sending arguments to mapper:\n%s", pprint.pformat(arguments)) handler_ctx = self.__get_handler_context(workflow, invocation) wrapped_arguments = { 'arguments': arguments, 'ctx': handler_ctx, } handler_result_gen = self.__call_handler( workflow, step.map_handler_name, wrapped_arguments) path_type = next(handler_result_gen) _logger.debug("Mapper [%s] path-type: [%s]", invocation, path_type.__class__.__name__) assert issubclass( path_type.__class__, mr.handlers.scope.MrConfigure) is True # Manage downstream steps that were mapped to (the handler was a # generator). if issubclass( path_type.__class__, mr.handlers.scope.MrConfigureToMap) is True: self.__map_to_downstream( path_type.next_step_name, step.map_handler_name, handler_result_gen, workflow, invocation, message_parameters) elif issubclass( path_type.__class__, mr.handlers.scope.MrConfigureToReturn) is True: self.__map_collect_result( step.map_handler_name, handler_result_gen, workflow, invocation, message_parameters) except: _logger.exception("Exception while processing MAP under request: " "%s", request) # TODO(dustin): We might have to remove the chain of invocations, on error. invocation.error = traceback.format_exc() invocation.save() request.failed_invocation_id = invocation.invocation_id request.done = True request.save() raise