def handle_stranded_tasks(self, engine):
        """Deal with jobs resident in an engine that died."""
        lost = self.pending[engine]
        for msg_id in list(lost.keys()):
            if msg_id not in self.pending[engine]:
                # prevent double-handling of messages
                continue

            raw_msg = lost[msg_id].raw_msg
            idents,msg = self.session.feed_identities(raw_msg, copy=False)
            parent = self.session.unpack(msg[1].bytes)
            idents = [engine, idents[0]]

            # build fake error reply
            try:
                raise error.EngineError("Engine %r died while running task %r"%(engine, msg_id))
            except:
                content = error.wrap_exception()
            # build fake header
            header = dict(
                status='error',
                engine=engine,
                date=datetime.now(),
            )
            msg = self.session.msg('apply_reply', content, parent=parent, subheader=header)
            raw_reply = list(map(zmq.Message, self.session.serialize(msg, ident=idents)))
            # and dispatch it
            self.dispatch_result(raw_reply)

        # finally scrub completed/failed lists
        self.completed.pop(engine)
        self.failed.pop(engine)
Example #2
0
    def _handle_stranded_msgs(self, eid, uuid):
        """Handle messages known to be on an engine when the engine unregisters.
        
        It is possible that this will fire prematurely - that is, an engine will
        go down after completing a result, and the client will be notified
        of the unregistration and later receive the successful result.
        """

        outstanding = self._outstanding_dict[uuid]

        for msg_id in list(outstanding):
            if msg_id in self.results:
                # we already
                continue
            try:
                raise error.EngineError(
                    "Engine %r died while running task %r" % (eid, msg_id))
            except:
                content = error.wrap_exception()
            # build a fake message:
            parent = {}
            header = {}
            parent['msg_id'] = msg_id
            header['engine'] = uuid
            header['date'] = datetime.now().strftime(util.ISO8601)
            msg = dict(parent_header=parent, header=header, content=content)
            self._handle_apply_reply(msg)
Example #3
0
    def handle_stranded_tasks(self, engine):
        """Deal with jobs resident in an engine that died."""
        lost = self.pending.pop(engine)

        for msg_id, (raw_msg, targets, MET, follow,
                     timeout) in lost.iteritems():
            self.all_failed.add(msg_id)
            self.all_done.add(msg_id)
            idents, msg = self.session.feed_identities(raw_msg, copy=False)
            msg = self.session.unpack_message(msg, copy=False, content=False)
            parent = msg['header']
            idents = [idents[0], engine] + idents[1:]
            # print (idents)
            try:
                raise error.EngineError(
                    "Engine %r died while running task %r" % (engine, msg_id))
            except:
                content = error.wrap_exception()
            msg = self.session.send(self.client_stream,
                                    'apply_reply',
                                    content,
                                    parent=parent,
                                    ident=idents)
            self.session.send(self.mon_stream, msg, ident=['outtask'] + idents)
            self.update_graph(msg_id)
Example #4
0
    def _handle_stranded_msgs(self, eid, uuid):
        """Handle messages known to be on an engine when the engine unregisters.
        
        It is possible that this will fire prematurely - that is, an engine will
        go down after completing a result, and the client will be notified
        that the result failed and later receive the actual result.
        """

        outstanding = self.queues[eid]

        for msg_id in outstanding:
            self.pending.remove(msg_id)
            self.all_completed.add(msg_id)
            try:
                raise error.EngineError(
                    "Engine %r died while running task %r" % (eid, msg_id))
            except:
                content = error.wrap_exception()
            # build a fake header:
            header = {}
            header['engine'] = uuid
            header['date'] = datetime.now()
            rec = dict(result_content=content,
                       result_header=header,
                       result_buffers=[])
            rec['completed'] = header['date']
            rec['engine_uuid'] = uuid
            try:
                self.db.update_record(msg_id, rec)
            except Exception:
                self.log.error("DB Error handling stranded msg %r" % msg_id,
                               exc_info=True)