Example #1
0
def _get_cached(path):
    # 1/ memory cache
    if path in JUMBO_FIELDS_MEMORY_CACHE:
        return JUMBO_FIELDS_MEMORY_CACHE[path]

    # 2/ disk cache
    if SIMPLEFLOW_ENABLE_DISK_CACHE:
        try:
            # NB: this cache may also be triggered on activity workers, where it's not that
            # useful. The performance hit should be minimal. To be improved later.
            # NB2: cache has to be lazily instantiated here, cache objects do not survive forks,
            # see DiskCache docs.
            cache = Cache(constants.CACHE_DIR)
            # generate a dedicated cache key because this cache may be shared with other
            # features of simpleflow at some point
            cache_key = "jumbo_fields/" + path.split("/")[-1]
            if cache_key in cache:
                logger.debug(
                    "diskcache: getting key={} from cache_dir={}".format(
                        cache_key, constants.CACHE_DIR))
                return cache[cache_key]
        except OperationalError:
            logger.warning(
                "diskcache: got an OperationalError, skipping cache usage")

    # nothing to return, but better be explicit here
    return
Example #2
0
    def schedule(self, *args, **kwargs):
        input = {
            'args': self.args,
            'kwargs': self.kwargs,
        }
        if self.extra_input:
            input.update(self.extra_input)
        logger.debug(
            'scheduling signal name={name}, workflow_id={workflow_id}, run_id={run_id}, control={control}, '
            'extra_input={extra_input}'.format(
                name=self.name,
                workflow_id=self.workflow_id,
                run_id=self.run_id,
                control=self.control,
                extra_input=self.extra_input,
            ))

        decision = swf.models.decision.ExternalWorkflowExecutionDecision()
        decision.signal(
            signal_name=self.name,
            input=input,
            workflow_id=self.workflow_id,
            run_id=self.run_id,
            control=self.control,
        )

        return [decision]
Example #3
0
def spawn(poller, decision_response):
    logger.debug("spawn() pid={}".format(os.getpid()))
    worker = multiprocessing.Process(
        target=process_decision, args=(poller, decision_response),
    )
    worker.start()
    worker.join()
Example #4
0
    def schedule(self, *args, **kwargs):
        input = {
            'args': self.args,
            'kwargs': self.kwargs,
        }
        if self.extra_input:
            input.update(self.extra_input)
        logger.debug(
            'scheduling signal name={name}, workflow_id={workflow_id}, run_id={run_id}, control={control}, '
            'extra_input={extra_input}'.format(
                name=self.name,
                workflow_id=self.workflow_id,
                run_id=self.run_id,
                control=self.control,
                extra_input=self.extra_input,
            )
        )

        decision = swf.models.decision.ExternalWorkflowExecutionDecision()
        decision.signal(
            signal_name=self.name,
            input=input,
            workflow_id=self.workflow_id,
            run_id=self.run_id,
            control=self.control,
        )

        return [decision]
Example #5
0
def spawn(poller, decision_response):
    logger.debug("spawn() pid={}".format(os.getpid()))
    worker = multiprocessing.Process(
        target=process_decision,
        args=(poller, decision_response),
    )
    worker.start()
    worker.join()
Example #6
0
    def process(self, poller, token, task):
        """

        :param poller:
        :type poller: ActivityPoller
        :param token:
        :type token: str
        :param task:
        :type task: swf.models.ActivityTask
        """
        logger.debug('ActivityWorker.process() pid={}'.format(os.getpid()))
        try:
            activity = self.dispatch(task)
            input = format.decode(task.input)
            args = input.get('args', ())
            kwargs = input.get('kwargs', {})
            context = sanitize_activity_context(task.context)
            context['domain_name'] = poller.domain.name
            if input.get('meta', {}).get('binaries'):
                download_binaries(input['meta']['binaries'])
            result = ActivityTask(activity, *args, context=context, **kwargs).execute()
        except Exception:
            exc_type, exc_value, exc_traceback = sys.exc_info()
            logger.exception("process error: {}".format(str(exc_value)))
            if isinstance(exc_value, ExecutionError) and len(exc_value.args):
                details = exc_value.args[0]
                reason = format_exc(exc_value)  # FIXME json.loads and rebuild?
            else:
                tb = traceback.format_tb(exc_traceback)
                reason = format_exc(exc_value)
                details = json_dumps(
                    {
                        'error': exc_type.__name__,
                        'message': str(exc_value),
                        'traceback': tb,
                    },
                    default=repr
                )
            return poller.fail_with_retry(
                token,
                task,
                reason=reason,
                details=details
            )

        try:
            logger.info('completing activity')
            poller.complete_with_retry(token, result)
        except Exception as err:
            logger.exception("complete error")
            reason = 'cannot complete task {}: {} {}'.format(
                task.activity_id,
                err.__class__.__name__,
                err,
            )
            poller.fail_with_retry(token, task, reason)
Example #7
0
def _set_cached(path, content):
    # 1/ memory cache
    JUMBO_FIELDS_MEMORY_CACHE[path] = content

    # 2/ disk cache
    if SIMPLEFLOW_ENABLE_DISK_CACHE:
        try:
            cache = Cache(constants.CACHE_DIR)
            cache_key = "jumbo_fields/" + path.split("/")[-1]
            logger.debug("diskcache: setting key={} on cache_dir={}".format(cache_key, constants.CACHE_DIR))
            cache.set(cache_key, content, expire=3 * constants.HOUR)
        except OperationalError:
            logger.warning("diskcache: got an OperationalError on write, skipping cache write")
Example #8
0
def process_decision(poller, decision_response):
    # type: (DeciderPoller, Response) -> None
    workflow_id = decision_response.execution.workflow_id
    workflow_str = "workflow {} ({})".format(workflow_id, poller.workflow_name)
    logger.debug("process_decision() pid={}".format(os.getpid()))
    logger.info("taking decision for {}".format(workflow_str))
    format.JUMBO_FIELDS_MEMORY_CACHE.clear()
    decisions = poller.decide(decision_response)
    try:
        logger.info("completing decision for {}".format(workflow_str))
        poller.complete_with_retry(decision_response.token, decisions)
    except Exception as err:
        logger.error("cannot complete decision for {}: {}".format(workflow_str, err))
Example #9
0
def process_decision(poller, decision_response):
    # type: (DeciderPoller, Response) -> None
    workflow_id = decision_response.execution.workflow_id
    workflow_str = "workflow {} ({})".format(workflow_id, poller.workflow_name)
    logger.debug("process_decision() pid={}".format(os.getpid()))
    logger.info("taking decision for {}".format(workflow_str))
    format.JUMBO_FIELDS_MEMORY_CACHE.clear()
    decisions = poller.decide(decision_response)
    try:
        logger.info("completing decision for {}".format(workflow_str))
        poller.complete_with_retry(decision_response.token, decisions)
    except Exception as err:
        logger.error("cannot complete decision for {}: {}".format(workflow_str, err))
Example #10
0
def process_task(poller, token, task):
    """

    :param poller:
    :type poller: ActivityPoller
    :param token:
    :type token: str
    :param task:
    :type task: swf.models.ActivityTask
    """
    logger.debug('process_task() pid={}'.format(os.getpid()))
    format.JUMBO_FIELDS_MEMORY_CACHE.clear()
    worker = ActivityWorker()
    worker.process(poller, token, task)
Example #11
0
def process_task(poller, token, task):
    """

    :param poller:
    :type poller: ActivityPoller
    :param token:
    :type token: str
    :param task:
    :type task: swf.models.ActivityTask
    """
    logger.debug("process_task() pid={}".format(os.getpid()))
    format.JUMBO_FIELDS_MEMORY_CACHE.clear()
    worker = ActivityWorker()
    worker.process(poller, token, task)
Example #12
0
def activity_rerun(domain, workflow_id, run_id, input, scheduled_id,
                   activity_id):
    # handle params
    if not activity_id and not scheduled_id:
        logger.error("Please supply --scheduled-id or --activity-id.")
        sys.exit(1)

    input_override = None
    if input:
        input_override = format.decode(input)

    # find workflow execution
    try:
        wfe = helpers.get_workflow_execution(domain, workflow_id, run_id)
    except (swf.exceptions.DoesNotExistError, IndexError):
        logger.error("Couldn't find execution, exiting.")
        sys.exit(1)
    logger.info("Found execution: workflowId={} runId={}".format(
        wfe.workflow_id, wfe.run_id))

    # now rerun the specified activity
    history = History(wfe.history())
    history.parse()
    task, args, kwargs, meta, params = helpers.find_activity(
        history,
        scheduled_id=scheduled_id,
        activity_id=activity_id,
        input=input_override,
    )
    kwargs["context"].update({
        "workflow_id": wfe.workflow_id,
        "run_id": wfe.run_id,
    })
    logger.debug("Found activity. Last execution:")
    for line in json_dumps(params, pretty=True).split("\n"):
        logger.debug(line)
    if input_override:
        logger.info("NB: input will be overriden with the passed one!")
    logger.info("Will re-run: {}(*{}, **{}) [+meta={}]".format(
        task, args, kwargs, meta))

    # download binaries if needed
    download_binaries(meta.get("binaries", {}))

    # execute the activity task with the correct arguments
    instance = ActivityTask(task, *args, **kwargs)
    result = instance.execute()
    if hasattr(instance, "post_execute"):
        instance.post_execute()
    logger.info("Result (JSON): {}".format(json_dumps(result, compact=False)))
Example #13
0
def load_workflow_executor(
    domain,
    workflow_name,
    task_list=None,
    repair_with=None,
    force_activities=None,
    repair_workflow_id=None,
    repair_run_id=None,
):
    """
    Load a workflow executor.

    :param domain:
    :type domain: str | swf.models.Domain
    :param workflow_name:
    :type workflow_name: str
    :param task_list:
    :type task_list: Optional[str]
    :param repair_with:
    :type repair_with: Optional[simpleflow.history.History]
    :param force_activities:
    :type force_activities: Optional[str]
    :param repair_workflow_id: workflow ID to repair
    :type repair_workflow_id: Optional[str]
    :param repair_run_id: run ID to repair
    :type repair_run_id: Optional[str]
    :return: Executor for this workflow
    :rtype: Executor
    """
    logger.debug(
        'load_workflow_executor(workflow_name="{}")'.format(workflow_name))
    module_name, object_name = workflow_name.rsplit(".", 1)
    module = __import__(module_name, fromlist=["*"])

    workflow = getattr(module, object_name)

    # TODO: find the cause of this differentiated behaviour
    if not isinstance(domain, swf.models.Domain):
        domain = swf.models.Domain(domain)

    return Executor(
        domain,
        workflow,
        task_list,
        repair_with=repair_with,
        force_activities=force_activities,
        repair_workflow_id=repair_workflow_id,
        repair_run_id=repair_run_id,
    )
Example #14
0
    def __init__(self, *args, **kwargs):
        self.region = (SETTINGS.get('region') or
                       kwargs.get('region') or
                       boto.swf.layer1.Layer1.DefaultRegionName)
        # Use settings-provided keys if available, otherwise pass empty
        # dictionary to boto SWF client, which will use its default credentials
        # chain provider.
        cred_keys = ['aws_access_key_id', 'aws_secret_access_key']
        creds_ = {k: SETTINGS[k] for k in cred_keys if SETTINGS.get(k, None)}
        self.connection = (kwargs.pop('connection', None) or
                           boto.swf.connect_to_region(self.region, **creds_))
        if self.connection is None:
            raise ValueError('invalid region: {}'.format(self.region))

        logger.debug("initiated connection to region={}".format(self.region))
Example #15
0
    def __init__(self, *args, **kwargs):
        self.region = (SETTINGS.get('region') or kwargs.get('region')
                       or boto.swf.layer1.Layer1.DefaultRegionName)
        # Use settings-provided keys if available, otherwise pass empty
        # dictionary to boto SWF client, which will use its default credentials
        # chain provider.
        cred_keys = ['aws_access_key_id', 'aws_secret_access_key']
        creds_ = {k: SETTINGS[k] for k in cred_keys if SETTINGS.get(k, None)}
        self.connection = (kwargs.pop('connection', None)
                           or boto.swf.connect_to_region(
                               self.region, **creds_))
        if self.connection is None:
            raise ValueError('invalid region: {}'.format(self.region))

        logger.debug("initiated connection to region={}".format(self.region))
Example #16
0
def _set_cached(path, content):
    # 1/ memory cache
    JUMBO_FIELDS_MEMORY_CACHE[path] = content

    # 2/ disk cache
    if SIMPLEFLOW_ENABLE_DISK_CACHE:
        try:
            cache = Cache(constants.CACHE_DIR)
            cache_key = "jumbo_fields/" + path.split("/")[-1]
            logger.debug("diskcache: setting key={} on cache_dir={}".format(
                cache_key, constants.CACHE_DIR))
            cache.set(cache_key, content, expire=3 * constants.HOUR)
        except OperationalError:
            logger.warning(
                "diskcache: got an OperationalError on write, skipping cache write"
            )
Example #17
0
    def __init__(self, *args, **kwargs):
        settings_ = {
            key: SETTINGS.get(key, kwargs.get(key))
            for key in ('aws_access_key_id', 'aws_secret_access_key')
        }

        self.region = (SETTINGS.get('region') or kwargs.get('region')
                       or boto.swf.layer1.Layer1.DefaultRegionName)

        self.connection = (kwargs.pop('connection', None)
                           or boto.swf.connect_to_region(
                               self.region, **settings_))
        if self.connection is None:
            raise ValueError('invalid region: {}'.format(self.region))

        logger.debug("initiated connection to region={}".format(self.region))
Example #18
0
def activity_rerun(domain,
                   workflow_id,
                   run_id,
                   input,
                   scheduled_id,
                   activity_id):
    # handle params
    if not activity_id and not scheduled_id:
        logger.error("Please supply --scheduled-id or --activity-id.")
        sys.exit(1)

    input_override = None
    if input:
        input_override = format.decode(input)

    # find workflow execution
    try:
        wfe = helpers.get_workflow_execution(domain, workflow_id, run_id)
    except (swf.exceptions.DoesNotExistError, IndexError):
        logger.error("Couldn't find execution, exiting.")
        sys.exit(1)
    logger.info("Found execution: workflowId={} runId={}".format(wfe.workflow_id, wfe.run_id))

    # now rerun the specified activity
    history = History(wfe.history())
    history.parse()
    task, args, kwargs, meta, params = helpers.find_activity(
        history, scheduled_id=scheduled_id, activity_id=activity_id, input=input_override,
    )
    logger.debug("Found activity. Last execution:")
    for line in json_dumps(params, pretty=True).split("\n"):
        logger.debug(line)
    if input_override:
        logger.info("NB: input will be overriden with the passed one!")
    logger.info("Will re-run: {}(*{}, **{}) [+meta={}]".format(task, args, kwargs, meta))

    # download binaries if needed
    download_binaries(meta.get("binaries", {}))

    # execute the activity task with the correct arguments
    instance = ActivityTask(task, *args, **kwargs)
    result = instance.execute()
    if hasattr(instance, 'post_execute'):
        instance.post_execute()
    logger.info("Result (JSON): {}".format(json_dumps(result, compact=False)))
Example #19
0
    def _cleanup_worker_processes(self):
        # cleanup children
        to_remove = []
        for pid, child in self._processes.items():
            try:
                name, status = child.name(), child.status()
            except psutil.NoSuchProcess:  # May be untimely deceased
                name, status = "unknown", "unknown"
            logger.debug("  child: name=%s pid=%d status=%s" % (name, child.pid, status))
            if status in (psutil.STATUS_ZOMBIE, "unknown"):
                logger.debug("  process {} is zombie, will cleanup".format(child.pid))
                # join process to clean it up
                child.wait()
                # set the process to be removed from self._processes
                to_remove.append(pid)

        # cleanup our internal state (self._processes)
        for pid in to_remove:
            del self._processes[pid]
Example #20
0
def load_workflow_executor(domain, workflow_name, task_list=None, repair_with=None,
                           force_activities=None,
                           repair_workflow_id=None, repair_run_id=None,
                           ):
    """
    Load a workflow executor.

    :param domain:
    :type domain: str | swf.models.Domain
    :param workflow_name:
    :type workflow_name: str
    :param task_list:
    :type task_list: Optional[str]
    :param repair_with:
    :type repair_with: Optional[simpleflow.history.History]
    :param force_activities:
    :type force_activities: Optional[str]
    :param repair_workflow_id: workflow ID to repair
    :type repair_workflow_id: Optional[str]
    :param repair_run_id: run ID to repair
    :type repair_run_id: Optional[str]
    :return: Executor for this workflow
    :rtype: Executor
    """
    logger.debug('load_workflow_executor(workflow_name="{}")'.format(workflow_name))
    module_name, object_name = workflow_name.rsplit('.', 1)
    module = __import__(module_name, fromlist=['*'])

    workflow = getattr(module, object_name)

    # TODO: find the cause of this differentiated behaviour
    if not isinstance(domain, swf.models.Domain):
        domain = swf.models.Domain(domain)

    return Executor(
        domain, workflow, task_list,
        repair_with=repair_with,
        force_activities=force_activities,
        repair_workflow_id=repair_workflow_id,
        repair_run_id=repair_run_id,
    )
Example #21
0
    def _cleanup_worker_processes(self):
        # cleanup children
        to_remove = []
        for pid, child in self._processes.items():
            try:
                name, status = child.name(), child.status()
            except psutil.NoSuchProcess:  # May be untimely deceased
                name, status = "unknown", "unknown"
            logger.debug("  child: name=%s pid=%d status=%s" %
                         (name, child.pid, status))
            if status in (psutil.STATUS_ZOMBIE, "unknown"):
                logger.debug("  process {} is zombie, will cleanup".format(
                    child.pid))
                # join process to clean it up
                child.wait()
                # set the process to be removed from self._processes
                to_remove.append(pid)

        # cleanup our internal state (self._processes)
        for pid in to_remove:
            del self._processes[pid]
Example #22
0
def _get_cached(path):
    # 1/ memory cache
    if path in JUMBO_FIELDS_MEMORY_CACHE:
        return JUMBO_FIELDS_MEMORY_CACHE[path]

    # 2/ disk cache
    if SIMPLEFLOW_ENABLE_DISK_CACHE:
        try:
            # NB: this cache may also be triggered on activity workers, where it's not that
            # useful. The performance hit should be minimal. To be improved later.
            # NB2: cache has to be lazily instantiated here, cache objects do not survive forks,
            # see DiskCache docs.
            cache = Cache(constants.CACHE_DIR)
            # generate a dedicated cache key because this cache may be shared with other
            # features of simpleflow at some point
            cache_key = "jumbo_fields/" + path.split("/")[-1]
            if cache_key in cache:
                logger.debug("diskcache: getting key={} from cache_dir={}".format(cache_key, constants.CACHE_DIR))
                return cache[cache_key]
        except OperationalError:
            logger.warning("diskcache: got an OperationalError, skipping cache usage")

    # nothing to return, but better be explicit here
    return
Example #23
0
    def poll_with_retry(self):
        """
        Polls a task represented by its token and data. It uses long-polling
        with a timeout of one minute.

        See also
        http://docs.aws.amazon.com/amazonswf/latest/apireference/API_PollForDecisionTask.html#API_PollForDecisionTask_RequestSyntax
        http://docs.aws.amazon.com/amazonswf/latest/apireference/API_PollForActivityTask.html#API_PollForActivityTask_RequestSyntax

        :returns:
        :rtype: swf.responses.Response
        """
        task_list = self.task_list
        identity = self.identity

        logger.debug("polling task on %s", task_list)
        poll = utils.retry.with_delay(
            nb_times=self.nb_retries,
            delay=utils.retry.exponential,
            log_with=logger.exception,
            on_exceptions=swf.exceptions.ResponseError,
        )(self.poll)
        response = poll(task_list, identity=identity)
        return response
Example #24
0
    def poll_with_retry(self):
        """
        Polls a task represented by its token and data. It uses long-polling
        with a timeout of one minute.

        See also
        http://docs.aws.amazon.com/amazonswf/latest/apireference/API_PollForDecisionTask.html#API_PollForDecisionTask_RequestSyntax
        http://docs.aws.amazon.com/amazonswf/latest/apireference/API_PollForActivityTask.html#API_PollForActivityTask_RequestSyntax

        :returns:
        :rtype: swf.responses.Response
        """
        task_list = self.task_list
        identity = self.identity

        logger.debug("polling task on %s", task_list)
        poll = utils.retry.with_delay(
            nb_times=self.nb_retries,
            delay=utils.retry.exponential,
            log_with=logger.exception,
            on_exceptions=swf.exceptions.ResponseError,
        )(self.poll)
        response = poll(task_list, identity=identity)
        return response
Example #25
0
 def wrapped(self, *args, **kwargs):
     logger.debug("entering state {}: {}(args={}, kwargs={})".format(
         state, method.__name__, args, kwargs))
     self.state = state
     return method(self, *args, **kwargs)
Example #26
0
def spawn(poller, token, task, heartbeat=60):
    """
    Spawn a process and wait for it to end, sending heartbeats to SWF.

    On activity timeouts and termination, we reap the worker process and its
    children.

    :param poller:
    :type poller: ActivityPoller
    :param token:
    :type token: str
    :param task:
    :type task: swf.models.ActivityTask
    :param heartbeat: heartbeat delay (seconds)
    :type heartbeat: int
    """
    logger.info(
        "spawning new activity worker pid={} heartbeat={}".format(
            os.getpid(), heartbeat
        )
    )
    worker = multiprocessing.Process(target=process_task, args=(poller, token, task),)
    worker.start()

    def worker_alive():
        return psutil.pid_exists(worker.pid)

    while worker_alive():
        worker.join(timeout=heartbeat)
        if not worker_alive():
            # Most certainly unneeded: we'll see
            if worker.exitcode is None:
                # race condition, try and re-join
                worker.join(timeout=0)
                if worker.exitcode is None:
                    logger.warning(
                        "process {} is dead but multiprocessing doesn't know it (simpleflow bug)".format(
                            worker.pid
                        )
                    )
            if worker.exitcode != 0:
                poller.fail_with_retry(
                    token,
                    task,
                    reason="process {} died: exit code {}".format(
                        worker.pid, worker.exitcode
                    ),
                )
            return
        try:
            logger.debug("heartbeating for pid={} (token={})".format(worker.pid, token))
            response = poller.heartbeat(token)
        except swf.exceptions.DoesNotExistError as error:
            # Either the task or the workflow execution no longer exists,
            # let's kill the worker process.
            logger.warning("heartbeat failed: {}".format(error))
            logger.warning("killing (KILL) worker with pid={}".format(worker.pid))
            reap_process_tree(worker.pid)
            return
        except swf.exceptions.RateLimitExceededError as error:
            # ignore rate limit errors: high chances the next heartbeat will be
            # ok anyway, so it would be stupid to break the task for that
            logger.warning(
                'got a "ThrottlingException / Rate exceeded" when heartbeating for task {}: {}'.format(
                    task.activity_type.name, error
                )
            )
            continue
        except Exception as error:
            # Let's crash if it cannot notify the heartbeat failed.  The
            # subprocess will become orphan and the heartbeat timeout may
            # eventually trigger on Amazon SWF side.
            logger.error(
                "cannot send heartbeat for task {}: {}".format(
                    task.activity_type.name, error
                )
            )
            raise

        # Task cancelled.
        if response and response.get("cancelRequested"):
            reap_process_tree(worker.pid)
            return
Example #27
0
 def wrapped(self, *args, **kwargs):
     logger.debug("entering state {}: {}(args={}, kwargs={})".format(
         state, method.__name__, args, kwargs))
     self.state = state
     return method(self, *args, **kwargs)
Example #28
0
def spawn(poller, token, task, heartbeat=60):
    """
    Spawn a process and wait for it to end, sending heartbeats to SWF.

    On activity timeouts and termination, we reap the worker process and its
    children.

    :param poller:
    :type poller: ActivityPoller
    :param token:
    :type token: str
    :param task:
    :type task: swf.models.ActivityTask
    :param heartbeat: heartbeat delay (seconds)
    :type heartbeat: int
    """
    logger.info('spawning new activity worker pid={} heartbeat={}'.format(os.getpid(), heartbeat))
    worker = multiprocessing.Process(
        target=process_task,
        args=(poller, token, task),
    )
    worker.start()

    def worker_alive():
        return psutil.pid_exists(worker.pid)

    while worker_alive():
        worker.join(timeout=heartbeat)
        if not worker_alive():
            # Most certainly unneeded: we'll see
            if worker.exitcode is None:
                # race condition, try and re-join
                worker.join(timeout=0)
                if worker.exitcode is None:
                    logger.warning("process {} is dead but multiprocessing doesn't know it (simpleflow bug)".format(
                        worker.pid
                    ))
            if worker.exitcode != 0:
                poller.fail_with_retry(
                    token,
                    task,
                    reason='process {} died: exit code {}'.format(
                        worker.pid,
                        worker.exitcode)
                )
            return
        try:
            logger.debug(
                'heartbeating for pid={} (token={})'.format(worker.pid, token)
            )
            response = poller.heartbeat(token)
        except swf.exceptions.DoesNotExistError as error:
            # Either the task or the workflow execution no longer exists,
            # let's kill the worker process.
            logger.warning('heartbeat failed: {}'.format(error))
            logger.warning('killing (KILL) worker with pid={}'.format(worker.pid))
            reap_process_tree(worker.pid)
            return
        except swf.exceptions.RateLimitExceededError as error:
            # ignore rate limit errors: high chances the next heartbeat will be
            # ok anyway, so it would be stupid to break the task for that
            logger.warning(
                'got a "ThrottlingException / Rate exceeded" when heartbeating for task {}: {}'.format(
                    task.activity_type.name,
                    error))
            continue
        except Exception as error:
            # Let's crash if it cannot notify the heartbeat failed.  The
            # subprocess will become orphan and the heartbeat timeout may
            # eventually trigger on Amazon SWF side.
            logger.error('cannot send heartbeat for task {}: {}'.format(
                task.activity_type.name,
                error))
            raise

        # Task cancelled.
        if response and response.get('cancelRequested'):
            reap_process_tree(worker.pid)
            return