def abandon_stale_jobs(cls): """ On rare occasions, nommers crash so hard that no ``ERROR`` state change is made, and the job just gets stuck in a permanent unfinished state (``DOWNLOADING``, ``ENCODING``, ``UPLOADING``, etc). Rather than hang on to these indefinitely, abandon them by setting their state to ``ABANDONED``. The threshold for which jobs are considered abandoned is configurable via the :py:data:`FEEDERD_ABANDON_INACTIVE_JOBS_THRESH <media_nommer.conf.settings.FEEDERD_ABANDON_INACTIVE_JOBS_THRESH>` setting. """ logger.debug("JobCache.abandon_stale_jobs(): "\ "Looking for stale jobs.") for id, job in cls.get_cached_jobs().items(): if not job.is_finished(): now_dtime = datetime.datetime.now() last_mod = job.last_modified_dtime tdelta = now_dtime - last_mod inactive_seconds = total_seconds(tdelta) if inactive_seconds >= settings.FEEDERD_ABANDON_INACTIVE_JOBS_THRESH: cls.remove_job(job) job.set_job_state('ABANDONED', job.job_state_details)
def contemplate_termination(cls, thread_count_mod=0): """ Looks at how long it's been since this worker has done something, and decides whether to self-terminate. :param int thread_count_mod: Add this to the amount returned by the call to :py:meth:`get_num_active_threads`. This is useful when calling this method from a non-encoder thread. :rtype: bool :returns: ``True`` if this instance terminated itself, ``False`` if not. """ if not cls.is_ec2_instance(): # Developing locally, don't go here. return False # This is -1 since this is also a thread doing the contemplation. # This would always be 1, even if we had no jobs encoding, if we # didn't take into account this thread. num_active_threads = cls.get_num_active_threads() + thread_count_mod if num_active_threads > 0: # Encoding right now, don't terminate. return False tdelt = datetime.datetime.now() - cls.last_dtime_i_did_something # Total seconds of inactivity. inactive_secs = total_seconds(tdelt) # If we're over the inactivity threshold... if inactive_secs > settings.NOMMERD_MAX_INACTIVITY: instance_id = cls.get_instance_id() conn = cls._aws_ec2_connection() # Find this particular EC2 instance via boto. reservations = conn.get_all_instances(instance_ids=[instance_id]) # This should only be one match, but in the interest of # playing along... for reservation in reservations: for instance in reservation.instances: # Here's the instance, terminate it. logger.info("Goodbye, cruel world.") cls.send_instance_state_update(state='TERMINATED') instance.terminate() # Seeya later! return True # Continue existence, no termination. return False