def stop_job(self, job): """This is intended to be called once a job has been completed (not cancelled, but completed) """ self._log.info("stopping job: {}".format(job.id)) if str(job.id) in self._job_handlers: with self._job_queue_lock: handler = self._job_handlers[str(job.id)] queue = self._job_amqp_queues[handler.queue_name] new_queue = [] for priority, handler in queue.queue: if handler.job.id == job.id: continue new_queue.append((priority, handler)) queue.queue = new_queue Master.instance().update_status(queues=self._get_queues()) AmqpManager.instance().queue_msg(json.dumps( dict(type="cancel", job=str(job.id))), "", exchange=Master.AMQP_BROADCAST_XCHG) job.reload() job.status = {"name": "finished"} job.timestamps["finished"] = time.time() job.save() self._log.info("stopped job: {}".format(job.id)) self._cleanup_job(job)
def cancel_job(self, job): """Cancel the job ``job`` :job: The job object to cancel :returns: None """ # TODO forcefully cancel the job (notify all slaves via amqp that # this job.id needs to be forcefully cancelled self._log.info("cancelling job: {}".format(job.id)) if str(job.id) in self._job_handlers: with self._job_queue_lock: handler = self._job_handlers[str(job.id)] queue = self._job_amqp_queues[handler.queue_name] new_queue = [] while queue.qsize() > 0: priority,handler = queue.get() # leave this one out (the one we're cancelling) if handler.job.id == job.id: continue new_queue.append((priority, handler)) for item in new_queue: queue.put(item) Master.instance().update_status(queues=self._get_queues()) else: self._log.debug("job to cancel ({}) not in job handlers, sending cancel message to amqp anyways".format(job.id)) AmqpManager.instance().queue_msg( json.dumps(dict( type = "cancel", job = str(job.id) )), "", exchange=Master.AMQP_BROADCAST_XCHG ) job.reload() job.status = { "name": "cancelled" } job.timestamps["cancelled"] = time.time() job.save() self._log.info("cancelled job: {}".format(job.id)) self._cleanup_job(job)
def __init__(self, intf): """docstring for Master constructor""" super(Master, self).__init__() self._log = logging.getLogger(self.__class__.__name__) # this will be set when the docker container is linked to talus-db self._db_conn_info = os.environ["TALUS_DB_PORT_27017_TCP"].replace("tcp://", "") self._running = threading.Event() self._watcher = None self._amqp_man = AmqpManager.instance() self._intf = intf # TODO need a better way than just eth0 self._ip = netifaces.ifaddresses(intf)[2][0]['addr'] # delete all the previous master entries - there should # only be _ONE_ master document in the DB MasterModel.objects().delete() self._master_obj_lock = threading.Lock() self._master_obj = MasterModel() self._master_obj.hostname = socket.gethostname() self._master_obj.ip = self._ip self._master_obj.vms = [] self._master_obj.queue = [] self._master_obj.save() self._log.info("ready")
def __init__(self, intf): """docstring for Master constructor""" super(Master, self).__init__() self._log = logging.getLogger(self.__class__.__name__) # this will be set when the docker container is linked to talus_db self._db_conn_info = os.environ["TALUS_DB_PORT_27017_TCP"].replace("tcp://", "") self._running = threading.Event() self._watcher = None self._amqp_man = AmqpManager.instance() self._intf = intf # TODO need a better way than just eth0 self._ip = netifaces.ifaddresses(intf)[2][0]['addr'] # delete all the previous master entries - there should # only be _ONE_ master document in the DB MasterModel.objects().delete() self._master_obj_lock = threading.Lock() self._master_obj = MasterModel() self._master_obj.hostname = socket.gethostname() self._master_obj.ip = self._ip self._master_obj.vms = [] self._master_obj.queue = [] self._master_obj.save() self._log.info("ready")
def stop_job(self, job): """This is intended to be called once a job has been completed (not cancelled, but completed) """ self._log.info("stopping job: {}".format(job.id)) if str(job.id) in self._job_handlers: with self._job_queue_lock: handler = self._job_handlers[str(job.id)] queue = self._job_amqp_queues[handler.queue_name] new_queue = [] for priority,handler in queue.queue: if handler.job.id == job.id: continue new_queue.append((priority, handler)) queue.queue = new_queue Master.instance().update_status(queues=self._get_queues()) AmqpManager.instance().queue_msg( json.dumps(dict( type = "cancel", job = str(job.id) )), "", exchange=Master.AMQP_BROADCAST_XCHG ) job.reload() job.status = { "name": "finished" } job.timestamps["finished"] = time.time() job.save() self._log.info("stopped job: {}".format(job.id)) self._cleanup_job(job)
def __init__(self, drip_size=25): """init the job manager :drip_size: The number of jobs to be added to the queue at once""" super(JobManager, self).__init__() self._drip_size = drip_size self._running = threading.Event() self._job_queue_lock = threading.Lock() self._amqp_man = AmqpManager.instance() self._log = logging.getLogger("JobMan") # each job can potentially specify their own queue, this # will be a dict of Q.PriorityQueue()s self._job_amqp_queues = {} # dict of {<jobid>: JobHandler} self._job_handlers = {}
def _shutdown_singletons(self): self._log.info("shutting down singletons") AmqpManager.instance().stop()