예제 #1
0
    def stop_job(self, job):
        """This is intended to be called once a job has been completed
		(not cancelled, but completed)
		"""
        self._log.info("stopping job: {}".format(job.id))

        if str(job.id) in self._job_handlers:
            with self._job_queue_lock:
                handler = self._job_handlers[str(job.id)]
                queue = self._job_amqp_queues[handler.queue_name]

                new_queue = []
                for priority, handler in queue.queue:
                    if handler.job.id == job.id:
                        continue
                    new_queue.append((priority, handler))

                queue.queue = new_queue

                Master.instance().update_status(queues=self._get_queues())

        AmqpManager.instance().queue_msg(json.dumps(
            dict(type="cancel", job=str(job.id))),
                                         "",
                                         exchange=Master.AMQP_BROADCAST_XCHG)

        job.reload()
        job.status = {"name": "finished"}
        job.timestamps["finished"] = time.time()
        job.save()

        self._log.info("stopped job: {}".format(job.id))

        self._cleanup_job(job)
예제 #2
0
	def cancel_job(self, job):
		"""Cancel the job ``job``

		:job: The job object to cancel
		:returns: None

		"""
		# TODO forcefully cancel the job (notify all slaves via amqp that
		# this job.id needs to be forcefully cancelled
		self._log.info("cancelling job: {}".format(job.id))

		if str(job.id) in self._job_handlers:
			with self._job_queue_lock:
				handler = self._job_handlers[str(job.id)]
				queue = self._job_amqp_queues[handler.queue_name]

				new_queue = []
				while queue.qsize() > 0:
					priority,handler = queue.get()
					# leave this one out (the one we're cancelling)
					if handler.job.id == job.id:
						continue
					new_queue.append((priority, handler))

				for item in new_queue:
					queue.put(item)

				Master.instance().update_status(queues=self._get_queues())
		else:
			self._log.debug("job to cancel ({}) not in job handlers, sending cancel message to amqp anyways".format(job.id))

		AmqpManager.instance().queue_msg(
			json.dumps(dict(
				type	= "cancel",
				job		= str(job.id)
			)),
			"",
			exchange=Master.AMQP_BROADCAST_XCHG
		)

		job.reload()
		job.status = {
			"name": "cancelled"
		}
		job.timestamps["cancelled"] = time.time()
		job.save()

		self._log.info("cancelled job: {}".format(job.id))

		self._cleanup_job(job)
예제 #3
0
	def cancel_job(self, job):
		"""Cancel the job ``job``

		:job: The job object to cancel
		:returns: None

		"""
		# TODO forcefully cancel the job (notify all slaves via amqp that
		# this job.id needs to be forcefully cancelled
		self._log.info("cancelling job: {}".format(job.id))

		if str(job.id) in self._job_handlers:
			with self._job_queue_lock:
				handler = self._job_handlers[str(job.id)]
				queue = self._job_amqp_queues[handler.queue_name]

				new_queue = []
				while queue.qsize() > 0:
					priority,handler = queue.get()
					# leave this one out (the one we're cancelling)
					if handler.job.id == job.id:
						continue
					new_queue.append((priority, handler))

				for item in new_queue:
					queue.put(item)

				Master.instance().update_status(queues=self._get_queues())
		else:
			self._log.debug("job to cancel ({}) not in job handlers, sending cancel message to amqp anyways".format(job.id))

		AmqpManager.instance().queue_msg(
			json.dumps(dict(
				type	= "cancel",
				job		= str(job.id)
			)),
			"",
			exchange=Master.AMQP_BROADCAST_XCHG
		)

		job.reload()
		job.status = {
			"name": "cancelled"
		}
		job.timestamps["cancelled"] = time.time()
		job.save()

		self._log.info("cancelled job: {}".format(job.id))

		self._cleanup_job(job)
예제 #4
0
    def __init__(self, intf):
        """docstring for Master constructor"""
        super(Master, self).__init__()

        self._log = logging.getLogger(self.__class__.__name__)

        # this will be set when the docker container is linked to talus-db
        self._db_conn_info = os.environ["TALUS_DB_PORT_27017_TCP"].replace("tcp://", "")

        self._running = threading.Event()
        self._watcher = None
        self._amqp_man = AmqpManager.instance()

        self._intf = intf
        # TODO need a better way than just eth0
        self._ip = netifaces.ifaddresses(intf)[2][0]['addr']

        # delete all the previous master entries - there should
        # only be _ONE_ master document in the DB
        MasterModel.objects().delete()
        self._master_obj_lock = threading.Lock()
        self._master_obj = MasterModel()
        self._master_obj.hostname = socket.gethostname()
        self._master_obj.ip = self._ip
        self._master_obj.vms = []
        self._master_obj.queue = []
        self._master_obj.save()

        self._log.info("ready")
예제 #5
0
	def __init__(self, intf):
		"""docstring for Master constructor"""
		super(Master, self).__init__()

		self._log = logging.getLogger(self.__class__.__name__)

		# this will be set when the docker container is linked to talus_db
		self._db_conn_info = os.environ["TALUS_DB_PORT_27017_TCP"].replace("tcp://", "")

		self._running = threading.Event()
		self._watcher = None
		self._amqp_man = AmqpManager.instance()
		
		self._intf = intf
		# TODO need a better way than just eth0
		self._ip = netifaces.ifaddresses(intf)[2][0]['addr']

		# delete all the previous master entries - there should
		# only be _ONE_ master document in the DB
		MasterModel.objects().delete()
		self._master_obj_lock = threading.Lock()
		self._master_obj = MasterModel()
		self._master_obj.hostname = socket.gethostname()
		self._master_obj.ip = self._ip
		self._master_obj.vms = []
		self._master_obj.queue = []
		self._master_obj.save()

		self._log.info("ready")
예제 #6
0
	def stop_job(self, job):
		"""This is intended to be called once a job has been completed
		(not cancelled, but completed)
		"""
		self._log.info("stopping job: {}".format(job.id))

		if str(job.id) in self._job_handlers:
			with self._job_queue_lock:
				handler = self._job_handlers[str(job.id)]
				queue = self._job_amqp_queues[handler.queue_name]

				new_queue = []
				for priority,handler in queue.queue:
					if handler.job.id == job.id:
						continue
					new_queue.append((priority, handler))

				queue.queue = new_queue

				Master.instance().update_status(queues=self._get_queues())

		AmqpManager.instance().queue_msg(
			json.dumps(dict(
				type	= "cancel",
				job		= str(job.id)
			)),
			"",
			exchange=Master.AMQP_BROADCAST_XCHG
		)

		job.reload()
		job.status = {
			"name": "finished"
		}
		job.timestamps["finished"] = time.time()
		job.save()

		self._log.info("stopped job: {}".format(job.id))

		self._cleanup_job(job)
예제 #7
0
	def __init__(self, drip_size=25):
		"""init the job manager
		
		:drip_size: The number of jobs to be added to the queue at once"""
		super(JobManager, self).__init__()

		self._drip_size = drip_size

		self._running = threading.Event()
		self._job_queue_lock = threading.Lock()

		self._amqp_man = AmqpManager.instance()

		self._log = logging.getLogger("JobMan")
		
		# each job can potentially specify their own queue, this
		# will be a dict of Q.PriorityQueue()s
		self._job_amqp_queues = {}
		# dict of {<jobid>: JobHandler}
		self._job_handlers = {}
예제 #8
0
 def _shutdown_singletons(self):
     self._log.info("shutting down singletons")
     AmqpManager.instance().stop()
예제 #9
0
	def _shutdown_singletons(self):
		self._log.info("shutting down singletons")
		AmqpManager.instance().stop()