def test_validate(self): x = HippoTask(definition={'id':'foo'},redis_client=self.redis_client) errors = x.validate() self.assertIsNotNone(errors) x.definition = {'mem': 32, 'cmd': "echo 'foo'", 'container': {'docker': {'image': 'busybox:latest'}}, 'id': 'fooface', 'cpus': 0.1} errors = x.validate() self.assertIsNone(errors)
def test_statusUpdate(self): x = HippoTask(definition={'id':'foo'},redis_client=self.redis_client) scheduler = HippoScheduler(self.redis_client) update = Dict() update.task_id.value = x.mesos_id update.state = 'TASK_FAILED' scheduler.statusUpdate(None,update) x.load() self.assertEqual(x.definition['mesos_state'],'TASK_FAILED')
def resourceOffers(self, driver, offers): filters = {'refuse_seconds': 5} try: working_count_by_id = HippoTask.working_task_count_by_id(self.redis) waiting_tasks = HippoTask.waiting_tasks(self.redis) waiting_tasks.reverse() except redis.exceptions.ConnectionError: logging.warning('Redis Connection Error in Scheduler resourceOffers') for offer in offers: driver.launchTasks(offer.id, [], filters) return waiting_tasks = [t for t in waiting_tasks if t.max_concurrent() > working_count_by_id.get(t.definition_id(),0)] #logging.info("Got %d offers" % len(offers)) #logging.info([o.hostname for o in offers]) used_mem_by_offer_id = {} used_cpu_by_offer_id = {} this_run_host_queue_count = {} matched_tasks_by_offer_id = {} host_by_offer_id = {} offers = list(offers) for task in waiting_tasks: offers.sort(key=lambda x:self.__get_recent_queue_count(x.hostname)) for offer in offers: if this_run_host_queue_count.get(offer.hostname,0) > 1: # don't launch more than two tasks per offer continue host_by_offer_id[offer.id.value] = offer.hostname cpus_available = self.getResource(offer.resources, 'cpus') - used_cpu_by_offer_id.get(offer.id.value,0) mem_available = self.getResource(offer.resources, 'mem') - used_mem_by_offer_id.get(offer.id.value,0) if (task.cpus() <= cpus_available and task.mem() <= mem_available and working_count_by_id.get(task.definition_id(),0) < task.max_concurrent() and task.constraints_ok(offer)): matched_tasks_by_offer_id.setdefault(offer.id.value,[]).append(task.mesos_launch_definition(offer)) task.work() working_count_by_id.setdefault(task.definition_id(),0) working_count_by_id[task.definition_id()] += 1 used_cpu_by_offer_id.setdefault(offer.id.value,0) used_cpu_by_offer_id[offer.id.value] += task.cpus() used_mem_by_offer_id.setdefault(offer.id.value,0) used_mem_by_offer_id[offer.id.value] += task.mem() this_run_host_queue_count.setdefault(offer.hostname,0) this_run_host_queue_count[offer.hostname] += 1 self.__log_recent_queue(offer.hostname) break for offer_id in matched_tasks_by_offer_id: logging.info("Launching %d tasks on offer id %s, host %s" % (len(matched_tasks_by_offer_id[offer_id]),offer_id, host_by_offer_id[offer_id])) driver.launchTasks({'value':offer_id}, matched_tasks_by_offer_id[offer_id], filters)
def test_mesos_launch_definition(self): x = HippoTask(definition={'mem': 32, 'cmd': "echo 'foo'", 'container': {'docker': {'image': 'busybox:latest'}}, 'id': 'fooface', 'cpus': 0.1}, redis_client=self.redis_client) offer = Dict() offer.agent_id.value = '999' ld = x.mesos_launch_definition(offer) self.assertEqual(ld['agent_id'], {'value': '999'}) self.assertTrue(hasattr(ld.command.environment, 'variables')) self.assertTrue({'name': 'HIPPO_TASK_NAME', 'value': 'fooface'} in ld.command.environment.variables)
def test_init(self): x = HippoTask(definition={'id':'foo'},redis_client=self.redis_client) self.assertIsNotNone(x.mesos_id) self.assertIn('task_retries',x.definition) self.assertIn('system_retries',x.definition) self.assertIn('mesos_id',x.definition) y = HippoTask(mesos_id=x.mesos_id,redis_client=self.redis_client) self.assertIn('id',y.definition) self.assertIn('task_retries',y.definition) self.assertIn('system_retries',y.definition)
def _rcile(): # give time for driver to connect first time.sleep(5) while True: # delete any ancient tasks so that we don't have them clog things up forever HippoTask.cleanup_old_tasks(redis_client) try: running_task_ids = [dict(task_id={'value':t.mesos_id}) for t in HippoTask.working_tasks(redis_client)] if running_task_ids: logging.info('Reconciling %d tasks' % len(running_task_ids)) driver.reconcileTasks(running_task_ids) except redis.exceptions.ConnectionError: logging.warning('Redis Connection Error in Reconcile Thread') time.sleep(60 * 15)
def test_constraints_ok(self): x = HippoTask(definition={'id':'foo'},redis_client=self.redis_client) x.definition['constraints'] = [['foo','EQUAL','bar']] offer1 = {'attributes':[{'name':'foo','text':'bar'}]} offer2 = {'attributes':[{'name':'foo','text':'baz'}]} self.assertTrue(x.constraints_ok(offer1)) self.assertFalse(x.constraints_ok(offer2)) x.definition['constraints'] = [['foo','UNLIKE','bar']] self.assertFalse(x.constraints_ok(offer1)) self.assertTrue(x.constraints_ok(offer2))
def statusUpdate(self, driver, update): try: t = HippoTask(mesos_id=update.task_id.value,redis_client=self.redis) t.definition['mesos_state'] = update.state t.save() if update.state in ['TASK_FINISHED','TASK_FAILED','TASK_LOST','TASK_ERROR','TASK_DROPPED', 'TASK_KILLED','TASK_UNREACHABLE','TASK_GONE','TASK_GONE_BY_OPERATOR']: t.finish() if update.state != 'TASK_FINISHED': t.retry() except redis.exceptions.ConnectionError: logging.warning('Redis Connection Error in Scheduler statusUpdate') logging.info('Status update TID %s %s', update.task_id.value, update.state)
def _kt(): while True: try: kill_tasks = HippoTask.kill_tasks(redis_client) for t in kill_tasks: logging.info('Killing task %s' % t.mesos_id) driver.killTask({'value':t.mesos_id}) t.kill_complete() except redis.exceptions.ConnectionError: logging.warning('Redis Connection Error in Kill Task Thread') time.sleep(2)
def tasks(): tasks = HippoTask.all_tasks(redis_client=app.redis) if request.method == 'POST': data = request.get_json() existing = [ t for t in tasks if (t.definition['cmd'] == data['cmd'] and t.definition['mesos_state'] in ['TASK_STAGING', 'TASK_RUNNING', 'WAITING_ON_OFFERS'] and t.definition['container']['docker']['image'] == data['container']['docker']['image']) ] if len(existing): print('skipping duplicate task', data) return jsonify({"mesos_id": existing[0].mesos_id}) # create a new task t = HippoTask(definition=data, redis_client=app.redis) validation_error = t.validate() if validation_error: return jsonify({"error": validation_error}), 400 t.queue() return jsonify({"mesos_id": t.mesos_id}) return jsonify([t.definition for t in tasks])
def test_hippodatasource(self): q = HippoQueue(definition={'id':'foo','cmd':'echo $HIPPO_DATA','env':{'foo':'$HIPPO_DATA_BASE64'}, 'queue':{'name':'fooname','last_run_tstamp':time.time(),'frequency_seconds':1}}, redis_client=self.redis_client) hds = HippoDataSource(q,0,HippoTask,self.redis_client) self.assertTrue(hds.too_soon()) hds.process_source() hds.create_tasks(['footask1','footask2']) waiting_tasks = HippoTask.waiting_tasks(self.redis_client) self.assertEqual(len(waiting_tasks),2) self.assertEqual(waiting_tasks[0].definition['cmd'],'echo footask2')
def feed_work(): while workers_alive() and not cls.__stop_processing: try: hippo_queues = HippoQueue.all_queues(redis_client) working_count_by_id = HippoTask.working_task_count_by_id( redis_client) for hippo_q in hippo_queues: if not is_running(hippo_q.id): q.put((hippo_q, working_count_by_id.get(hippo_q.id, 0))) mark_is_running(hippo_q.id, True) except redis.exceptions.ConnectionError: logging.warning( 'Redis Connection Error in Queue Processing Thread') time.sleep(1)
def single_task(task_id): t = HippoTask(mesos_id=task_id, redis_client=app.redis) if not t.definition: return jsonify({"error": task_id + " not found"}), 404 if request.method == 'DELETE': t.delete() return jsonify({"deleted": task_id}) else: t = HippoTask(mesos_id=task_id, redis_client=app.redis) return jsonify(t.definition)
def test_resourceOffers(self): driver = Mock() x = HippoTask(definition={'id':'foo'},redis_client=self.redis_client) x.queue() x.work() scheduler = HippoScheduler(self.redis_client) offer = Dict() cres = Dict() cres.name = 'cpus' cres.scalar.value = 1.0 mres = Dict() mres.name = 'mem' mres.scalar.value = 1024 offer.resources = [cres,mres] offer.id = 'foooffer' offer.agent_id.value = 'fooagent' offers = [offer] scheduler.resourceOffers(driver,offers)
def test_retry(self): x = HippoTask(definition={'id':'foo'},redis_client=self.redis_client) x.queue() x.work() x.definition['task_retries'] = 1 x.definition['mesos_state'] = 'TASK_FAILED' x.finish() x.retry() self.assertEqual(len(HippoTask.all_tasks(self.redis_client)),2) waiting_tasks = HippoTask.waiting_tasks(self.redis_client) self.assertEqual(len(waiting_tasks),1) w = waiting_tasks[0] self.assertEqual(w.definition['task_retries'],0) w.finish() w.definition['mesos_state'] = 'TASK_LOST' w.retry() self.assertEqual(len(HippoTask.waiting_tasks(self.redis_client)),2)
def test_task_lists(self): x = HippoTask(definition={'id':'foo'},redis_client=self.redis_client) x.queue() self.assertEqual(len(HippoTask.all_tasks(self.redis_client)),1) self.assertEqual(len(HippoTask.waiting_tasks(self.redis_client)),1) x.work() self.assertEqual(len(HippoTask.working_tasks(self.redis_client)),1) self.assertEqual(len(HippoTask.waiting_tasks(self.redis_client)),0) x.finish() self.assertEqual(len(HippoTask.working_tasks(self.redis_client)),0) x.delete() self.assertEqual(len(HippoTask.all_tasks(self.redis_client)),0)
def kill_task(task_id): t = HippoTask(mesos_id=task_id, redis_client=app.redis) if not t.definition: return jsonify({"error": task_id + " not found"}), 404 t.kill() return jsonify({"killed": task_id})