def resourceOffer(self, driver, oid, slave_offers): print "Got resource offer %s with %s slots." % (oid, len(slave_offers)) self.lock.acquire() tasks = [] for offer in slave_offers: if offer.host in self.servers.values(): print "Rejecting slot on host " + offer.host + " because we've launched a server on that machine already." #print "self.servers currently looks like: " + str(self.servers) elif not self.overloaded and len(self.servers) > 0: print "Rejecting slot because we've launched enough tasks." elif int(offer.params['mem']) < 1024: print "Rejecting offer because it doesn't contain enough memory (it has " + offer.params['mem'] + " and we need 1024mb." elif int(offer.params['cpus']) < 1: print "Rejecting offer because it doesn't contain enough CPUs." else: print "Offer is for " + offer.params['cpus'] + " CPUS and " + offer.params["mem"] + " MB on host " + offer.host params = {"cpus": "1", "mem": "1024"} td = mesos.TaskDescription(self.id, offer.slaveId, "server %s" % self.id, params, "") print "Accepting task, id=" + str(self.id) + ", params: " + params['cpus'] + " CPUS, and " + params['mem'] + " MB, on node " + offer.host tasks.append(td) self.servers[self.id] = offer.host self.id += 1 self.overloaded = False driver.replyToOffer(oid, tasks, {"timeout":"1"}) #driver.replyToOffer(oid, tasks, {}) print "done with resourceOffer()" self.lock.release()
def resourceOffer(self, driver, oid, offers): print "Got offer %s" % oid tasks = [] if self.tasksLaunched == TOTAL_TASKS: print "Rejecting permanently because we have already started" driver.replyToOffer(oid, tasks, {"timeout": "-1"}) return for offer in offers: print "Considering slot on %s" % offer.host cpus = int(offer.params["cpus"]) mem = int(offer.params["mem"]) if cpus < CPUS or mem < MEM: print "Rejecting slot due to too few resources" elif self.tasksLaunched < TOTAL_TASKS: tid = self.tasksLaunched print "Accepting slot to start mpd %d" % tid params = {"cpus": "%d" % CPUS, "mem": "%d" % MEM} td = mesos.TaskDescription(tid, offer.slaveId, "task %d" % tid, params, "") tasks.append(td) self.tasksLaunched += 1 else: print "Rejecting slot because we've launched enough tasks" driver.replyToOffer(oid, tasks, {"timeout": "1"}) if self.tasksLaunched == TOTAL_TASKS: print "We've launched all our MPDs; waiting for them to come up" while countMPDs() <= TOTAL_TASKS: print "...waiting on MPD(s)..." time.sleep(1) threading.Thread(target=mpiexec, args=[driver]).start()
def resourceOffer(self, driver, oid, slave_offers): self.driver = driver driverlog.debug("Got slot offer %d" % oid) self.lock.acquire() driverlog.debug("resourceOffer() acquired lock") tasks = [] for offer in slave_offers: # if we haven't registered this node, accept slot & register w pbs_server #TODO: check to see if slot is big enough if self.numToRegister <= 0: driverlog.debug("Rejecting slot, no need for more slaves") continue if offer.host in self.servers.values(): driverlog.debug("Rejecting slot, already registered node " + offer.host) continue if len(self.servers) >= SAFE_ALLOCATION["cpus"]: driverlog.debug("Rejecting slot, already at safe allocation (i.e. %d CPUS)" % SAFE_ALLOCATION["cpus"]) continue driverlog.info("Need %d more nodes, so accepting slot, setting up params for it..." % self.numToRegister) params = {"cpus": "1", "mem": "1024"} td = mesos.TaskDescription( self.id, offer.slaveId, "task %d" % self.id, params, "") tasks.append(td) self.servers[self.id] = offer.host self.regComputeNode(offer.host) self.numToRegister -= 1 self.id += 1 driverlog.info("writing logfile") eventlog.info("%d %d" % (time.time(),len(self.servers))) driverlog.info("done writing logfile") driverlog.info("self.id now set to " + str(self.id)) #print "---" driver.replyToOffer(oid, tasks, {"timeout": "1"}) self.lock.release() driverlog.debug("resourceOffer() finished, released lock\n\n")
def resourceOffer(self, driver, oid, offers): tasks = [] for offer in offers: if self.todo != self.tid: self.tid += 1 pars = {"cpus": "%d" % CPUS, "mem": "%d" % MEM} task = mesos.TaskDescription(self.tid, offer.slaveId, "task %d" % self.tid, pars, pickle.dumps(self.duration)) tasks.append(task) #msg = mesos.FrameworkMessage(-1, , "") #executor.sendFrameworkMessage("") driver.launchTasks(oid, tasks)
def resourceOffer(self, driver, oid, offers): tasks = [] print "Got a resource offer!" for offer in offers: if self.tasksLaunched < TOTAL_TASKS: tid = self.tasksLaunched self.tasksLaunched += 1 print "Accepting offer on %s to start task %d" % (offer.host, tid) params = {"cpus": "%d" % TASK_CPUS, "mem": "%d" % TASK_MEM} td = mesos.TaskDescription(tid, offer.slaveId, "task %d" % tid, params, "") tasks.append(td) driver.replyToOffer(oid, tasks, {})
def resourceOffer(self, driver, oid, offers): if self.task_launched: # Since we already launched our task, we reject the offer driver.replyToOffer(oid, [], {"timeout": "-1"}) else: for offer in offers: cpus = int(offer.params["cpus"]) mem = int(offer.params["mem"]) if cpus >= self.cpus and mem >= self.mem: print "Accepting slot on slave %s (%s)" % (offer.slaveId, offer.host) params = {"cpus": "%d" % self.cpus, "mem": "%d" % self.mem} arg = [ self.fid, self.framework_name, self.master, self.command ] task = mesos.TaskDescription(0, offer.slaveId, "task", params, pickle.dumps(arg)) driver.replyToOffer(oid, [task], {"timeout": "1"}) self.task_launched = True return
def resourceOffer(self, driver, oid, offers): # Make sure the nested schedulers can actually run their tasks. # if len(offers) <= len(config) and len(config) != self.tid: # print "Need at least one spare slave to do this work ... exiting!" # driver.stop() # return # Farm out the schedulers! tasks = [] for offer in offers: if len(config) != self.tid: (todo, duration) = config[self.tid] arg = pickle.dumps((self.master, (todo, duration))) pars = {"cpus": "%d" % CPUS, "mem": "%d" % MEM} task = mesos.TaskDescription(self.tid, offer.slaveId, "task %d" % self.tid, pars, arg) tasks.append(task) self.running[self.tid] = (todo, duration) self.tid += 1 print "Launching (%d, %d) on slave %s" % (todo, duration, offer.slaveId) driver.launchTasks(oid, tasks)