Ejemplo n.º 1
0
def create_tasks(MV, available_offers, driver, command, ns):
    """
    Launch up to `MV` mesos tasks, depending on availability of mesos
    resources.

    `MV` max number of mesos tasks to spin up.  Relay chooses this number
    `available_offers` a dict of mesos offers and num tasks they can support
    `driver` a mesos driver instance
    """
    n_fulfilled = 0
    for offer, ntasks in available_offers:
        if n_fulfilled >= MV:
            driver.declineOffer(offer.id)
            continue
        tasks = []
        for ID in range(ntasks):
            if n_fulfilled >= MV:
                break
            n_fulfilled += 1

            tid = "%s.%s.%s" % (
                ID, offer.id.value, random.randint(1, sys.maxint))
            log.debug(
                "Accepting offer to start a task", extra=dict(
                    offer_host=offer.hostname, task_id=tid,
                    mesos_framework_name=ns.mesos_framework_name))
            task = _create_task(
                tid, offer, command, ns)
            tasks.append(task)
        driver.launchTasks(offer.id, tasks)
    return n_fulfilled
Ejemplo n.º 2
0
def create_tasks(MV, available_offers, driver, command, ns):
    """
    Launch up to `MV` mesos tasks, depending on availability of mesos
    resources.

    `MV` max number of mesos tasks to spin up.  Relay chooses this number
    `available_offers` a dict of mesos offers and num tasks they can support
    `driver` a mesos driver instance
    """
    n_fulfilled = 0
    for offer, ntasks in available_offers:
        if n_fulfilled >= MV:
            driver.declineOffer(offer.id)
            continue
        tasks = []
        for ID in range(ntasks):
            if n_fulfilled >= MV:
                break
            n_fulfilled += 1

            tid = "%s.%s.%s" % (ID, offer.id.value,
                                random.randint(1, sys.maxint))
            log.debug("Accepting offer to start a task",
                      extra=dict(offer_host=offer.hostname,
                                 task_id=tid,
                                 mesos_framework_name=ns.mesos_framework_name))
            task = _create_task(tid, offer, command, ns)
            tasks.append(task)
        driver.launchTasks(offer.id, tasks)
    return n_fulfilled
Ejemplo n.º 3
0
    def _get_and_update_relay(self, available_offers):
        """
        Get num tasks I should create and evaluate whether to use Relay's
        warmer or cooler command.  Update the MV with number of commands about
        to be created.

        Competes for the MV with these other threads, and will wait
        indefinitely for it:

          - other Mesos resourceOffers(...) calls to the Framework scheduler
          - Relay warmer and cooler functions attempting to ask the Framework
            to execute more tasks.
        """
        command = None
        with self.MV.get_lock():
            MV, t = self.MV
            # create tasks that fulfill relay's requests or return
            if MV == 0:
                log.debug(
                    'mesos scheduler has received no requests from relay',
                    extra=dict(
                        mesos_framework_name=self.ns.mesos_framework_name))
            else:
                if MV > 0 and self.ns.warmer:
                    command = self.ns.warmer
                elif MV < 0 and self.ns.cooler:
                    command = self.ns.cooler
                if abs(MV) < len(available_offers):
                    self.MV[:] = [0, time.time()]
                else:
                    new_MV = MV - (MV > 0 or -1) * max(abs(MV),
                                                       len(available_offers))
                    self.MV[:] = [new_MV, time.time()]
        return (MV, command)
Ejemplo n.º 4
0
    def _get_and_update_relay(self, available_offers):
        """
        Get num tasks I should create and evaluate whether to use Relay's
        warmer or cooler command.  Update the MV with number of commands about
        to be created.

        Competes for the MV with these other threads, and will wait
        indefinitely for it:

          - other Mesos resourceOffers(...) calls to the Framework scheduler
          - Relay warmer and cooler functions attempting to ask the Framework
            to execute more tasks.
        """
        command = None
        with self.MV.get_lock():
            MV, t = self.MV
            # create tasks that fulfill relay's requests or return
            if MV == 0:
                log.debug(
                    'mesos scheduler has received no requests from relay',
                    extra=dict(
                        mesos_framework_name=self.ns.mesos_framework_name))
            else:
                if MV > 0 and self.ns.warmer:
                    command = self.ns.warmer
                elif MV < 0 and self.ns.cooler:
                    command = self.ns.cooler
                if abs(MV) < len(available_offers):
                    self.MV[:] = [0, time.time()]
                else:
                    new_MV = MV - (MV > 0 or -1) * max(abs(MV),
                                                       len(available_offers))
                    self.MV[:] = [new_MV, time.time()]
        return (MV, command)
Ejemplo n.º 5
0
def init_relay(ns_relay, mesos_ready, mesos_framework_name):
    log.debug(
        'Relay waiting to start until mesos framework is registered',
        extra=dict(mesos_framework_name=mesos_framework_name))
    mesos_ready.acquire()
    mesos_ready.wait()
    log.debug(
        'Relay notified that mesos framework is registered',
        extra=dict(mesos_framework_name=mesos_framework_name))
    relay_main(ns_relay)
Ejemplo n.º 6
0
 def offerRescinded(self, driver, offerId):
     """
     Invoked when the status of a task has changed (e.g., a slave is
     lost and so the task is lost, a task finishes and an executor
     sends a status update saying so, etc). Note that returning from
     this callback _acknowledges_ receipt of this status update! If
     for whatever reason the scheduler aborts during this callback (or
     the process exits) another status update will be delivered (note,
     however, that this is currently not true if the slave sending the
     status update is lost/fails during that time).
     """
     log.debug('offer rescinded', extra=dict(
         offer_id=offerId.value,
         mesos_framework_name=self.ns.mesos_framework_name))
Ejemplo n.º 7
0
 def offerRescinded(self, driver, offerId):
     """
     Invoked when the status of a task has changed (e.g., a slave is
     lost and so the task is lost, a task finishes and an executor
     sends a status update saying so, etc). Note that returning from
     this callback _acknowledges_ receipt of this status update! If
     for whatever reason the scheduler aborts during this callback (or
     the process exits) another status update will be delivered (note,
     however, that this is currently not true if the slave sending the
     status update is lost/fails during that time).
     """
     log.debug('offer rescinded',
               extra=dict(
                   offer_id=offerId.value,
                   mesos_framework_name=self.ns.mesos_framework_name))
Ejemplo n.º 8
0
 def _warmer_cooler_wrapper(n):
     # inform mesos that it should spin up n tasks of type f, where f is
     # either the warmer or cooler.  Since Relay assumes that the choice of
     # `f` (either a warmer or cooler func) is determined by the sign of n,
     # we can too!
     log.debug(
         'asking mesos to spawn tasks',
         extra=dict(
             mesos_framework_name=ns.mesos_framework_name,
             task_num=n, task_type="warmer" if n > 0 else "cooler"))
     t = time.time()
     with MV.get_lock():
         if MV[1] < t:
             MV[:] = (n, t)
     log.debug(
         '...finished asking mesos to spawn tasks',
         extra=dict(
             mesos_framework_name=ns.mesos_framework_name,
             task_num=n, task_type="warmer" if n > 0 else "cooler"))
Ejemplo n.º 9
0
    def _statusUpdate(self, driver, update):
        log.debug('task status update: %s' % str(update.message), extra=dict(
            task_id=update.task_id.value, task_state=update.state,
            slave_id=update.slave_id.value, timestamp=update.timestamp,
            mesos_framework_name=self.ns.mesos_framework_name))
        if self.ns.max_failures == -1:
            return  # don't quit even if you are getting failures

        m = mesos_pb2
        if update.state in [m.TASK_FAILED, m.TASK_LOST]:
            self.failures += 1
        elif update.state in [m.TASK_FINISHED, m.TASK_STARTING]:
            self.failures = max(self.failures - 1, 0)
        if self.failures >= self.ns.max_failures:
            log.error(
                "Max allowable number of failures reached", extra=dict(
                    max_failures=self.failures,
                    mesos_framework_name=self.ns.mesos_framework_name))
            driver.stop()
            raise MaxFailuresReached(self.failures)
Ejemplo n.º 10
0
    def _statusUpdate(self, driver, update):
        log.debug('task status update: %s' % str(update.message),
                  extra=dict(
                      task_id=update.task_id.value,
                      task_state=update.state,
                      slave_id=update.slave_id.value,
                      timestamp=update.timestamp,
                      mesos_framework_name=self.ns.mesos_framework_name))
        if self.ns.max_failures == -1:
            return  # don't quit even if you are getting failures

        m = mesos_pb2
        if update.state in [m.TASK_FAILED, m.TASK_LOST]:
            self.failures += 1
        elif update.state in [m.TASK_FINISHED, m.TASK_STARTING]:
            self.failures = max(self.failures - 1, 0)
        if self.failures >= self.ns.max_failures:
            log.error("Max allowable number of failures reached",
                      extra=dict(
                          max_failures=self.failures,
                          mesos_framework_name=self.ns.mesos_framework_name))
            driver.stop()
            raise MaxFailuresReached(self.failures)
Ejemplo n.º 11
0
    def _resourceOffers(self, driver, offers):
        """
        Invoked when resources have been offered to this framework. A single
        offer will only contain resources from a single slave.  Resources
        associated with an offer will not be re-offered to _this_ framework
        until either (a) this framework has rejected those resources (see
        SchedulerDriver.launchTasks) or (b) those resources have been
        rescinded (see Scheduler.offerRescinded).  Note that resources may be
        concurrently offered to more than one framework at a time (depending
        on the allocator being used).  In that case, the first framework to
        launch tasks using those resources will be able to use them while the
        other frameworks will have those resources rescinded (or if a
        framework has already launched tasks with those resources then those
        tasks will fail with a TASK_LOST status and a message saying as much).
        """
        log.debug("Got resource offers",
                  extra=dict(
                      num_offers=len(offers),
                      mesos_framework_name=self.ns.mesos_framework_name))
        available_offers, decline_offers = filter_offers(
            offers, dict(self.ns.mesos_task_resources))
        for offer in decline_offers:
            driver.declineOffer(offer.id)
        if not available_offers:
            log.debug(
                'None of the mesos offers had enough relevant resources',
                extra=dict(mesos_framework_name=self.ns.mesos_framework_name))
            return
        log.debug('Mesos has offers available',
                  extra=dict(
                      available_offers=len(available_offers),
                      max_runnable_tasks=sum(x[1] for x in available_offers),
                      mesos_framework_name=self.ns.mesos_framework_name))
        MV, command = self._get_and_update_relay(available_offers)

        if command is None:
            for offer, _ in available_offers:
                driver.declineOffer(offer.id)
            return
        create_tasks(MV=abs(MV),
                     available_offers=available_offers,
                     driver=driver,
                     command=command,
                     ns=self.ns)
        driver.reviveOffers()
Ejemplo n.º 12
0
    def _resourceOffers(self, driver, offers):
        """
        Invoked when resources have been offered to this framework. A single
        offer will only contain resources from a single slave.  Resources
        associated with an offer will not be re-offered to _this_ framework
        until either (a) this framework has rejected those resources (see
        SchedulerDriver.launchTasks) or (b) those resources have been
        rescinded (see Scheduler.offerRescinded).  Note that resources may be
        concurrently offered to more than one framework at a time (depending
        on the allocator being used).  In that case, the first framework to
        launch tasks using those resources will be able to use them while the
        other frameworks will have those resources rescinded (or if a
        framework has already launched tasks with those resources then those
        tasks will fail with a TASK_LOST status and a message saying as much).
        """
        log.debug("Got resource offers", extra=dict(
            num_offers=len(offers),
            mesos_framework_name=self.ns.mesos_framework_name))
        available_offers, decline_offers = filter_offers(
            offers, dict(self.ns.mesos_task_resources))
        for offer in decline_offers:
            driver.declineOffer(offer.id)
        if not available_offers:
            log.debug(
                'None of the mesos offers had enough relevant resources',
                extra=dict(mesos_framework_name=self.ns.mesos_framework_name))
            return
        log.debug(
            'Mesos has offers available', extra=dict(
                available_offers=len(available_offers),
                max_runnable_tasks=sum(x[1] for x in available_offers),
                mesos_framework_name=self.ns.mesos_framework_name))
        MV, command = self._get_and_update_relay(available_offers)

        if command is None:
            for offer, _ in available_offers:
                driver.declineOffer(offer.id)
            return
        create_tasks(
            MV=abs(MV), available_offers=available_offers,
            driver=driver, command=command, ns=self.ns
        )
        driver.reviveOffers()