Exemple #1
0
 def kill_children(signal, frame):
     log.error(
         'Received a signal that is trying to terminate this process.'
         ' Terminating mesos and relay child processes!', extra=dict(
             mesos_framework_name=ns.mesos_framework_name,
             signal=signal))
     try:
         mesos.terminate()
         log.info(
             'terminated mesos scheduler',
             extra=dict(mesos_framework_name=ns.mesos_framework_name))
     except:
         log.exception(
             'could not terminate mesos scheduler',
             extra=dict(mesos_framework_name=ns.mesos_framework_name))
     try:
         relay.terminate()
         log.info(
             'terminated relay',
             extra=dict(mesos_framework_name=ns.mesos_framework_name))
     except:
         log.exception(
             'could not terminate relay',
             extra=dict(mesos_framework_name=ns.mesos_framework_name))
     sys.exit(1)
Exemple #2
0
 def reregistered(self, driver, masterInfo):
     log.info(
         "Re-registered with master", extra=dict(
             master_pid=masterInfo.pid,
             master_hostname=masterInfo.hostname, master_id=masterInfo.id,
             master_ip=masterInfo.ip, master_port=masterInfo.port,
             mesos_framework_name=self.ns.mesos_framework_name,
         ))
Exemple #3
0
 def reregistered(self, driver, masterInfo):
     log.info("Re-registered with master",
              extra=dict(
                  master_pid=masterInfo.pid,
                  master_hostname=masterInfo.hostname,
                  master_id=masterInfo.id,
                  master_ip=masterInfo.ip,
                  master_port=masterInfo.port,
                  mesos_framework_name=self.ns.mesos_framework_name,
              ))
Exemple #4
0
    def _registered(self, driver, frameworkId, masterInfo):
        self.mesos_ready.acquire()
        self.mesos_ready.notify()
        self.mesos_ready.release()

        log.info(
            "Registered with master", extra=dict(
                framework_id=frameworkId.value, master_pid=masterInfo.pid,
                master_hostname=masterInfo.hostname, master_id=masterInfo.id,
                master_ip=masterInfo.ip, master_port=masterInfo.port,
                mesos_framework_name=self.ns.mesos_framework_name,
            ))
Exemple #5
0
    def _registered(self, driver, frameworkId, masterInfo):
        self.mesos_ready.acquire()
        self.mesos_ready.notify()
        self.mesos_ready.release()

        log.info("Registered with master",
                 extra=dict(
                     framework_id=frameworkId.value,
                     master_pid=masterInfo.pid,
                     master_hostname=masterInfo.hostname,
                     master_id=masterInfo.id,
                     master_ip=masterInfo.ip,
                     master_port=masterInfo.port,
                     mesos_framework_name=self.ns.mesos_framework_name,
                 ))
Exemple #6
0
def init_mesos_scheduler(ns, MV, exception_sender, mesos_ready):
    import mesos.interface
    from mesos.interface import mesos_pb2
    try:
        import mesos.native
    except ImportError:
        log.error(
            "Oops! Mesos native bindings are not installed.  You can download"
            " these binaries from mesosphere.",
            extra=dict(mesos_framework_name=ns.mesos_framework_name))
        raise

    log.info(
        'starting mesos scheduler',
        extra=dict(mesos_framework_name=ns.mesos_framework_name))

    # build framework
    framework = mesos_pb2.FrameworkInfo()
    framework.user = ""  # Have Mesos fill in the current user.
    framework.name = "Relay.Mesos: %s" % ns.mesos_framework_name
    if ns.mesos_framework_principal:
        framework.principal = ns.mesos_framework_principal
    if ns.mesos_framework_role:
        framework.role = ns.mesos_framework_role
    if ns.mesos_checkpoint:
        framework.checkpoint = True

    # build driver
    driver = mesos.native.MesosSchedulerDriver(
        Scheduler(
            MV=MV, exception_sender=exception_sender, mesos_ready=mesos_ready,
            ns=ns),
        framework,
        ns.mesos_master)
    atexit.register(driver.stop)

    # run things
    status = 0 if driver.run() == mesos_pb2.DRIVER_STOPPED else 1
    driver.stop()  # Ensure that the driver process terminates.
    sys.exit(status)
Exemple #7
0
def main(ns):
    """
    Run Relay as a Mesos framework.
    Relay's event loop and the Mesos scheduler each run in separate processes
    and communicate through a multiprocessing.Pipe.

    These two processes bounce control back and forth between mesos
    resourceOffers and Relay's warmer/cooler functions.  Relay warmer/cooler
    functions request that mesos tasks get spun up, but those requests are only
    filled if the mesos scheduler receives enough relevant offers.  Relay's
    requests don't build up: only the largest request since the last fulfilled
    request is fulfilled at moment enough mesos resources are available.
    """
    if ns.mesos_master is None:
        log.error(
            "Oops!  You didn't define --mesos_master",
            extra=dict(mesos_framework_name=ns.mesos_framework_name))
        build_arg_parser().print_usage()
        sys.exit(1)
    if not ns.mesos_task_resources:
        log.warn(
            "You didn't define '--mesos_task_resources'."
            "  Tasks may not start on slaves",
            extra=dict(mesos_framework_name=ns.mesos_framework_name))
    log.info(
        "Starting Relay Mesos!",
        extra={k: str(v) for k, v in ns.__dict__.items()})

    # a distributed value storing the num and type of tasks mesos scheduler
    # should create at any given moment in time.
    # Sign of MV determines task type: warmer or cooler
    # ie. A positive value of n means n warmer tasks
    MV = mp.Array('d', [0, 0])  # max_val is a ctypes.c_int64

    # store exceptions that may be raised
    exception_receiver, exception_sender = mp.Pipe(False)
    # notify relay when mesos framework is ready
    mesos_ready = mp.Condition()

    # copy and then override warmer and cooler
    ns_relay = ns.__class__(**{k: v for k, v in ns.__dict__.items()})
    if ns.warmer:
        ns_relay.warmer = warmer_cooler_wrapper(MV, ns)
    if ns.cooler:
        ns_relay.cooler = warmer_cooler_wrapper(MV, ns)

    mesos_name = "Relay.Mesos Scheduler"
    mesos = mp.Process(
        target=catch(init_mesos_scheduler, exception_sender),
        kwargs=dict(ns=ns, MV=MV, exception_sender=exception_sender,
                    mesos_ready=mesos_ready),
        name=mesos_name)
    relay_name = "Relay.Runner Event Loop"
    relay = mp.Process(
        target=catch(init_relay, exception_sender),
        args=(ns_relay, mesos_ready, ns.mesos_framework_name),
        name=relay_name)
    mesos.start()  # start mesos framework
    relay.start()  # start relay's loop
    set_signals(mesos, relay, ns)

    while True:
        if exception_receiver.poll():
            exception_receiver.recv()
            log.error(
                'Terminating child processes because one of them raised'
                ' an exception', extra=dict(
                    is_relay_alive=relay.is_alive(),
                    is_mesos_alive=mesos.is_alive(),
                    mesos_framework_name=ns.mesos_framework_name))
            break
        if not relay.is_alive():
            log.error(
                "Relay died.  Check logs to see why.",
                extra=dict(mesos_framework_name=ns.mesos_framework_name))
            break
        if not mesos.is_alive():
            log.error(
                "Mesos Scheduler died and didn't notify me of its exception."
                "  This may be a code bug.  Check logs.",
                extra=dict(mesos_framework_name=ns.mesos_framework_name))
            break
        # save cpu cycles by checking for subprocess failures less often
        if ns.delay > 5:
            time.sleep(5)
        else:
            time.sleep(ns.delay)

    relay.terminate()
    mesos.terminate()
    sys.exit(1)