Ejemplo n.º 1
0
def proxy_main(args, opts):
    assert opts.thermos_json and os.path.exists(opts.thermos_json)
    assert opts.sandbox
    assert opts.checkpoint_root

    thermos_task = get_task_from_options(opts)
    prebound_ports = opts.prebound_ports
    missing_ports = set(thermos_task.ports()) - set(prebound_ports)

    if missing_ports:
        app.error('ERROR!  Unbound ports: %s' %
                  ' '.join(port for port in missing_ports))

    task_runner = TaskRunner(thermos_task.task,
                             opts.checkpoint_root,
                             opts.sandbox,
                             task_id=opts.task_id,
                             user=opts.setuid,
                             portmap=prebound_ports,
                             chroot=opts.chroot,
                             planner_class=CappedTaskPlanner)

    for sig in (signal.SIGUSR1, signal.SIGUSR2):
        signal.signal(sig, functools.partial(runner_teardown, task_runner))

    try:
        task_runner.run()
    except TaskRunner.InternalError as err:
        app.error('Internal error: %s' % err)
    except TaskRunner.InvalidTask as err:
        app.error(str(err))
    except TaskRunner.StateError:
        app.error('Task appears to already be in a terminal state.')
    except KeyboardInterrupt:
        runner_teardown(task_runner)
Ejemplo n.º 2
0
def _really_run(task,
                root,
                sandbox,
                task_id=None,
                user=None,
                prebound_ports=None,
                chroot=None,
                daemon=False):
    prebound_ports = prebound_ports or {}
    missing_ports = set(task.ports()) - set(prebound_ports.keys())
    if missing_ports:
        app.error('ERROR!  Unbound ports: %s' %
                  ' '.join(port for port in missing_ports))
    task_runner = TaskRunner(task.task,
                             root,
                             sandbox,
                             task_id=task_id,
                             user=user,
                             portmap=prebound_ports,
                             chroot=chroot)
    if daemon:
        print('Daemonizing and starting runner.')
        try:
            log.teardown_stderr_logging()
            daemonize()
        except Exception as e:
            print("Failed to daemonize: %s" % e)
            sys.exit(1)
    try:
        task_runner.run()
    except KeyboardInterrupt:
        print('Got keyboard interrupt, killing job!')
        task_runner.close_ckpt()
        task_runner.kill()
Ejemplo n.º 3
0
def proxy_main(args, opts):
  assert opts.thermos_json and os.path.exists(opts.thermos_json)
  assert opts.sandbox
  assert opts.checkpoint_root

  thermos_task = get_task_from_options(opts)
  prebound_ports = opts.prebound_ports
  missing_ports = set(thermos_task.ports()) - set(prebound_ports)

  if missing_ports:
    app.error('ERROR!  Unbound ports: %s' % ' '.join(port for port in missing_ports))

  task_runner = TaskRunner(
      thermos_task.task,
      opts.checkpoint_root,
      opts.sandbox,
      task_id=opts.task_id,
      user=opts.setuid,
      portmap=prebound_ports,
      chroot=opts.chroot,
      planner_class=CappedTaskPlanner
  )

  for sig in (signal.SIGUSR1, signal.SIGUSR2):
    signal.signal(sig, functools.partial(runner_teardown, task_runner))

  try:
    task_runner.run()
  except TaskRunner.InternalError as err:
    app.error('Internal error: %s' % err)
  except TaskRunner.InvalidTask as err:
    app.error(str(err))
  except TaskRunner.StateError:
    app.error('Task appears to already be in a terminal state.')
  except KeyboardInterrupt:
    runner_teardown(task_runner)
Ejemplo n.º 4
0
def make_executor(proxy_driver,
                  checkpoint_root,
                  task,
                  ports={},
                  fast_status=False,
                  runner_class=ThermosTaskRunner,
                  status_providers=()):

    status_manager_class = FastStatusManager if fast_status else StatusManager
    runner_provider = make_provider(checkpoint_root, runner_class)
    te = FastThermosExecutor(
        runner_provider=runner_provider,
        status_manager_class=status_manager_class,
        sandbox_provider=DefaultTestSandboxProvider,
        status_providers=status_providers,
    )

    ExecutorTimeout(te.launched,
                    proxy_driver,
                    timeout=Amount(100, Time.MILLISECONDS)).start()
    task_description = make_task(task, assigned_ports=ports, instanceId=0)
    te.launchTask(proxy_driver, task_description)

    te.runner_started.wait()

    while len(proxy_driver.method_calls['sendStatusUpdate']) < 2:
        time.sleep(0.1)

    # make sure startup was kosher
    updates = proxy_driver.method_calls['sendStatusUpdate']
    assert len(updates) == 2
    status_updates = [arg_tuple[0][0] for arg_tuple in updates]
    assert status_updates[0].state == mesos_pb.TASK_STARTING
    assert status_updates[1].state == mesos_pb.TASK_RUNNING

    # wait for the runner to bind to a task
    while True:
        runner = TaskRunner.get(task_description.task_id.value,
                                checkpoint_root)
        if runner:
            break
        time.sleep(0.1)

    assert te.launched.is_set()
    return runner, te
def make_executor(
    proxy_driver,
    checkpoint_root,
    task,
    ports={},
    fast_status=False,
    runner_class=ThermosTaskRunner,
    status_providers=()):

  status_manager_class = FastStatusManager if fast_status else StatusManager
  runner_provider = make_provider(checkpoint_root, runner_class)
  te = FastThermosExecutor(
      runner_provider=runner_provider,
      status_manager_class=status_manager_class,
      sandbox_provider=DefaultTestSandboxProvider,
      status_providers=status_providers,
  )

  ExecutorTimeout(te.launched, proxy_driver, timeout=Amount(100, Time.MILLISECONDS)).start()
  task_description = make_task(task, assigned_ports=ports, instanceId=0)
  te.launchTask(proxy_driver, task_description)

  te.runner_started.wait()

  while len(proxy_driver.method_calls['sendStatusUpdate']) < 2:
    time.sleep(0.1)

  # make sure startup was kosher
  updates = proxy_driver.method_calls['sendStatusUpdate']
  assert len(updates) == 2
  status_updates = [arg_tuple[0][0] for arg_tuple in updates]
  assert status_updates[0].state == mesos_pb.TASK_STARTING
  assert status_updates[1].state == mesos_pb.TASK_RUNNING

  # wait for the runner to bind to a task
  while True:
    runner = TaskRunner.get(task_description.task_id.value, checkpoint_root)
    if runner:
      break
    time.sleep(0.1)

  assert te.launched.is_set()
  return runner, te
Ejemplo n.º 6
0
    def test_coordinator_dead_kill(self):
        runner = self.start_runner()
        tm = TaskMonitor(runner.pathspec, runner.task_id)
        self.wait_until_running(tm)
        process_state, run_number = tm.get_active_processes()[0]
        assert process_state.process == "ignorant_process"
        assert run_number == 0

        os.kill(runner.po.pid, signal.SIGKILL)
        os.kill(process_state.coordinator_pid, signal.SIGKILL)
        os.kill(process_state.pid, signal.SIGKILL)

        killer = TaskRunner.get(runner.task_id, runner.root)
        assert killer is not None
        killer.kill(force=True)

        state = tm.get_state()
        assert len(state.processes["ignorant_process"]) == 1
        assert state.processes["ignorant_process"][0].state == ProcessState.LOST
Ejemplo n.º 7
0
    def test_coordinator_dead_kill(self):
        runner = self.start_runner()
        tm = TaskMonitor(runner.pathspec, runner.task_id)
        self.wait_until_running(tm)
        process_state, run_number = tm.get_active_processes()[0]
        assert process_state.process == 'ignorant_process'
        assert run_number == 0

        os.kill(runner.po.pid, signal.SIGKILL)
        os.kill(process_state.coordinator_pid, signal.SIGKILL)
        os.kill(process_state.pid, signal.SIGKILL)

        killer = TaskRunner.get(runner.task_id, runner.root)
        assert killer is not None
        killer.kill(force=True)

        state = tm.get_state()
        assert len(state.processes['ignorant_process']) == 1
        assert state.processes['ignorant_process'][
            0].state == ProcessState.LOST
Ejemplo n.º 8
0
    def test_preemption_wait(self):
        runner = self.start_runner()
        tm = TaskMonitor(runner.pathspec, runner.task_id)
        self.wait_until_running(tm)
        process_state, run_number = tm.get_active_processes()[0]
        assert process_state.process == "ignorant_process"
        assert run_number == 0

        preempter = TaskRunner.get(runner.task_id, runner.root)
        assert preempter is not None
        now = time.time()
        preempter.kill(force=True, preemption_wait=Amount(1, Time.SECONDS))
        duration = time.time() - now

        # This is arbitrary, but make sure we finish within half a second of
        # requested preemption wait.
        assert abs(duration - 1.0) < 0.5

        assert preempter.state.statuses[-1].state == TaskState.KILLED
        assert preempter.state.processes["ignorant_process"][-1].state == ProcessState.KILLED
Ejemplo n.º 9
0
    def test_preemption_wait(self):
        runner = self.start_runner()
        tm = TaskMonitor(runner.pathspec, runner.task_id)
        self.wait_until_running(tm)
        process_state, run_number = tm.get_active_processes()[0]
        assert process_state.process == 'ignorant_process'
        assert run_number == 0

        preempter = TaskRunner.get(runner.task_id, runner.root)
        assert preempter is not None
        now = time.time()
        preempter.kill(force=True, preemption_wait=Amount(1, Time.SECONDS))
        duration = time.time() - now

        # This is arbitrary, but make sure we finish within half a second of
        # requested preemption wait.
        assert abs(duration - 1.0) < 0.5

        assert preempter.state.statuses[-1].state == TaskState.KILLED
        assert preempter.state.processes['ignorant_process'][
            -1].state == ProcessState.KILLED
Ejemplo n.º 10
0
def _really_run(task, root, sandbox, task_id=None, user=None, prebound_ports=None, chroot=None,
                daemon=False):
  prebound_ports = prebound_ports or {}
  missing_ports = set(task.ports()) - set(prebound_ports.keys())
  if missing_ports:
    app.error('ERROR!  Unbound ports: %s' % ' '.join(port for port in missing_ports))
  task_runner = TaskRunner(task.task, root, sandbox, task_id=task_id,
                           user=user, portmap=prebound_ports, chroot=chroot)
  if daemon:
    print('Daemonizing and starting runner.')
    try:
      log.teardown_stderr_logging()
      daemonize()
    except Exception as e:
      print("Failed to daemonize: %s" % e)
      sys.exit(1)
  try:
    task_runner.run()
  except KeyboardInterrupt:
    print('Got keyboard interrupt, killing job!')
    task_runner.close_ckpt()
    task_runner.kill()