Exemple #1
0
  def __init__(self, task, checkpoint_root, sandbox, log_dir=None,
               task_id=None, portmap=None, user=None, chroot=False, clock=time,
               universal_handler=None, planner_class=TaskPlanner):
    """
      required:
        task (config.Task) = the task to run
        checkpoint_root (path) = the checkpoint root
        sandbox (path) = the sandbox in which the path will be run
                         [if None, cwd will be assumed, but garbage collection will be
                          disabled for this task.]

      optional:
        log_dir (string)  = directory to house stdout/stderr logs. If not specified, logs will be
                            written into the sandbox directory under .logs/
        task_id (string)  = bind to this task id.  if not specified, will synthesize an id based
                            upon task.name()
        portmap (dict)    = a map (string => integer) from name to port, e.g. { 'http': 80 }
        user (string)     = the user to run the task as.  if not current user, requires setuid
                            privileges.
        chroot (boolean)  = whether or not to chroot into the sandbox prior to exec.
        clock (time interface) = the clock to use throughout
        universal_handler = checkpoint record handler (only used for testing)
        planner_class (TaskPlanner class) = TaskPlanner class to use for constructing the task
                            planning policy.
    """
    if not issubclass(planner_class, TaskPlanner):
      raise TypeError('planner_class must be a TaskPlanner.')
    self._clock = clock
    launch_time = self._clock.time()
    launch_time_ms = '%06d' % int((launch_time - int(launch_time)) * 10**6)
    if not task_id:
      self._task_id = '%s-%s.%s' % (task.name(),
                                    time.strftime('%Y%m%d-%H%M%S', time.localtime(launch_time)),
                                    launch_time_ms)
    else:
      self._task_id = task_id
    current_user = TaskRunnerHelper.get_actual_user()
    self._user = user or current_user
    # TODO(wickman) This should be delegated to the ProcessPlatform / Helper
    if self._user != current_user:
      if os.geteuid() != 0:
        raise ValueError('task specifies user as %s, but %s does not have setuid permission!' % (
          self._user, current_user))
    self._portmap = portmap or {}
    self._launch_time = launch_time
    self._log_dir = log_dir or os.path.join(sandbox, '.logs')
    self._pathspec = TaskPath(root=checkpoint_root, task_id=self._task_id, log_dir=self._log_dir)
    try:
      ThermosTaskValidator.assert_valid_task(task)
      ThermosTaskValidator.assert_valid_ports(task, self._portmap)
    except ThermosTaskValidator.InvalidTaskError as e:
      raise self.InvalidTask('Invalid task: %s' % e)
    context = ThermosContext(
        task_id=self._task_id,
        ports=self._portmap,
        user=self._user)
    self._task, uninterp = (task % Environment(thermos=context)).interpolate()
    if len(uninterp) > 0:
      raise self.InvalidTask('Failed to interpolate task, missing: %s' %
          ', '.join(str(ref) for ref in uninterp))
    try:
      ThermosTaskValidator.assert_same_task(self._pathspec, self._task)
    except ThermosTaskValidator.InvalidTaskError as e:
      raise self.InvalidTask('Invalid task: %s' % e)
    self._plan = None # plan currently being executed (updated by Handlers)
    self._regular_plan = planner_class(self._task, clock=clock,
        process_filter=lambda proc: proc.final().get() == False)
    self._finalizing_plan = planner_class(self._task, clock=clock,
        process_filter=lambda proc: proc.final().get() == True)
    self._chroot = chroot
    self._sandbox = sandbox
    self._terminal_state = None
    self._ckpt = None
    self._process_map = dict((p.name().get(), p) for p in self._task.processes())
    self._task_processes = {}
    self._stages = dict((state, stage(self)) for state, stage in self.STAGES.items())
    self._finalization_start = None
    self._preemption_deadline = None
    self._watcher = ProcessMuxer(self._pathspec)
    self._state   = RunnerState(processes = {})

    # create runner state
    universal_handler = universal_handler or TaskRunnerUniversalHandler
    self._dispatcher = CheckpointDispatcher()
    self._dispatcher.register_handler(universal_handler(self))
    self._dispatcher.register_handler(TaskRunnerProcessHandler(self))
    self._dispatcher.register_handler(TaskRunnerTaskHandler(self))

    # recover checkpointed runner state and update plan
    self._recovery = True
    self._replay_runner_ckpt()
Exemple #2
0
def convert(job, packages=frozenset(), ports=frozenset()):
  """Convert a Pystachio MesosJob to an Aurora Thrift JobConfiguration."""

  owner = Identity(role=fully_interpolated(job.role()), user=getpass.getuser())
  key = JobKey(
    role=assert_valid_field('role', fully_interpolated(job.role())),
    environment=assert_valid_field('environment', fully_interpolated(job.environment())),
    name=assert_valid_field('name', fully_interpolated(job.name())))

  task_raw = job.task()

  MB = 1024 * 1024
  task = TaskConfig()

  def not_empty_or(item, default):
    return default if item is Empty else fully_interpolated(item)

  # job components
  task.jobName = fully_interpolated(job.name())
  task.environment = fully_interpolated(job.environment())
  task.production = fully_interpolated(job.production(), bool)
  task.isService = select_service_bit(job)
  task.maxTaskFailures = fully_interpolated(job.max_task_failures())
  task.priority = fully_interpolated(job.priority())
  task.contactEmail = not_empty_or(job.contact(), None)

  # Add package tuples to a task, to display in the scheduler UI.
  task.packages = frozenset(
      Package(role=str(role), name=str(package_name), version=int(version))
      for role, package_name, version in packages)

  # task components
  if not task_raw.has_resources():
    raise InvalidConfig('Task must specify resources!')

  if (fully_interpolated(task_raw.resources().ram()) == 0
      or fully_interpolated(task_raw.resources().disk()) == 0):
    raise InvalidConfig('Must specify ram and disk resources, got ram:%r disk:%r' % (
      fully_interpolated(task_raw.resources().ram()),
      fully_interpolated(task_raw.resources().disk())))

  task.numCpus = fully_interpolated(task_raw.resources().cpu())
  task.ramMb = fully_interpolated(task_raw.resources().ram()) / MB
  task.diskMb = fully_interpolated(task_raw.resources().disk()) / MB
  if task.numCpus <= 0 or task.ramMb <= 0 or task.diskMb <= 0:
    raise InvalidConfig('Task has invalid resources.  cpu/ramMb/diskMb must all be positive: '
        'cpu:%r ramMb:%r diskMb:%r' % (task.numCpus, task.ramMb, task.diskMb))

  task.owner = owner
  task.requestedPorts = ports
  task.taskLinks = not_empty_or(job.task_links(), {})
  task.constraints = constraints_to_thrift(not_empty_or(job.constraints(), {}))

  underlying, refs = job.interpolate()

  # need to fake an instance id for the sake of schema checking
  underlying_checked = underlying.bind(mesos = {'instance': 31337})
  try:
    ThermosTaskValidator.assert_valid_task(underlying_checked.task())
  except ThermosTaskValidator.InvalidTaskError as e:
    raise InvalidConfig('Task is invalid: %s' % e)
  if not underlying_checked.check().ok():
    raise InvalidConfig('Job not fully specified: %s' % underlying.check().message())

  unbound = []
  for ref in refs:
    if ref == THERMOS_TASK_ID_REF or ref == MESOS_INSTANCE_REF or (
        Ref.subscope(THERMOS_PORT_SCOPE_REF, ref)):
      continue
    unbound.append(ref)

  if unbound:
    raise InvalidConfig('Config contains unbound variables: %s' % ' '.join(map(str, unbound)))

  cron_schedule = not_empty_or(job.cron_schedule(), '')
  cron_policy = select_cron_policy(job.cron_policy(), job.cron_collision_policy())

  task.executorConfig = ExecutorConfig(
      name=AURORA_EXECUTOR_NAME,
      data=filter_aliased_fields(underlying).json_dumps())

  return JobConfiguration(
      key=key,
      owner=owner,
      cronSchedule=cron_schedule,
      cronCollisionPolicy=cron_policy,
      taskConfig=task,
      instanceCount=fully_interpolated(job.instances()))
Exemple #3
0
 def on_initialization(self, header):
   log.debug('_on_initialization: %s' % header)
   ThermosTaskValidator.assert_valid_task(self._runner.task)
   ThermosTaskValidator.assert_valid_ports(self._runner.task, header.ports)
   self._checkpoint(RunnerCkpt(runner_header=header))
Exemple #4
0
def convert(job, packages=frozenset(), ports=frozenset()):
    """Convert a Pystachio MesosJob to an Aurora Thrift JobConfiguration."""

    owner = Identity(role=fully_interpolated(job.role()),
                     user=getpass.getuser())
    key = JobKey(
        role=assert_valid_field('role', fully_interpolated(job.role())),
        environment=assert_valid_field('environment',
                                       fully_interpolated(job.environment())),
        name=assert_valid_field('name', fully_interpolated(job.name())))

    task_raw = job.task()

    MB = 1024 * 1024
    task = TaskConfig()

    def not_empty_or(item, default):
        return default if item is Empty else fully_interpolated(item)

    # job components
    task.jobName = fully_interpolated(job.name())
    task.environment = fully_interpolated(job.environment())
    task.production = fully_interpolated(job.production(), bool)
    task.isService = select_service_bit(job)
    task.maxTaskFailures = fully_interpolated(job.max_task_failures())
    task.priority = fully_interpolated(job.priority())
    task.contactEmail = not_empty_or(job.contact(), None)

    # Add package tuples to a task, to display in the scheduler UI.
    task.packages = frozenset(
        Package(role=str(role), name=str(package_name), version=int(version))
        for role, package_name, version in packages)

    # task components
    if not task_raw.has_resources():
        raise InvalidConfig('Task must specify resources!')

    if (fully_interpolated(task_raw.resources().ram()) == 0
            or fully_interpolated(task_raw.resources().disk()) == 0):
        raise InvalidConfig(
            'Must specify ram and disk resources, got ram:%r disk:%r' %
            (fully_interpolated(task_raw.resources().ram()),
             fully_interpolated(task_raw.resources().disk())))

    task.numCpus = fully_interpolated(task_raw.resources().cpu())
    task.ramMb = fully_interpolated(task_raw.resources().ram()) / MB
    task.diskMb = fully_interpolated(task_raw.resources().disk()) / MB
    if task.numCpus <= 0 or task.ramMb <= 0 or task.diskMb <= 0:
        raise InvalidConfig(
            'Task has invalid resources.  cpu/ramMb/diskMb must all be positive: '
            'cpu:%r ramMb:%r diskMb:%r' %
            (task.numCpus, task.ramMb, task.diskMb))

    task.owner = owner
    task.requestedPorts = ports
    task.taskLinks = not_empty_or(job.task_links(), {})
    task.constraints = constraints_to_thrift(
        not_empty_or(job.constraints(), {}))

    underlying, refs = job.interpolate()

    # need to fake an instance id for the sake of schema checking
    underlying_checked = underlying.bind(mesos={'instance': 31337})
    try:
        ThermosTaskValidator.assert_valid_task(underlying_checked.task())
    except ThermosTaskValidator.InvalidTaskError as e:
        raise InvalidConfig('Task is invalid: %s' % e)
    if not underlying_checked.check().ok():
        raise InvalidConfig('Job not fully specified: %s' %
                            underlying.check().message())

    unbound = []
    for ref in refs:
        if ref == THERMOS_TASK_ID_REF or ref == MESOS_INSTANCE_REF or (
                Ref.subscope(THERMOS_PORT_SCOPE_REF, ref)):
            continue
        unbound.append(ref)

    if unbound:
        raise InvalidConfig('Config contains unbound variables: %s' %
                            ' '.join(map(str, unbound)))

    cron_schedule = not_empty_or(job.cron_schedule(), '')
    cron_policy = select_cron_policy(job.cron_policy(),
                                     job.cron_collision_policy())

    task.executorConfig = ExecutorConfig(
        name=AURORA_EXECUTOR_NAME,
        data=filter_aliased_fields(underlying).json_dumps())

    return JobConfiguration(key=key,
                            owner=owner,
                            cronSchedule=cron_schedule,
                            cronCollisionPolicy=cron_policy,
                            taskConfig=task,
                            instanceCount=fully_interpolated(job.instances()))