def substitute_thermos(cls, command, task, cluster, **kw): prefix_command = 'cd %s;' % cls.thermos_sandbox(cluster, **kw) thermos_namespace = ThermosContext( task_id=task.assignedTask.taskId, ports=task.assignedTask.assignedPorts) mesos_namespace = MesosContext(instance=task.assignedTask.instanceId) command = String(prefix_command + command) % Environment( thermos=thermos_namespace, mesos=mesos_namespace) return command.get()
def context(self, task_id): state = self.state if state.header is None: return None return ThermosContext( ports=state.header.ports if state.header.ports else {}, task_id=state.header.task_id, user=state.header.user, )
def interpolate_cmd(task, cmd): """ :param task: Assigned task passed from Mesos Agent :param cmd: Command defined inside shell_command inside config. :return: Interpolated cmd with filled in values, for example ports. """ thermos_namespace = ThermosContext(task_id=task.taskId, ports=task.assignedPorts) mesos_namespace = MesosContext(instance=task.instanceId) command = String(cmd) % Environment(thermos=thermos_namespace, mesos=mesos_namespace) return command.get()
def _build_path(context, target): (task_instance, path) = ScpCommand._extract_task_instance_and_path(context, target) # No jobkey is specified therefore we are using a local path. if (task_instance is None): return path # Jobkey specified, we want to convert to the user@host:file scp format (cluster, role, env, name) = task_instance.jobkey instance = set([task_instance.instance]) api = context.get_api(cluster) resp = api.query( api.build_query(role, name, env=env, instances=instance)) context.log_response_and_raise( resp, err_msg=('Unable to get information about instance: %s' % combine_messages(resp))) if (resp.result.scheduleStatusResult.tasks is None or len(resp.result.scheduleStatusResult.tasks) == 0): raise context.CommandError( EXIT_INVALID_PARAMETER, ScpCommand.JOB_NOT_FOUND_ERROR_MSG % (task_instance.jobkey, task_instance.instance)) first_task = resp.result.scheduleStatusResult.tasks[0] assigned = first_task.assignedTask role = assigned.task.job.role slave_host = assigned.slaveHost # If path is absolute, use that. Else if it is a tilde expansion, throw an error. # Otherwise, use sandbox as relative root. normalized_input_path = os.path.normpath(path) if (os.path.isabs(normalized_input_path)): final_path = normalized_input_path elif (normalized_input_path.startswith('~/') or normalized_input_path == '~'): raise context.CommandError(EXIT_INVALID_PARAMETER, ScpCommand.TILDE_USAGE_ERROR_MSG % path) else: sandbox_path_pre_format = DistributedCommandRunner.thermos_sandbox( api.cluster, executor_sandbox=context.options.executor_sandbox) thermos_namespace = ThermosContext(task_id=assigned.taskId, ports=assigned.assignedPorts) sandbox_path = String(sandbox_path_pre_format) % Environment( thermos=thermos_namespace) # Join the individual folders to the sandbox path to build safely final_path = os.path.join(str(sandbox_path), *normalized_input_path.split(os.sep)) return '%s@%s:%s' % (role, slave_host, final_path)
def assert_all_refs_bound(cls, task): port_names = PortExtractor.extract(task) # Create fake bindings and make sure that there are no unbound refs afterwards. If # there are unbound refs that could indicate improper scoping e.g. # {{array[{{mesos.instance}}]}} which is disallowed. thermos_bindings = ThermosContext( task_id='dummy_task_id', user='******', ports=dict( (name, random.randrange(30000, 40000)) for name in port_names), ) task_instance, unbindable_refs = ( task % dict(thermos=thermos_bindings)).interpolate() if len(unbindable_refs) != 0: raise cls.InvalidTaskError( 'Unexpected unbound refs: %s. Make sure you are not nesting template variables.' % ' '.join(map(str, unbindable_refs)))
def __init__(self, task, checkpoint_root, sandbox, log_dir=None, task_id=None, portmap=None, user=None, chroot=False, clock=time, universal_handler=None, planner_class=TaskPlanner, hostname=None, process_logger_destination=None, process_logger_mode=None, rotate_log_size_mb=None, rotate_log_backups=None, preserve_env=False): """ required: task (config.Task) = the task to run checkpoint_root (path) = the checkpoint root sandbox (path) = the sandbox in which the path will be run [if None, cwd will be assumed, but garbage collection will be disabled for this task.] optional: log_dir (string) = directory to house stdout/stderr logs. If not specified, logs will be written into the sandbox directory under .logs/ task_id (string) = bind to this task id. if not specified, will synthesize an id based upon task.name() portmap (dict) = a map (string => integer) from name to port, e.g. { 'http': 80 } user (string) = the user to run the task as. if not current user, requires setuid privileges. chroot (boolean) = whether or not to chroot into the sandbox prior to exec. clock (time interface) = the clock to use throughout universal_handler = checkpoint record handler (only used for testing) planner_class (TaskPlanner class) = TaskPlanner class to use for constructing the task planning policy. process_logger_destination (string) = The destination of logger to use for all processes. process_logger_mode (string) = The mode of logger to use for all processes. rotate_log_size_mb (integer) = The maximum size of the rotated stdout/stderr logs in MiB. rotate_log_backups (integer) = The maximum number of rotated stdout/stderr log backups. preserve_env (boolean) = whether or not env variables for the runner should be in the env for the task being run """ if not issubclass(planner_class, TaskPlanner): raise TypeError('planner_class must be a TaskPlanner.') self._clock = clock launch_time = self._clock.time() launch_time_ms = '%06d' % int( (launch_time - int(launch_time)) * (10**6)) if not task_id: self._task_id = '%s-%s.%s' % ( task.name(), time.strftime('%Y%m%d-%H%M%S', time.localtime(launch_time)), launch_time_ms) else: self._task_id = task_id current_user = TaskRunnerHelper.get_actual_user() self._user = user or current_user # TODO(wickman) This should be delegated to the ProcessPlatform / Helper if self._user != current_user: if os.geteuid() != 0: raise ValueError( 'task specifies user as %s, but %s does not have setuid permission!' % (self._user, current_user)) self._portmap = portmap or {} self._launch_time = launch_time self._log_dir = log_dir or os.path.join(sandbox, '.logs') self._process_logger_destination = process_logger_destination self._process_logger_mode = process_logger_mode self._rotate_log_size_mb = rotate_log_size_mb self._rotate_log_backups = rotate_log_backups self._pathspec = TaskPath(root=checkpoint_root, task_id=self._task_id, log_dir=self._log_dir) self._hostname = hostname or socket.gethostname() try: ThermosTaskValidator.assert_valid_task(task) ThermosTaskValidator.assert_valid_ports(task, self._portmap) except ThermosTaskValidator.InvalidTaskError as e: raise self.InvalidTask('Invalid task: %s' % e) context = ThermosContext(task_id=self._task_id, ports=self._portmap, user=self._user) self._task, uninterp = (task % Environment(thermos=context)).interpolate() if len(uninterp) > 0: raise self.InvalidTask('Failed to interpolate task, missing: %s' % ', '.join(str(ref) for ref in uninterp)) try: ThermosTaskValidator.assert_same_task(self._pathspec, self._task) except ThermosTaskValidator.InvalidTaskError as e: raise self.InvalidTask('Invalid task: %s' % e) self._plan = None # plan currently being executed (updated by Handlers) self._regular_plan = planner_class( self._task, clock=clock, process_filter=lambda proc: proc.final().get() is False) self._finalizing_plan = planner_class( self._task, clock=clock, process_filter=lambda proc: proc.final().get() is True) self._chroot = chroot self._sandbox = sandbox self._terminal_state = None self._ckpt = None self._process_map = dict( (p.name().get(), p) for p in self._task.processes()) self._task_processes = {} self._stages = dict( (state, stage(self)) for state, stage in self.STAGES.items()) self._finalization_start = None self._preemption_deadline = None self._watcher = ProcessMuxer(self._pathspec) self._state = RunnerState(processes={}) self._preserve_env = preserve_env # create runner state universal_handler = universal_handler or TaskRunnerUniversalHandler self._dispatcher = CheckpointDispatcher() self._dispatcher.register_handler(universal_handler(self)) self._dispatcher.register_handler(TaskRunnerProcessHandler(self)) self._dispatcher.register_handler(TaskRunnerTaskHandler(self)) # recover checkpointed runner state and update plan self._recovery = True self._replay_runner_ckpt()