def log_response_and_raise(self, resp, err_code=EXIT_API_ERROR, err_msg="Command failure:"): if resp.responseCode == ResponseCode.OK: logging.info(combine_messages(resp)) else: self.print_err(err_msg) self.print_err("\t%s" % combine_messages(resp)) if resp.responseCode == ResponseCode.LOCK_ERROR: self.print_err("\t%s" % self.LOCK_ERROR_MSG) raise self.CommandErrorLogged(err_code, err_msg)
def resolve(self): resp = self._api.query(self.query_from(self._role, self._env, self._job, self.instances)) if resp.responseCode == ResponseCode.OK: for task in resp.result.scheduleStatusResult.tasks: yield task else: self._log( logging.ERROR, 'Error: could not retrieve task information for run command: %s' % combine_messages(resp)) raise ValueError('Could not retrieve task information: %s' % combine_messages(resp))
def test_combine_messages(self): resp = Response(responseCode=ResponseCode.ERROR) assert base.combine_messages(resp) == '' resp = Response(responseCode=ResponseCode.ERROR, details=[]) assert base.combine_messages(resp) == '' resp = Response(responseCode=ResponseCode.ERROR, details=[ResponseDetail(message='Error')]) assert base.combine_messages(resp) == 'Error' resp = Response( responseCode=ResponseCode.ERROR, details=[ResponseDetail(message='Error1'), ResponseDetail(message='Error2')]) assert base.combine_messages(resp) == 'Error1, Error2'
def resolve(self): resp = self._api.query( self.query_from(self._role, self._env, self._job, self.instances)) if resp.responseCode == ResponseCode.OK: for task in resp.result.scheduleStatusResult.tasks: yield task else: self._log( logging.ERROR, 'Error: could not retrieve task information for run command: %s' % combine_messages(resp)) raise ValueError('Could not retrieve task information: %s' % combine_messages(resp))
def execute(self, context): job = context.options.instance_spec.jobkey instances = (None if context.options.instance_spec.instance == ALL_INSTANCES else context.options.instance_spec.instance) config = context.get_job_config(job, context.options.config_file) if config.raw().has_cron_schedule(): raise context.CommandError( EXIT_COMMAND_FAILURE, "Cron jobs may only be updated with \"aurora cron schedule\" command") api = context.get_api(config.cluster()) try: resp = api.start_job_update(config, context.options.message, instances) except AuroraClientAPI.UpdateConfigError as e: raise context.CommandError(EXIT_INVALID_CONFIGURATION, e.message) context.log_response_and_raise(resp, err_code=EXIT_API_ERROR, err_msg="Failed to start update due to error:") if resp.result: update_key = resp.result.startJobUpdateResult.key url = get_update_page( api, AuroraJobKey.from_thrift(config.cluster(), update_key.job), resp.result.startJobUpdateResult.key.id) context.print_out(self.UPDATE_MSG_TEMPLATE % url) if context.options.wait: return wait_for_update(context, self._clock, api, update_key) else: context.print_out(combine_messages(resp)) return EXIT_OK
def execute(self, context): job = context.options.instance_spec.jobkey instances = (None if context.options.instance_spec.instance == ALL_INSTANCES else context.options.instance_spec.instance) config = context.get_job_config(job, context.options.config_file) if config.raw().has_cron_schedule(): raise context.CommandError( EXIT_COMMAND_FAILURE, "Cron jobs may only be updated with \"aurora cron schedule\" command" ) api = context.get_api(config.cluster()) resp = api.start_job_update(config, instances) context.log_response_and_raise( resp, err_code=EXIT_API_ERROR, err_msg="Failed to start update due to error:") if resp.result: url = context.get_update_page( api, job, resp.result.startJobUpdateResult.updateId) context.print_out(self.UPDATE_MSG_TEMPLATE % url) else: context.print_out(combine_messages(resp)) return EXIT_OK
def execute(self, context): (cluster, role, env, name) = context.options.instance_spec.jobkey instance = (None if context.options.instance_spec.instance == ALL_INSTANCES else set(context.options.instance_spec.instance)) if instance is None and context.options.command: raise context.CommandError( EXIT_INVALID_PARAMETER, 'INSTANCE must be specified when --command option is given') api = context.get_api(cluster) resp = api.query( api.build_query(role, name, env=env, instances=instance)) context.log_response_and_raise( resp, err_msg=('Unable to get information about instance: %s' % combine_messages(resp))) if (resp.result.scheduleStatusResult.tasks is None or len(resp.result.scheduleStatusResult.tasks) == 0): raise context.CommandError( EXIT_INVALID_PARAMETER, "Job %s not found" % context.options.instance_spec.jobkey) first_task = resp.result.scheduleStatusResult.tasks[0] remote_cmd = context.options.command or 'bash' command = DistributedCommandRunner.substitute( remote_cmd, first_task, api.cluster, executor_sandbox=context.options.executor_sandbox) ssh_command = ['ssh', '-t'] ssh_command += context.options.ssh_options if context.options.ssh_options else [] assigned = first_task.assignedTask role = assigned.task.job.role slave_host = assigned.slaveHost for tunnel in context.options.tunnels: try: port, name = tunnel.split(':') port = int(port) except ValueError: raise context.CommandError( EXIT_INVALID_PARAMETER, 'Could not parse tunnel: %s. Must be of form PORT:NAME' % tunnel) if name not in assigned.assignedPorts: raise context.CommandError( EXIT_INVALID_PARAMETER, 'Task %s has no port named %s' % (assigned.taskId, name)) ssh_command += [ '-L', '%d:%s:%d' % (port, slave_host, assigned.assignedPorts[name]) ] ssh_command += [ '%s@%s' % (context.options.ssh_user or role, slave_host), command ] return subprocess.call(ssh_command)
def cancel_update(self, job_key): """Cancel the update represented by job_key. Returns whether or not the cancellation was successful.""" self._assert_valid_job_key(job_key) log.info("Canceling update on job %s" % job_key) resp = Updater.cancel_update(self._scheduler_proxy, job_key) if resp.responseCode != ResponseCode.OK: log.error('Error cancelling the update: %s' % combine_messages(resp)) return resp
def _finish(self): """Finishes an update by removing an exclusive lock on an updated job. Returns Response instance from the scheduler call. """ resp = self._scheduler.releaseLock(self._lock, LockValidation.CHECKED) if resp.responseCode == ResponseCode.OK: self._lock = None else: log.error('There was an error finalizing the update: %s' % combine_messages(resp)) return resp
def execute(self, context): (cluster, role, env, name) = context.options.instance_spec.jobkey instance = (None if context.options.instance_spec.instance == ALL_INSTANCES else set(context.options.instance_spec.instance)) if instance is None and context.options.command: raise context.CommandError(EXIT_INVALID_PARAMETER, 'INSTANCE must be specified when --command option is given') api = context.get_api(cluster) resp = api.query(api.build_query(role, name, env=env, instances=instance)) context.log_response_and_raise(resp, err_msg=('Unable to get information about instance: %s' % combine_messages(resp))) if (resp.result.scheduleStatusResult.tasks is None or len(resp.result.scheduleStatusResult.tasks) == 0): raise context.CommandError(EXIT_INVALID_PARAMETER, "Job %s not found" % context.options.instance_spec.jobkey) first_task = resp.result.scheduleStatusResult.tasks[0] remote_cmd = context.options.command or 'bash' command = DistributedCommandRunner.substitute( remote_cmd, first_task, api.cluster, executor_sandbox=context.options.executor_sandbox) ssh_command = ['ssh', '-t'] ssh_command += context.options.ssh_options if context.options.ssh_options else [] assigned = first_task.assignedTask role = assigned.task.job.role slave_host = assigned.slaveHost for tunnel in context.options.tunnels: try: port, name = tunnel.split(':') port = int(port) except ValueError: raise context.CommandError(EXIT_INVALID_PARAMETER, 'Could not parse tunnel: %s. Must be of form PORT:NAME' % tunnel) if name not in assigned.assignedPorts: raise context.CommandError(EXIT_INVALID_PARAMETER, 'Task %s has no port named %s' % (assigned.taskId, name)) ssh_command += [ '-L', '%d:%s:%d' % (port, slave_host, assigned.assignedPorts[name])] ssh_command += ['%s@%s' % (context.options.ssh_user or role, slave_host), command] process = subprocess.Popen(ssh_command) if context.options.pid_file: with open(context.options.pid_file, "w") as f: f.write(str(process.pid)) return process.wait()
def restart(self, instances): # Verify that this operates on a valid job. query = self._job_key.to_thrift_query() query.statuses = ACTIVE_STATES status = self._scheduler.getTasksWithoutConfigs(query) if status.responseCode != ResponseCode.OK: return status failure_threshold = FailureThreshold( self._restart_settings.max_per_instance_failures, self._restart_settings.max_total_failures) if not instances: tasks = status.result.scheduleStatusResult.tasks instances = sorted(task.assignedTask.instanceId for task in tasks) if not instances: log.info( "No instances specified, and no active instances found in job %s" % self._job_key) log.info("Nothing to do.") return status log.info("Performing rolling restart of job %s (instances: %s)" % (self._job_key, instances)) while instances and not failure_threshold.is_failed_update(): batch = instances[:self._restart_settings.batch_size] instances = instances[self._restart_settings.batch_size:] log.info("Restarting instances: %s", batch) resp = self._scheduler.restartShards(self._job_key.to_thrift(), batch, self._lock) if resp.responseCode != ResponseCode.OK: log.error('Error restarting instances: %s', combine_messages(resp)) return resp failed_instances = self._instance_watcher.watch(batch) instances += failed_instances failure_threshold.update_failure_counts(failed_instances) if failure_threshold.is_failed_update(): log.info("Restart failures threshold reached. Aborting") else: log.info("All instances were restarted successfully") return resp
def _build_path(context, target): (task_instance, path) = ScpCommand._extract_task_instance_and_path(context, target) # No jobkey is specified therefore we are using a local path. if (task_instance is None): return path # Jobkey specified, we want to convert to the user@host:file scp format (cluster, role, env, name) = task_instance.jobkey instance = set([task_instance.instance]) api = context.get_api(cluster) resp = api.query( api.build_query(role, name, env=env, instances=instance)) context.log_response_and_raise( resp, err_msg=('Unable to get information about instance: %s' % combine_messages(resp))) if (resp.result.scheduleStatusResult.tasks is None or len(resp.result.scheduleStatusResult.tasks) == 0): raise context.CommandError( EXIT_INVALID_PARAMETER, ScpCommand.JOB_NOT_FOUND_ERROR_MSG % (task_instance.jobkey, task_instance.instance)) first_task = resp.result.scheduleStatusResult.tasks[0] assigned = first_task.assignedTask role = assigned.task.job.role slave_host = assigned.slaveHost # If path is absolute, use that. Else if it is a tilde expansion, throw an error. # Otherwise, use sandbox as relative root. normalized_input_path = os.path.normpath(path) if (os.path.isabs(normalized_input_path)): final_path = normalized_input_path elif (normalized_input_path.startswith('~/') or normalized_input_path == '~'): raise context.CommandError(EXIT_INVALID_PARAMETER, ScpCommand.TILDE_USAGE_ERROR_MSG % path) else: sandbox_path_pre_format = DistributedCommandRunner.thermos_sandbox( api.cluster, executor_sandbox=context.options.executor_sandbox) thermos_namespace = ThermosContext(task_id=assigned.taskId, ports=assigned.assignedPorts) sandbox_path = String(sandbox_path_pre_format) % Environment( thermos=thermos_namespace) # Join the individual folders to the sandbox path to build safely final_path = os.path.join(str(sandbox_path), *normalized_input_path.split(os.sep)) return '%s@%s:%s' % (role, slave_host, final_path)
def execute(self, context): job = context.options.instance_spec.jobkey instances = (None if context.options.instance_spec.instance == ALL_INSTANCES else context.options.instance_spec.instance) update_id = str(uuid.uuid4()) config = context.get_job_config(job, context.options.config_file) if config.raw().has_cron_schedule(): raise context.CommandError( EXIT_COMMAND_FAILURE, "Cron jobs may only be updated with \"aurora cron schedule\" command" ) api = context.get_api(config.cluster()) formatter = DiffFormatter(context, config) formatter.show_job_update_diff(instances) try: resp = api.start_job_update(config, context.options.message, instances, {CLIENT_UPDATE_ID: update_id}) except AuroraClientAPI.UpdateConfigError as e: raise context.CommandError(EXIT_INVALID_CONFIGURATION, e.message) if not self._is_update_already_in_progress(resp, update_id): context.log_response_and_raise( resp, err_code=EXIT_API_ERROR, err_msg=self.FAILED_TO_START_UPDATE_ERROR_MSG) if resp.result: update_key = resp.result.startJobUpdateResult.key url = get_update_page( api, AuroraJobKey.from_thrift(config.cluster(), update_key.job), resp.result.startJobUpdateResult.key.id) context.print_out(self.UPDATE_MSG_TEMPLATE % url) if context.options.open_browser: webbrowser.open_new_tab(url) if context.options.wait: return wait_for_update(context, self._clock, api, update_key, update_state_to_err_code) else: context.print_out(combine_messages(resp)) return EXIT_OK
def validate_quota_from_requested(self, job_key, production, released, acquired): """Validates requested change will not exceed the available quota. Arguments: job_key -- job key. production -- production flag. released -- production CapacityRequest to be released (in case of job update). acquired -- production CapacityRequest to be acquired. Returns: ResponseCode.OK if check is successful. """ # TODO(wfarner): Avoid synthesizing scheduler responses. resp_ok = Response( responseCode=ResponseCode.OK, details=[ResponseDetail(message='Quota check successful.')]) if not production: return resp_ok resp = self._scheduler.getQuota(job_key.role) if resp.responseCode != ResponseCode.OK: log.error('Failed to get quota from scheduler: %s' % combine_messages(resp)) return resp allocated = CapacityRequest(resp.result.getQuotaResult.quota) consumed = CapacityRequest( resp.result.getQuotaResult.prodSharedConsumption) requested = acquired - released effective = allocated - consumed - requested if not effective.valid(): log.info('Not enough quota to create/update job.') print_quota(allocated.quota(), 'Total allocated quota', job_key.role) print_quota(consumed.quota(), 'Consumed quota', job_key.role) print_quota(requested.quota(), 'Requested', job_key.name) print_quota(effective.invert_or_reset().quota(), 'Additional quota required', job_key.role) # TODO(wfarner): Avoid synthesizing scheduler responses. return Response( responseCode=ResponseCode.INVALID_REQUEST, details=[ResponseDetail(message='Failed quota check.')]) return resp_ok
def restart(self, instances): # Verify that this operates on a valid job. query = self._job_key.to_thrift_query() query.statuses = ACTIVE_STATES status = self._scheduler.getTasksWithoutConfigs(query, retry=True) if status.responseCode != ResponseCode.OK: return status failure_threshold = FailureThreshold( self._restart_settings.max_per_instance_failures, self._restart_settings.max_total_failures) if not instances: tasks = status.result.scheduleStatusResult.tasks instances = sorted(task.assignedTask.instanceId for task in tasks) if not instances: log.info("No instances specified, and no active instances found in job %s" % self._job_key) log.info("Nothing to do.") return status log.info("Performing rolling restart of job %s (instances: %s)" % (self._job_key, instances)) while instances and not failure_threshold.is_failed_update(): batch = instances[:self._restart_settings.batch_size] instances = instances[self._restart_settings.batch_size:] log.info("Restarting instances: %s", batch) resp = self._scheduler.restartShards(self._job_key.to_thrift(), batch, retry=True) if resp.responseCode != ResponseCode.OK: log.error('Error restarting instances: %s', combine_messages(resp)) return resp failed_instances = self._instance_watcher.watch(batch) instances += failed_instances failure_threshold.update_failure_counts(failed_instances) if failure_threshold.is_failed_update(): log.info("Restart failures threshold reached. Aborting") else: log.info("All instances were restarted successfully") return resp
def _build_path(context, target): (task_instance, path) = ScpCommand._extract_task_instance_and_path(context, target) # No jobkey is specified therefore we are using a local path. if (task_instance is None): return path # Jobkey specified, we want to convert to the user@host:file scp format (cluster, role, env, name) = task_instance.jobkey instance = set([task_instance.instance]) api = context.get_api(cluster) resp = api.query(api.build_query(role, name, env=env, instances=instance)) context.log_response_and_raise(resp, err_msg=('Unable to get information about instance: %s' % combine_messages(resp))) if (resp.result.scheduleStatusResult.tasks is None or len(resp.result.scheduleStatusResult.tasks) == 0): raise context.CommandError(EXIT_INVALID_PARAMETER, ScpCommand.JOB_NOT_FOUND_ERROR_MSG % (task_instance.jobkey, task_instance.instance)) first_task = resp.result.scheduleStatusResult.tasks[0] assigned = first_task.assignedTask role = assigned.task.job.role slave_host = assigned.slaveHost # If path is absolute, use that. Else if it is a tilde expansion, throw an error. # Otherwise, use sandbox as relative root. normalized_input_path = os.path.normpath(path) if (os.path.isabs(normalized_input_path)): final_path = normalized_input_path elif (normalized_input_path.startswith('~/') or normalized_input_path == '~'): raise context.CommandError(EXIT_INVALID_PARAMETER, ScpCommand.TILDE_USAGE_ERROR_MSG % path) else: sandbox_path_pre_format = DistributedCommandRunner.thermos_sandbox( api.cluster, executor_sandbox=context.options.executor_sandbox) thermos_namespace = ThermosContext( task_id=assigned.taskId, ports=assigned.assignedPorts) sandbox_path = String(sandbox_path_pre_format) % Environment(thermos=thermos_namespace) # Join the individual folders to the sandbox path to build safely final_path = os.path.join(str(sandbox_path), *normalized_input_path.split(os.sep)) return '%s@%s:%s' % (role, slave_host, final_path)
def execute(self, context): job = context.options.instance_spec.jobkey instances = (None if context.options.instance_spec.instance == ALL_INSTANCES else context.options.instance_spec.instance) update_id = str(uuid.uuid4()) config = context.get_job_config(job, context.options.config_file) if config.raw().has_cron_schedule(): raise context.CommandError( EXIT_COMMAND_FAILURE, "Cron jobs may only be updated with \"aurora cron schedule\" command") api = context.get_api(config.cluster()) formatter = DiffFormatter(context, config) formatter.show_job_update_diff(instances) try: resp = api.start_job_update(config, context.options.message, instances, {CLIENT_UPDATE_ID: update_id}) except AuroraClientAPI.UpdateConfigError as e: raise context.CommandError(EXIT_INVALID_CONFIGURATION, e.message) if not self._is_update_already_in_progress(resp, update_id): context.log_response_and_raise(resp, err_code=EXIT_API_ERROR, err_msg=self.FAILED_TO_START_UPDATE_ERROR_MSG) if resp.result: update_key = resp.result.startJobUpdateResult.key url = get_update_page( api, AuroraJobKey.from_thrift(config.cluster(), update_key.job), resp.result.startJobUpdateResult.key.id) context.print_out(self.UPDATE_MSG_TEMPLATE % url) if context.options.open_browser: webbrowser.open_new_tab(url) if context.options.wait: return wait_for_update(context, self._clock, api, update_key, update_state_to_err_code) else: context.print_out(combine_messages(resp)) return EXIT_OK
def validate_quota_from_requested(self, job_key, production, released, acquired): """Validates requested change will not exceed the available quota. Arguments: job_key -- job key. production -- production flag. released -- production CapacityRequest to be released (in case of job update). acquired -- production CapacityRequest to be acquired. Returns: ResponseCode.OK if check is successful. """ # TODO(wfarner): Avoid synthesizing scheduler responses. resp_ok = Response( responseCode=ResponseCode.OK, details=[ResponseDetail(message='Quota check successful.')]) if not production: return resp_ok resp = self._scheduler.getQuota(job_key.role) if resp.responseCode != ResponseCode.OK: log.error('Failed to get quota from scheduler: %s' % combine_messages(resp)) return resp allocated = CapacityRequest(resp.result.getQuotaResult.quota) consumed = CapacityRequest(resp.result.getQuotaResult.prodConsumption) requested = acquired - released effective = allocated - consumed - requested if not effective.valid(): log.info('Not enough quota to create/update job.') print_quota(allocated.quota(), 'Total allocated quota', job_key.role) print_quota(consumed.quota(), 'Consumed quota', job_key.role) print_quota(requested.quota(), 'Requested', job_key.name) print_quota(effective.invert_or_reset().quota(), 'Additional quota required', job_key.role) # TODO(wfarner): Avoid synthesizing scheduler responses. return Response( responseCode=ResponseCode.INVALID_REQUEST, details=[ResponseDetail(message='Failed quota check.')]) return resp_ok
def execute(self, context): (cluster, role, env, name) = context.options.task_instance.jobkey instance = context.options.task_instance.instance api = context.get_api(cluster) resp = api.query(api.build_query(role, name, env=env, instances=set([int(instance)]))) context.log_response_and_raise( resp, err_msg=("Unable to get information about instance: %s" % combine_messages(resp)) ) if resp.result.scheduleStatusResult.tasks is None or len(resp.result.scheduleStatusResult.tasks) == 0: raise context.CommandError( EXIT_INVALID_PARAMETER, "Job %s not found" % context.options.task_instance.jobkey ) first_task = resp.result.scheduleStatusResult.tasks[0] remote_cmd = context.options.command or "bash" command = DistributedCommandRunner.substitute( remote_cmd, first_task, api.cluster, executor_sandbox=context.options.executor_sandbox ) ssh_command = ["ssh", "-t"] ssh_command += context.options.ssh_options if context.options.ssh_options else [] assigned = first_task.assignedTask role = assigned.task.job.role if assigned.task.job else assigned.task.owner.role slave_host = assigned.slaveHost for tunnel in context.options.tunnels: try: port, name = tunnel.split(":") port = int(port) except ValueError: raise context.CommandError( EXIT_INVALID_PARAMETER, "Could not parse tunnel: %s. Must be of form PORT:NAME" % tunnel ) if name not in assigned.assignedPorts: raise context.CommandError( EXIT_INVALID_PARAMETER, "Task %s has no port named %s" % (assigned.taskId, name) ) ssh_command += ["-L", "%d:%s:%d" % (port, slave_host, assigned.assignedPorts[name])] ssh_command += ["%s@%s" % (context.options.ssh_user or role, slave_host), command] return subprocess.call(ssh_command)
def prune_tasks(args, options): if len(args) == 0: die('Must specify at least cluster.') cluster = args[0] t = TaskQuery() if options.states: t.statuses = set(map(ScheduleStatus._NAMES_TO_VALUES.get, options.states.split(','))) if options.role: t.role = options.role if options.environment: t.environment = options.environment if options.limit: t.limit = options.limit api = make_admin_client_with_options(cluster) rsp = api.prune_tasks(t) if rsp.responseCode != ResponseCode.OK: die('Failed to prune tasks: %s' % combine_messages(rsp)) else: print("Tasks pruned.")
def execute(self, context): job = context.options.instance_spec.jobkey instances = (None if context.options.instance_spec.instance == ALL_INSTANCES else context.options.instance_spec.instance) config = context.get_job_config(job, context.options.config_file) if config.raw().has_cron_schedule(): raise context.CommandError( EXIT_COMMAND_FAILURE, "Cron jobs may only be updated with \"aurora cron schedule\" command") api = context.get_api(config.cluster()) resp = api.start_job_update(config, instances) context.log_response_and_raise(resp, err_code=EXIT_API_ERROR, err_msg="Failed to start update due to error:") if resp.result: url = context.get_update_page(api, job, resp.result.startJobUpdateResult.updateId) context.print_out(self.UPDATE_MSG_TEMPLATE % url) else: context.print_out(combine_messages(resp)) return EXIT_OK
def prune_tasks(args, options): if len(args) == 0: die('Must specify at least cluster.') cluster = args[0] t = TaskQuery() if options.states: t.statuses = set( map(ScheduleStatus._NAMES_TO_VALUES.get, options.states.split(','))) if options.role: t.role = options.role if options.environment: t.environment = options.environment if options.limit: t.limit = options.limit api = make_admin_client_with_options(cluster) rsp = api.prune_tasks(t) if rsp.responseCode != ResponseCode.OK: die('Failed to prune tasks: %s' % combine_messages(rsp)) else: print("Tasks pruned.")
def query(args, options): """usage: query [--force] [--listformat=FORMAT] [--shards=N[,N,...]] [--states=State[,State,...]] cluster [role [job]] Query Mesos about jobs and tasks. """ def _convert_fmt_string(fmtstr): import re def convert(match): return "%%(%s)s" % match.group(1) return re.sub(r'%(\w+)%', convert, fmtstr) def flatten_task(t, d={}): for key in t.__dict__.keys(): val = getattr(t, key) try: val.__dict__.keys() except AttributeError: d[key] = val else: flatten_task(val, d) return d def map_values(d): default_value = lambda v: v mapping = { 'status': lambda v: ScheduleStatus._VALUES_TO_NAMES[v], } return dict( (k, mapping.get(k, default_value)(v)) for (k, v) in d.items() ) for state in options.states.split(','): if state not in ScheduleStatus._NAMES_TO_VALUES: msg = "Unknown state '%s' specified. Valid states are:\n" % state msg += ','.join(ScheduleStatus._NAMES_TO_VALUES.keys()) die(msg) # Role, Job, Instances, States, and the listformat if len(args) == 0: die('Must specify at least cluster.') cluster = args[0] role = args[1] if len(args) > 1 else None job = args[2] if len(args) > 2 else None instances = set(map(int, options.shards.split(','))) if options.shards else set() if options.states: states = set(map(ScheduleStatus._NAMES_TO_VALUES.get, options.states.split(','))) else: states = ACTIVE_STATES | TERMINAL_STATES listformat = _convert_fmt_string(options.listformat) # Figure out "expensive" queries here and bone if they do not have --force # - Does not specify role if not role and not options.force: die('--force is required for expensive queries (no role specified)') # - Does not specify job if not job and not options.force: die('--force is required for expensive queries (no job specified)') # - Specifies status outside of ACTIVE_STATES if not (states <= ACTIVE_STATES) and not options.force: die('--force is required for expensive queries (states outside ACTIVE states') api = make_admin_client(cluster) query_info = api.query(TaskQuery(role=role, jobName=job, instanceIds=instances, statuses=states)) if query_info.responseCode != ResponseCode.OK: die('Failed to query scheduler: %s' % combine_messages(query_info)) tasks = query_info.result.scheduleStatusResult.tasks if tasks is None: return try: for task in tasks: d = flatten_task(task) print(listformat % map_values(d)) except KeyError: msg = "Unknown key in format string. Valid keys are:\n" msg += ','.join(d.keys()) die(msg)
def __str__(self): return '%s: %s: %s' % (self.__class__.__name__, ResponseCode._VALUES_TO_NAMES.get(self.response.responseCode, 'UNKNOWN'), combine_messages(self.response))
def __str__(self): return '%s: %s: %s' % (self.__class__.__name__, ResponseCode._VALUES_TO_NAMES.get( self.response.responseCode, 'UNKNOWN'), combine_messages(self.response))