def really_killall(args, options): """Helper for testing purposes: make it easier to mock out the actual kill process, while testing hooks in the command dispatch process. """ maybe_disable_hooks(options) job_key = AuroraJobKey.from_path(args[0]) config_file = args[1] if len(args) > 1 else None # the config for hooks new_cmd = ["job", "killall", args[0]] if config_file is not None: new_cmd.append("--config=%s" % config_file) if options.open_browser: new_cmd.append("--open-browser") if options.batch_size is not None: new_cmd.append("--batch-size=%s" % options.batch_size) if options.max_total_failures is not None: new_cmd.append("--max-total-failures=%s" % options.max_total_failures) v1_deprecation_warning("killall", new_cmd) config = get_job_config(job_key.to_path(), config_file, options) if config_file else None api = make_client(job_key.cluster) if options.batch_size is not None: kill_in_batches(api, job_key, None, options.batch_size, options.max_failures_option) else: resp = api.kill_job(job_key, None, config=config) check_and_log_response(resp) handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role, job_key.env, job_key.name) wait_kill_tasks(api.scheduler_proxy, job_key)
def test_make_client_hooks_disabled(): with patch('apache.aurora.client.factory.CLUSTERS', new=TEST_CLUSTERS): client = make_client(TEST_CLUSTER, 'some-user-agent', enable_hooks=False) assert not isinstance(client, HookedAuroraClientAPI) assert isinstance(client, AuroraClientAPI)
def do_open(args, _): """usage: open cluster[/role[/env/job]] Opens the scheduler page for a cluster, role or job in the default web browser. """ cluster_name = role = env = job = None args = args[0].split("/") if len(args) > 0: cluster_name = args[0] if len(args) > 1: role = args[1] if len(args) > 2: env = args[2] if len(args) > 3: job = args[3] else: # TODO(ksweeney): Remove this after MESOS-2945 is completed. die('env scheduler pages are not yet implemented, please specify job') if not cluster_name: die('cluster is required') api = make_client(cluster_name) import webbrowser webbrowser.open_new_tab( synthesize_url(api.scheduler_proxy.scheduler_client().url, role, env, job))
def really_update(job_spec, config_file, options): def warn_if_dangerous_change(api, job_spec, config): # Get the current job status, so that we can check if there's anything # dangerous about this update. resp = api.query_no_configs(api.build_query(config.role(), config.name(), statuses=ACTIVE_STATES, env=config.environment())) if resp.responseCode != ResponseCode.OK: die('Could not get job status from server for comparison: %s' % resp.messageDEPRECATED) remote_tasks = [t.assignedTask.task for t in resp.result.scheduleStatusResult.tasks] resp = api.populate_job_config(config) if resp.responseCode != ResponseCode.OK: die('Server could not populate job config for comparison: %s' % resp.messageDEPRECATED) local_task_count = len(resp.result.populateJobResult.populated) remote_task_count = len(remote_tasks) if (local_task_count >= 4 * remote_task_count or local_task_count <= 4 * remote_task_count or local_task_count == 0): print('Warning: this update is a large change. Press ^c within 5 seconds to abort') time.sleep(5) maybe_disable_hooks(options) config = get_job_config(job_spec, config_file, options) api = make_client(config.cluster()) if not options.force: warn_if_dangerous_change(api, job_spec, config) resp = api.update_job(config, options.health_check_interval_seconds, options.shards) check_and_log_response(resp)
def do_open(args, _): """usage: open cluster[/role[/env/job]] Opens the scheduler page for a cluster, role or job in the default web browser. """ cluster_name = role = env = job = None if len(args) == 0: print('Open command requires a jobkey parameter.') exit(1) v1_deprecation_warning("open", ["job", "open"]) args = args[0].split("/") if len(args) > 0: cluster_name = args[0] if len(args) > 1: role = args[1] if len(args) > 2: env = args[2] if len(args) > 3: job = args[3] else: # TODO(ksweeney): Remove this after MESOS-2945 is completed. die('env scheduler pages are not yet implemented, please specify job' ) if not cluster_name: die('cluster is required') api = make_client(cluster_name) import webbrowser webbrowser.open_new_tab( synthesize_url(api.scheduler_proxy.scheduler_client().url, role, env, job))
def list_jobs(cluster_and_role): """usage: list_jobs [--show-cron] cluster/role/env/job Shows all jobs that match the job-spec known by the scheduler. If --show-cron is specified, then also shows the registered cron schedule. """ def show_job_simple(job): if options.show_cron_schedule: print(('{0}/{1.key.role}/{1.key.environment}/{1.key.name}' + '\t\'{1.cronSchedule}\'\t{1.cronCollisionPolicy}').format(cluster, job)) else: print('{0}/{1.key.role}/{1.key.environment}/{1.key.name}'.format(cluster, job)) def show_job_pretty(job): print("Job %s/%s/%s/%s:" % (cluster, job.key.role, job.key.environment, job.key.name)) print('\tcron schedule: %s' % job.cronSchedule) print('\tcron policy: %s' % job.cronCollisionPolicy) options = app.get_options() if options.show_cron_schedule and options.pretty: print_fn = show_job_pretty else: print_fn = show_job_simple # Take the cluster_and_role parameter, and split it into its two components. if cluster_and_role.count('/') != 1: die('list_jobs parameter must be in cluster/role format') (cluster,role) = cluster_and_role.split('/') api = make_client(cluster) resp = api.get_jobs(role) check_and_log_response(resp) for job in resp.result.getJobsResult.configs: print_fn(job)
def restart_job(self, cluster, role, environment, jobname, jobspec=None, instances=[]): """Method to restart aurora job""" job_key = AuroraJobKey(cluster, role, environment, jobname) logger.info("request to restart => %s", job_key.to_path()) instances = self.pack_instance_list(instances) try: config = self.make_job_config(job_key, jobspec) except Exception as e: return(job_key.to_path(), ["Failed to restart Aurora job", "Can not create job configuration object because", str(e)]) # these are the default values from apache.aurora.client.commands.core.restart() updater_config = UpdaterConfig( 1, # options.batch_size 60, # options.restart_threshold 30, # options.watch_secs 0, # options.max_per_shard_failures 0 # options.max_total_failures ) api = make_client(job_key.cluster) # instances = all shards, health check = 3 sec resp = api.restart(job_key, instances, updater_config, 3, config=config) if resp.responseCode != ResponseCode.OK: logger.warning("aurora -- restart job failed") responseStr = self.response_string(resp) logger.warning(responseStr) return(job_key.to_path(), ["Error reported by aurora client:", responseStr]) logger.info("aurora -- restart job successful") return(job_key.to_path(), None)
def ssh(args, options): """usage: ssh cluster/role/env/job shard [args...] Initiate an SSH session on the machine that a shard is running on. """ if not args: die('Job path is required') job_path = args.pop(0) try: cluster_name, role, env, name = AuroraJobKey.from_path(job_path) except AuroraJobKey.Error as e: die('Invalid job path "%s": %s' % (job_path, e)) if not args: die('Shard is required') try: shard = int(args.pop(0)) except ValueError: die('Shard must be an integer') newcmd = ["task", "ssh", "%s/%s" % (job_path, shard)] if len(options.tunnels) > 0: newcmd.append("--tunnels=%s" % options.tunnels) if options.ssh_user is not None: newcmd.append("--ssh-user=%s" % options.ssh_user) if options.executor_sandbox: newcmd.append("--executor-sandbox") if len(args) > 0: newcmd.append("--command=\"%s\"" % " ".join(args)) v1_deprecation_warning("ssh", newcmd) api = make_client(cluster_name) resp = api.query(api.build_query(role, name, set([int(shard)]), env=env)) check_and_log_response(resp) first_task = resp.result.scheduleStatusResult.tasks[0] remote_cmd = 'bash' if not args else ' '.join(args) command = DistributedCommandRunner.substitute(remote_cmd, first_task, api.cluster, executor_sandbox=options.executor_sandbox) ssh_command = ['ssh', '-t'] role = first_task.assignedTask.task.owner.role slave_host = first_task.assignedTask.slaveHost for tunnel in options.tunnels: try: port, name = tunnel.split(':') port = int(port) except ValueError: die('Could not parse tunnel: %s. Must be of form PORT:NAME' % tunnel) if name not in first_task.assignedTask.assignedPorts: die('Task %s has no port named %s' % (first_task.assignedTask.taskId, name)) ssh_command += [ '-L', '%d:%s:%d' % (port, slave_host, first_task.assignedTask.assignedPorts[name])] ssh_command += ['%s@%s' % (options.ssh_user or role, slave_host), command] return subprocess.call(ssh_command)
def get_api(self, cluster): """Gets an API object for a specified cluster Keeps the API handle cached, so that only one handle for each cluster will be created in a session. """ if cluster not in self.apis: api = make_client(cluster) self.apis[cluster] = api return self.apis[cluster]
def diff(job_spec, config_file): """usage: diff cluster/role/env/job config Compares a job configuration against a running job. By default the diff will be displayed using 'diff', though you may choose an alternate diff program by specifying the DIFF_VIEWER environment variable.""" options = app.get_options() config = get_job_config(job_spec, config_file, options) if options.rename_from: cluster, role, env, name = options.rename_from else: cluster = config.cluster() role = config.role() env = config.environment() name = config.name() api = make_client(cluster) resp = api.query(api.build_query(role, name, statuses=ACTIVE_STATES, env=env)) if resp.responseCode != ResponseCode.OK: die('Request failed, server responded with "%s"' % resp.messageDEPRECATED) remote_tasks = [t.assignedTask.task for t in resp.result.scheduleStatusResult.tasks] resp = api.populate_job_config(config) if resp.responseCode != ResponseCode.OK: die('Request failed, server responded with "%s"' % resp.messageDEPRECATED) local_tasks = resp.result.populateJobResult.populated pp = pprint.PrettyPrinter(indent=2) def pretty_print_task(task): # The raw configuration is not interesting - we only care about what gets parsed. task.configuration = None task.executorConfig = ExecutorConfig( name=AURORA_EXECUTOR_NAME, data=json.loads(task.executorConfig.data)) return pp.pformat(vars(task)) def pretty_print_tasks(tasks): return ',\n'.join([pretty_print_task(t) for t in tasks]) def dump_tasks(tasks, out_file): out_file.write(pretty_print_tasks(tasks)) out_file.write('\n') out_file.flush() diff_program = os.environ.get('DIFF_VIEWER', 'diff') with NamedTemporaryFile() as local: dump_tasks(local_tasks, local) with NamedTemporaryFile() as remote: dump_tasks(remote_tasks, remote) result = subprocess.call([diff_program, remote.name, local.name]) # Unlike most commands, diff doesn't return zero on success; it returns # 1 when a successful diff is non-empty. if result != 0 and result != 1: return result else: return 0
def get_api(self, cluster, enable_hooks=True): """Gets an API object for a specified cluster Keeps the API handle cached, so that only one handle for each cluster will be created in a session. """ apis = self.apis if enable_hooks else self.unhooked_apis if cluster not in apis: api = make_client(cluster, AURORA_V2_USER_AGENT_NAME, enable_hooks) apis[cluster] = api return apis[cluster]
def get_quota(role): """usage: get_quota --cluster=CLUSTER role Prints the production quota that has been allocated to a user. """ options = app.get_options() resp = make_client(options.cluster).get_quota(role) print_quota(resp.result.getQuotaResult.quota, 'Total allocated quota', role) if resp.result.getQuotaResult.consumed: print_quota(resp.result.getQuotaResult.consumed, 'Consumed quota', role)
def killall(args, options): """usage: killall cluster/role/env/job Kills all tasks in a running job, blocking until all specified tasks have been terminated. """ job_key = AuroraJobKey.from_path(args[0]) config_file = args[1] if len(args) > 1 else None # the config for hooks config = get_job_config(job_key.to_path(), config_file, options) if config_file else None api = make_client(job_key.cluster) resp = api.kill_job(job_key, None, config=config) check_and_log_response(resp) handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role, job_key.env, job_key.name)
def really_create(job_spec, config_file, options): try: config = get_job_config(job_spec, config_file, options) except ValueError as v: print("Error: %s" % v) sys.exit(1) api = make_client(config.cluster()) resp = api.create_job(config) check_and_log_response(resp) handle_open(api.scheduler_proxy.scheduler_client().url, config.role(), config.environment(), config.name()) if options.wait_until == 'RUNNING': JobMonitor(api.scheduler_proxy, config.job_key()).wait_until(JobMonitor.running_or_finished) elif options.wait_until == 'FINISHED': JobMonitor(api.scheduler_proxy, config.job_key()).wait_until(JobMonitor.terminal)
def get_quota(role): """usage: get_quota --cluster=CLUSTER role Prints the production quota that has been allocated to a user. """ options = app.get_options() resp = make_client(options.cluster).get_quota(role) quota = resp.result.getQuotaResult.quota quota_fields = [ ('CPU', quota.numCpus), ('RAM', '%f GB' % (float(quota.ramMb) / 1024)), ('Disk', '%f GB' % (float(quota.diskMb) / 1024)) ] log.info('Quota for %s:\n\t%s' % (role, '\n\t'.join(['%s\t%s' % (k, v) for (k, v) in quota_fields])))
def killall(args, options): """usage: killall cluster/role/env/job Kills all tasks in a running job, blocking until all specified tasks have been terminated. """ maybe_disable_hooks(options) job_key = AuroraJobKey.from_path(args[0]) config_file = args[1] if len(args) > 1 else None # the config for hooks config = get_job_config(job_key.to_path(), config_file, options) if config_file else None api = make_client(job_key.cluster) if options.batch_size is not None: kill_in_batches(api, job_key, None, options.batch_size, options.max_failures_option) else: resp = api.kill_job(job_key, None, config=config) check_and_log_response(resp) handle_open(api.scheduler_proxy.scheduler_client().url, job_key.role, job_key.env, job_key.name) wait_kill_tasks(api.scheduler_proxy, job_key)
def update(job_spec, config_file): """usage: update cluster/role/env/job config Performs a rolling upgrade on a running job, using the update configuration within the config file as a control for update velocity and failure tolerance. Updates are fully controlled client-side, so aborting an update halts the update and leaves the job in a 'locked' state on the scheduler. Subsequent update attempts will fail until the update is 'unlocked' using the 'cancel_update' command. The updater only takes action on shards in a job that have changed, meaning that changing a single shard will only induce a restart on the changed shard. You may want to consider using the 'diff' subcommand before updating, to preview what changes will take effect. """ def warn_if_dangerous_change(api, job_spec, config): # Get the current job status, so that we can check if there's anything # dangerous about this update. resp = api.query_no_configs(api.build_query(config.role(), config.name(), statuses=ACTIVE_STATES, env=config.environment())) if resp.responseCode != ResponseCode.OK: die('Could not get job status from server for comparison: %s' % resp.messageDEPRECATED) remote_tasks = [t.assignedTask.task for t in resp.result.scheduleStatusResult.tasks] resp = api.populate_job_config(config) if resp.responseCode != ResponseCode.OK: die('Server could not populate job config for comparison: %s' % resp.messageDEPRECATED) local_task_count = len(resp.result.populateJobResult.populated) remote_task_count = len(remote_tasks) if (local_task_count >= 4 * remote_task_count or local_task_count <= 4 * remote_task_count or local_task_count == 0): print('Warning: this update is a large change. Press ^c within 5 seconds to abort') time.sleep(5) options = app.get_options() CoreCommandHook.run_hooks("update", options, job_spec, config_file) maybe_disable_hooks(options) config = get_job_config(job_spec, config_file, options) api = make_client(config.cluster()) if not options.force: warn_if_dangerous_change(api, job_spec, config) resp = api.update_job(config, options.health_check_interval_seconds, options.shards) check_and_log_response(resp)
def get_quota(role): """usage: get_quota --cluster=CLUSTER role Prints the production quota that has been allocated to a user. """ options = app.get_options() resp = make_client(options.cluster).get_quota(role) quota_result = resp.result.getQuotaResult print_quota(quota_result.quota, 'Total allocated quota', role) if resp.result.getQuotaResult.prodConsumption: print_quota(quota_result.prodConsumption, 'Resources consumed by production jobs', role) if resp.result.getQuotaResult.nonProdConsumption: print_quota(quota_result.nonProdConsumption, 'Resources consumed by non-production jobs', role)
def get_quota(role): """usage: get_quota --cluster=CLUSTER role Prints the production quota that has been allocated to a user. """ options = app.get_options() v1_deprecation_warning( "get_quota", ["quota", "get", "%s/%s" % (options.cluster, role)]) resp = make_client(options.cluster).get_quota(role) quota_result = resp.result.getQuotaResult print_quota(quota_result.quota, 'Total allocated quota', role) if resp.result.getQuotaResult.prodConsumption: print_quota(quota_result.prodConsumption, 'Resources consumed by production jobs', role) if resp.result.getQuotaResult.nonProdConsumption: print_quota(quota_result.nonProdConsumption, 'Resources consumed by non-production jobs', role)
def create(job_spec, config_file): """usage: create cluster/role/env/job config Creates a job based on a configuration file. """ options = app.get_options() try: config = get_job_config(job_spec, config_file, options) except ValueError as v: print("Error: %s" % v) sys.exit(1) api = make_client(config.cluster()) monitor = JobMonitor(api, config.role(), config.environment(), config.name()) resp = api.create_job(config) check_and_log_response(resp) handle_open(api.scheduler.scheduler().url, config.role(), config.environment(), config.name()) if options.wait_until == 'RUNNING': monitor.wait_until(monitor.running_or_finished) elif options.wait_until == 'FINISHED': monitor.wait_until(monitor.terminal)
def cancel_update_job(self, cluster, role, environment, jobname, jobspec=None): """Method to cancel an update of aurora job""" job_key = AuroraJobKey(cluster, role, environment, jobname) logger.info("request to cancel update of => %s", job_key.to_path()) try: config = self.make_job_config(job_key, jobspec) except Exception as e: return(job_key.to_path(), ["Failed to cancel update of Aurora job", "Can not create job configuration object because", str(e)]) api = make_client(cluster) resp = api.cancel_update(job_key, config=config) if resp.responseCode != ResponseCode.OK: logger.warning("aurora -- cancel the update of job failed") responseStr = self.response_string(resp) logger.warning(responseStr) return(job_key.to_path(), ["Error reported by aurora client:", responseStr]) logger.info("aurora -- cancel of update job successful") return(job_key.to_path(), None)
def really_update(job_spec, config_file, options): def warn_if_dangerous_change(api, job_spec, config): # Get the current job status, so that we can check if there's anything # dangerous about this update. resp = api.query_no_configs( api.build_query(config.role(), config.name(), statuses=ACTIVE_STATES, env=config.environment())) if resp.responseCode != ResponseCode.OK: die('Could not get job status from server for comparison: %s' % resp.messageDEPRECATED) remote_tasks = [ t.assignedTask.task for t in resp.result.scheduleStatusResult.tasks ] resp = api.populate_job_config(config) if resp.responseCode != ResponseCode.OK: die('Server could not populate job config for comparison: %s' % resp.messageDEPRECATED) local_task_count = len( resp.result.populateJobResult.populatedDEPRECATED) remote_task_count = len(remote_tasks) if (local_task_count >= 4 * remote_task_count or local_task_count <= 4 * remote_task_count or local_task_count == 0): print( 'Warning: this update is a large change. Press ^c within 5 seconds to abort' ) time.sleep(5) maybe_disable_hooks(options) config = get_job_config(job_spec, config_file, options) api = make_client(config.cluster()) if not options.force: warn_if_dangerous_change(api, job_spec, config) resp = api.update_job(config, options.health_check_interval_seconds, options.shards) check_and_log_response(resp)
def delete_job(self, cluster, role, environment, jobname, jobspec=None, instances=[]): """Method to delete aurora job""" job_key = AuroraJobKey(cluster, role, environment, jobname) logger.info("request to delete => %s", job_key.to_path()) instances = self.pack_instance_list(instances) try: config = self.make_job_config(job_key, jobspec) except Exception as e: return(job_key.to_path(), ["Failed to delete Aurora job", "Can not create job configuration object because", str(e)]) api = make_client(job_key.cluster) resp = api.kill_job(job_key, config=config, instances=instances) if resp.responseCode != ResponseCode.OK: logger.warning("aurora -- kill job failed") responseStr = self.response_string(resp) logger.warning(responseStr) return(job_key.to_path(), [], ["Error reported by aurora client:", responseStr]) logger.info("aurora -- kill job successful") return(job_key.to_path(), [job_key.to_path()], None)
def list_jobs(cluster_and_role): """usage: list_jobs [--show-cron] cluster/role/env/job Shows all jobs that match the job-spec known by the scheduler. If --show-cron is specified, then also shows the registered cron schedule. """ def show_job_simple(job): if options.show_cron_schedule: print(('{0}/{1.key.role}/{1.key.environment}/{1.key.name}' + '\t\'{1.cronSchedule}\'\t{1.cronCollisionPolicy}').format( cluster, job)) else: print('{0}/{1.key.role}/{1.key.environment}/{1.key.name}'.format( cluster, job)) def show_job_pretty(job): print("Job %s/%s/%s/%s:" % (cluster, job.key.role, job.key.environment, job.key.name)) print('\tcron schedule: %s' % job.cronSchedule) print('\tcron policy: %s' % job.cronCollisionPolicy) options = app.get_options() v1_deprecation_warning("list_jobs", ["job", "list", cluster_and_role]) if options.show_cron_schedule and options.pretty: print_fn = show_job_pretty else: print_fn = show_job_simple # Take the cluster_and_role parameter, and split it into its two components. if cluster_and_role.count('/') != 1: die('list_jobs parameter must be in cluster/role format') cluster, role = cluster_and_role.split('/') api = make_client(cluster) resp = api.get_jobs(role) check_and_log_response(resp) for job in resp.result.getJobsResult.configs: print_fn(job)
def list_jobs(self, cluster, role): """Method to execute [ aurora list_jobs cluster/role command ]""" def job_string(cluster, job): return '{0}/{1.key.role}/{1.key.environment}/{1.key.name}'.format(cluster, job) jobkey = self.make_job_key(cluster, role) logger.info("request to list jobs = %s" % jobkey) api = make_client(cluster) resp = api.get_jobs(role) if resp.responseCode != ResponseCode.OK: logger.warning("Failed to list Aurora jobs") responseStr = self.response_string(resp) logger.warning(responseStr) return(jobkey, [], ["Failed to list Aurora jobs", responseStr]) jobs = [ job_string(cluster, job) for job in resp.result.getJobsResult.configs ] if len(jobs) == 0: logger.info("no jobs found for key = %s" % jobkey) for s in jobs: logger.info("> %s" % s ) return(jobkey, jobs, None)
def diff(job_spec, config_file): """usage: diff cluster/role/env/job config Compares a job configuration against a running job. By default the diff will be displayed using 'diff', though you may choose an alternate diff program by specifying the DIFF_VIEWER environment variable.""" options = app.get_options() newcmd = ["job", "diff", job_spec, config_file] if options.json: newcmd.append("--read-json") v1_deprecation_warning("diff", newcmd) config = get_job_config(job_spec, config_file, options) if options.rename_from: cluster, role, env, name = options.rename_from else: cluster = config.cluster() role = config.role() env = config.environment() name = config.name() api = make_client(cluster) resp = api.query( api.build_query(role, name, statuses=ACTIVE_STATES, env=env)) if resp.responseCode != ResponseCode.OK: die('Request failed, server responded with "%s"' % resp.messageDEPRECATED) remote_tasks = [ t.assignedTask.task for t in resp.result.scheduleStatusResult.tasks ] resp = api.populate_job_config(config) if resp.responseCode != ResponseCode.OK: die('Request failed, server responded with "%s"' % resp.messageDEPRECATED) local_tasks = resp.result.populateJobResult.populatedDEPRECATED pp = pprint.PrettyPrinter(indent=2) def pretty_print_task(task): # The raw configuration is not interesting - we only care about what gets parsed. task.configuration = None task.executorConfig = ExecutorConfig(name=AURORA_EXECUTOR_NAME, data=json.loads( task.executorConfig.data)) return pp.pformat(vars(task)) def pretty_print_tasks(tasks): return ',\n'.join([pretty_print_task(t) for t in tasks]) def dump_tasks(tasks, out_file): out_file.write(pretty_print_tasks(tasks)) out_file.write('\n') out_file.flush() diff_program = os.environ.get('DIFF_VIEWER', 'diff') with NamedTemporaryFile() as local: dump_tasks(local_tasks, local) with NamedTemporaryFile() as remote: dump_tasks(remote_tasks, remote) result = subprocess.call([diff_program, remote.name, local.name]) # Unlike most commands, diff doesn't return zero on success; it returns # 1 when a successful diff is non-empty. if result != 0 and result != 1: return result else: return 0
def test_make_client_defaults_to_hooks_enabled(): with patch('apache.aurora.client.factory.CLUSTERS', new=TEST_CLUSTERS): assert isinstance(make_client(TEST_CLUSTER, 'some-user-agent'), HookedAuroraClientAPI)
def ssh(args, options): """usage: ssh cluster/role/env/job shard [args...] Initiate an SSH session on the machine that a shard is running on. """ if not args: die('Job path is required') job_path = args.pop(0) try: cluster_name, role, env, name = AuroraJobKey.from_path(job_path) except AuroraJobKey.Error as e: die('Invalid job path "%s": %s' % (job_path, e)) if not args: die('Shard is required') try: shard = int(args.pop(0)) except ValueError: die('Shard must be an integer') newcmd = ["task", "ssh", "%s/%s" % (job_path, shard)] if len(options.tunnels) > 0: newcmd.append("--tunnels=%s" % options.tunnels) if options.ssh_user is not None: newcmd.append("--ssh-user=%s" % options.ssh_user) if options.executor_sandbox: newcmd.append("--executor-sandbox") if len(args) > 0: newcmd.append("--command=\"%s\"" % " ".join(args)) v1_deprecation_warning("ssh", newcmd) api = make_client(cluster_name) resp = api.query(api.build_query(role, name, set([int(shard)]), env=env)) check_and_log_response(resp) if (resp.result.scheduleStatusResult.tasks is None or len(resp.result.scheduleStatusResult.tasks) == 0): die("Job %s not found" % job_path) first_task = resp.result.scheduleStatusResult.tasks[0] remote_cmd = 'bash' if not args else ' '.join(args) command = DistributedCommandRunner.substitute(remote_cmd, first_task, api.cluster, executor_sandbox=options.executor_sandbox) ssh_command = ['ssh', '-t'] role = first_task.assignedTask.task.owner.role slave_host = first_task.assignedTask.slaveHost for tunnel in options.tunnels: try: port, name = tunnel.split(':') port = int(port) except ValueError: die('Could not parse tunnel: %s. Must be of form PORT:NAME' % tunnel) if name not in first_task.assignedTask.assignedPorts: die('Task %s has no port named %s' % (first_task.assignedTask.taskId, name)) ssh_command += [ '-L', '%d:%s:%d' % (port, slave_host, first_task.assignedTask.assignedPorts[name])] ssh_command += ['%s@%s' % (options.ssh_user or role, slave_host), command] return subprocess.call(ssh_command)
def get_api(self, cluster): """Creates an API object for a specified cluster""" return make_client(cluster)
def make_admin_client(cluster): return make_client(cluster, AURORA_ADMIN_USER_AGENT_NAME)