def load_config(self): service_configuration_lib._yaml_cache = {} self.service_config = load_paasta_native_job_config( service=self.service_name, instance=self.instance_name, instance_type=self.instance_type, cluster=self.cluster, soa_dir=self.soa_dir, config_overrides=self.service_config_overrides) self.recreate_drain_method() self.reload_constraints() self.validate_config()
def remote_run_start(args): system_paasta_config, service, cluster, \ soa_dir, instance, instance_type = extract_args(args) overrides_dict = {} constraints_json = args.constraints_json if constraints_json: try: constraints = json.loads(constraints_json) except Exception as e: paasta_print("Error while parsing constraints: %s", e) if constraints: overrides_dict['constraints'] = constraints if args.cmd: overrides_dict['cmd'] = args.cmd if args.instances: overrides_dict['instances'] = args.instances run_id = args.run_id if run_id is None: run_id = ''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(8)) paasta_print("Assigned random run-id: %s" % run_id) if args.detach: paasta_print("Running in background") if os.fork() > 0: return os.setsid() if os.fork() > 0: return sys.stdout = open('/dev/null', 'w') sys.stderr = open('/dev/null', 'w') paasta_print('Scheduling a task on Mesos') processor = TaskProcessor() processor.load_plugin(provider_module='task_processing.plugins.mesos') processor.load_plugin(provider_module='task_processing.plugins.stateful') MesosExecutor = processor.executor_cls(provider='mesos') native_job_config = load_paasta_native_job_config( service, instance, cluster, soa_dir=soa_dir, instance_type=instance_type, config_overrides=overrides_dict, load_deployments=not args.docker_image, ) try: task_config = MesosExecutor.TASK_CONFIG_INTERFACE( **paasta_to_task_config_kwargs( service=service, instance=instance, system_paasta_config=system_paasta_config, native_job_config=native_job_config, config_overrides=overrides_dict, docker_image=args.docker_image, offer_timeout=args.staging_timeout, ), ) except InvariantException as e: if len(e.missing_fields) > 0: paasta_print( PaastaColors.red( "Mesos task config is missing following fields: {}".format( ', '.join(e.missing_fields), ), ), ) elif len(e.invariant_errors) > 0: paasta_print( PaastaColors.red( "Mesos task config is failing following checks: {}".format( ', '.join(str(ie) for ie in e.invariant_errors), ), ), ) else: paasta_print(PaastaColors.red(f"Mesos task config error: {e}"), ) traceback.print_exc() emit_counter_metric('paasta.remote_run.start.failed', service, instance) sys.exit(1) except PTypeError as e: paasta_print( PaastaColors.red( f"Mesos task config is failing a type check: {e}", ), ) traceback.print_exc() emit_counter_metric('paasta.remote_run.start.failed', service, instance) sys.exit(1) def handle_interrupt(_signum, _frame): paasta_print( PaastaColors.red("Signal received, shutting down scheduler."), ) if runner is not None: runner.stop() if _signum == signal.SIGTERM: sys.exit(143) else: sys.exit(1) signal.signal(signal.SIGINT, handle_interrupt) signal.signal(signal.SIGTERM, handle_interrupt) default_role = system_paasta_config.get_remote_run_config().get( 'default_role') assert default_role try: executor_stack = build_executor_stack( processor=processor, service=service, instance=instance, role=native_job_config.get_role() or default_role, pool=native_job_config.get_pool(), cluster=cluster, run_id=run_id, system_paasta_config=system_paasta_config, framework_staging_timeout=args.staging_timeout, ) runner = Sync(executor_stack) terminal_event = runner.run(task_config) runner.stop() except (Exception, ValueError) as e: paasta_print("Except while running executor stack: %s", e) traceback.print_exc() emit_counter_metric('paasta.remote_run.start.failed', service, instance) sys.exit(1) if terminal_event.success: paasta_print("Task finished successfully") sys.exit(0) else: paasta_print(PaastaColors.red(f"Task failed: {terminal_event.raw}"), ) # This is not necessarily an infrastructure failure. It may just be a # application failure. emit_counter_metric('paasta.remote_run.start.failed', service, instance) sys.exit(1)
def remote_run_start(args): """ Start a task in Mesos Steps: 1. Accumulate overrides 2. Create task configuration 3. Build executor stack 4. Run the task on the executor stack """ # accumulate all configuration needed to build what we need to run a task system_paasta_config, service, cluster, \ soa_dir, instance, instance_type = extract_args(args) # TODO: move run_id into task identifier? run_id = args.run_id or generate_run_id(length=10) framework_name = create_framework_name(service, instance, run_id) overrides = accumulate_config_overrides(args, service, instance) # TODO: implement DryRunExecutor? taskproc_config = system_paasta_config.get_taskproc() native_job_config = load_paasta_native_job_config( service, instance, cluster, soa_dir=soa_dir, instance_type=instance_type, config_overrides=overrides, load_deployments=not args.docker_image, ) region = args.aws_region or taskproc_config.get('aws_region') default_role = system_paasta_config.get_remote_run_config().get( 'default_role') assert default_role role = native_job_config.get_role() or default_role pool = native_job_config.get_pool() processor = TaskProcessor() processor.load_plugin(provider_module='task_processing.plugins.stateful') processor.load_plugin(provider_module='task_processing.plugins.mesos') if args.detach: paasta_print("Running in background") if os.fork() > 0: return os.setsid() if os.fork() > 0: return sys.stdout = open('/dev/null', 'w') sys.stderr = open('/dev/null', 'w') # create factory functions for task_config and executors, which makes it # easier to recreate them for retry purposes def task_config_factory(): return create_mesos_task_config( processor=processor, service=service, instance=instance, system_paasta_config=system_paasta_config, native_job_config=native_job_config, offer_timeout=args.staging_timeout, docker_image=args.docker_image, ) framework_config = dict( cluster=cluster, framework_name=framework_name, framework_staging_timeout=args.staging_timeout, role=role, pool=pool, ) executor_kwargs = dict( # used to create mesos executor processor=processor, system_paasta_config=system_paasta_config, taskproc_config=taskproc_config, **framework_config, ) def executor_factory(): mesos_executor = create_mesos_executor(**executor_kwargs) return build_executor_stack( processor, mesos_executor, taskproc_config, cluster, region, ) if args.dry_run: task_config_dict = task_config_to_dict(task_config_factory()) pp = pprint.PrettyPrinter(indent=2) paasta_print( PaastaColors.green("Would have run task with:"), PaastaColors.green("Framework config:"), pp.pformat(framework_config), PaastaColors.green("Task config:"), pp.pformat(task_config_dict), sep='\n', ) return terminals = run_tasks_with_retries( executor_factory, task_config_factory, retries=args.retries, ) final_event, final_task_config = terminals[-1] exit_code = handle_terminal_event( event=final_event, service=service, instance=instance, run_id=run_id, email_address=args.notification_email, framework_config=framework_config, task_config=final_task_config, ) sys.exit(exit_code)
def paasta_to_task_config_kwargs( service, instance, cluster, system_paasta_config, instance_type, soa_dir=DEFAULT_SOA_DIR, config_overrides=None, ): native_job_config = load_paasta_native_job_config( service, instance, cluster, soa_dir=soa_dir, instance_type=instance_type, config_overrides=config_overrides, ) image = native_job_config.get_docker_url() docker_parameters = [{ 'key': param['key'], 'value': param['value'] } for param in native_job_config.format_docker_parameters()] # network = native_job_config.get_mesos_network_mode() docker_volumes = native_job_config.get_volumes( system_volumes=system_paasta_config.get_volumes(), ) volumes = [{ 'container_path': volume['containerPath'], 'host_path': volume['hostPath'], 'mode': volume['mode'].upper(), } for volume in docker_volumes] cmd = native_job_config.get_cmd() uris = system_paasta_config.get_dockercfg_location() cpus = native_job_config.get_cpus() mem = native_job_config.get_mem() disk = native_job_config.get_disk(10) gpus = native_job_config.get_gpus() kwargs = { 'image': str(image), 'cpus': cpus, 'mem': float(mem), 'disk': float(disk), 'volumes': volumes, # 'ports': None, # 'cap_add' # 'ulimit' 'uris': [uris], 'docker_parameters': docker_parameters, 'containerizer': 'DOCKER', 'environment': native_job_config.get_env_dictionary(), } if cmd: kwargs['cmd'] = cmd if gpus > 0: kwargs['gpus'] = int(gpus) kwargs['containerizer'] = 'MESOS' config_hash = get_config_hash( kwargs, force_bounce=native_job_config.get_force_bounce(), ) kwargs['name'] = str( compose_job_id( service, instance, git_hash=get_code_sha_from_dockerurl(image), config_hash=config_hash, spacer=MESOS_TASK_SPACER, )) return kwargs