async def subscribe(self): # This connection should live ~forever, so disable some timeouts. timeout = aiohttp.ClientTimeout( total=None, sock_read=None, connect=30, sock_connect=30, ) async with aiohttp.ClientSession(timeout=timeout) as session: payload = '{"type":"SUBSCRIBE"}' master_host_port = mesos_tools.find_mesos_leader( cluster=self.cluster) async with session.post( f"http://{master_host_port}/api/v1", data=payload, # allow_redirects=True, headers={"Content-Type": "application/json"}, timeout=timeout, ) as resp: while True: _size = await resp.content.readline() if not _size: break size = int(_size) record = await resp.content.readexactly(size) yield json.loads(record)
def create_mesos_executor( processor, system_paasta_config, taskproc_config, cluster, framework_name, framework_staging_timeout, role='*', pool='default', ): """ Create a Mesos executor specific to our cluster """ MesosExecutor = processor.executor_cls('mesos_task') cluster_fqdn = system_paasta_config.get_cluster_fqdn_format().format( cluster=cluster) mesos_address = '{}:{}'.format( mesos_tools.find_mesos_leader(cluster_fqdn), mesos_tools.MESOS_MASTER_PORT, ) return MesosExecutor( role=role, pool=pool, principal=taskproc_config.get('principal'), secret=taskproc_config.get('secret'), mesos_address=mesos_address, framework_name=framework_name, framework_staging_timeout=framework_staging_timeout, initial_decline_delay=0.5, )
def create_mesos_executor( processor, system_paasta_config, taskproc_config, cluster, framework_name, framework_staging_timeout, role="*", pool="default", ): """ Create a Mesos executor specific to our cluster """ MesosExecutor = processor.executor_cls("mesos_task") mesos_address = mesos_tools.find_mesos_leader(cluster) return MesosExecutor( role=role, pool=pool, principal=taskproc_config.get("principal"), secret=taskproc_config.get("secret"), mesos_address=mesos_address, framework_name=framework_name, framework_staging_timeout=framework_staging_timeout, initial_decline_delay=0.5, )
def get_spark_configuration( args, container_name, spark_ui_port, docker_img, system_paasta_config, ): spark_conf = {} spark_conf['APP_NAME'] = container_name spark_conf['SPARK_UI_PORT'] = spark_ui_port creds = Session().get_credentials() spark_conf['AWS_ACCESS_KEY_ID'] = creds.access_key spark_conf['AWS_SECRET_ACCESS_KEY'] = creds.secret_key cluster_fqdn = system_paasta_config.get_cluster_fqdn_format().format( cluster=args.cluster) mesos_address = '{}:{}'.format( find_mesos_leader(cluster_fqdn), MESOS_MASTER_PORT, ) spark_conf['SPARK_MASTER'] = 'mesos://%s' % mesos_address spark_conf['SPARK_CORES_MAX'] = args.max_cores spark_conf['SPARK_EXECUTOR_CORES'] = args.executor_cores spark_conf['SPARK_EXECUTOR_MEMORY'] = '%dg' % args.executor_memory if args.build: spark_conf['SPARK_EXECUTOR_IMAGE'] = docker_img return spark_conf
def get_env(self): env = super().get_env() spark_env = {} if self.get_executor() == "spark": spark_env = get_mesos_spark_env( spark_app_name= "tron_spark_{self.get_service()}_{self.get_instance()}", spark_ui_port=pick_random_port( f"{self.get_service()}{self.get_instance()}".encode()), mesos_leader=find_mesos_leader(self.get_cluster()), mesos_secret=load_mesos_secret_for_spark(), paasta_cluster=self.get_cluster(), paasta_pool=self.get_pool(), paasta_service=self.get_service(), paasta_instance=self.get_instance(), docker_img=self.get_docker_url(), volumes=format_volumes( self.get_volumes( load_system_paasta_config().get_volumes())), user_spark_opts=self.config_dict.get("spark_args"), event_log_dir=get_default_event_log_dir( service=self.get_service(), aws_credentials_yaml=self.config_dict.get( "aws_credentials"), ), ) env["SPARK_OPTS"] = stringify_spark_env(spark_env) return env
def get_spark_config_dict(self): if self.get_spark_cluster_manager() == "mesos": spark_env = get_mesos_spark_env( spark_app_name= f"tron_spark_{self.get_service()}_{self.get_instance()}", spark_ui_port=self.spark_ui_port, mesos_leader=(find_mesos_leader( self.get_spark_paasta_cluster()) if not self.for_validation else "N/A"), paasta_cluster=self.get_spark_paasta_cluster(), paasta_pool=self.get_spark_paasta_pool(), paasta_service=self.get_service(), paasta_instance=self.get_instance(), docker_img=self.get_docker_url(), volumes=[ f"{v['hostPath']}:{v['containerPath']}:{v['mode'].lower()}" for v in self.get_volumes( load_system_paasta_config().get_volumes()) ], user_spark_opts=self.config_dict.get("spark_args", {}), event_log_dir=get_default_event_log_dir( service=self.get_service(), aws_credentials_yaml=self.config_dict.get( "aws_credentials_yaml"), ), needs_docker_cfg=True, ) else: spark_env = get_k8s_spark_env( spark_app_name= f"tron_spark_{self.get_service()}_{self.get_instance()}", spark_ui_port=self.spark_ui_port, paasta_cluster=self.get_spark_paasta_cluster(), paasta_service=self.get_service(), paasta_instance=self.get_instance(), docker_img=self.get_docker_url(), volumes=self.get_volumes( load_system_paasta_config().get_volumes()), user_spark_opts=self.config_dict.get("spark_args", {}), event_log_dir=get_default_event_log_dir( service=self.get_service(), aws_credentials_yaml=self.config_dict.get( "aws_credentials_yaml"), ), ) return spark_env
def build_executor_stack( # TODO: rename to registry? processor, service, instance, cluster, role, pool, # TODO: move run_id into task identifier? run_id, system_paasta_config, framework_staging_timeout, ): cluster_fqdn = system_paasta_config.get_cluster_fqdn_format().format( cluster=cluster) mesos_address = '{}:{}'.format( mesos_tools.find_mesos_leader(cluster_fqdn), mesos_tools.MESOS_MASTER_PORT, ) # TODO: implement DryRunExecutor? taskproc_config = system_paasta_config.get_taskproc() MesosExecutor = processor.executor_cls('mesos') mesos_executor = MesosExecutor( role=role, pool=pool, principal=taskproc_config.get('principal'), secret=taskproc_config.get('secret'), mesos_address=mesos_address, framework_name="paasta-remote {} {} {}".format( compose_job_id(service, instance), datetime.utcnow().strftime('%Y%m%d%H%M%S%f'), run_id, ), framework_staging_timeout=framework_staging_timeout, initial_decline_delay=0.5, ) task_logging_executor = processor.executor_from_config( provider='logging', provider_config={ 'downstream_executor': mesos_executor, }, ) credentials_file = taskproc_config.get('boto_credential_file') if credentials_file: with open(credentials_file) as f: credentials = json.loads(f.read()) else: raise ValueError("Required aws credentials") region = taskproc_config.get('aws_region') endpoint = taskproc_config.get('dynamodb_endpoint') session = Session( region_name=region, aws_access_key_id=credentials['accessKeyId'], aws_secret_access_key=credentials['secretAccessKey'], ) StatefulExecutor = processor.executor_cls(provider='stateful') stateful_executor = StatefulExecutor( downstream_executor=task_logging_executor, persister=DynamoDBPersister( table_name="taskproc_events_%s" % cluster, session=session, endpoint_url=endpoint, ), ) return stateful_executor
def get_spark_config( args, spark_app_name, spark_ui_port, docker_img, system_paasta_config, volumes, ): # User configurable Spark options user_args = { 'spark.app.name': spark_app_name, 'spark.cores.max': '4', 'spark.executor.cores': '2', 'spark.executor.memory': '4g', # Use \; for multiple constraints. e.g. # instance_type:m4.10xlarge\;pool:default 'spark.mesos.constraints': 'pool:%s' % args.pool, 'spark.mesos.executor.docker.forcePullImage': 'true', } # Spark options managed by PaaSTA cluster_fqdn = system_paasta_config.get_cluster_fqdn_format().format( cluster=args.cluster) mesos_address = '{}:{}'.format( find_mesos_leader(cluster_fqdn), MESOS_MASTER_PORT, ) non_user_args = { 'spark.master': 'mesos://%s' % mesos_address, 'spark.ui.port': spark_ui_port, 'spark.executorEnv.PAASTA_SERVICE': args.service, 'spark.executorEnv.PAASTA_INSTANCE': '{}_{}'.format(args.instance, get_username()), 'spark.executorEnv.PAASTA_CLUSTER': args.cluster, 'spark.mesos.executor.docker.parameters': 'label=paasta_service={},label=paasta_instance={}_{}'.format( args.service, args.instance, get_username(), ), 'spark.mesos.executor.docker.volumes': ','.join(volumes), 'spark.mesos.executor.docker.image': docker_img, 'spark.mesos.principal': 'spark', 'spark.mesos.secret': _load_mesos_secret(), # derby.system.home property defaulting to '.', # which requires directory permission changes. 'spark.driver.extraJavaOptions': '-Dderby.system.home=/tmp/derby', } if not args.build and not args.image: non_user_args['spark.mesos.uris'] = 'file:///root/.dockercfg' if args.spark_args: spark_args = args.spark_args.split() for spark_arg in spark_args: fields = spark_arg.split('=') if len(fields) != 2: paasta_print( PaastaColors.red( "Spark option %s is not in format option=value." % spark_arg, ), file=sys.stderr, ) sys.exit(1) if fields[0] in non_user_args: paasta_print( PaastaColors.red( "Spark option {} is set by PaaSTA with {}.".format( fields[0], non_user_args[fields[0]], ), ), file=sys.stderr, ) sys.exit(1) # Update default configuration user_args[fields[0]] = fields[1] if int(user_args['spark.cores.max']) < int( user_args['spark.executor.cores']): paasta_print( PaastaColors.red( "Total number of cores {} is less than per-executor cores {}.". format( user_args['spark.cores.max'], user_args['spark.executor.cores'], ), ), file=sys.stderr, ) sys.exit(1) exec_mem = user_args['spark.executor.memory'] if exec_mem[-1] != 'g' or not exec_mem[:-1].isdigit() or int( exec_mem[:-1]) > 32: paasta_print( PaastaColors.red( "Executor memory {} not in format dg (d<=32).".format( user_args['spark.executor.memory'], ), ), file=sys.stderr, ) sys.exit(1) # Limit a container's cpu usage non_user_args[ 'spark.mesos.executor.docker.parameters'] += ',cpus={}'.format( user_args['spark.executor.cores']) return dict(non_user_args, **user_args)
def get_spark_config( args, spark_app_name, spark_ui_port, docker_img, system_paasta_config, volumes, access_key, secret_key, ): # User configurable Spark options user_args = { "spark.app.name": spark_app_name, "spark.cores.max": "4", "spark.executor.cores": "2", "spark.executor.memory": "4g", # Use \; for multiple constraints. e.g. # instance_type:m4.10xlarge\;pool:default "spark.mesos.constraints": "pool:%s" % args.pool, "spark.mesos.executor.docker.forcePullImage": "true", } default_event_log_dir = get_default_event_log_dir(access_key, secret_key) if default_event_log_dir is not None: user_args["spark.eventLog.enabled"] = "true" user_args["spark.eventLog.dir"] = default_event_log_dir # Spark options managed by PaaSTA cluster_fqdn = system_paasta_config.get_cluster_fqdn_format().format( cluster=args.cluster) mesos_address = "{}:{}".format(find_mesos_leader(cluster_fqdn), MESOS_MASTER_PORT) paasta_instance = get_smart_paasta_instance_name(args) non_user_args = { "spark.master": "mesos://%s" % mesos_address, "spark.ui.port": spark_ui_port, "spark.executorEnv.PAASTA_SERVICE": args.service, "spark.executorEnv.PAASTA_INSTANCE": paasta_instance, "spark.executorEnv.PAASTA_CLUSTER": args.cluster, "spark.executorEnv.PAASTA_INSTANCE_TYPE": "spark", "spark.mesos.executor.docker.parameters": f"label=paasta_service={args.service},label=paasta_instance={paasta_instance}", "spark.mesos.executor.docker.volumes": ",".join(volumes), "spark.mesos.executor.docker.image": docker_img, "spark.mesos.principal": "spark", "spark.mesos.secret": _load_mesos_secret(), } if not args.build and not args.image: non_user_args["spark.mesos.uris"] = "file:///root/.dockercfg" if args.spark_args: spark_args = args.spark_args.split() for spark_arg in spark_args: fields = spark_arg.split("=", 1) if len(fields) != 2: paasta_print( PaastaColors.red( "Spark option %s is not in format option=value." % spark_arg), file=sys.stderr, ) sys.exit(1) if fields[0] in non_user_args: paasta_print( PaastaColors.red( "Spark option {} is set by PaaSTA with {}.".format( fields[0], non_user_args[fields[0]])), file=sys.stderr, ) sys.exit(1) # Update default configuration user_args[fields[0]] = fields[1] if "spark.sql.shuffle.partitions" not in user_args: num_partitions = str(2 * int(user_args["spark.cores.max"])) user_args["spark.sql.shuffle.partitions"] = num_partitions paasta_print( PaastaColors.yellow( f"Warning: spark.sql.shuffle.partitions has been set to" f" {num_partitions} to be equal to twice the number of " f"requested cores, but you should consider setting a " f"higher value if necessary.")) if int(user_args["spark.cores.max"]) < int( user_args["spark.executor.cores"]): paasta_print( PaastaColors.red( "Total number of cores {} is less than per-executor cores {}.". format(user_args["spark.cores.max"], user_args["spark.executor.cores"])), file=sys.stderr, ) sys.exit(1) exec_mem = user_args["spark.executor.memory"] if exec_mem[-1] != "g" or not exec_mem[:-1].isdigit() or int( exec_mem[:-1]) > 32: paasta_print( PaastaColors.red( "Executor memory {} not in format dg (d<=32).".format( user_args["spark.executor.memory"])), file=sys.stderr, ) sys.exit(1) # Limit a container's cpu usage non_user_args[ "spark.mesos.executor.docker.parameters"] += ",cpus={}".format( user_args["spark.executor.cores"]) return dict(non_user_args, **user_args)
def get_spark_conf_str( args, container_name, spark_ui_port, docker_img, system_paasta_config, volumes, ): spark_conf = list() spark_conf.append('--conf spark.app.name=%s' % container_name) spark_conf.append('--conf spark.ui.port=%d' % spark_ui_port) cluster_fqdn = system_paasta_config.get_cluster_fqdn_format().format( cluster=args.cluster) mesos_address = '{}:{}'.format( find_mesos_leader(cluster_fqdn), MESOS_MASTER_PORT, ) spark_conf.append('--conf spark.master=mesos://%s' % mesos_address) spark_conf.append('--conf spark.cores.max=%d' % args.max_cores) spark_conf.append('--conf spark.executor.memory=%dg' % args.executor_memory) spark_conf.append('--conf spark.executor.cores=%d' % args.executor_cores) if args.driver_max_result_size: spark_conf.append('--conf spark.driver.maxResultSize=%dg' % args.driver_max_result_size) if args.driver_memory: spark_conf.append('--conf spark.driver.memory=%dg' % args.driver_memory) if args.driver_cores: spark_conf.append('--conf spark.driver.cores=%d' % args.driver_cores) spark_conf.append('--conf spark.mesos.executor.docker.image=%s' % docker_img) if not args.build and not args.image: spark_conf.append('--conf spark.mesos.uris=file:///root/.dockercfg') if args.jars: spark_conf.append('--conf spark.jars=%s' % args.jars) spark_conf.append('--conf spark.mesos.principal=%s' % args.mesos_principal) if not args.mesos_secret: try: with open(DEFAULT_SPARK_MESOS_SECRET_FILE, 'r') as f: mesos_secret = f.read() spark_conf.append('--conf spark.mesos.secret=%s' % mesos_secret) except IOError: paasta_print( 'Cannot load mesos secret from %s' % DEFAULT_SPARK_MESOS_SECRET_FILE, file=sys.stderr, ) sys.exit(1) else: spark_conf.append('--conf spark.mesos.secret=%s' % args.mesos_secret) # derby.system.home property defaulting to '.', # which requires directory permission changes. spark_conf.append( '--conf spark.driver.extraJavaOptions=-Dderby.system.home=/tmp/derby') spark_conf.append('--conf spark.mesos.constraints=pool:%s' % args.pool) spark_conf.append('--conf spark.mesos.executor.docker.volumes=%s' % ','.join(volumes)) return ' '.join(spark_conf)