コード例 #1
0
 async def subscribe(self):
     # This connection should live ~forever, so disable some timeouts.
     timeout = aiohttp.ClientTimeout(
         total=None,
         sock_read=None,
         connect=30,
         sock_connect=30,
     )
     async with aiohttp.ClientSession(timeout=timeout) as session:
         payload = '{"type":"SUBSCRIBE"}'
         master_host_port = mesos_tools.find_mesos_leader(
             cluster=self.cluster)
         async with session.post(
                 f"http://{master_host_port}/api/v1",
                 data=payload,
                 # allow_redirects=True,
                 headers={"Content-Type": "application/json"},
                 timeout=timeout,
         ) as resp:
             while True:
                 _size = await resp.content.readline()
                 if not _size:
                     break
                 size = int(_size)
                 record = await resp.content.readexactly(size)
                 yield json.loads(record)
コード例 #2
0
ファイル: paasta_remote_run.py プロジェクト: rajacsp/paasta
def create_mesos_executor(
    processor,
    system_paasta_config,
    taskproc_config,
    cluster,
    framework_name,
    framework_staging_timeout,
    role='*',
    pool='default',
):
    """ Create a Mesos executor specific to our cluster """
    MesosExecutor = processor.executor_cls('mesos_task')

    cluster_fqdn = system_paasta_config.get_cluster_fqdn_format().format(
        cluster=cluster)
    mesos_address = '{}:{}'.format(
        mesos_tools.find_mesos_leader(cluster_fqdn),
        mesos_tools.MESOS_MASTER_PORT,
    )

    return MesosExecutor(
        role=role,
        pool=pool,
        principal=taskproc_config.get('principal'),
        secret=taskproc_config.get('secret'),
        mesos_address=mesos_address,
        framework_name=framework_name,
        framework_staging_timeout=framework_staging_timeout,
        initial_decline_delay=0.5,
    )
コード例 #3
0
def create_mesos_executor(
    processor,
    system_paasta_config,
    taskproc_config,
    cluster,
    framework_name,
    framework_staging_timeout,
    role="*",
    pool="default",
):
    """ Create a Mesos executor specific to our cluster """
    MesosExecutor = processor.executor_cls("mesos_task")

    mesos_address = mesos_tools.find_mesos_leader(cluster)

    return MesosExecutor(
        role=role,
        pool=pool,
        principal=taskproc_config.get("principal"),
        secret=taskproc_config.get("secret"),
        mesos_address=mesos_address,
        framework_name=framework_name,
        framework_staging_timeout=framework_staging_timeout,
        initial_decline_delay=0.5,
    )
コード例 #4
0
ファイル: spark_run.py プロジェクト: chlgit/paasta
def get_spark_configuration(
    args,
    container_name,
    spark_ui_port,
    docker_img,
    system_paasta_config,
):
    spark_conf = {}
    spark_conf['APP_NAME'] = container_name
    spark_conf['SPARK_UI_PORT'] = spark_ui_port

    creds = Session().get_credentials()
    spark_conf['AWS_ACCESS_KEY_ID'] = creds.access_key
    spark_conf['AWS_SECRET_ACCESS_KEY'] = creds.secret_key

    cluster_fqdn = system_paasta_config.get_cluster_fqdn_format().format(
        cluster=args.cluster)
    mesos_address = '{}:{}'.format(
        find_mesos_leader(cluster_fqdn),
        MESOS_MASTER_PORT,
    )
    spark_conf['SPARK_MASTER'] = 'mesos://%s' % mesos_address
    spark_conf['SPARK_CORES_MAX'] = args.max_cores
    spark_conf['SPARK_EXECUTOR_CORES'] = args.executor_cores
    spark_conf['SPARK_EXECUTOR_MEMORY'] = '%dg' % args.executor_memory

    if args.build:
        spark_conf['SPARK_EXECUTOR_IMAGE'] = docker_img

    return spark_conf
コード例 #5
0
    def get_env(self):
        env = super().get_env()
        spark_env = {}
        if self.get_executor() == "spark":
            spark_env = get_mesos_spark_env(
                spark_app_name=
                "tron_spark_{self.get_service()}_{self.get_instance()}",
                spark_ui_port=pick_random_port(
                    f"{self.get_service()}{self.get_instance()}".encode()),
                mesos_leader=find_mesos_leader(self.get_cluster()),
                mesos_secret=load_mesos_secret_for_spark(),
                paasta_cluster=self.get_cluster(),
                paasta_pool=self.get_pool(),
                paasta_service=self.get_service(),
                paasta_instance=self.get_instance(),
                docker_img=self.get_docker_url(),
                volumes=format_volumes(
                    self.get_volumes(
                        load_system_paasta_config().get_volumes())),
                user_spark_opts=self.config_dict.get("spark_args"),
                event_log_dir=get_default_event_log_dir(
                    service=self.get_service(),
                    aws_credentials_yaml=self.config_dict.get(
                        "aws_credentials"),
                ),
            )
            env["SPARK_OPTS"] = stringify_spark_env(spark_env)

        return env
コード例 #6
0
ファイル: tron_tools.py プロジェクト: ymilki/paasta
 def get_spark_config_dict(self):
     if self.get_spark_cluster_manager() == "mesos":
         spark_env = get_mesos_spark_env(
             spark_app_name=
             f"tron_spark_{self.get_service()}_{self.get_instance()}",
             spark_ui_port=self.spark_ui_port,
             mesos_leader=(find_mesos_leader(
                 self.get_spark_paasta_cluster())
                           if not self.for_validation else "N/A"),
             paasta_cluster=self.get_spark_paasta_cluster(),
             paasta_pool=self.get_spark_paasta_pool(),
             paasta_service=self.get_service(),
             paasta_instance=self.get_instance(),
             docker_img=self.get_docker_url(),
             volumes=[
                 f"{v['hostPath']}:{v['containerPath']}:{v['mode'].lower()}"
                 for v in self.get_volumes(
                     load_system_paasta_config().get_volumes())
             ],
             user_spark_opts=self.config_dict.get("spark_args", {}),
             event_log_dir=get_default_event_log_dir(
                 service=self.get_service(),
                 aws_credentials_yaml=self.config_dict.get(
                     "aws_credentials_yaml"),
             ),
             needs_docker_cfg=True,
         )
     else:
         spark_env = get_k8s_spark_env(
             spark_app_name=
             f"tron_spark_{self.get_service()}_{self.get_instance()}",
             spark_ui_port=self.spark_ui_port,
             paasta_cluster=self.get_spark_paasta_cluster(),
             paasta_service=self.get_service(),
             paasta_instance=self.get_instance(),
             docker_img=self.get_docker_url(),
             volumes=self.get_volumes(
                 load_system_paasta_config().get_volumes()),
             user_spark_opts=self.config_dict.get("spark_args", {}),
             event_log_dir=get_default_event_log_dir(
                 service=self.get_service(),
                 aws_credentials_yaml=self.config_dict.get(
                     "aws_credentials_yaml"),
             ),
         )
     return spark_env
コード例 #7
0
def build_executor_stack(
    # TODO: rename to registry?
    processor,
    service,
    instance,
    cluster,
    role,
    pool,
    # TODO: move run_id into task identifier?
    run_id,
    system_paasta_config,
    framework_staging_timeout,
):

    cluster_fqdn = system_paasta_config.get_cluster_fqdn_format().format(
        cluster=cluster)
    mesos_address = '{}:{}'.format(
        mesos_tools.find_mesos_leader(cluster_fqdn),
        mesos_tools.MESOS_MASTER_PORT,
    )

    # TODO: implement DryRunExecutor?
    taskproc_config = system_paasta_config.get_taskproc()

    MesosExecutor = processor.executor_cls('mesos')
    mesos_executor = MesosExecutor(
        role=role,
        pool=pool,
        principal=taskproc_config.get('principal'),
        secret=taskproc_config.get('secret'),
        mesos_address=mesos_address,
        framework_name="paasta-remote {} {} {}".format(
            compose_job_id(service, instance),
            datetime.utcnow().strftime('%Y%m%d%H%M%S%f'),
            run_id,
        ),
        framework_staging_timeout=framework_staging_timeout,
        initial_decline_delay=0.5,
    )

    task_logging_executor = processor.executor_from_config(
        provider='logging',
        provider_config={
            'downstream_executor': mesos_executor,
        },
    )

    credentials_file = taskproc_config.get('boto_credential_file')
    if credentials_file:
        with open(credentials_file) as f:
            credentials = json.loads(f.read())
    else:
        raise ValueError("Required aws credentials")

    region = taskproc_config.get('aws_region')

    endpoint = taskproc_config.get('dynamodb_endpoint')
    session = Session(
        region_name=region,
        aws_access_key_id=credentials['accessKeyId'],
        aws_secret_access_key=credentials['secretAccessKey'],
    )

    StatefulExecutor = processor.executor_cls(provider='stateful')
    stateful_executor = StatefulExecutor(
        downstream_executor=task_logging_executor,
        persister=DynamoDBPersister(
            table_name="taskproc_events_%s" % cluster,
            session=session,
            endpoint_url=endpoint,
        ),
    )

    return stateful_executor
コード例 #8
0
ファイル: spark_run.py プロジェクト: rajacsp/paasta
def get_spark_config(
    args,
    spark_app_name,
    spark_ui_port,
    docker_img,
    system_paasta_config,
    volumes,
):
    # User configurable Spark options
    user_args = {
        'spark.app.name': spark_app_name,
        'spark.cores.max': '4',
        'spark.executor.cores': '2',
        'spark.executor.memory': '4g',
        # Use \; for multiple constraints. e.g.
        # instance_type:m4.10xlarge\;pool:default
        'spark.mesos.constraints': 'pool:%s' % args.pool,
        'spark.mesos.executor.docker.forcePullImage': 'true',
    }

    # Spark options managed by PaaSTA
    cluster_fqdn = system_paasta_config.get_cluster_fqdn_format().format(
        cluster=args.cluster)
    mesos_address = '{}:{}'.format(
        find_mesos_leader(cluster_fqdn),
        MESOS_MASTER_PORT,
    )
    non_user_args = {
        'spark.master':
        'mesos://%s' % mesos_address,
        'spark.ui.port':
        spark_ui_port,
        'spark.executorEnv.PAASTA_SERVICE':
        args.service,
        'spark.executorEnv.PAASTA_INSTANCE':
        '{}_{}'.format(args.instance, get_username()),
        'spark.executorEnv.PAASTA_CLUSTER':
        args.cluster,
        'spark.mesos.executor.docker.parameters':
        'label=paasta_service={},label=paasta_instance={}_{}'.format(
            args.service,
            args.instance,
            get_username(),
        ),
        'spark.mesos.executor.docker.volumes':
        ','.join(volumes),
        'spark.mesos.executor.docker.image':
        docker_img,
        'spark.mesos.principal':
        'spark',
        'spark.mesos.secret':
        _load_mesos_secret(),
        # derby.system.home property defaulting to '.',
        # which requires directory permission changes.
        'spark.driver.extraJavaOptions':
        '-Dderby.system.home=/tmp/derby',
    }

    if not args.build and not args.image:
        non_user_args['spark.mesos.uris'] = 'file:///root/.dockercfg'

    if args.spark_args:
        spark_args = args.spark_args.split()
        for spark_arg in spark_args:
            fields = spark_arg.split('=')
            if len(fields) != 2:
                paasta_print(
                    PaastaColors.red(
                        "Spark option %s is not in format option=value." %
                        spark_arg, ),
                    file=sys.stderr,
                )
                sys.exit(1)

            if fields[0] in non_user_args:
                paasta_print(
                    PaastaColors.red(
                        "Spark option {} is set by PaaSTA with {}.".format(
                            fields[0],
                            non_user_args[fields[0]],
                        ), ),
                    file=sys.stderr,
                )
                sys.exit(1)
            # Update default configuration
            user_args[fields[0]] = fields[1]

    if int(user_args['spark.cores.max']) < int(
            user_args['spark.executor.cores']):
        paasta_print(
            PaastaColors.red(
                "Total number of cores {} is less than per-executor cores {}.".
                format(
                    user_args['spark.cores.max'],
                    user_args['spark.executor.cores'],
                ), ),
            file=sys.stderr,
        )
        sys.exit(1)

    exec_mem = user_args['spark.executor.memory']
    if exec_mem[-1] != 'g' or not exec_mem[:-1].isdigit() or int(
            exec_mem[:-1]) > 32:
        paasta_print(
            PaastaColors.red(
                "Executor memory {} not in format dg (d<=32).".format(
                    user_args['spark.executor.memory'], ), ),
            file=sys.stderr,
        )
        sys.exit(1)

    # Limit a container's cpu usage
    non_user_args[
        'spark.mesos.executor.docker.parameters'] += ',cpus={}'.format(
            user_args['spark.executor.cores'])

    return dict(non_user_args, **user_args)
コード例 #9
0
def get_spark_config(
    args,
    spark_app_name,
    spark_ui_port,
    docker_img,
    system_paasta_config,
    volumes,
    access_key,
    secret_key,
):
    # User configurable Spark options
    user_args = {
        "spark.app.name": spark_app_name,
        "spark.cores.max": "4",
        "spark.executor.cores": "2",
        "spark.executor.memory": "4g",
        # Use \; for multiple constraints. e.g.
        # instance_type:m4.10xlarge\;pool:default
        "spark.mesos.constraints": "pool:%s" % args.pool,
        "spark.mesos.executor.docker.forcePullImage": "true",
    }

    default_event_log_dir = get_default_event_log_dir(access_key, secret_key)
    if default_event_log_dir is not None:
        user_args["spark.eventLog.enabled"] = "true"
        user_args["spark.eventLog.dir"] = default_event_log_dir

    # Spark options managed by PaaSTA
    cluster_fqdn = system_paasta_config.get_cluster_fqdn_format().format(
        cluster=args.cluster)
    mesos_address = "{}:{}".format(find_mesos_leader(cluster_fqdn),
                                   MESOS_MASTER_PORT)
    paasta_instance = get_smart_paasta_instance_name(args)
    non_user_args = {
        "spark.master": "mesos://%s" % mesos_address,
        "spark.ui.port": spark_ui_port,
        "spark.executorEnv.PAASTA_SERVICE": args.service,
        "spark.executorEnv.PAASTA_INSTANCE": paasta_instance,
        "spark.executorEnv.PAASTA_CLUSTER": args.cluster,
        "spark.executorEnv.PAASTA_INSTANCE_TYPE": "spark",
        "spark.mesos.executor.docker.parameters":
        f"label=paasta_service={args.service},label=paasta_instance={paasta_instance}",
        "spark.mesos.executor.docker.volumes": ",".join(volumes),
        "spark.mesos.executor.docker.image": docker_img,
        "spark.mesos.principal": "spark",
        "spark.mesos.secret": _load_mesos_secret(),
    }

    if not args.build and not args.image:
        non_user_args["spark.mesos.uris"] = "file:///root/.dockercfg"

    if args.spark_args:
        spark_args = args.spark_args.split()
        for spark_arg in spark_args:
            fields = spark_arg.split("=", 1)
            if len(fields) != 2:
                paasta_print(
                    PaastaColors.red(
                        "Spark option %s is not in format option=value." %
                        spark_arg),
                    file=sys.stderr,
                )
                sys.exit(1)

            if fields[0] in non_user_args:
                paasta_print(
                    PaastaColors.red(
                        "Spark option {} is set by PaaSTA with {}.".format(
                            fields[0], non_user_args[fields[0]])),
                    file=sys.stderr,
                )
                sys.exit(1)
            # Update default configuration
            user_args[fields[0]] = fields[1]

    if "spark.sql.shuffle.partitions" not in user_args:
        num_partitions = str(2 * int(user_args["spark.cores.max"]))
        user_args["spark.sql.shuffle.partitions"] = num_partitions
        paasta_print(
            PaastaColors.yellow(
                f"Warning: spark.sql.shuffle.partitions has been set to"
                f" {num_partitions} to be equal to twice the number of "
                f"requested cores, but you should consider setting a "
                f"higher value if necessary."))

    if int(user_args["spark.cores.max"]) < int(
            user_args["spark.executor.cores"]):
        paasta_print(
            PaastaColors.red(
                "Total number of cores {} is less than per-executor cores {}.".
                format(user_args["spark.cores.max"],
                       user_args["spark.executor.cores"])),
            file=sys.stderr,
        )
        sys.exit(1)

    exec_mem = user_args["spark.executor.memory"]
    if exec_mem[-1] != "g" or not exec_mem[:-1].isdigit() or int(
            exec_mem[:-1]) > 32:
        paasta_print(
            PaastaColors.red(
                "Executor memory {} not in format dg (d<=32).".format(
                    user_args["spark.executor.memory"])),
            file=sys.stderr,
        )
        sys.exit(1)

    # Limit a container's cpu usage
    non_user_args[
        "spark.mesos.executor.docker.parameters"] += ",cpus={}".format(
            user_args["spark.executor.cores"])

    return dict(non_user_args, **user_args)
コード例 #10
0
ファイル: spark_run.py プロジェクト: kkellyy/paasta
def get_spark_conf_str(
    args,
    container_name,
    spark_ui_port,
    docker_img,
    system_paasta_config,
    volumes,
):
    spark_conf = list()
    spark_conf.append('--conf spark.app.name=%s' % container_name)
    spark_conf.append('--conf spark.ui.port=%d' % spark_ui_port)

    cluster_fqdn = system_paasta_config.get_cluster_fqdn_format().format(
        cluster=args.cluster)
    mesos_address = '{}:{}'.format(
        find_mesos_leader(cluster_fqdn),
        MESOS_MASTER_PORT,
    )
    spark_conf.append('--conf spark.master=mesos://%s' % mesos_address)

    spark_conf.append('--conf spark.cores.max=%d' % args.max_cores)
    spark_conf.append('--conf spark.executor.memory=%dg' %
                      args.executor_memory)
    spark_conf.append('--conf spark.executor.cores=%d' % args.executor_cores)

    if args.driver_max_result_size:
        spark_conf.append('--conf spark.driver.maxResultSize=%dg' %
                          args.driver_max_result_size)
    if args.driver_memory:
        spark_conf.append('--conf spark.driver.memory=%dg' %
                          args.driver_memory)
    if args.driver_cores:
        spark_conf.append('--conf spark.driver.cores=%d' % args.driver_cores)

    spark_conf.append('--conf spark.mesos.executor.docker.image=%s' %
                      docker_img)
    if not args.build and not args.image:
        spark_conf.append('--conf spark.mesos.uris=file:///root/.dockercfg')

    if args.jars:
        spark_conf.append('--conf spark.jars=%s' % args.jars)

    spark_conf.append('--conf spark.mesos.principal=%s' % args.mesos_principal)
    if not args.mesos_secret:
        try:
            with open(DEFAULT_SPARK_MESOS_SECRET_FILE, 'r') as f:
                mesos_secret = f.read()
                spark_conf.append('--conf spark.mesos.secret=%s' %
                                  mesos_secret)
        except IOError:
            paasta_print(
                'Cannot load mesos secret from %s' %
                DEFAULT_SPARK_MESOS_SECRET_FILE,
                file=sys.stderr,
            )
            sys.exit(1)
    else:
        spark_conf.append('--conf spark.mesos.secret=%s' % args.mesos_secret)

    # derby.system.home property defaulting to '.',
    # which requires directory permission changes.
    spark_conf.append(
        '--conf spark.driver.extraJavaOptions=-Dderby.system.home=/tmp/derby')

    spark_conf.append('--conf spark.mesos.constraints=pool:%s' % args.pool)

    spark_conf.append('--conf spark.mesos.executor.docker.volumes=%s' %
                      ','.join(volumes))

    return ' '.join(spark_conf)