Exemple #1
0
def get_local_run_environment_vars(instance_config, port0, framework):
    """Returns a dictionary of environment variables to simulate what would be available to
    a paasta service running in a container"""
    hostname = socket.getfqdn()
    docker_image = instance_config.get_docker_image()
    if docker_image == "":
        # In a local_run environment, the docker_image may not be available
        # so we can fall-back to the injected DOCKER_TAG per the paasta contract
        docker_image = os.environ["DOCKER_TAG"]
    fake_taskid = uuid.uuid4()
    env = {
        "HOST": hostname,
        "MESOS_SANDBOX": "/mnt/mesos/sandbox",
        "MESOS_CONTAINER_NAME": "localrun-%s" % fake_taskid,
        "MESOS_TASK_ID": str(fake_taskid),
        "PAASTA_DOCKER_IMAGE": docker_image,
        "PAASTA_LAUNCHED_BY": get_possible_launched_by_user_variable_from_env(),
    }
    if framework == "marathon":
        env["MARATHON_PORT"] = str(port0)
        env["MARATHON_PORT0"] = str(port0)
        env["MARATHON_PORTS"] = str(port0)
        env["MARATHON_PORT_%d" % instance_config.get_container_port()] = str(port0)
        env["MARATHON_APP_VERSION"] = "simulated_marathon_app_version"
        env["MARATHON_APP_RESOURCE_CPUS"] = str(instance_config.get_cpus())
        env["MARATHON_APP_DOCKER_IMAGE"] = docker_image
        env["MARATHON_APP_RESOURCE_MEM"] = str(instance_config.get_mem())
        env["MARATHON_APP_RESOURCE_DISK"] = str(instance_config.get_disk())
        env["MARATHON_APP_LABELS"] = ""
        env["MARATHON_APP_ID"] = "/simulated_marathon_app_id"
        env["MARATHON_HOST"] = hostname
        env["PAASTA_HOST"] = hostname

    return env
Exemple #2
0
def get_spark_env(
    args: argparse.Namespace,
    spark_conf_str: str,
    aws_creds: Tuple[Optional[str], Optional[str], Optional[str]],
    ui_port: str,
) -> Dict[str, str]:
    """Create the env config dict to configure on the docker container"""

    spark_env = {}
    if not args.disable_aws_credential_env_variables:
        access_key, secret_key, session_token = aws_creds
        if access_key:
            spark_env["AWS_ACCESS_KEY_ID"] = access_key
            spark_env["AWS_SECRET_ACCESS_KEY"] = secret_key
            if session_token is not None:
                spark_env["AWS_SESSION_TOKEN"] = session_token

    spark_env["AWS_DEFAULT_REGION"] = args.aws_region
    spark_env["PAASTA_LAUNCHED_BY"] = get_possible_launched_by_user_variable_from_env()
    spark_env["PAASTA_INSTANCE_TYPE"] = "spark"

    # Run spark (and mesos framework) as root.
    spark_env["SPARK_USER"] = "******"
    spark_env["SPARK_OPTS"] = spark_conf_str

    # Default configs to start the jupyter notebook server
    if args.cmd == "jupyter-lab":
        spark_env["JUPYTER_RUNTIME_DIR"] = "/source/.jupyter"
        spark_env["JUPYTER_DATA_DIR"] = "/source/.jupyter"
        spark_env["JUPYTER_CONFIG_DIR"] = "/source/.jupyter"
    elif args.cmd == "history-server":
        dirs = args.work_dir.split(":")
        spark_env["SPARK_LOG_DIR"] = dirs[1]
        if not args.spark_args or not args.spark_args.startswith(
            "spark.history.fs.logDirectory"
        ):
            print(
                "history-server requires spark.history.fs.logDirectory in spark-args",
                file=sys.stderr,
            )
            sys.exit(1)
        spark_env["SPARK_HISTORY_OPTS"] = (
            f"-D{args.spark_args} " f"-Dspark.history.ui.port={ui_port}"
        )
        spark_env["SPARK_DAEMON_CLASSPATH"] = "/opt/spark/extra_jars/*"
        spark_env["SPARK_NO_DAEMONIZE"] = "true"

    return spark_env
Exemple #3
0
def get_local_run_environment_vars(instance_config, port0, framework):
    """Returns a dictionary of environment variables to simulate what would be available to
    a paasta service running in a container"""
    hostname = socket.getfqdn()
    docker_image = instance_config.get_docker_image()
    if docker_image == '':
        # In a local_run environment, the docker_image may not be available
        # so we can fall-back to the injected DOCKER_TAG per the paasta contract
        docker_image = os.environ['DOCKER_TAG']
    fake_taskid = uuid.uuid4()
    env = {
        'HOST': hostname,
        'MESOS_SANDBOX': '/mnt/mesos/sandbox',
        'MESOS_CONTAINER_NAME': 'localrun-%s' % fake_taskid,
        'MESOS_TASK_ID': str(fake_taskid),
        'PAASTA_DOCKER_IMAGE': docker_image,
        'PAASTA_LAUNCHED_BY':
        get_possible_launched_by_user_variable_from_env(),
    }
    if framework == 'marathon':
        env['MARATHON_PORT'] = str(port0)
        env['MARATHON_PORT0'] = str(port0)
        env['MARATHON_PORTS'] = str(port0)
        env['MARATHON_PORT_%d' %
            instance_config.get_container_port()] = str(port0)
        env['MARATHON_APP_VERSION'] = 'simulated_marathon_app_version'
        env['MARATHON_APP_RESOURCE_CPUS'] = str(instance_config.get_cpus())
        env['MARATHON_APP_DOCKER_IMAGE'] = docker_image
        env['MARATHON_APP_RESOURCE_MEM'] = str(instance_config.get_mem())
        env['MARATHON_APP_RESOURCE_DISK'] = str(instance_config.get_disk())
        env['MARATHON_APP_LABELS'] = ""
        env['MARATHON_APP_ID'] = '/simulated_marathon_app_id'
        env['MARATHON_HOST'] = hostname
    elif framework == 'chronos':
        env['CHRONOS_RESOURCE_DISK'] = str(instance_config.get_disk())
        env['CHRONOS_RESOURCE_CPU'] = str(instance_config.get_cpus())
        env['CHRONOS_RESOURCE_MEM'] = str(instance_config.get_mem())
        env['CHRONOS_JOB_OWNER'] = 'simulated-owner'
        env['CHRONOS_JOB_RUN_TIME'] = str(int(time.time()))
        env['CHRONOS_JOB_NAME'] = "{} {}".format(
            instance_config.get_service(), instance_config.get_instance())
        env['CHRONOS_JOB_RUN_ATTEMPT'] = str(0)
        env['mesos_task_id'] = 'ct:simulated-task-id'
    return env
Exemple #4
0
def get_spark_env(
    args,
    spark_conf,
    spark_ui_port,
):
    spark_env = {}

    access_key, secret_key = get_aws_credentials(args)
    spark_env['AWS_ACCESS_KEY_ID'] = access_key
    spark_env['AWS_SECRET_ACCESS_KEY'] = secret_key
    spark_env[
        'PAASTA_LAUNCHED_BY'] = get_possible_launched_by_user_variable_from_env(
        )

    # Run spark (and mesos framework) as root.
    spark_env['SPARK_USER'] = '******'
    spark_env['SPARK_OPTS'] = spark_conf

    # Default configs to start the jupyter notebook server
    if args.cmd == 'jupyter':
        dirs = args.work_dir.split(':')
        spark_env['JUPYTER_RUNTIME_DIR'] = dirs[1] + '/.jupyter'
        spark_env['JUPYTER_DATA_DIR'] = dirs[1] + '/.jupyter'
    elif args.cmd == 'history-server':
        dirs = args.work_dir.split(':')
        spark_env['SPARK_LOG_DIR'] = dirs[1]
        if not args.spark_args or not args.spark_args.startswith(
                'spark.history.fs.logDirectory'):
            paasta_print(
                "history-server requires spark.history.fs.logDirectory in spark-args",
                file=sys.stderr,
            )
            sys.exit(1)
        spark_env['SPARK_HISTORY_OPTS'] = '-D%s -Dspark.history.ui.port=%d' % (
            args.spark_args,
            spark_ui_port,
        )
        spark_env['SPARK_NO_DAEMONIZE'] = 'true'

    return spark_env
Exemple #5
0
def get_spark_env(args, spark_conf, spark_ui_port, access_key, secret_key):
    spark_env = {}

    if access_key is not None:
        spark_env["AWS_ACCESS_KEY_ID"] = access_key
        spark_env["AWS_SECRET_ACCESS_KEY"] = secret_key
        spark_env["AWS_DEFAULT_REGION"] = args.aws_region
    spark_env[
        "PAASTA_LAUNCHED_BY"] = get_possible_launched_by_user_variable_from_env(
        )
    spark_env["PAASTA_INSTANCE_TYPE"] = "spark"

    # Run spark (and mesos framework) as root.
    spark_env["SPARK_USER"] = "******"
    spark_env["SPARK_OPTS"] = spark_conf

    # Default configs to start the jupyter notebook server
    if args.cmd == "jupyter-lab":
        spark_env["JUPYTER_RUNTIME_DIR"] = "/source/.jupyter"
        spark_env["JUPYTER_DATA_DIR"] = "/source/.jupyter"
        spark_env["JUPYTER_CONFIG_DIR"] = "/source/.jupyter"
    elif args.cmd == "history-server":
        dirs = args.work_dir.split(":")
        spark_env["SPARK_LOG_DIR"] = dirs[1]
        if not args.spark_args or not args.spark_args.startswith(
                "spark.history.fs.logDirectory"):
            paasta_print(
                "history-server requires spark.history.fs.logDirectory in spark-args",
                file=sys.stderr,
            )
            sys.exit(1)
        spark_env["SPARK_HISTORY_OPTS"] = "-D%s -Dspark.history.ui.port=%d" % (
            args.spark_args,
            spark_ui_port,
        )
        spark_env["SPARK_DAEMON_CLASSPATH"] = "/opt/spark/extra_jars/*"
        spark_env["SPARK_NO_DAEMONIZE"] = "true"

    return spark_env