Python upload_file 예제들, spark_s3.upload_file Python 예제들

예제 #1

0

파일 보기

파일: test_spark.py 프로젝트: kndarp/spark-build

def test_s3_env():
    creds = s3.get_credentials()
    args = [
        "--conf spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format(
            creds.access_key),
        "--conf spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format(
            creds.secret_key)
    ]
    args.append("--class S3Job")

    linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt')
    s3.upload_file(linecount_path)

    # download/read linecount.txt only
    utils.run_tests(app_url=utils.dcos_test_jar_url(),
                    app_args="--readUrl {} --countOnly".format(
                        s3.s3n_url('linecount.txt')),
                    expected_output="Read 3 lines",
                    args=args)

    # download/read linecount.txt, reupload as linecount-env.txt
    utils.run_tests(app_url=utils.dcos_test_jar_url(),
                    app_args="--readUrl {} --writeUrl {}".format(
                        s3.s3n_url('linecount.txt'),
                        s3.s3n_url('linecount-env.txt')),
                    expected_output="Read 3 lines",
                    args=args)

    assert len(list(s3.list("linecount-env.txt"))) > 0

예제 #2

0

파일 보기

def upload_file(file_path):
    LOGGER.info("Uploading {} to s3://{}/{}".format(
        file_path,
        os.environ['S3_BUCKET'],
        os.environ['S3_PREFIX']))

    spark_s3.upload_file(file_path)

    basename = os.path.basename(file_path)
    return spark_s3.http_url(basename)

예제 #3

0

파일 보기

파일: test_spark.py 프로젝트: kndarp/spark-build

def test_s3_secrets():
    linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt')
    s3.upload_file(linecount_path)

    creds = s3.get_credentials()

    def make_credential_secret(path, val):
        sdk_security.delete_secret(path)
        rc, stdout, stderr = sdk_cmd.run_raw_cli(
            "security secrets create /{} -v {}".format(path, val))
        assert rc == 0, "Failed to create secret {}, stderr: {}, stdout: {}".format(
            path, stderr, stdout)

    aws_access_key_path = "aws_access_key_id"
    make_credential_secret(aws_access_key_path, creds.access_key)
    aws_secret_key_path = "aws_secret_access_key"
    make_credential_secret(aws_secret_key_path, creds.secret_key)

    args = [
        "--conf spark.mesos.containerizer=mesos",
        "--conf spark.mesos.driver.secret.names=/{key},/{secret}".format(
            key=aws_access_key_path, secret=aws_secret_key_path),
        "--conf spark.mesos.driver.secret.envkeys=AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY",
        "--class S3Job"
    ]

    try:
        # download/read linecount.txt only
        utils.run_tests(app_url=utils.dcos_test_jar_url(),
                        app_args="--readUrl {} --countOnly".format(
                            s3.s3n_url('linecount.txt')),
                        expected_output="Read 3 lines",
                        args=args)
        # download/read linecount.txt, reupload as linecount-secret.txt:
        utils.run_tests(app_url=utils.dcos_test_jar_url(),
                        app_args="--readUrl {} --writeUrl {}".format(
                            s3.s3n_url('linecount.txt'),
                            s3.s3n_url('linecount-secret.txt')),
                        expected_output="Read 3 lines",
                        args=args)
        assert len(list(s3.list("linecount-secret.txt"))) > 0
    finally:
        sdk_security.delete_secret(aws_access_key_path)
        sdk_security.delete_secret(aws_secret_key_path)

예제 #4

0

파일 보기

def test_pipeline(kerberos_flag,
                  stop_count,
                  jar_uri,
                  keytab_secret,
                  spark_app_name,
                  jaas_uri=None):
    stop_count = str(stop_count)
    kerberized = True if kerberos_flag == "true" else False
    broker_dns = _kafka_broker_dns()
    topic = "top1"

    big_file, big_file_url = "file:///mnt/mesos/sandbox/big.txt", "http://norvig.com/big.txt"

    # arguments to the application
    producer_args = " ".join([broker_dns, big_file, topic, kerberos_flag])

    uris = "spark.mesos.uris=http://norvig.com/big.txt"

    if kerberized and jaas_uri is None:
        jaas_path = os.path.join(THIS_DIR, "resources",
                                 "spark-kafka-client-jaas.conf")
        s3.upload_file(jaas_path)
        _uri = s3.s3_http_url("spark-kafka-client-jaas.conf")
        uris += ",{}".format(_uri)
    else:
        uris += ",{}".format(jaas_uri)

    common_args = [
        "--conf", "spark.mesos.containerizer=mesos", "--conf",
        "spark.scheduler.maxRegisteredResourcesWaitingTime=2400s", "--conf",
        "spark.scheduler.minRegisteredResourcesRatio=1.0", "--conf", uris
    ]

    kerberos_args = [
        "--conf",
        "spark.mesos.driver.secret.names={}".format(keytab_secret),
        "--conf",
        "spark.mesos.driver.secret.filenames=kafka-client.keytab",
        "--conf",
        "spark.mesos.executor.secret.names={}".format(keytab_secret),
        "--conf",
        "spark.mesos.executor.secret.filenames=kafka-client.keytab",
        "--conf",
        "spark.mesos.task.labels=DCOS_SPACE:{}".format(utils.SPARK_APP_NAME),
        "--conf",
        "spark.executorEnv.KRB5_CONFIG_BASE64={}".format(KAFKA_KRB5),
        "--conf",
        "spark.mesos.driverEnv.KRB5_CONFIG_BASE64={}".format(KAFKA_KRB5),
        "--conf",
        "spark.driver.extraJavaOptions=-Djava.security.auth.login.config="
        "/mnt/mesos/sandbox/spark-kafka-client-jaas.conf",
        "--conf",
        "spark.executor.extraJavaOptions="
        "-Djava.security.auth.login.config=/mnt/mesos/sandbox/spark-kafka-client-jaas.conf",
    ]

    producer_config = [
        "--conf", "spark.cores.max=2", "--conf", "spark.executor.cores=2",
        "--class", "KafkaFeeder"
    ] + common_args

    if kerberized:
        producer_config += kerberos_args

    producer_id = utils.submit_job(app_url=jar_uri,
                                   app_args=producer_args,
                                   app_name=spark_app_name,
                                   args=producer_config)

    shakedown.wait_for(lambda: _producer_launched(),
                       ignore_exceptions=False,
                       timeout_seconds=600)
    shakedown.wait_for(lambda: utils.is_service_ready(KAFKA_SERVICE_NAME, 1),
                       ignore_exceptions=False,
                       timeout_seconds=600)

    consumer_config = [
        "--conf", "spark.cores.max=4", "--class", "KafkaConsumer"
    ] + common_args

    if kerberized:
        consumer_config += kerberos_args

    consumer_args = " ".join([broker_dns, topic, stop_count, kerberos_flag])

    utils.run_tests(app_url=jar_uri,
                    app_args=consumer_args,
                    expected_output="Read {} words".format(stop_count),
                    app_name=spark_app_name,
                    args=consumer_config)

    utils.kill_driver(producer_id, spark_app_name)

예제 #5

0

파일 보기

파일: spark_utils.py 프로젝트: zencircle/spark-build

def upload_file(file_path):
    spark_s3.upload_file(file_path)
    return spark_s3.http_url(os.path.basename(file_path))

예제 #6

0

파일 보기

def upload_jaas():
    jaas_path = os.path.join(THIS_DIR, "..", "resources",
                             "spark-kafka-client-jaas.conf")
    s3.upload_file(jaas_path)
    return s3.http_url("spark-kafka-client-jaas.conf")

예제 #7

0

파일 보기

파일: test_spark.py 프로젝트: super-collider/spark-build

def test_s3():
    def make_credential_secret(envvar, secret_path):
        rc, stdout, stderr = sdk_cmd.run_raw_cli(
            "security secrets create {p} -v {e}".format(p=secret_path,
                                                        e=os.environ[envvar]))
        assert rc == 0, "Failed to create secret {secret} from envvar {envvar}, stderr: {err}, stdout: {out}".format(
            secret=secret_path, envvar=envvar, err=stderr, out=stdout)

    LOGGER.info("Creating AWS secrets")

    aws_access_key_secret_path = "aws_access_key_id"
    aws_secret_access_key_path = "aws_secret_access_key"

    make_credential_secret(
        envvar="AWS_ACCESS_KEY_ID",
        secret_path="/{}".format(aws_access_key_secret_path))
    make_credential_secret(
        envvar="AWS_SECRET_ACCESS_KEY",
        secret_path="/{}".format(aws_secret_access_key_path))

    linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt')
    s3.upload_file(linecount_path)

    app_args = "--readUrl {} --writeUrl {}".format(s3.s3n_url('linecount.txt'),
                                                   s3.s3n_url("linecount-out"))

    args = [
        "--conf", "spark.mesos.containerizer=mesos", "--conf",
        "spark.mesos.driver.secret.names=/{key},/{secret}".format(
            key=aws_access_key_secret_path,
            secret=aws_secret_access_key_path), "--conf",
        "spark.mesos.driver.secret.envkeys=AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY",
        "--class", "S3Job"
    ]
    utils.run_tests(app_url=utils._scala_test_jar_url(),
                    app_args=app_args,
                    expected_output="Read 3 lines",
                    args=args)

    assert len(list(s3.list("linecount-out"))) > 0

    app_args = "--readUrl {} --countOnly".format(s3.s3n_url('linecount.txt'))

    args = [
        "--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format(
            os.environ["AWS_ACCESS_KEY_ID"]), "--conf",
        "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format(
            os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job"
    ]
    utils.run_tests(app_url=utils._scala_test_jar_url(),
                    app_args=app_args,
                    expected_output="Read 3 lines",
                    args=args)

    app_args = "--countOnly --readUrl {}".format(s3.s3n_url('linecount.txt'))

    args = [
        "--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format(
            os.environ["AWS_ACCESS_KEY_ID"]), "--conf",
        "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format(
            os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job"
    ]
    utils.run_tests(app_url=utils._scala_test_jar_url(),
                    app_args=app_args,
                    expected_output="Read 3 lines",
                    args=args)