def get_env(self): env = super().get_env() if self.get_executor() == "spark": env["EXECUTOR_CLUSTER"] = self.get_spark_paasta_cluster() env["EXECUTOR_POOL"] = self.get_spark_paasta_pool() # Run spark (and mesos framework) as root. env["SPARK_USER"] = "******" env["SPARK_OPTS"] = stringify_spark_env( self.get_spark_config_dict()) env.update(get_mesos_spark_auth_env()) env["CLUSTERMAN_RESOURCES"] = json.dumps( dict( get_spark_resource_requirements( spark_config_dict=self.get_spark_config_dict(), webui_url=get_webui_url(self.spark_ui_port), ).values())) if "AWS_ACCESS_KEY_ID" not in env or "AWS_SECRET_ACCESS_KEY" not in env: try: access_key, secret_key = get_aws_credentials( service=self.get_service(), aws_credentials_yaml=self.config_dict.get( "aws_credentials_yaml"), ) env["AWS_ACCESS_KEY_ID"] = access_key env["AWS_SECRET_ACCESS_KEY"] = secret_key except Exception: log.warning( f"Cannot set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment " f"variables for tron action {self.get_instance()} of service " f"{self.get_service()} via credentail file. Traceback:\n" f"{traceback.format_exc()}") if "AWS_DEFAULT_REGION" not in env: env["AWS_DEFAULT_REGION"] = DEFAULT_AWS_REGION return env
def test_service_provided_no_yaml( mock_load_aws_credentials_from_yaml, mock_os, ): mock_os.return_value = True credentials = get_aws_credentials(service="service_name") mock_load_aws_credentials_from_yaml.assert_called_once_with( "/etc/boto_cfg/service_name.yaml" ) assert credentials == mock_load_aws_credentials_from_yaml.return_value
def test_service_provided_fallback_to_default(mock_get_credentials, mock_os): args = mock.Mock( no_aws_credentials=False, aws_credentials_yaml=None, service="service_name" ) mock_os.path.exists.return_value = False mock_get_credentials.return_value = mock.MagicMock( access_key="id", secret_key="secret" ) credentials = get_aws_credentials(args) assert credentials == ("id", "secret")
def test_use_default_creds(mock_load_aws_credentials_from_yaml, mock_get_credentials): args = mock.Mock( no_aws_credentials=False, aws_credentials_yaml=None, service=DEFAULT_SPARK_SERVICE, ) mock_get_credentials.return_value = mock.MagicMock( access_key="id", secret_key="secret" ) credentials = get_aws_credentials(args) assert credentials == ("id", "secret")
def configure_and_run_docker_container( args: argparse.Namespace, docker_img: str, instance_config: InstanceConfig, system_paasta_config: SystemPaastaConfig, ) -> int: volumes = list() for volume in instance_config.get_volumes(system_paasta_config.get_volumes()): if os.path.exists(volume["hostPath"]): volumes.append( "{}:{}:{}".format( volume["hostPath"], volume["containerPath"], volume["mode"].lower() ) ) else: print( PaastaColors.yellow( "Warning: Path %s does not exist on this host. Skipping this binding." % volume["hostPath"] ), file=sys.stderr, ) original_docker_cmd = args.cmd or instance_config.get_cmd() spark_ui_port = pick_random_port(args.service + str(os.getpid())) spark_app_name = get_spark_app_name(original_docker_cmd, spark_ui_port) access_key, secret_key = get_aws_credentials( service=args.service, no_aws_credentials=args.no_aws_credentials, aws_credentials_yaml=args.aws_credentials_yaml, profile_name=args.aws_profile, ) spark_config_dict = get_spark_config( args=args, spark_app_name=spark_app_name, spark_ui_port=spark_ui_port, docker_img=docker_img, system_paasta_config=system_paasta_config, volumes=volumes, access_key=access_key, secret_key=secret_key, ) spark_conf_str = create_spark_config_str(spark_config_dict, is_mrjob=args.mrjob) # Spark client specific volumes volumes.append("%s:rw" % args.work_dir) volumes.append("/etc/passwd:/etc/passwd:ro") volumes.append("/etc/group:/etc/group:ro") volumes.append("/nail/home:/nail/home:rw") environment = instance_config.get_env_dictionary() environment.update( get_spark_env(args, spark_conf_str, spark_ui_port, access_key, secret_key) ) webui_url = get_webui_url(spark_ui_port) docker_cmd = get_docker_cmd(args, instance_config, spark_conf_str) if "history-server" in docker_cmd: print(f"\nSpark history server URL {webui_url}\n") elif any(c in docker_cmd for c in ["pyspark", "spark-shell", "spark-submit"]): print(f"\nSpark monitoring URL {webui_url}\n") if clusterman_metrics and _should_emit_resource_requirements( docker_cmd, args.mrjob ): try: emit_resource_requirements(spark_config_dict, args.cluster, webui_url) except Boto3Error as e: print( PaastaColors.red( f"Encountered {e} while attempting to send resource requirements to Clusterman." ) ) if args.suppress_clusterman_metrics_errors: print( "Continuing anyway since --suppress-clusterman-metrics-errors was passed" ) else: raise return run_docker_container( container_name=spark_app_name, volumes=volumes, environment=environment, docker_img=docker_img, docker_cmd=docker_cmd, dry_run=args.dry_run, nvidia=args.nvidia, )
def test_yaml_provided(mock_load_aws_credentials_from_yaml): credentials = get_aws_credentials(aws_credentials_yaml="credentials.yaml") mock_load_aws_credentials_from_yaml.assert_called_once_with("credentials.yaml") assert credentials == mock_load_aws_credentials_from_yaml.return_value
def test_creds_disabled(): credentials = get_aws_credentials(no_aws_credentials=True) assert credentials == (None, None)