Beispiel #1
0
    def get_env(self):
        env = super().get_env()
        if self.get_executor() == "spark":
            env["EXECUTOR_CLUSTER"] = self.get_spark_paasta_cluster()
            env["EXECUTOR_POOL"] = self.get_spark_paasta_pool()
            # Run spark (and mesos framework) as root.
            env["SPARK_USER"] = "******"
            env["SPARK_OPTS"] = stringify_spark_env(
                self.get_spark_config_dict())
            env.update(get_mesos_spark_auth_env())
            env["CLUSTERMAN_RESOURCES"] = json.dumps(
                dict(
                    get_spark_resource_requirements(
                        spark_config_dict=self.get_spark_config_dict(),
                        webui_url=get_webui_url(self.spark_ui_port),
                    ).values()))
            if "AWS_ACCESS_KEY_ID" not in env or "AWS_SECRET_ACCESS_KEY" not in env:
                try:
                    access_key, secret_key = get_aws_credentials(
                        service=self.get_service(),
                        aws_credentials_yaml=self.config_dict.get(
                            "aws_credentials_yaml"),
                    )
                    env["AWS_ACCESS_KEY_ID"] = access_key
                    env["AWS_SECRET_ACCESS_KEY"] = secret_key
                except Exception:
                    log.warning(
                        f"Cannot set AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY environment "
                        f"variables for tron action {self.get_instance()} of service "
                        f"{self.get_service()} via credentail file. Traceback:\n"
                        f"{traceback.format_exc()}")
            if "AWS_DEFAULT_REGION" not in env:
                env["AWS_DEFAULT_REGION"] = DEFAULT_AWS_REGION

        return env
Beispiel #2
0
def emit_resource_requirements(spark_config_dict, paasta_cluster, webui_url):
    print("Sending resource request metrics to Clusterman")

    desired_resources = get_spark_resource_requirements(spark_config_dict, webui_url)
    constraints = parse_constraints_string(spark_config_dict["spark.mesos.constraints"])
    pool = constraints["pool"]

    aws_region = get_aws_region_for_paasta_cluster(paasta_cluster)
    metrics_client = clusterman_metrics.ClustermanMetricsBotoClient(
        region_name=aws_region, app_identifier=pool
    )

    cpus = desired_resources["cpus"][1]
    mem = desired_resources["mem"][1]
    est_cost = clusterman_metrics.util.costs.estimate_cost_per_hour(
        cluster=paasta_cluster, pool=pool, cpus=cpus, mem=mem,
    )
    message = f"Resource request ({cpus} cpus and {mem} MB memory total) is estimated to cost ${est_cost} per hour"
    if clusterman_metrics.util.costs.should_warn(est_cost):
        message = "WARNING: " + message
        print(PaastaColors.red(message))
    else:
        print(message)

    with metrics_client.get_writer(
        clusterman_metrics.APP_METRICS, aggregate_meteorite_dims=True
    ) as writer:
        for _, (metric_key, desired_quantity) in desired_resources.items():
            writer.send((metric_key, int(time.time()), desired_quantity))