예제 #1
0
    def test_reads_s3_config(self):
        s3 = boto3.client("s3")

        bucket = "config_bucket"
        key = "config_key"

        with open(TEST_PATH_CONFIG_CLUSTER, "r") as f:
            config_content = f.read()

        expected = yaml.safe_load(config_content)
        with Stubber(s3) as stubber:
            stream = BytesIO(bytes(config_content, encoding="utf-8"))
            stream.seek(0, os.SEEK_END)
            size = stream.tell()
            stream.seek(0, os.SEEK_SET)
            stubber.add_response(
                "get_object",
                {"Body": StreamingBody(stream, size)},
                {
                    "Bucket": bucket,
                    "Key": key
                },
            )
            actual = ClusterConfig.from_s3(bucket=bucket,
                                           key=key,
                                           s3_client=s3)

        assert expected == actual
예제 #2
0
    def test_raises_correct_exception_s3_not_found(self):
        s3 = boto3.client("s3")

        bucket = "config_bucket"
        key = "config_key"

        with Stubber(s3) as stubber:
            stubber.add_client_error(
                "get_object",
                service_error_code="NoSuchKey",
                expected_params={
                    "Bucket": bucket,
                    "Key": key
                },
            )
            with pytest.raises(ConfigNotFoundError):
                ClusterConfig.from_s3(bucket=bucket, key=key, s3_client=s3)
예제 #3
0
def read_config(config_type: str,
                s3_overrides: dict = None,
                required: bool = True) -> ClusterConfig:
    """Reads an EMR cluster configuration file.

    Reads configuration details of an EMR cluster from either a local file or
    from an S3 object.

    Parameters:
    config_type (str): The type of config file to read. Must be one of
                       `cluster`, `instances`, or `steps`.

    s3_overrides (dict): The optional s3 location overrides for the EMR config files

    required (bool): Whether or not the configuration file should be required
                     to be present. If set to True and the configuration file
                     can't be read, then this function will raise an exception
    Returns:
    dict: A dictionary containing the cluster configuration parsed from the
          provided input.
    """
    logger = logging.getLogger("emr_launcher")

    local_config_dir = os.getenv("EMR_LAUNCHER_CONFIG_DIR")
    try:
        if local_config_dir:
            logger.info("Locating configs",
                        extra={"local_config_dir": {local_config_dir}})
            config = ClusterConfig.from_local(file_path=os.path.join(
                local_config_dir, f"{config_type}.yaml"))
        else:
            s3_bucket_location = get_s3_location(s3_overrides)
            s3_bucket = s3_bucket_location[0]
            s3_folder = s3_bucket_location[1]
            logger.info(
                "Locating configs",
                extra={
                    "s3_bucket": s3_bucket,
                    "s3_folder": s3_folder
                },
            )
            s3_key = f"{s3_folder}/{config_type}.yaml"
            config = ClusterConfig.from_s3(bucket=s3_bucket, key=s3_key)

        logger.debug(f"{config_type} config:", config)

        return config
    except ConfigNotFoundError:
        if required:
            raise
        else:
            logger.debug(f"Config type {config_type} not found")