Esempio n. 1
0
    def test_find_replace_nonexistent_path(self):
        expected = ClusterConfig.from_local(
            file_path=TEST_PATH_CONFIG_INSTANCES)
        actual = ClusterConfig(expected)
        actual.find_replace("Nonexistent.Path", "Name", "MASTER", lambda x: {
            **x, "Name": "TEST"
        })

        assert expected is not actual
        assert actual == expected
Esempio n. 2
0
def read_config(config_type: str,
                s3_overrides: dict = None,
                required: bool = True) -> ClusterConfig:
    """Reads an EMR cluster configuration file.

    Reads configuration details of an EMR cluster from either a local file or
    from an S3 object.

    Parameters:
    config_type (str): The type of config file to read. Must be one of
                       `cluster`, `instances`, or `steps`.

    s3_overrides (dict): The optional s3 location overrides for the EMR config files

    required (bool): Whether or not the configuration file should be required
                     to be present. If set to True and the configuration file
                     can't be read, then this function will raise an exception
    Returns:
    dict: A dictionary containing the cluster configuration parsed from the
          provided input.
    """
    logger = logging.getLogger("emr_launcher")

    local_config_dir = os.getenv("EMR_LAUNCHER_CONFIG_DIR")
    try:
        if local_config_dir:
            logger.info("Locating configs",
                        extra={"local_config_dir": {local_config_dir}})
            config = ClusterConfig.from_local(file_path=os.path.join(
                local_config_dir, f"{config_type}.yaml"))
        else:
            s3_bucket_location = get_s3_location(s3_overrides)
            s3_bucket = s3_bucket_location[0]
            s3_folder = s3_bucket_location[1]
            logger.info(
                "Locating configs",
                extra={
                    "s3_bucket": s3_bucket,
                    "s3_folder": s3_folder
                },
            )
            s3_key = f"{s3_folder}/{config_type}.yaml"
            config = ClusterConfig.from_s3(bucket=s3_bucket, key=s3_key)

        logger.debug(f"{config_type} config:", config)

        return config
    except ConfigNotFoundError:
        if required:
            raise
        else:
            logger.debug(f"Config type {config_type} not found")
Esempio n. 3
0
    def test_reads_s3_config(self):
        s3 = boto3.client("s3")

        bucket = "config_bucket"
        key = "config_key"

        with open(TEST_PATH_CONFIG_CLUSTER, "r") as f:
            config_content = f.read()

        expected = yaml.safe_load(config_content)
        with Stubber(s3) as stubber:
            stream = BytesIO(bytes(config_content, encoding="utf-8"))
            stream.seek(0, os.SEEK_END)
            size = stream.tell()
            stream.seek(0, os.SEEK_SET)
            stubber.add_response(
                "get_object",
                {"Body": StreamingBody(stream, size)},
                {
                    "Bucket": bucket,
                    "Key": key
                },
            )
            actual = ClusterConfig.from_s3(bucket=bucket,
                                           key=key,
                                           s3_client=s3)

        assert expected == actual
Esempio n. 4
0
 def test_override(self):
     config = ClusterConfig.from_local(file_path=TEST_PATH_CONFIG_INSTANCES)
     overrides = {"Instances": {"Ec2SubnetId": "Test_Subnet_Id"}}
     config.override(overrides)
     assert config["Instances"]["Ec2SubnetId"] == "Test_Subnet_Id"
     assert config["Instances"][
         "EmrManagedMasterSecurityGroup"] == "$MASTER_SG"
Esempio n. 5
0
    def test_get_nested_node(self):
        config = ClusterConfig.from_local(file_path=TEST_PATH_CONFIG_INSTANCES)
        actual = config.get_nested_node("Instances.Ec2SubnetId")
        expected = load_local_yaml(
            TEST_PATH_CONFIG_INSTANCES)["Instances"]["Ec2SubnetId"]

        assert actual == expected
Esempio n. 6
0
def get_default_config() -> ClusterConfig:
    cluster_config = dict()
    for config_type in ["cluster", "configurations", "instances", "steps"]:
        with open(os.path.join(EMR_LAUNCHER_CONFIG_DIR, f"{config_type}.yaml"),
                  "r") as f:
            cluster_config.update(yaml.safe_load(f.read()))
    return ClusterConfig(cluster_config)
Esempio n. 7
0
    def test_raises_correct_exception_s3_not_found(self):
        s3 = boto3.client("s3")

        bucket = "config_bucket"
        key = "config_key"

        with Stubber(s3) as stubber:
            stubber.add_client_error(
                "get_object",
                service_error_code="NoSuchKey",
                expected_params={
                    "Bucket": bucket,
                    "Key": key
                },
            )
            with pytest.raises(ConfigNotFoundError):
                ClusterConfig.from_s3(bucket=bucket, key=key, s3_client=s3)
Esempio n. 8
0
    def test_insert_nested_node(self):
        config = ClusterConfig.from_local(file_path=TEST_PATH_CONFIG_INSTANCES)
        expected = {"TestKey": "TestValue"}

        config.insert_nested_node("Instances.TestNode", expected)
        actual = config["Instances"]["TestNode"]

        assert actual == expected
Esempio n. 9
0
    def test_find_replace_raises_not_list(self):
        config = ClusterConfig.from_local(file_path=TEST_PATH_CONFIG_INSTANCES)

        with pytest.raises(TypeError):
            config.find_replace(
                "Instances.Ec2SubnetId",
                "Name",
                "MASTER",
                lambda x: {
                    **x, "Name": "TEST"
                },
            )
Esempio n. 10
0
    def test_find_replace_single_path(self):
        config = ClusterConfig.from_local(
            file_path=TEST_PATH_CONFIG_CONFIGURATIONS)
        config.find_replace(
            "Configurations",
            "Classification",
            "yarn-site",
            lambda x: {
                **x, "Properties": {
                    "test-property": "test-value"
                }
            },
        )

        updated_item = next((item for item in config["Configurations"]
                             if item["Classification"] == "yarn-site"))
        assert updated_item["Properties"]["test-property"] == "test-value"
Esempio n. 11
0
    def test_find_replace_nested_path(self):
        config = ClusterConfig.from_local(file_path=TEST_PATH_CONFIG_INSTANCES)
        config.find_replace(
            "Instances.InstanceFleets",
            "Name",
            "MASTER",
            lambda x: {
                **x, "Name": "TEST"
            },
        )

        old_item = next(
            (item for item in config["Instances"]["InstanceFleets"]
             if item["Name"] == "MASTER"),
            None,
        )
        assert old_item is None

        updated_item = next(
            (item for item in config["Instances"]["InstanceFleets"]
             if item["Name"] == "TEST"),
            None,
        )
        assert updated_item is not None
Esempio n. 12
0
 def test_raises_correct_exception_local_file_not_found(self):
     with pytest.raises(ConfigNotFoundError):
         ClusterConfig.from_local(file_path="nonexistent_path")
Esempio n. 13
0
    def test_reads_local_config(self):
        actual = ClusterConfig.from_local(file_path=TEST_PATH_CONFIG_CLUSTER)
        expected = load_local_yaml(TEST_PATH_CONFIG_CLUSTER)

        assert actual == expected
Esempio n. 14
0
    def test_extend_nested_list(self):
        config = ClusterConfig.from_local(file_path=TEST_PATH_CONFIG_INSTANCES)
        expected = {"InstanceFleetType": "CORE", "Name": "TEST"}
        config.extend_nested_list("Instances.InstanceFleets", [expected])

        assert expected in config["Instances"]["InstanceFleets"]
Esempio n. 15
0
 def test_insert_raises_existing_path(self):
     config = ClusterConfig.from_local(file_path=TEST_PATH_CONFIG_INSTANCES)
     with pytest.raises(TypeError):
         config.insert_nested_node("Instances.Ec2SubnetId", "testvalue")