def test_find_replace_nonexistent_path(self): expected = ClusterConfig.from_local( file_path=TEST_PATH_CONFIG_INSTANCES) actual = ClusterConfig(expected) actual.find_replace("Nonexistent.Path", "Name", "MASTER", lambda x: { **x, "Name": "TEST" }) assert expected is not actual assert actual == expected
def read_config(config_type: str, s3_overrides: dict = None, required: bool = True) -> ClusterConfig: """Reads an EMR cluster configuration file. Reads configuration details of an EMR cluster from either a local file or from an S3 object. Parameters: config_type (str): The type of config file to read. Must be one of `cluster`, `instances`, or `steps`. s3_overrides (dict): The optional s3 location overrides for the EMR config files required (bool): Whether or not the configuration file should be required to be present. If set to True and the configuration file can't be read, then this function will raise an exception Returns: dict: A dictionary containing the cluster configuration parsed from the provided input. """ logger = logging.getLogger("emr_launcher") local_config_dir = os.getenv("EMR_LAUNCHER_CONFIG_DIR") try: if local_config_dir: logger.info("Locating configs", extra={"local_config_dir": {local_config_dir}}) config = ClusterConfig.from_local(file_path=os.path.join( local_config_dir, f"{config_type}.yaml")) else: s3_bucket_location = get_s3_location(s3_overrides) s3_bucket = s3_bucket_location[0] s3_folder = s3_bucket_location[1] logger.info( "Locating configs", extra={ "s3_bucket": s3_bucket, "s3_folder": s3_folder }, ) s3_key = f"{s3_folder}/{config_type}.yaml" config = ClusterConfig.from_s3(bucket=s3_bucket, key=s3_key) logger.debug(f"{config_type} config:", config) return config except ConfigNotFoundError: if required: raise else: logger.debug(f"Config type {config_type} not found")
def test_reads_s3_config(self): s3 = boto3.client("s3") bucket = "config_bucket" key = "config_key" with open(TEST_PATH_CONFIG_CLUSTER, "r") as f: config_content = f.read() expected = yaml.safe_load(config_content) with Stubber(s3) as stubber: stream = BytesIO(bytes(config_content, encoding="utf-8")) stream.seek(0, os.SEEK_END) size = stream.tell() stream.seek(0, os.SEEK_SET) stubber.add_response( "get_object", {"Body": StreamingBody(stream, size)}, { "Bucket": bucket, "Key": key }, ) actual = ClusterConfig.from_s3(bucket=bucket, key=key, s3_client=s3) assert expected == actual
def test_override(self): config = ClusterConfig.from_local(file_path=TEST_PATH_CONFIG_INSTANCES) overrides = {"Instances": {"Ec2SubnetId": "Test_Subnet_Id"}} config.override(overrides) assert config["Instances"]["Ec2SubnetId"] == "Test_Subnet_Id" assert config["Instances"][ "EmrManagedMasterSecurityGroup"] == "$MASTER_SG"
def test_get_nested_node(self): config = ClusterConfig.from_local(file_path=TEST_PATH_CONFIG_INSTANCES) actual = config.get_nested_node("Instances.Ec2SubnetId") expected = load_local_yaml( TEST_PATH_CONFIG_INSTANCES)["Instances"]["Ec2SubnetId"] assert actual == expected
def get_default_config() -> ClusterConfig: cluster_config = dict() for config_type in ["cluster", "configurations", "instances", "steps"]: with open(os.path.join(EMR_LAUNCHER_CONFIG_DIR, f"{config_type}.yaml"), "r") as f: cluster_config.update(yaml.safe_load(f.read())) return ClusterConfig(cluster_config)
def test_raises_correct_exception_s3_not_found(self): s3 = boto3.client("s3") bucket = "config_bucket" key = "config_key" with Stubber(s3) as stubber: stubber.add_client_error( "get_object", service_error_code="NoSuchKey", expected_params={ "Bucket": bucket, "Key": key }, ) with pytest.raises(ConfigNotFoundError): ClusterConfig.from_s3(bucket=bucket, key=key, s3_client=s3)
def test_insert_nested_node(self): config = ClusterConfig.from_local(file_path=TEST_PATH_CONFIG_INSTANCES) expected = {"TestKey": "TestValue"} config.insert_nested_node("Instances.TestNode", expected) actual = config["Instances"]["TestNode"] assert actual == expected
def test_find_replace_raises_not_list(self): config = ClusterConfig.from_local(file_path=TEST_PATH_CONFIG_INSTANCES) with pytest.raises(TypeError): config.find_replace( "Instances.Ec2SubnetId", "Name", "MASTER", lambda x: { **x, "Name": "TEST" }, )
def test_find_replace_single_path(self): config = ClusterConfig.from_local( file_path=TEST_PATH_CONFIG_CONFIGURATIONS) config.find_replace( "Configurations", "Classification", "yarn-site", lambda x: { **x, "Properties": { "test-property": "test-value" } }, ) updated_item = next((item for item in config["Configurations"] if item["Classification"] == "yarn-site")) assert updated_item["Properties"]["test-property"] == "test-value"
def test_find_replace_nested_path(self): config = ClusterConfig.from_local(file_path=TEST_PATH_CONFIG_INSTANCES) config.find_replace( "Instances.InstanceFleets", "Name", "MASTER", lambda x: { **x, "Name": "TEST" }, ) old_item = next( (item for item in config["Instances"]["InstanceFleets"] if item["Name"] == "MASTER"), None, ) assert old_item is None updated_item = next( (item for item in config["Instances"]["InstanceFleets"] if item["Name"] == "TEST"), None, ) assert updated_item is not None
def test_raises_correct_exception_local_file_not_found(self): with pytest.raises(ConfigNotFoundError): ClusterConfig.from_local(file_path="nonexistent_path")
def test_reads_local_config(self): actual = ClusterConfig.from_local(file_path=TEST_PATH_CONFIG_CLUSTER) expected = load_local_yaml(TEST_PATH_CONFIG_CLUSTER) assert actual == expected
def test_extend_nested_list(self): config = ClusterConfig.from_local(file_path=TEST_PATH_CONFIG_INSTANCES) expected = {"InstanceFleetType": "CORE", "Name": "TEST"} config.extend_nested_list("Instances.InstanceFleets", [expected]) assert expected in config["Instances"]["InstanceFleets"]
def test_insert_raises_existing_path(self): config = ClusterConfig.from_local(file_path=TEST_PATH_CONFIG_INSTANCES) with pytest.raises(TypeError): config.insert_nested_node("Instances.Ec2SubnetId", "testvalue")