def test_reads_s3_config(self): s3 = boto3.client("s3") bucket = "config_bucket" key = "config_key" with open(TEST_PATH_CONFIG_CLUSTER, "r") as f: config_content = f.read() expected = yaml.safe_load(config_content) with Stubber(s3) as stubber: stream = BytesIO(bytes(config_content, encoding="utf-8")) stream.seek(0, os.SEEK_END) size = stream.tell() stream.seek(0, os.SEEK_SET) stubber.add_response( "get_object", {"Body": StreamingBody(stream, size)}, { "Bucket": bucket, "Key": key }, ) actual = ClusterConfig.from_s3(bucket=bucket, key=key, s3_client=s3) assert expected == actual
def test_raises_correct_exception_s3_not_found(self): s3 = boto3.client("s3") bucket = "config_bucket" key = "config_key" with Stubber(s3) as stubber: stubber.add_client_error( "get_object", service_error_code="NoSuchKey", expected_params={ "Bucket": bucket, "Key": key }, ) with pytest.raises(ConfigNotFoundError): ClusterConfig.from_s3(bucket=bucket, key=key, s3_client=s3)
def read_config(config_type: str, s3_overrides: dict = None, required: bool = True) -> ClusterConfig: """Reads an EMR cluster configuration file. Reads configuration details of an EMR cluster from either a local file or from an S3 object. Parameters: config_type (str): The type of config file to read. Must be one of `cluster`, `instances`, or `steps`. s3_overrides (dict): The optional s3 location overrides for the EMR config files required (bool): Whether or not the configuration file should be required to be present. If set to True and the configuration file can't be read, then this function will raise an exception Returns: dict: A dictionary containing the cluster configuration parsed from the provided input. """ logger = logging.getLogger("emr_launcher") local_config_dir = os.getenv("EMR_LAUNCHER_CONFIG_DIR") try: if local_config_dir: logger.info("Locating configs", extra={"local_config_dir": {local_config_dir}}) config = ClusterConfig.from_local(file_path=os.path.join( local_config_dir, f"{config_type}.yaml")) else: s3_bucket_location = get_s3_location(s3_overrides) s3_bucket = s3_bucket_location[0] s3_folder = s3_bucket_location[1] logger.info( "Locating configs", extra={ "s3_bucket": s3_bucket, "s3_folder": s3_folder }, ) s3_key = f"{s3_folder}/{config_type}.yaml" config = ClusterConfig.from_s3(bucket=s3_bucket, key=s3_key) logger.debug(f"{config_type} config:", config) return config except ConfigNotFoundError: if required: raise else: logger.debug(f"Config type {config_type} not found")