def create_with_emr_cluster_id(config_path, cluster_mode, destination_database, destination_environment, algorithm_instance, emr_cluster_id): """ Create algorithm configuration object from acon file. Method will discover acon file based on the parameters passed to it. :return: Returns algorithm configuration object of the type that is used for calling the method. """ # Create config service to get acon file path. config_service = ConfigService(config_path) acon_path = config_service.get_acon_path(cluster_mode, destination_database, destination_environment, algorithm_instance) acon_dict = Util.load_dict(acon_path) environment = acon_dict[ AlgorithmConfigurationHadoop.Sections.ENVIRONMENT] environment[ AlgorithmConfigurationHadoop.Keys.EMR_CLUSTER_ID] = emr_cluster_id return AlgorithmConfigurationHadoop(algorithm_instance, acon_dict)
def read_acon_params(execution_system, table_name): config_service = ConfigService(execution_system.config) acon_path = config_service.get_acon_path(execution_system.database, execution_system.environment, table_name) acon_dict = Util.load_dict(acon_path) return acon_dict.get(LoadHadoop.PARAMETERS_KEY, {})
def _create_s3_table(self, s3_resource, hql_validation_function, hql_validation_error=None): s3_resource.create_bucket(Bucket=self.LAKE_SPEC.bucket) for f in self.LAKE_SPEC.keys: logging.info("Creating object s3://{}/{}".format( self.LAKE_SPEC.bucket, f)) s3_resource.Bucket(self.LAKE_SPEC.bucket).put_object(Key=f, Body="") s3_resource.create_bucket(Bucket=self.LANDING_SPEC.bucket) for f in self.LANDING_SPEC.keys: logging.info("Creating object s3://{}/{}".format( self.LANDING_SPEC.bucket, f)) s3_resource.Bucket(self.LANDING_SPEC.bucket).put_object(Key=f, Body="") s3_table = S3Table.__new__(S3Table) # landing s3_table.db_table_landing = self.LANDING_SPEC.table dir_landing_data = "s3://{}/{}".format(self.LANDING_SPEC.bucket, self.LANDING_SPEC.data_dir) s3_table.dir_landing_data = dir_landing_data s3_table.dir_landing_work = dir_landing_data.replace("data", "work") s3_table.dir_landing_archive = dir_landing_data.replace( "data", "archive") s3_table.dir_landing_final = s3_table.dir_landing_data # lake s3_table.db_table_lake = self.LAKE_SPEC.table dir_lake_data = "s3://{}/{}".format(self.LAKE_SPEC.bucket, self.LAKE_SPEC.data_dir) s3_table.dir_lake_final = dir_lake_data s3_table.emr_system = FakeStorageSystem(hql_validation_function, hql_validation_error) s3_table.s3_resource = s3_resource test_landing_bucket_name = self.LANDING_SPEC.bucket test_lake_bucket_name = self.LAKE_SPEC.bucket s3_table.dir_landing_table = "s3://" + test_landing_bucket_name + "/" + self.LANDING_SPEC.data_dir s3_table.dir_lake_table = "s3://" + test_lake_bucket_name + "/" + self.LAKE_SPEC.data_dir s3_table.config_service = ConfigService( TestS3Table.DEFAULT_CONFIG_PATH) s3_table.partitioned_by = "month" s3_table.header_lines = 0 s3_table.delimiter = "|" s3_table.columns_lake = [("name1", "varchar(21)"), ("name2", "varchar(6)"), ("name3", "varchar(4)")] return s3_table
def env_setup(self, local_run_dir, destination_system, destination_database, destination_environment): m3d_config_file, scon_emr_file, m3d_config_dict, scon_emr_dict = \ super(TestAlgorithmNestedFlattenerEMR, self).env_setup( self.local_run_dir, self.destination_system, self.destination_database, self.destination_environment ) config_service = ConfigService(m3d_config_file) acon_path = config_service.get_acon_path(self.destination_database, self.destination_environment, self.algorithm_instance) os.makedirs(os.path.dirname(acon_path)) acon_data = py.path.local(self.test_acon).read() py.path.local(acon_path).write(acon_data) return m3d_config_file, scon_emr_file, acon_path, m3d_config_dict, scon_emr_dict
def create_with_ext_params(config_path, cluster_mode, destination_database, destination_environment, algorithm_instance, ext_params_str): """ Create algorithm configuration object from acon file. Method will discover acon file based on the parameters passed to it. :return: Returns algorithm configuration object of the type that is used for calling the method. """ # Create config service to get acon file path. config_service = ConfigService(config_path) acon_path = config_service.get_acon_path(cluster_mode, destination_database, destination_environment, algorithm_instance) acon_dict = Util.load_dict(acon_path) if ext_params_str: ext_params_dict = json.loads(ext_params_str) acon_dict = Util.merge_nested_dicts(acon_dict, ext_params_dict) return AlgorithmConfigurationHadoop(algorithm_instance, acon_dict)
def test_parse_config_file(self): test_config_json = \ """ { "emails": [ "*****@*****.**", "*****@*****.**", "*****@*****.**" ], "dir_exec": "/tmp/", "python": { "main": "m3d_main.pyc", "base_package": "m3d" }, "subdir_projects": { "m3d_engine": "m3d-engine/target/scala-2.10/", "m3d_api": "m3d-api/" }, "tags": { "table_suffix_stage": "_stg1", "table_suffix_swap": "_swap", "full_load": "full_load", "delta_load": "delta_load", "append_load": "append_load", "oozie": "oozie", "decom_gzip": "gzip_decompressor", "false": "false", "config": "config", "system": "system", "algorithm": "algorithm", "table": "table", "view": "view", "upload": "upload", "pushdown": "pushdown", "aws": "aws", "hdfs": "hdfs", "file": "file" }, "data_dict_delimiter": "|" } """ with patch("builtins.open", new_callable=mock_open, read_data=test_config_json): config_service = ConfigService("_") expected_params = { "emails": [ "*****@*****.**", "*****@*****.**", "*****@*****.**" ], "python_main": "m3d_main.pyc", "python_base_package": "m3d", "dir_exec": "/tmp/", "subdir_projects_m3d_engine": "m3d-engine/target/scala-2.10/", "subdir_projects_m3d_api": "m3d-api/", "data_dict_delimiter": "|", "tag_table_suffix_stage": "_stg1", "tag_table_suffix_swap": "_swap", "tag_full_load": "full_load", "tag_delta_load": "delta_load", "tag_append_load": "append_load", "tag_system": "system", "tag_table": "table", "tag_algorithm": "algorithm", "tag_config": "config", "tag_pushdown": "pushdown", "tag_upload": "upload", "tag_aws": "aws", "tag_file": "file" } for param in expected_params: assert getattr(config_service, param) == expected_params[param]