def create(config_path, destination_system, destination_database, destination_environment, algorithm_instance, emr_cluster_id, ext_params_str): data_system = DataSystem(config_path, destination_system, destination_database, destination_environment) if data_system.database_type == DataSystem.DatabaseType.EMR: config = AlgorithmConfigurationHadoop.create( config_path, destination_database, destination_environment, algorithm_instance, ext_params_str) execution_system = EMRSystem.from_data_system( data_system, emr_cluster_id) return AlgorithmExecutorHadoop(execution_system, config) else: raise M3DUnsupportedDatabaseTypeException( data_system.database_type)
def test_from_acon_file(self): test_python_class = "python_test_class" test_emr_cluster_id = "test_id" config = self.env_setup() test_dict = { "environment": { "emr_cluster_id": test_emr_cluster_id, "spark": { "spark.executor.instances": "5", "spark.executor.memory": "25G" } }, "algorithm": { "python_class": test_python_class, "parameters": {} } } algorithms = {"python_class": test_python_class, "parameters": {}} spark_params = { "spark.executor.instances": "5", "spark.executor.memory": "25G" } with patch('m3d.util.util.Util.load_dict', return_value={}): with patch('m3d.util.util.Util.merge_nested_dicts', return_value=test_dict): algorithm_configuration = AlgorithmConfigurationHadoop.create( config, "bdp_test", "test", "gzip_decompressor_bytes", """ { "environment": { "emr_cluster_id": "test_id" } } """) assert algorithm_configuration.get_python_class() == test_python_class assert algorithm_configuration.get_spark_params() == spark_params assert algorithm_configuration.get_algorithm_params() == algorithms