def test_constructor(self): test_python_class = "test_python_class" test_algorithm_instance = "test_instance_cd" acon_dict = { "environment": { "emr_cluster_id": "j-D1LSS423N", "spark": { "spark.executor.instances": "5", "spark.executor.memory": "25G" } }, "algorithm": { "python_class": test_python_class, "parameters": {} } } expected_algorithm_section = { "python_class": "test_python_class", "parameters": {} } algorithm_configuration = AlgorithmConfigurationHadoop( test_algorithm_instance, acon_dict) # Check python class assert test_python_class == algorithm_configuration.get_python_class() assert test_algorithm_instance == algorithm_configuration.get_algorithm_instance( ) assert expected_algorithm_section == algorithm_configuration.get_algorithm_params( )
def create( config_path, cluster_mode, destination_system, destination_database, destination_environment, algorithm_instance, ext_params_str ): data_system = DataSystem( config_path, cluster_mode, destination_system, destination_database, destination_environment ) if data_system.database_type == DataSystem.DatabaseType.EMR: config = AlgorithmConfigurationHadoop.create_with_ext_params( config_path, cluster_mode, destination_database, destination_environment, algorithm_instance, ext_params_str ) execution_system = EMRSystem.from_data_system(data_system, config.get_emr_cluster_id()) return AlgorithmExecutorHadoop(execution_system, config) else: raise M3DUnsupportedDatabaseTypeException(data_system.database_type)
def test_from_acon_file(self): test_python_class = "python_test_class" test_emr_cluster_id = "test_id" config = self.env_setup() test_dict = { "environment": { "emr_cluster_id": test_emr_cluster_id, "spark": { "spark.executor.instances": "5", "spark.executor.memory": "25G" } }, "algorithm": { "python_class": test_python_class, "parameters": {} } } algorithms = { "python_class": test_python_class, "parameters": {} } spark_params = { "spark.executor.instances": "5", "spark.executor.memory": "25G" } with patch('m3d.util.util.Util.load_dict', return_value={}): with patch('m3d.util.util.Util.merge_nested_dicts', return_value=test_dict): algorithm_configuration = AlgorithmConfigurationHadoop.create_with_ext_params( config, False, "bdp_test", "test", "gzip_decompressor_bytes", """ { "environment": { "emr_cluster_id": "test_id" } } """ ) assert algorithm_configuration.get_python_class() == test_python_class assert algorithm_configuration.get_spark_params() == spark_params assert algorithm_configuration.get_algorithm_params() == algorithms
def _create_algorithm_configuration(acon_dict): return AlgorithmConfigurationHadoop("materialization", acon_dict)
def _create_algorithm_configuration(acon_dict): return AlgorithmConfigurationHadoop("fixed_length_string_extractor", acon_dict)