コード例 #1
0
    def create(config_path, destination_system, destination_database,
               destination_environment, algorithm_instance, emr_cluster_id,
               ext_params_str):
        data_system = DataSystem(config_path, destination_system,
                                 destination_database, destination_environment)
        if data_system.database_type == DataSystem.DatabaseType.EMR:
            config = AlgorithmConfigurationHadoop.create(
                config_path, destination_database, destination_environment,
                algorithm_instance, ext_params_str)

            execution_system = EMRSystem.from_data_system(
                data_system, emr_cluster_id)
            return AlgorithmExecutorHadoop(execution_system, config)
        else:
            raise M3DUnsupportedDatabaseTypeException(
                data_system.database_type)
コード例 #2
0
    def test_from_acon_file(self):
        test_python_class = "python_test_class"
        test_emr_cluster_id = "test_id"

        config = self.env_setup()

        test_dict = {
            "environment": {
                "emr_cluster_id": test_emr_cluster_id,
                "spark": {
                    "spark.executor.instances": "5",
                    "spark.executor.memory": "25G"
                }
            },
            "algorithm": {
                "python_class": test_python_class,
                "parameters": {}
            }
        }

        algorithms = {"python_class": test_python_class, "parameters": {}}

        spark_params = {
            "spark.executor.instances": "5",
            "spark.executor.memory": "25G"
        }

        with patch('m3d.util.util.Util.load_dict', return_value={}):
            with patch('m3d.util.util.Util.merge_nested_dicts',
                       return_value=test_dict):
                algorithm_configuration = AlgorithmConfigurationHadoop.create(
                    config, "bdp_test", "test", "gzip_decompressor_bytes", """
                    {
                        "environment": {
                            "emr_cluster_id": "test_id"
                        }
                    }
                    """)

        assert algorithm_configuration.get_python_class() == test_python_class
        assert algorithm_configuration.get_spark_params() == spark_params
        assert algorithm_configuration.get_algorithm_params() == algorithms