Ejemplo n.º 1
0
 def _get_mapreduce_mapper(self, job_properties, config):
     return mapreduce_mapper.MapReduceMapper(
         oozie_node=self.mapreduce_node,
         name="test_id",
         dag_name="DAG_NAME_B",
         props=PropertySet(job_properties=job_properties, config=config),
     )
 def _get_mapreduce_mapper(self, job_properties, config):
     return mapreduce_mapper.MapReduceMapper(
         oozie_node=self.mapreduce_node,
         name="test_id",
         dag_name="DAG_NAME_B",
         props=PropertySet(job_properties=job_properties, config=config),
         input_directory_path="/tmp/input-directory-path/",
     )
Ejemplo n.º 3
0
    def test_to_tasks_and_relations(self):
        mapper = mapreduce_mapper.MapReduceMapper(
            oozie_node=self.mapreduce_node,
            name="test_id",
            dag_name="DAG_NAME_B",
            props=PropertySet(
                job_properties={"nameNode": "hdfs://"},
                config={
                    "dataproc_cluster": "my-cluster",
                    "gcp_region": "europe-west3",
                    "hadoop_jars":
                    "hdfs:///user/mapred/examples/mapreduce/lib/wordcount.jar",
                    "hadoop_main_class": "WordCount",
                },
            ),
        )
        mapper.on_parse_node()
        tasks, relations = mapper.to_tasks_and_relations()

        self.assertEqual(
            [
                Task(
                    task_id="test_id",
                    template_name="mapreduce.tpl",
                    template_params={
                        "props":
                        PropertySet(
                            config={
                                "dataproc_cluster": "my-cluster",
                                "gcp_region": "europe-west3",
                                "hadoop_jars":
                                "hdfs:///user/mapred/examples/mapreduce/lib/wordcount.jar",
                                "hadoop_main_class": "WordCount",
                            },
                            job_properties={"nameNode": "hdfs://"},
                            action_node_properties={
                                "mapred.mapper.new-api":
                                "true",
                                "mapred.reducer.new-api":
                                "true",
                                "mapred.job.queue.name":
                                "${queueName}",
                                "mapreduce.job.map.class":
                                "WordCount$Map",
                                "mapreduce.job.reduce.class":
                                "WordCount$Reduce",
                                "mapreduce.job.output.key.class":
                                "org.apache.hadoop.io.Text",
                                "mapreduce.job.output.value.class":
                                "org.apache.hadoop.io.IntWritable",
                                "mapreduce.input.fileinputformat."
                                "inputdir":
                                "/user/mapred/${examplesRoot}/mapreduce/input",
                                "mapreduce.output.fileoutputformat."
                                "outputdir":
                                "/user/mapred/${examplesRoot}/mapreduce/output",
                            },
                        ),
                        "params_dict": {},
                        "hdfs_files": [],
                        "hdfs_archives": [],
                        "action_node_properties": {
                            "mapred.mapper.new-api":
                            "true",
                            "mapred.reducer.new-api":
                            "true",
                            "mapred.job.queue.name":
                            "${queueName}",
                            "mapreduce.job.map.class":
                            "WordCount$Map",
                            "mapreduce.job.reduce.class":
                            "WordCount$Reduce",
                            "mapreduce.job.output.key.class":
                            "org.apache.hadoop.io.Text",
                            "mapreduce.job.output.value.class":
                            "org.apache.hadoop.io.IntWritable",
                            "mapreduce.input.fileinputformat.inputdir":
                            "/user/mapred/${examplesRoot}/"
                            "mapreduce/input",
                            "mapreduce.output.fileoutputformat.outputdir":
                            "/user/mapred/${examplesRoot}/"
                            "mapreduce/output",
                        },
                    },
                )
            ],
            tasks,
        )
        self.assertEqual(relations, [])