Esempi in Python per MapReduce.prepare_mapreduce_job, esempi in Python per merlin.tools.mapreduce.MapReduce.prepare_mapreduce_job

Esempio n. 1

0

Mostra file

File: test_mapreduce.py Progetto: epam/Merlin

 def test_mr_job_command_generation(self):
     _job_name = "test_mr_job_%s" % uuid.uuid4()
     _expected_command = "hadoop jar {0}/resources/mapreduce/hadoop-mapreduce-examples.jar demo.mr.Driver -D mapreduce.job.name={1}"\
         .format(os.path.dirname(os.path.realpath(__file__)),
                         _job_name)
     MapReduce.prepare_mapreduce_job(
         jar="{0}/resources/mapreduce/hadoop-mapreduce-examples.jar"
         .format(os.path.dirname(os.path.realpath(__file__))),
         main_class="demo.mr.Driver",
         name=_job_name,
         executor=self.assert_generated_command(_expected_command)).run()

Esempio n. 2

0

Mostra file

File: test_mapreduce.py Progetto: Mbaroudi/Merlin

 def test_mr_job_command_generation(self):
     _job_name = "test_mr_job_%s" % uuid.uuid4()
     _expected_command = "hadoop jar {0}/resources/mapreduce/hadoop-mapreduce-examples.jar demo.mr.Driver -D mapreduce.job.name={1}"\
         .format(os.path.dirname(os.path.realpath(__file__)),
                         _job_name)
     MapReduce.prepare_mapreduce_job(
         jar="{0}/resources/mapreduce/hadoop-mapreduce-examples.jar".format(
             os.path.dirname(os.path.realpath(__file__))),
         main_class="demo.mr.Driver",
         name=_job_name,
         executor=self.assert_generated_command(_expected_command)).run()

Esempio n. 3

0

Mostra file

File: test_mapreduce.py Progetto: Mbaroudi/Merlin

 def test_mr_job_command_generation(self):
     _expected_command = 'hadoop jar {0}/resources/mapreduce/hadoop-mapreduce-examples.jar ' \
                         'test.mr.Driver ' \
                         '-D mapreduce.job.name=simple_mr_job ' \
                         '-D value.delimiter.char=, ' \
                         '-D partition.to.process=20142010 ' \
                         '/input/dir ' \
                         '/output/dir'.format(os.path.dirname(os.path.realpath(__file__)))
     MapReduce.prepare_mapreduce_job(
         jar="{0}/resources/mapreduce/hadoop-mapreduce-examples.jar".format(
             os.path.dirname(os.path.realpath(__file__))),
         main_class="test.mr.Driver",
         config=self._config,
         name='simple_mr_job',
         executor=self.assert_generated_command(_expected_command)).run()

Esempio n. 4

0

Mostra file

File: test_mapreduce.py Progetto: epam/Merlin

 def test_mr_job_command_generation(self):
     _expected_command = 'hadoop jar {0}/resources/mapreduce/hadoop-mapreduce-examples.jar ' \
                         'test.mr.Driver ' \
                         '-D mapreduce.job.name=simple_mr_job ' \
                         '-D value.delimiter.char=, ' \
                         '-D partition.to.process=20142010 ' \
                         '/input/dir ' \
                         '/output/dir'.format(os.path.dirname(os.path.realpath(__file__)))
     MapReduce.prepare_mapreduce_job(
         jar="{0}/resources/mapreduce/hadoop-mapreduce-examples.jar"
         .format(os.path.dirname(os.path.realpath(__file__))),
         main_class="test.mr.Driver",
         config=self._config,
         name='simple_mr_job',
         executor=self.assert_generated_command(_expected_command)
     ).run()

Esempio n. 5

0

Mostra file

File: test_mapreduce.py Progetto: epam/Merlin

 def test_mr_job_command_generation_with_configurations(self):
     _job_name = "test_mr_job_%s" % uuid.uuid4()
     _expected_command = "hadoop jar " \
                         "{0}/resources/mapreduce/hadoop-mapreduce-examples.jar " \
                         "demo.mr.Driver " \
                         "-D mapreduce.job.name={1} " \
                         "-D job.input=/data/raw/24102014 " \
                         "-D mapreduce.job.reduces=10"\
         .format(os.path.dirname(os.path.realpath(__file__)),
                         _job_name)
     MapReduce.prepare_mapreduce_job(
         jar="{0}/resources/mapreduce/hadoop-mapreduce-examples.jar"
         .format(os.path.dirname(os.path.realpath(__file__))),
         main_class="demo.mr.Driver",
         name=_job_name,
         executor=self.assert_generated_command(_expected_command)
     ).with_config_option("job.input", "/data/raw/24102014").with_number_of_reducers(10).run()

Esempio n. 6

0

Mostra file

File: test_mapreduce.py Progetto: Mbaroudi/Merlin

 def test_mr_job_command_generation_with_configurations(self):
     _job_name = "test_mr_job_%s" % uuid.uuid4()
     _expected_command = "hadoop jar " \
                         "{0}/resources/mapreduce/hadoop-mapreduce-examples.jar " \
                         "demo.mr.Driver " \
                         "-D mapreduce.job.name={1} " \
                         "-D job.input=/data/raw/24102014 " \
                         "-D mapreduce.job.reduces=10"\
         .format(os.path.dirname(os.path.realpath(__file__)),
                         _job_name)
     MapReduce.prepare_mapreduce_job(
         jar="{0}/resources/mapreduce/hadoop-mapreduce-examples.jar".format(
             os.path.dirname(os.path.realpath(__file__))),
         main_class="demo.mr.Driver",
         name=_job_name,
         executor=self.assert_generated_command(
             _expected_command)).with_config_option(
                 "job.input",
                 "/data/raw/24102014").with_number_of_reducers(10).run()

Esempio n. 7

0

Mostra file

File: test_mapreduce.py Progetto: Mbaroudi/Merlin

 def test_mr_job_command_generation_with_arguments(self):
     _job_name = "test_mr_job_%s" % uuid.uuid4()
     _expected_command = "hadoop jar " \
                         "{0}/resources/mapreduce/hadoop-mapreduce-examples.jar " \
                         "wordcount " \
                         "-D mapreduce.job.name={1} " \
                         "-D split.by='\\t' " \
                         "-D mapreduce.job.reduces=3 " \
                         "/user/vagrant/dmode.txt " \
                         "/tmp/test".format(os.path.dirname(os.path.realpath(__file__)),
                         _job_name)
     MapReduce.prepare_mapreduce_job(
         jar="{0}/resources/mapreduce/hadoop-mapreduce-examples.jar"
         .format(os.path.dirname(os.path.realpath(__file__))),
         main_class="wordcount",
         name=_job_name,
         executor=self.assert_generated_command(_expected_command)
     ).with_config_option("split.by", "'\\t'") \
         .with_number_of_reducers(3) \
         .with_arguments() \
         .run("/user/vagrant/dmode.txt", "/tmp/test")

Esempio n. 8

0

Mostra file

File: test_mapreduce.py Progetto: epam/Merlin

 def test_mr_job_command_generation_with_arguments(self):
     _job_name = "test_mr_job_%s" % uuid.uuid4()
     _expected_command = "hadoop jar " \
                         "{0}/resources/mapreduce/hadoop-mapreduce-examples.jar " \
                         "wordcount " \
                         "-D mapreduce.job.name={1} " \
                         "-D split.by='\\t' " \
                         "-D mapreduce.job.reduces=3 " \
                         "/user/vagrant/dmode.txt " \
                         "/tmp/test".format(os.path.dirname(os.path.realpath(__file__)),
                         _job_name)
     MapReduce.prepare_mapreduce_job(
         jar="{0}/resources/mapreduce/hadoop-mapreduce-examples.jar"
         .format(os.path.dirname(os.path.realpath(__file__))),
         main_class="wordcount",
         name=_job_name,
         executor=self.assert_generated_command(_expected_command)
     ).with_config_option("split.by", "'\\t'") \
         .with_number_of_reducers(3) \
         .with_arguments() \
         .run("/user/vagrant/dmode.txt", "/tmp/test")

Esempio n. 9

0

Mostra file

def process_data(context):
    # Configure and run MapReduce job
    _mapreduce_job_ = MapReduce.prepare_mapreduce_job(
        jar=os.path.join(os.path.dirname(__file__), 'resources/WordsCount-1.0-SNAPSHOT.jar'),
        main_class="WordsCountJob",
        name="MAPREDUCE_Counting"
    ).with_config_option("input", "{0}/data_from_import".format(BASE_DIR)) \
        .with_config_option("output", "{0}/data_to_export".format(BASE_DIR))
    _mapreduce_job_.run()
    status = _mapreduce_job_.status()
    if not status.is_succeeded():
        raise MapReduceJobException("MapReduce job failed: {}".format(
            status.failure_reason() if status.is_failed() else 'NONE'))

Esempio n. 10

0

Mostra file

File: test_mapreduce.py Progetto: Mbaroudi/Merlin

    def test_mr_job_command_generation_with_arguments(self):
        _job_name = "test_mr_job_%s" % uuid.uuid4()

        _base_dir = HDFS(os.path.join("/tmp", _job_name))
        _base_dir.create_directory()
        try:
            jar = os.path.join(os.path.dirname(__file__), 'resources', 'mapreduce', 'hadoop-mapreduce-examples.jar')
            # configure job inputs
            _job_input = HDFS(os.path.join(_base_dir.path, "input"))
            _job_input.create_directory()
            LocalFS(os.path.join(
                os.path.dirname(__file__),
                'resources',
                'mapreduce', 'raw-data.txt')
            ).copy_to_hdfs(
                _job_input.path
            )

            # configure job output
            _job_output = HDFS(os.path.join(_base_dir.path, "output"))
            if not os.path.exists(jar):
                self.skipTest("'%s' not found" % jar)

            job = MapReduce.prepare_mapreduce_job(jar=jar,
                                                  main_class="wordcount",
                                                  name=_job_name) \
                .with_config_option("split.by", "'\\t'") \
                .with_number_of_reducers(3) \
                .with_arguments(
                _job_input.path,
                _job_output.path
            )
            _command_submission_result = job.run()
            _command_submission_result.if_failed_raise(AssertionError("Cannot run MR job"))
            _job_status = job.status()
            self.assertTrue(_job_status is not None and _job_status.is_succeeded(), "MR job Failed")
            self.assertTrue(_job_output.exists(), "Error: empty job output")
            #     check counters
            self.assertEqual(6, _job_status.counter(group='File System Counters',
                                                    counter='HDFS: Number of write operations'))
            self.assertEqual(1, _job_status.counter(group='Job Counters', counter='Launched map tasks'))
            self.assertEqual(3, _job_status.counter(group='Job Counters', counter='Launched reduce tasks'))
            self.assertEqual(2168, _job_status.counter(group='File Input Format Counters', counter='Bytes Read'))
        finally:
            _base_dir.delete_directory()