Ejemplo n.º 1
0
 def test_configure_logging(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-logfile pig.log -brief -debug '
                                        '-f "wordcount.pig"')) \
         .log_config(logfile="pig.log", debug=True, brief=True) \
         .run()
Ejemplo n.º 2
0
 def test_configure_logging(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-logfile pig.log -brief -debug '
                                        '-f "wordcount.pig"')) \
         .log_config(logfile="pig.log", debug=True, brief=True) \
         .run()
Ejemplo n.º 3
0
 def test_with_param_query(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-param_file params.properties '
                                        '-f "wordcount.pig"')) \
         .load_parameters_from_file("params.properties") \
         .run()
Ejemplo n.º 4
0
 def test_with_property_file(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-propertyFile pig.properties '
                                        '-x mapreduce '
                                        '-f "wordcount.pig"')) \
         .with_property_file("pig.properties").using_mode().run()
Ejemplo n.º 5
0
 def test_with_property_file(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-propertyFile pig.properties '
                                        '-x mapreduce '
                                        '-f "wordcount.pig"')) \
         .with_property_file("pig.properties").using_mode().run()
Ejemplo n.º 6
0
 def test_log4j_configs_injections(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-log4jconf ~/log4j.properties '
                                        '-f "wordcount.pig"')) \
         .log4j_config("~/log4j.properties") \
         .run()
Ejemplo n.º 7
0
 def test_with_param_query(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-param_file params.properties '
                                        '-f "wordcount.pig"')) \
         .load_parameters_from_file("params.properties") \
         .run()
Ejemplo n.º 8
0
 def test_log4j_configs_injections(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-log4jconf ~/log4j.properties '
                                        '-f "wordcount.pig"')) \
         .log4j_config("~/log4j.properties") \
         .run()
Ejemplo n.º 9
0
 def test_with_param_file(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-param param001=value001 '
                                        '-param param002=value002 '
                                        '-x mapreduce '
                                        '-f "wordcount.pig"')) \
         .with_parameter("param001", "value001").using_mode() \
         .with_parameter("param002", "value002").run()
Ejemplo n.º 10
0
 def test_with_param_file(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-param param001=value001 '
                                        '-param param002=value002 '
                                        '-x mapreduce '
                                        '-f "wordcount.pig"')) \
         .with_parameter("param001", "value001").using_mode() \
         .with_parameter("param002", "value002").run()
Ejemplo n.º 11
0
    def test_run_preconfigured_job_without_parameters_substitution(self):
        _test_id = str(uuid.uuid4())
        _job_name = "TEST_PIG_{}".format(_test_id)
        _input_dir = self.copy_file_from_local(
            self.temp_file("hello,world,world", ".txt"))
        _output_dir = "/tmp/data_{}".format(_test_id)

        _commands = "A = load '{}' using PigStorage(',');".format(_input_dir)
        _commands += "B = foreach A generate \$0 as id;"
        _commands += "STORE B into '{}';".format(_output_dir)
        # create job configuration. can also be loaded from .ini file
        _config = Configuration.create()
        _config.set(_job_name, TaskOptions.CONFIG_KEY_COMMANDS_STRING,
                    _commands)
        _config.set(_job_name, TaskOptions.CONFIG_KEY_LOG_BRIEF, 'enabled')
        _config.set(
            _job_name, TaskOptions.CONFIG_KEY_PARAMETER_VALUE,
            'input_dir={}\noutput_dir={}'.format(_input_dir, _output_dir))
        try:
            _pig = Pig.load_preconfigured_job(config=_config,
                                              job_name=_job_name)
            _result = _pig.run()
            _result.if_failed_raise(
                AssertionError("test_run_preconfigured_job failed"))
            self.assertTrue(
                HDFS(_output_dir).exists(), "Cannot find job output")
        finally:
            self.delete_file_in_hdfs(_input_dir)
            self.delete_file_in_hdfs(_output_dir)
Ejemplo n.º 12
0
    def test_run_preconfigured_job_without_parameters_substitution(self):
        _test_id = str(uuid.uuid4())
        _job_name = "TEST_PIG_{}".format(_test_id)
        _input_dir = self.copy_file_from_local(self.temp_file("hello,world,world", ".txt"))
        _output_dir = "/tmp/data_{}".format(_test_id)

        _commands = "A = load '{}' using PigStorage(',');".format(_input_dir)
        _commands += "B = foreach A generate \$0 as id;"
        _commands += "STORE B into '{}';".format(_output_dir)
        # create job configuration. can also be loaded from .ini file
        _config = Configuration.create()
        _config.set(_job_name, TaskOptions.CONFIG_KEY_COMMANDS_STRING, _commands)
        _config.set(_job_name, TaskOptions.CONFIG_KEY_LOG_BRIEF, "enabled")
        _config.set(
            _job_name,
            TaskOptions.CONFIG_KEY_PARAMETER_VALUE,
            "input_dir={}\noutput_dir={}".format(_input_dir, _output_dir),
        )
        try:
            _pig = Pig.load_preconfigured_job(config=_config, job_name=_job_name)
            _result = _pig.run()
            _result.if_failed_raise(AssertionError("test_run_preconfigured_job failed"))
            self.assertTrue(HDFS(_output_dir).exists(), "Cannot find job output")
        finally:
            self.delete_file_in_hdfs(_input_dir)
            self.delete_file_in_hdfs(_output_dir)
Ejemplo n.º 13
0
    def test_logging_configuration(self):
        files = self.copy_file_from_local(self.temp_file("hello,world,world", ".txt"))
        path = "/tmp/pig_log"
        commands = "A = load '$input_dir' using PigStorage(',');"
        commands += "B = foreach A generate \$0 as id;"
        commands += "STORE B into '$output_dir';"
        files_s = self.temp_file(commands)
        try:
            import os

            os.makedirs(path)
            pig = (
                Pig.load_commands_from_file(files_s)
                .with_parameter("input_dir", files)
                .with_parameter("output_dir", "/tmp/data")
            )
            pig.log_config(logfile=path + "pig")
            self.assertEqual(os.path.exists(path), pig.run().is_ok())
        finally:
            import shutil

            shutil.rmtree(path)
            self.delete_file_in_hdfs()
            self.delete_file_in_hdfs(files)
            self.delete_local(files_s)
Ejemplo n.º 14
0
def merge_snapshot_with_updates(context):
    context["partition"] = datetime.now().strftime('%Y%m%d')
    pig_job = Pig.load_commands_from_file(_pig_script) \
        .with_parameter("active_snapshot", _scd_active_snapshot) \
        .with_parameter("data_updates", os.path.join(_hdfs_tmpdir.path, os.path.basename(_scd_updates))) \
        .with_parameter('output', _hdfs_job_output) \
        .with_parameter("date", context["partition"])
    pig_job.run()
Ejemplo n.º 15
0
Archivo: flow.py Proyecto: epam/Merlin
def merge_snapshot_with_updates(context):
    context["partition"] = datetime.now().strftime('%Y%m%d')
    pig_job = Pig.load_commands_from_file(_pig_script) \
        .with_parameter("active_snapshot", _scd_active_snapshot) \
        .with_parameter("data_updates", os.path.join(_hdfs_tmpdir.path, os.path.basename(_scd_updates))) \
        .with_parameter('output', _hdfs_job_output) \
        .with_parameter("date", context["partition"])
    pig_job.run()
Ejemplo n.º 16
0
 def test_load_preconfigured_job(self):
     _command = 'pig -brief -optimizer_off SplitFilter -optimizer_off ColumnMapKeyPrune -e "ls /"'
     metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), 'resources/pig/pig.ini'))
     pig = Pig.load_preconfigured_job(job_name='pig test',
                                      config=Configuration.load(
                                          metastore=metastore,
                                          readonly=False, accepts_nulls=True),
                                      command_executor=mock_executor(expected_command=_command))
     pig.without_split_filter().run()
Ejemplo n.º 17
0
 def test_load_preconfigured_job(self):
     _command = 'pig -brief -optimizer_off SplitFilter -optimizer_off ColumnMapKeyPrune -e "ls /"'
     metastore = IniFileMetaStore(file=os.path.join(
         os.path.dirname(__file__), 'resources/pig/pig.ini'))
     pig = Pig.load_preconfigured_job(
         job_name='pig test',
         config=Configuration.load(metastore=metastore,
                                   readonly=False,
                                   accepts_nulls=True),
         command_executor=mock_executor(expected_command=_command))
     pig.without_split_filter().run()
Ejemplo n.º 18
0
 def test_wrap_with_quotes(self):
     _pc = Pig(config=Configuration.create(),
               job_name=None,
               command_executor=None)
     self.assertEqual("", _pc._wrap_with_quotes_(""))
     self.assertEqual(None, _pc._wrap_with_quotes_(None))
     self.assertEqual('"test"', _pc._wrap_with_quotes_("test"))
     self.assertEqual("'test'", _pc._wrap_with_quotes_("'test'"))
     self.assertEqual("'te\"st'", _pc._wrap_with_quotes_('te"st'))
     self.assertEqual('"te\'st"', _pc._wrap_with_quotes_("te'st"))
Ejemplo n.º 19
0
 def test_wrap_with_quotes(self):
     _pc = Pig(config=Configuration.create(), job_name=None, command_executor=None)
     self.assertEqual("", _pc._wrap_with_quotes_(""))
     self.assertEqual(None, _pc._wrap_with_quotes_(None))
     self.assertEqual('"test"', _pc._wrap_with_quotes_("test"))
     self.assertEqual("'test'", _pc._wrap_with_quotes_("'test'"))
     self.assertEqual("'te\"st'", _pc._wrap_with_quotes_('te"st'))
     self.assertEqual('"te\'st"', _pc._wrap_with_quotes_("te'st"))
Ejemplo n.º 20
0
    def test_run_commands_from_string_without_param_substitution(self):
        _test_id = str(uuid.uuid4())
        _output_dir = "/tmp/data_{}".format(_test_id)
        _input_dir = self.copy_file_from_local(self.temp_file("hello,world,world", ".txt"))

        commands = "A = load '{}' using PigStorage(',');".format(_input_dir)
        commands += "B = foreach A generate \$0 as id;"
        commands += "STORE B into '{}';".format(_output_dir)
        try:
            _pig = Pig.load_commands_from_string(commands)
            _result = _pig.run()
            _result.if_failed_raise(AssertionError("test_run_commands_from_string failed"))
            self.assertTrue(HDFS(_output_dir).exists(), "Cannot find job output")
        finally:
            self.delete_file_in_hdfs(_input_dir)
            self.delete_file_in_hdfs(_output_dir)
Ejemplo n.º 21
0
    def test_run_commands_from_string_without_param_substitution(self):
        _test_id = str(uuid.uuid4())
        _output_dir = "/tmp/data_{}".format(_test_id)
        _input_dir = self.copy_file_from_local(
            self.temp_file("hello,world,world", ".txt"))

        commands = "A = load '{}' using PigStorage(',');".format(_input_dir)
        commands += "B = foreach A generate \$0 as id;"
        commands += "STORE B into '{}';".format(_output_dir)
        try:
            _pig = Pig.load_commands_from_string(commands)
            _result = _pig.run()
            _result.if_failed_raise(
                AssertionError("test_run_commands_from_string failed"))
            self.assertTrue(
                HDFS(_output_dir).exists(), "Cannot find job output")
        finally:
            self.delete_file_in_hdfs(_input_dir)
            self.delete_file_in_hdfs(_output_dir)
Ejemplo n.º 22
0
 def test_run_commands_from_file(self):
     _test_id = str(uuid.uuid4())
     _inputs = self.copy_file_from_local(
         self.temp_file("hello,world,world", ".txt"))
     commands = "A = load '$input_dir' using PigStorage(',');"
     commands += "B = foreach A generate \$0 as id;"
     commands += "STORE B into '$output_dir';"
     files_s = self.temp_file(commands)
     try:
         _output_dir = "/tmp/data_{}".format(_test_id)
         pig = Pig.load_commands_from_file(files_s) \
             .with_parameter("input_dir", _inputs) \
             .with_parameter("output_dir", _output_dir)
         self.assertTrue(pig.run().is_ok())
         self.assertTrue(HDFS(_output_dir).exists())
     finally:
         self.delete_local(files_s)
         self.delete_file_in_hdfs()
         self.delete_file_in_hdfs(_inputs)
Ejemplo n.º 23
0
 def test_run_commands_from_file(self):
     _test_id = str(uuid.uuid4())
     _inputs = self.copy_file_from_local(self.temp_file("hello,world,world", ".txt"))
     commands = "A = load '$input_dir' using PigStorage(',');"
     commands += "B = foreach A generate \$0 as id;"
     commands += "STORE B into '$output_dir';"
     files_s = self.temp_file(commands)
     try:
         _output_dir = "/tmp/data_{}".format(_test_id)
         pig = (
             Pig.load_commands_from_file(files_s)
             .with_parameter("input_dir", _inputs)
             .with_parameter("output_dir", _output_dir)
         )
         self.assertTrue(pig.run().is_ok())
         self.assertTrue(HDFS(_output_dir).exists())
     finally:
         self.delete_local(files_s)
         self.delete_file_in_hdfs()
         self.delete_file_in_hdfs(_inputs)
Ejemplo n.º 24
0
    def test_logging_configuration(self):
        files = self.copy_file_from_local(
            self.temp_file("hello,world,world", ".txt"))
        path = "/tmp/pig_log"
        commands = "A = load '$input_dir' using PigStorage(',');"
        commands += "B = foreach A generate \$0 as id;"
        commands += "STORE B into '$output_dir';"
        files_s = self.temp_file(commands)
        try:
            import os

            os.makedirs(path)
            pig = Pig.load_commands_from_file(files_s).with_parameter("input_dir", files) \
                .with_parameter("output_dir", "/tmp/data")
            pig.log_config(logfile=path + "pig")
            self.assertEqual(os.path.exists(path), pig.run().is_ok())
        finally:
            import shutil

            shutil.rmtree(path)
            self.delete_file_in_hdfs()
            self.delete_file_in_hdfs(files)
            self.delete_local(files_s)
Ejemplo n.º 25
0
 def test_run_script_from_string(self):
     Pig.load_commands_from_string(
         commands="ls /",
         command_executor=mock_executor('pig -e "ls /"')).run()
Ejemplo n.º 26
0
    def test_optimization_disabling(self):
        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off SplitFilter -f "wordcount.pig"')) \
            .without_split_filter().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off PushUpFilter -f "wordcount.pig"')) \
            .without_pushup_filter().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off MergeFilter -f "wordcount.pig"')) \
            .without_merge_filter().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off PushDownForeachFlatten -f "wordcount.pig"')) \
            .without_push_down_foreach_flatten().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off LimitOptimizer -f "wordcount.pig"')) \
            .without_limit_optimizer().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off ColumnMapKeyPrune -f "wordcount.pig"')) \
            .without_column_map_key_prune().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off AddForEach -f "wordcount.pig"')) \
            .without_add_foreach().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off MergeForEach -f "wordcount.pig"')) \
            .without_merge_foreach().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off GroupByConstParallelSetter -f "wordcount.pig"')) \
            .without_groupby_const_parallel_setter().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off All -f "wordcount.pig"')) \
            .disable_all_optimizations().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig '
                                           '-optimizer_off LimitOptimizer '
                                           '-optimizer_off AddForEach '
                                           '-f "wordcount.pig"')) \
            .without_add_foreach().without_limit_optimizer().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig '
                                           '-x tez '
                                           '-optimizer_off LimitOptimizer '
                                           '-optimizer_off AddForEach '
                                           '-no_multiquery '
                                           '-f "wordcount.pig"')) \
            .without_add_foreach().using_mode(type="tez")\
            .without_limit_optimizer() \
            .without_multiquery().run()
Ejemplo n.º 27
0
 def test_run_script_from_string(self):
     Pig.load_commands_from_string(
         commands="ls /",
         command_executor=mock_executor('pig -e "ls /"')).run()
Ejemplo n.º 28
0
 def test_run_script_from_file_verbose(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig -verbose -f "wordcount.pig"')) \
         .debug()
Ejemplo n.º 29
0
    def test_optimization_disabling(self):
        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off SplitFilter -f "wordcount.pig"')) \
            .without_split_filter().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off PushUpFilter -f "wordcount.pig"')) \
            .without_pushup_filter().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off MergeFilter -f "wordcount.pig"')) \
            .without_merge_filter().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off PushDownForeachFlatten -f "wordcount.pig"')) \
            .without_push_down_foreach_flatten().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off LimitOptimizer -f "wordcount.pig"')) \
            .without_limit_optimizer().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off ColumnMapKeyPrune -f "wordcount.pig"')) \
            .without_column_map_key_prune().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off AddForEach -f "wordcount.pig"')) \
            .without_add_foreach().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off MergeForEach -f "wordcount.pig"')) \
            .without_merge_foreach().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off GroupByConstParallelSetter -f "wordcount.pig"')) \
            .without_groupby_const_parallel_setter().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off All -f "wordcount.pig"')) \
            .disable_all_optimizations().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig '
                                           '-optimizer_off LimitOptimizer '
                                           '-optimizer_off AddForEach '
                                           '-f "wordcount.pig"')) \
            .without_add_foreach().without_limit_optimizer().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig '
                                           '-x tez '
                                           '-optimizer_off LimitOptimizer '
                                           '-optimizer_off AddForEach '
                                           '-no_multiquery '
                                           '-f "wordcount.pig"')) \
            .without_add_foreach().using_mode(type="tez")\
            .without_limit_optimizer() \
            .without_multiquery().run()
Ejemplo n.º 30
0
 def test_run_script_from_file_verbose(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig -verbose -f "wordcount.pig"')) \
         .debug()
Ejemplo n.º 31
0
 def test_try_execute_empty_command(self):
     self.assertRaises(
         PigCommandError,
         Pig(config=Configuration.create(),
             job_name=None,
             command_executor=None).run)