Esempio n. 1
0
 def test_configure_logging(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-logfile pig.log -brief -debug '
                                        '-f "wordcount.pig"')) \
         .log_config(logfile="pig.log", debug=True, brief=True) \
         .run()
Esempio n. 2
0
 def test_with_property_file(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-propertyFile pig.properties '
                                        '-x mapreduce '
                                        '-f "wordcount.pig"')) \
         .with_property_file("pig.properties").using_mode().run()
Esempio n. 3
0
 def test_log4j_configs_injections(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-log4jconf ~/log4j.properties '
                                        '-f "wordcount.pig"')) \
         .log4j_config("~/log4j.properties") \
         .run()
Esempio n. 4
0
 def test_with_property_file(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-propertyFile pig.properties '
                                        '-x mapreduce '
                                        '-f "wordcount.pig"')) \
         .with_property_file("pig.properties").using_mode().run()
Esempio n. 5
0
 def test_log4j_configs_injections(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-log4jconf ~/log4j.properties '
                                        '-f "wordcount.pig"')) \
         .log4j_config("~/log4j.properties") \
         .run()
Esempio n. 6
0
 def test_with_param_query(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-param_file params.properties '
                                        '-f "wordcount.pig"')) \
         .load_parameters_from_file("params.properties") \
         .run()
Esempio n. 7
0
 def test_with_param_query(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-param_file params.properties '
                                        '-f "wordcount.pig"')) \
         .load_parameters_from_file("params.properties") \
         .run()
Esempio n. 8
0
 def test_configure_logging(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-logfile pig.log -brief -debug '
                                        '-f "wordcount.pig"')) \
         .log_config(logfile="pig.log", debug=True, brief=True) \
         .run()
Esempio n. 9
0
 def test_with_param_file(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-param param001=value001 '
                                        '-param param002=value002 '
                                        '-x mapreduce '
                                        '-f "wordcount.pig"')) \
         .with_parameter("param001", "value001").using_mode() \
         .with_parameter("param002", "value002").run()
Esempio n. 10
0
 def test_with_param_file(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig '
                                        '-param param001=value001 '
                                        '-param param002=value002 '
                                        '-x mapreduce '
                                        '-f "wordcount.pig"')) \
         .with_parameter("param001", "value001").using_mode() \
         .with_parameter("param002", "value002").run()
Esempio n. 11
0
    def test_logging_configuration(self):
        files = self.copy_file_from_local(self.temp_file("hello,world,world", ".txt"))
        path = "/tmp/pig_log"
        commands = "A = load '$input_dir' using PigStorage(',');"
        commands += "B = foreach A generate \$0 as id;"
        commands += "STORE B into '$output_dir';"
        files_s = self.temp_file(commands)
        try:
            import os

            os.makedirs(path)
            pig = (
                Pig.load_commands_from_file(files_s)
                .with_parameter("input_dir", files)
                .with_parameter("output_dir", "/tmp/data")
            )
            pig.log_config(logfile=path + "pig")
            self.assertEqual(os.path.exists(path), pig.run().is_ok())
        finally:
            import shutil

            shutil.rmtree(path)
            self.delete_file_in_hdfs()
            self.delete_file_in_hdfs(files)
            self.delete_local(files_s)
Esempio n. 12
0
File: flow.py Progetto: epam/Merlin
def merge_snapshot_with_updates(context):
    context["partition"] = datetime.now().strftime('%Y%m%d')
    pig_job = Pig.load_commands_from_file(_pig_script) \
        .with_parameter("active_snapshot", _scd_active_snapshot) \
        .with_parameter("data_updates", os.path.join(_hdfs_tmpdir.path, os.path.basename(_scd_updates))) \
        .with_parameter('output', _hdfs_job_output) \
        .with_parameter("date", context["partition"])
    pig_job.run()
Esempio n. 13
0
def merge_snapshot_with_updates(context):
    context["partition"] = datetime.now().strftime('%Y%m%d')
    pig_job = Pig.load_commands_from_file(_pig_script) \
        .with_parameter("active_snapshot", _scd_active_snapshot) \
        .with_parameter("data_updates", os.path.join(_hdfs_tmpdir.path, os.path.basename(_scd_updates))) \
        .with_parameter('output', _hdfs_job_output) \
        .with_parameter("date", context["partition"])
    pig_job.run()
Esempio n. 14
0
 def test_run_commands_from_file(self):
     _test_id = str(uuid.uuid4())
     _inputs = self.copy_file_from_local(
         self.temp_file("hello,world,world", ".txt"))
     commands = "A = load '$input_dir' using PigStorage(',');"
     commands += "B = foreach A generate \$0 as id;"
     commands += "STORE B into '$output_dir';"
     files_s = self.temp_file(commands)
     try:
         _output_dir = "/tmp/data_{}".format(_test_id)
         pig = Pig.load_commands_from_file(files_s) \
             .with_parameter("input_dir", _inputs) \
             .with_parameter("output_dir", _output_dir)
         self.assertTrue(pig.run().is_ok())
         self.assertTrue(HDFS(_output_dir).exists())
     finally:
         self.delete_local(files_s)
         self.delete_file_in_hdfs()
         self.delete_file_in_hdfs(_inputs)
Esempio n. 15
0
 def test_run_commands_from_file(self):
     _test_id = str(uuid.uuid4())
     _inputs = self.copy_file_from_local(self.temp_file("hello,world,world", ".txt"))
     commands = "A = load '$input_dir' using PigStorage(',');"
     commands += "B = foreach A generate \$0 as id;"
     commands += "STORE B into '$output_dir';"
     files_s = self.temp_file(commands)
     try:
         _output_dir = "/tmp/data_{}".format(_test_id)
         pig = (
             Pig.load_commands_from_file(files_s)
             .with_parameter("input_dir", _inputs)
             .with_parameter("output_dir", _output_dir)
         )
         self.assertTrue(pig.run().is_ok())
         self.assertTrue(HDFS(_output_dir).exists())
     finally:
         self.delete_local(files_s)
         self.delete_file_in_hdfs()
         self.delete_file_in_hdfs(_inputs)
Esempio n. 16
0
    def test_logging_configuration(self):
        files = self.copy_file_from_local(
            self.temp_file("hello,world,world", ".txt"))
        path = "/tmp/pig_log"
        commands = "A = load '$input_dir' using PigStorage(',');"
        commands += "B = foreach A generate \$0 as id;"
        commands += "STORE B into '$output_dir';"
        files_s = self.temp_file(commands)
        try:
            import os

            os.makedirs(path)
            pig = Pig.load_commands_from_file(files_s).with_parameter("input_dir", files) \
                .with_parameter("output_dir", "/tmp/data")
            pig.log_config(logfile=path + "pig")
            self.assertEqual(os.path.exists(path), pig.run().is_ok())
        finally:
            import shutil

            shutil.rmtree(path)
            self.delete_file_in_hdfs()
            self.delete_file_in_hdfs(files)
            self.delete_local(files_s)
Esempio n. 17
0
    def test_optimization_disabling(self):
        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off SplitFilter -f "wordcount.pig"')) \
            .without_split_filter().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off PushUpFilter -f "wordcount.pig"')) \
            .without_pushup_filter().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off MergeFilter -f "wordcount.pig"')) \
            .without_merge_filter().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off PushDownForeachFlatten -f "wordcount.pig"')) \
            .without_push_down_foreach_flatten().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off LimitOptimizer -f "wordcount.pig"')) \
            .without_limit_optimizer().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off ColumnMapKeyPrune -f "wordcount.pig"')) \
            .without_column_map_key_prune().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off AddForEach -f "wordcount.pig"')) \
            .without_add_foreach().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off MergeForEach -f "wordcount.pig"')) \
            .without_merge_foreach().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off GroupByConstParallelSetter -f "wordcount.pig"')) \
            .without_groupby_const_parallel_setter().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off All -f "wordcount.pig"')) \
            .disable_all_optimizations().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig '
                                           '-optimizer_off LimitOptimizer '
                                           '-optimizer_off AddForEach '
                                           '-f "wordcount.pig"')) \
            .without_add_foreach().without_limit_optimizer().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig '
                                           '-x tez '
                                           '-optimizer_off LimitOptimizer '
                                           '-optimizer_off AddForEach '
                                           '-no_multiquery '
                                           '-f "wordcount.pig"')) \
            .without_add_foreach().using_mode(type="tez")\
            .without_limit_optimizer() \
            .without_multiquery().run()
Esempio n. 18
0
 def test_run_script_from_file_verbose(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig -verbose -f "wordcount.pig"')) \
         .debug()
Esempio n. 19
0
    def test_optimization_disabling(self):
        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off SplitFilter -f "wordcount.pig"')) \
            .without_split_filter().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off PushUpFilter -f "wordcount.pig"')) \
            .without_pushup_filter().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off MergeFilter -f "wordcount.pig"')) \
            .without_merge_filter().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off PushDownForeachFlatten -f "wordcount.pig"')) \
            .without_push_down_foreach_flatten().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off LimitOptimizer -f "wordcount.pig"')) \
            .without_limit_optimizer().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off ColumnMapKeyPrune -f "wordcount.pig"')) \
            .without_column_map_key_prune().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off AddForEach -f "wordcount.pig"')) \
            .without_add_foreach().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off MergeForEach -f "wordcount.pig"')) \
            .without_merge_foreach().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off GroupByConstParallelSetter -f "wordcount.pig"')) \
            .without_groupby_const_parallel_setter().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig -optimizer_off All -f "wordcount.pig"')) \
            .disable_all_optimizations().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig '
                                           '-optimizer_off LimitOptimizer '
                                           '-optimizer_off AddForEach '
                                           '-f "wordcount.pig"')) \
            .without_add_foreach().without_limit_optimizer().run()

        Pig.load_commands_from_file(
            path='wordcount.pig',
            command_executor=mock_executor('pig '
                                           '-x tez '
                                           '-optimizer_off LimitOptimizer '
                                           '-optimizer_off AddForEach '
                                           '-no_multiquery '
                                           '-f "wordcount.pig"')) \
            .without_add_foreach().using_mode(type="tez")\
            .without_limit_optimizer() \
            .without_multiquery().run()
Esempio n. 20
0
 def test_run_script_from_file_verbose(self):
     Pig.load_commands_from_file(
         path='wordcount.pig',
         command_executor=mock_executor('pig -verbose -f "wordcount.pig"')) \
         .debug()