Beispiel #1
0
    def test_apply_hdfs_snapshot(self):
        _config_file = os.path.join(os.path.dirname(__file__),
                                    'resources',
                                    'bootsrap',
                                    'bootstrap.ini')
        _raw_sales_dir = HDFS('/tmp/raw/sales')
        _raw_users_dir = HDFS('/tmp/raw/users')
        _raw_tmp_dir = HDFS('/tmp/raw/tmp')
        try:
            # run bootstrap script
            metastore = IniFileMetaStore(file=_config_file)
            _config = Configuration.load(metastore)
            apply_hdfs_snapshot(_config)

            # asserts
            # assert directories were created
            self.assertTrue(_raw_sales_dir.exists(), "Directory '/tmp/raw/sales' was not created")
            self.assertTrue(_raw_users_dir.exists(), "Directory '/tmp/raw/users' was not created")
            self.assertTrue(_raw_tmp_dir.exists(), "Directory '/tmp/raw/tmp' was not created")
            # assert acls were applied
            sales_dir_acls = _raw_sales_dir.get_acls()
            users_dir_acls = _raw_users_dir.get_acls()

            self.assertIsNotNone(sales_dir_acls, '/tmp/raw/sales : ACL were not applied')
            self.assertTrue('group:sys-pii:r-x' in sales_dir_acls, '/tmp/raw/sales : pii acl was not applied')
            self.assertTrue('group:sales:r--' in sales_dir_acls, '/tmp/raw/sales : salse acl was not applied')

            self.assertIsNotNone(users_dir_acls, '/tmp/raw/users : ACL were not applied')
            self.assertTrue('group:sys-pii:r-x' in sales_dir_acls, '/tmp/raw/users : pii acl was not applied')
        finally:
            _test_basedir = HDFS('/tmp/raw')
            _test_basedir.delete_directory()
            self.assertFalse(_test_basedir.exists(), "ERROR: clean up failed")
Beispiel #2
0
 def test_should_be_able_to_add_nones(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore, readonly=False, accepts_nulls=True)
     _config.set(section='section_a', key='new_key', value=None)
     self.assertEqual('value', _config.get('section_a', 'key'), "Can't find old item")
     self.assertTrue(_config.has('section_a', 'new_key'), "New Item was not added")
Beispiel #3
0
 def test_should_raise_exception_if_required_option_was_not_found(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources',
                                 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore)
     self.assertRaises(ConfigurationError, _config.require, 'section_a',
                       'item_a')
Beispiel #4
0
    def test_run_preconfigured_job_without_parameters_substitution(self):
        _test_id = str(uuid.uuid4())
        _job_name = "TEST_PIG_{}".format(_test_id)
        _input_dir = self.copy_file_from_local(self.temp_file("hello,world,world", ".txt"))
        _output_dir = "/tmp/data_{}".format(_test_id)

        _commands = "A = load '{}' using PigStorage(',');".format(_input_dir)
        _commands += "B = foreach A generate \$0 as id;"
        _commands += "STORE B into '{}';".format(_output_dir)
        # create job configuration. can also be loaded from .ini file
        _config = Configuration.create()
        _config.set(_job_name, TaskOptions.CONFIG_KEY_COMMANDS_STRING, _commands)
        _config.set(_job_name, TaskOptions.CONFIG_KEY_LOG_BRIEF, "enabled")
        _config.set(
            _job_name,
            TaskOptions.CONFIG_KEY_PARAMETER_VALUE,
            "input_dir={}\noutput_dir={}".format(_input_dir, _output_dir),
        )
        try:
            _pig = Pig.load_preconfigured_job(config=_config, job_name=_job_name)
            _result = _pig.run()
            _result.if_failed_raise(AssertionError("test_run_preconfigured_job failed"))
            self.assertTrue(HDFS(_output_dir).exists(), "Cannot find job output")
        finally:
            self.delete_file_in_hdfs(_input_dir)
            self.delete_file_in_hdfs(_output_dir)
Beispiel #5
0
 def test_streaming_map_only_job_generation(self):
     _config_file = os.path.join(
         os.path.dirname(os.path.realpath(__file__)),
         'resources',
         'mapreduce',
         'mapreduce_streaming_job.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore=metastore)
     _job_name = 'streaming_test_job_map_only'
     _expected_command = 'hadoop jar ' \
                         '{0}/resources/mapreduce/hadoop-streaming.jar ' \
                         '-D mapreduce.job.name={1} ' \
                         '-D value.delimiter.char=, ' \
                         '-D partition.to.process=20142010 ' \
                         '-mapper smapper.py ' \
                         '-reducer NONE ' \
                         '-numReduceTasks 0 ' \
                         '-input /raw/20102014 ' \
                         '-output /core/20102014'\
         .format(os.path.dirname(os.path.realpath(__file__)),
                         _job_name)
     MapReduce.prepare_streaming_job(
         jar='{0}/resources/mapreduce/hadoop-streaming.jar'
         .format(os.path.dirname(os.path.realpath(__file__))),
         config=_config,
         name=_job_name,
         executor=self.assert_generated_command(_expected_command)
     ).run()
Beispiel #6
0
    def test_run_preconfigured_job_without_parameters_substitution(self):
        _test_id = str(uuid.uuid4())
        _job_name = "TEST_PIG_{}".format(_test_id)
        _input_dir = self.copy_file_from_local(
            self.temp_file("hello,world,world", ".txt"))
        _output_dir = "/tmp/data_{}".format(_test_id)

        _commands = "A = load '{}' using PigStorage(',');".format(_input_dir)
        _commands += "B = foreach A generate \$0 as id;"
        _commands += "STORE B into '{}';".format(_output_dir)
        # create job configuration. can also be loaded from .ini file
        _config = Configuration.create()
        _config.set(_job_name, TaskOptions.CONFIG_KEY_COMMANDS_STRING,
                    _commands)
        _config.set(_job_name, TaskOptions.CONFIG_KEY_LOG_BRIEF, 'enabled')
        _config.set(
            _job_name, TaskOptions.CONFIG_KEY_PARAMETER_VALUE,
            'input_dir={}\noutput_dir={}'.format(_input_dir, _output_dir))
        try:
            _pig = Pig.load_preconfigured_job(config=_config,
                                              job_name=_job_name)
            _result = _pig.run()
            _result.if_failed_raise(
                AssertionError("test_run_preconfigured_job failed"))
            self.assertTrue(
                HDFS(_output_dir).exists(), "Cannot find job output")
        finally:
            self.delete_file_in_hdfs(_input_dir)
            self.delete_file_in_hdfs(_output_dir)
Beispiel #7
0
 def spark_app_config_template(self, master, name=str(uuid.uuid4())):
     _config = Configuration.create()
     _config.set(section=name, key=TaskOptions.SPARK_APP_CONFIG_MASTER, value=master)
     _config.set(section=name, key=TaskOptions.SPARK_APP_CONFIG_APPLICATION_JAR,
                 value=os.path.join(os.path.dirname(__file__), "resources", "spark", "SparkExample.jar"))
     _config.set(section=name, key=TaskOptions.SPARK_APP_CONFIG_MAIN_CLASS, value="example.spark.WordCounter")
     return _config
Beispiel #8
0
 def __init__(self, methodName='runTest'):
     super(TestMapReduceCommandGenerationFromIni, self).__init__(methodName)
     _config_file = os.path.join(
         os.path.dirname(os.path.realpath(__file__)), 'resources',
         'mapreduce', 'mapreduce_streaming_job.ini')
     metastore = IniFileMetaStore(file=_config_file)
     self._config = Configuration.load(metastore=metastore)
Beispiel #9
0
    def test_export_table_with_staging(self):
        try:
            metastore = IniFileMetaStore(file=os.path.join(
                os.path.dirname(__file__), 'resources/sqoop/custom.ini'))
            cmd = SqoopExport.load_preconfigured_job(config=Configuration.load(
                metastore=metastore, readonly=False, accepts_nulls=True
            )).to_rdbms().table(table="table_name_second").from_hdfs(
                export_dir="{0}/data_custom_directory".format(
                    BASE_DIR)).with_staging_table(staging_table="stag").run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.
                format(USER, PASSWORD,
                       MYSQL_SERVER), "'SELECT * FROM table_name_second'")
            self.assertNotEqual(
                result.stdout.split(' ')[0], 'Empty', result.stdout)
        finally:
            shell.execute_shell_command(
                'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.
                format(USER, PASSWORD,
                       MYSQL_SERVER), "'DELETE FROM table_name_second'")
            shell.execute_shell_command(
                'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.
                format(USER, PASSWORD, MYSQL_SERVER), "'DELETE FROM stag'")
Beispiel #10
0
 def test_streaming_map_only_job_generation(self):
     _config_file = os.path.join(
         os.path.dirname(os.path.realpath(__file__)), 'resources',
         'mapreduce', 'mapreduce_streaming_job.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore=metastore)
     _job_name = 'streaming_test_job_map_only'
     _expected_command = 'hadoop jar ' \
                         '{0}/resources/mapreduce/hadoop-streaming.jar ' \
                         '-D mapreduce.job.name={1} ' \
                         '-D value.delimiter.char=, ' \
                         '-D partition.to.process=20142010 ' \
                         '-mapper smapper.py ' \
                         '-reducer NONE ' \
                         '-numReduceTasks 0 ' \
                         '-input /raw/20102014 ' \
                         '-output /core/20102014'\
         .format(os.path.dirname(os.path.realpath(__file__)),
                         _job_name)
     MapReduce.prepare_streaming_job(
         jar='{0}/resources/mapreduce/hadoop-streaming.jar'.format(
             os.path.dirname(os.path.realpath(__file__))),
         config=_config,
         name=_job_name,
         executor=self.assert_generated_command(_expected_command)).run()
Beispiel #11
0
 def test_should_be_able_to_split_string_to_multiple_values(self):
     _values = ['one', 'two', 'three']
     _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore, readonly=False, accepts_nulls=True)
     _config.set("section_b", 'list', ",".join(_values))
     self.assertListEqual(_values, _config.get_list("section_b", 'list', delimiter=','))
Beispiel #12
0
 def test_create_new_config(self):
     _config = Configuration.create()
     _section = 'new_section'
     _key = 'new_key'
     _value = 'new_value'
     _config.set(section=_section, key=_key, value=_value)
     self.assertTrue(_config.has(_section, _key), "Config option was not added")
     self.assertEqual(_value, _config.get(_section, _key))
Beispiel #13
0
 def test_wrap_with_quotes(self):
     _pc = Pig(config=Configuration.create(), job_name=None, command_executor=None)
     self.assertEqual("", _pc._wrap_with_quotes_(""))
     self.assertEqual(None, _pc._wrap_with_quotes_(None))
     self.assertEqual('"test"', _pc._wrap_with_quotes_("test"))
     self.assertEqual("'test'", _pc._wrap_with_quotes_("'test'"))
     self.assertEqual("'te\"st'", _pc._wrap_with_quotes_('te"st'))
     self.assertEqual('"te\'st"', _pc._wrap_with_quotes_("te'st"))
Beispiel #14
0
 def test_load_config_from_file(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini')
     _section = 'section_a'
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore)
     self.assertTrue(_config.has(section=_section, key='key'),
                     'Cannot find "key" option in test config')
     self.assertEqual('value', _config.require(_section, 'key'))
Beispiel #15
0
 def __init__(self, name, config, executable, executor, main_class=None, shell_command="hadoop jar"):
     self.executor = executor
     self.executable = executable
     self._config = config if config else Configuration.create(readonly=False, accepts_nulls=True)
     self.name = name if name else "MR_TASK_{0}".format(uuid.uuid4())
     self.main_class = main_class
     self._shell_command = shell_command
     self._process = None
Beispiel #16
0
 def test_load_config_from_file(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources',
                                 'test.ini')
     _section = 'section_a'
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore)
     self.assertTrue(_config.has(section=_section, key='key'),
                     'Cannot find "key" option in test config')
     self.assertEqual('value', _config.require(_section, 'key'))
Beispiel #17
0
 def test_create_new_config(self):
     _config = Configuration.create()
     _section = 'new_section'
     _key = 'new_key'
     _value = 'new_value'
     _config.set(section=_section, key=_key, value=_value)
     self.assertTrue(_config.has(_section, _key),
                     "Config option was not added")
     self.assertEqual(_value, _config.get(_section, _key))
Beispiel #18
0
 def __init__(self, methodName='runTest'):
     super(TestMapReduceCommandGenerationFromIni, self).__init__(methodName)
     _config_file = os.path.join(
         os.path.dirname(os.path.realpath(__file__)),
         'resources',
         'mapreduce',
         'mapreduce_streaming_job.ini')
     metastore = IniFileMetaStore(file=_config_file)
     self._config = Configuration.load(metastore=metastore)
Beispiel #19
0
 def test_load_preconfigured_job(self):
     _command = 'pig -brief -optimizer_off SplitFilter -optimizer_off ColumnMapKeyPrune -e "ls /"'
     metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), 'resources/pig/pig.ini'))
     pig = Pig.load_preconfigured_job(job_name='pig test',
                                      config=Configuration.load(
                                          metastore=metastore,
                                          readonly=False, accepts_nulls=True),
                                      command_executor=mock_executor(expected_command=_command))
     pig.without_split_filter().run()
Beispiel #20
0
 def test_should_be_able_to_add_multiple_values_for_a_single_key(self):
     _values = [1, 2, 3, 4]
     _increment = ['one', 'two', 'three']
     _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore, readonly=False, accepts_nulls=True)
     _config.update_list("section_b", 'list', *_values)
     self.assertListEqual(_values, _config.get_list("section_b", 'list'))
     _config.update_list("section_b", 'list', *_increment)
     self.assertListEqual(_values + _increment, _config.get_list("section_b", 'list'))
Beispiel #21
0
 def test_should_not_be_able_to_add_new_items(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore, readonly=True, accepts_nulls=False)
     self.assertRaises(
         excClass=ConfigurationError,
         callableObj=_config.set,
         section='test',
         key='key',
         value='value')
Beispiel #22
0
 def test_wrap_with_quotes(self):
     _pc = Pig(config=Configuration.create(),
               job_name=None,
               command_executor=None)
     self.assertEqual("", _pc._wrap_with_quotes_(""))
     self.assertEqual(None, _pc._wrap_with_quotes_(None))
     self.assertEqual('"test"', _pc._wrap_with_quotes_("test"))
     self.assertEqual("'test'", _pc._wrap_with_quotes_("'test'"))
     self.assertEqual("'te\"st'", _pc._wrap_with_quotes_('te"st'))
     self.assertEqual('"te\'st"', _pc._wrap_with_quotes_("te'st"))
Beispiel #23
0
 def test_load_preconfigured_job(self):
     _command = 'pig -brief -optimizer_off SplitFilter -optimizer_off ColumnMapKeyPrune -e "ls /"'
     metastore = IniFileMetaStore(file=os.path.join(
         os.path.dirname(__file__), 'resources/pig/pig.ini'))
     pig = Pig.load_preconfigured_job(
         job_name='pig test',
         config=Configuration.load(metastore=metastore,
                                   readonly=False,
                                   accepts_nulls=True),
         command_executor=mock_executor(expected_command=_command))
     pig.without_split_filter().run()
Beispiel #24
0
 def test_should_be_able_to_split_string_to_multiple_values(self):
     _values = ['one', 'two', 'three']
     _config_file = os.path.join(os.path.dirname(__file__), 'resources',
                                 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore,
                                  readonly=False,
                                  accepts_nulls=True)
     _config.set("section_b", 'list', ",".join(_values))
     self.assertListEqual(
         _values, _config.get_list("section_b", 'list', delimiter=','))
Beispiel #25
0
 def test_load_config(self):
     _command = (
         'hive -e "test" --define A=B --define C=D --hiveconf hello=world '
         "--hivevar A=B --hivevar C=D --database hive"
     )
     metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), "resources/hive/hive.ini"))
     hive = Hive.load_preconfigured_job(
         name="hive test",
         config=Configuration.load(metastore=metastore, readonly=False, accepts_nulls=True),
         executor=mock_executor(expected_command=_command),
     ).with_hive_conf("hello", "world")
     hive.run()
Beispiel #26
0
 def test_should_be_able_to_add_nones(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources',
                                 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore,
                                  readonly=False,
                                  accepts_nulls=True)
     _config.set(section='section_a', key='new_key', value=None)
     self.assertEqual('value', _config.get('section_a', 'key'),
                      "Can't find old item")
     self.assertTrue(_config.has('section_a', 'new_key'),
                     "New Item was not added")
Beispiel #27
0
 def test_should_not_be_able_to_add_new_items(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources',
                                 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore,
                                  readonly=True,
                                  accepts_nulls=False)
     self.assertRaises(excClass=ConfigurationError,
                       callableObj=_config.set,
                       section='test',
                       key='key',
                       value='value')
Beispiel #28
0
 def test_load_config(self):
     _command = "hive -e \"test\" --define A=B --define C=D --hiveconf hello=world " \
                "--hivevar A=B --hivevar C=D --database hive"
     metastore = IniFileMetaStore(file=os.path.join(
         os.path.dirname(__file__), 'resources/hive/hive.ini'))
     hive = Hive.load_preconfigured_job(name='hive test',
                                        config=Configuration.load(
                                            metastore=metastore,
                                            readonly=False, accepts_nulls=True),
                                        executor=mock_executor(expected_command=_command)) \
         .with_hive_conf("hello", "world")
     hive.run()
Beispiel #29
0
 def test_should_not_be_able_to_add_nones(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources',
                                 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore,
                                  readonly=False,
                                  accepts_nulls=False)
     self.assertRaises(ConfigurationError,
                       _config.set,
                       section='section_a',
                       key='new_key',
                       value=None)
Beispiel #30
0
    def test_fsimage_from_config_withot_acls(self):
        config = Configuration.create(readonly=False, accepts_nulls=True)
        config.set(section=CONFIG_HDFS_DIRS_KEY, key='/raw/sales', value=None)
        snapshot = FsSnapshot.load_from_config(config=config,
                                               fs_section=CONFIG_HDFS_DIRS_KEY)
        files = snapshot.files

        self.assertTrue('/raw/sales' in files,
                        'File was not added to fs snapshot')
        self.assertTrue(
            len(files['/raw/sales']) == 0,
            'ACL should be ignored for current configuration')
Beispiel #31
0
 def test_import_direct(self):
     metastore = IniFileMetaStore(file=os.path.join(
         os.path.dirname(os.path.realpath(__file__)),
         'resources',
         'sqoop',
         'sqoop.ini'))
     config = Configuration.load(metastore=metastore, readonly=False)
     self.assertEquals(
         SqoopImport.load_preconfigured_job(name="test", config=config).from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password",
                                            host="localhost", database="sqoop_tests").with_direct_mode(direct_split_size="1", name_2="12", names_3="1").table(
                                            table="table_name").to_hdfs().build(),
         '-DA=12 -DB=13 --connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/password --table table_name --direct -- --name-2=12 --names-3=1')
Beispiel #32
0
 def test_import_with_hadoop_properties_from_ini_file(self):
     metastore = IniFileMetaStore(file=os.path.join(
         os.path.dirname(os.path.realpath(__file__)),
         'resources',
         'sqoop',
         'sqoop.ini'))
     config = Configuration.load(metastore=metastore, readonly=False)
     self.assertEquals(
         SqoopImport.load_preconfigured_job(name="sqoo", config=config).from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password",
                                        host="localhost", database="sqoop_tests").
         to_hdfs().table(table="table_name").with_hadoop_properties(some_properties="10").build(),
         "-DA=12 -DB=13 -Dsome.properties=10 --connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/password --table table_name")
Beispiel #33
0
    def __init__(self, name=None, config=None, executor=execute_shell_command):
        """
        Creates wrapper for Hive command line utility
        :param executor: custom executor
        :type executor:
        """

        super(Hive, self).__init__()
        self.name = name if name else "HIVE_TASK_{0}".format(uuid.uuid4())
        self.__executor = executor
        self._config = config if config else Configuration.create(
            readonly=False, accepts_nulls=True)
Beispiel #34
0
    def __init__(self, config=None, name=None, executor=execute_shell_command):
        """

        :param config: configurations
        :param name: name of the config section containing specific application configurations
        :param executor: he interface used by the client to launch Spark Application.
        """
        super(SparkApplication, self).__init__()
        self.executor = executor
        self._configs = config if config else Configuration.create()
        self.name = name if name \
            else "SPARK_JOB_{0}".format(uuid.uuid4())
Beispiel #35
0
    def __init__(self, agent=None, conf_file=None, config=None, executor=execute_shell_command):
        """
        Creates wrapper for Flume command line utility
        :param executor: custom executor
        :type executor:
        """

        self.name = agent if agent else "FLUME_AGENT_{0}".format(uuid.uuid4())
        self._executor = executor
        self._config = config if config else Configuration.create(readonly=False, accepts_nulls=True)
        self.__set_attr__(TaskOptions.CONFIG_KEY_AGENT_NAME, agent)
        self.__set_attr__(TaskOptions.CONFIG_KEY_CONF_FILE, conf_file)
Beispiel #36
0
    def test_fsimage_from_config_withot_acls(self):
        config = Configuration.create(readonly=False, accepts_nulls=True)
        config.set(section=CONFIG_HDFS_DIRS_KEY,
                   key='/raw/sales',
                   value=None)
        snapshot = FsSnapshot.load_from_config(config=config,
                                               fs_section=CONFIG_HDFS_DIRS_KEY)
        files = snapshot.files

        self.assertTrue('/raw/sales' in files,
                        'File was not added to fs snapshot')
        self.assertTrue(len(files['/raw/sales']) == 0,
                        'ACL should be ignored for current configuration')
Beispiel #37
0
 def spark_app_config_template(self, master, name=str(uuid.uuid4())):
     _config = Configuration.create()
     _config.set(section=name,
                 key=TaskOptions.SPARK_APP_CONFIG_MASTER,
                 value=master)
     _config.set(section=name,
                 key=TaskOptions.SPARK_APP_CONFIG_APPLICATION_JAR,
                 value=os.path.join(os.path.dirname(__file__), "resources",
                                    "spark", "SparkExample.jar"))
     _config.set(section=name,
                 key=TaskOptions.SPARK_APP_CONFIG_MAIN_CLASS,
                 value="example.spark.WordCounter")
     return _config
Beispiel #38
0
    def __init__(self, name=None, config=None, executor=execute_shell_command):
        """
        Creates wrapper for Hive command line utility
        :param executor: custom executor
        :type executor:
        """

        super(Hive, self).__init__()
        self.name = name if name else "HIVE_TASK_{0}".format(uuid.uuid4())
        self.__executor = executor
        self._config = config if config else Configuration.create(
            readonly=False,
            accepts_nulls=True
        )
Beispiel #39
0
 def test_should_be_able_to_add_multiple_values_for_a_single_key(self):
     _values = [1, 2, 3, 4]
     _increment = ['one', 'two', 'three']
     _config_file = os.path.join(os.path.dirname(__file__), 'resources',
                                 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore,
                                  readonly=False,
                                  accepts_nulls=True)
     _config.update_list("section_b", 'list', *_values)
     self.assertListEqual(_values, _config.get_list("section_b", 'list'))
     _config.update_list("section_b", 'list', *_increment)
     self.assertListEqual(_values + _increment,
                          _config.get_list("section_b", 'list'))
Beispiel #40
0
 def __init__(self,
              name,
              config,
              executable,
              executor,
              main_class=None,
              shell_command="hadoop jar"):
     self.executor = executor
     self.executable = executable
     self._config = config if config else Configuration.create(
         readonly=False, accepts_nulls=True)
     self.name = name if name else "MR_TASK_{0}".format(uuid.uuid4())
     self.main_class = main_class
     self._shell_command = shell_command
     self._process = None
Beispiel #41
0
 def test_apply_local_fs_snapshot(self):
     _config_file = os.path.join(os.path.dirname(__file__),
                                 'resources',
                                 'bootsrap',
                                 'bootstrap.ini')
     test_dir = LocalFS('/tmp/data_tmp')
     if test_dir.exists():
         test_dir.delete_directory()
     try:
         metastore = IniFileMetaStore(file=_config_file)
         _config = Configuration.load(metastore)
         apply_localfs_snapshot(_config)
         self.assertTrue(test_dir.exists(), "Folder was not created")
     finally:
         test_dir.delete_directory()
Beispiel #42
0
    def __init__(self, agent=None, conf_file=None, config=None,
                 executor=execute_shell_command):
        """
        Creates wrapper for Flume command line utility
        :param executor: custom executor
        :type executor:
        """

        self.name = agent if agent else "FLUME_AGENT_{0}".format(uuid.uuid4())
        self._executor = executor
        self._config = config if config else Configuration.create(
            readonly=False,
            accepts_nulls=True
        )
        self.__set_attr__(TaskOptions.CONFIG_KEY_AGENT_NAME, agent)
        self.__set_attr__(TaskOptions.CONFIG_KEY_CONF_FILE, conf_file)
Beispiel #43
0
    def test_import_table(self):
        try:
            metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__),
                                                                   'resources/sqoop/custom.ini'))
            cmd = SqoopImport.load_preconfigured_job(
                config=Configuration.load(metastore=metastore,
                                           readonly=False,
                                           accepts_nulls=True)).from_rdbms().table(
                table="table_name", where="id>2",
                columns="id,last_name").to_hdfs(
                target_dir="{0}/custom_directory".format(BASE_DIR)).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Beispiel #44
0
    def load_commands_from_string(commands,
                                  command_executor=execute_shell_command):
        """
         Creates an instance of Pig client.
         Configures Pig client to parse and run commands from string.
         :param commands: Commands to execute (within quotes)
         :param command_executor:  The interface used by the client to run command.

         :type commands: str
         :rtype: Pig
         """
        _config = Configuration.create(readonly=False, accepts_nulls=True)
        _job_name = "PIG_TASK_{0}".format(uuid.uuid4())
        _pig = Pig(config=_config,
                   job_name=_job_name,
                   command_executor=command_executor)
        _pig.execute_commands(commands=commands)
        return _pig
Beispiel #45
0
 def prepare_streaming_job(config=None, name=None, jar="hadoop-streaming.jar", executor=execute_shell_command):
     """
     Creates instance of StreamingJob
     :param name: name of job
     :param jar: executing jar
     :param executor: interface used by the client to run command.
     :return: StreamingJob template
     :rtype : StreamingJob
     """
     MapReduce.LOG.info("MapReduce streaming job")
     config = config if config else Configuration.create(readonly=False, accepts_nulls=True)
     MapReduce.__validate_configs(config, name, "StreamingJob", TaskOptions.KEYS_FOR_MAPREDUCE)
     return StreamingJob(
         config=config,
         name=name if name else "MR_STREAMING_JOB_{0}".format(uuid.uuid4()),
         jar=jar,
         executor=executor,
     )
Beispiel #46
0
    def load_commands_from_file(path,
                                command_executor=execute_shell_command):
        """
        Creates an instance of Pig client.
        Configures Pig client to run commands from specified script file.
        :param path: path to the script to execute
        :param command_executor:  The interface used by the client to run command.

        :type path: str
        :rtype: Pig
        """
        Pig.LOG.info("Loading Pig script from file : {0}".format(path))
        _config = Configuration.create(readonly=False, accepts_nulls=True)
        _job_name = "PIG_TASK_{0}".format(uuid.uuid4())
        _pig = Pig(config=_config,
                   job_name=_job_name,
                   command_executor=command_executor)
        _pig.execute_script(path=path)
        return _pig
Beispiel #47
0
 def test_spark_submit_from_ini(self):
     _command = "spark-submit " \
                "--master local[10] " \
                "--class test.SparkApp " \
                "--name test_app " \
                "--jars lib001.jar,lib002.jar,lib003.jar " \
                "--files dim001.cache.txt,dim002.cache.txt " \
                "--properties-file spark.app.configs " \
                "--conf \"spark.app.name=test_app spark.executor.memory=512m " \
                "spark.serializer=org.apache.spark.serializer.KryoSerializer\" " \
                "application.jar " \
                "10 test"
     metastore=IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), "resources", "spark", "spark.app.ini"))
     spark = SparkApplication.load_preconfigured_job(
         config=Configuration.load(metastore,
                                    readonly=False),
         name="test_spark_app",
         executor=mock_executor(expected_command=_command)).application_jar("application.jar")
     spark.run(10, "test")
Beispiel #48
0
    def test_export_table_with_call(self):
        try:
            metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__),
                                                                   'resources/sqoop/custom.ini'))
            cmd = SqoopExport.load_preconfigured_job(
                config=Configuration.load(metastore=metastore,
                                           readonly=False,
                                           accepts_nulls=True)).to_rdbms().from_hdfs(
                export_dir="{0}/data_custom_directory".format(BASE_DIR)).call(stored_procedure="p").run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER),
                "'SELECT * FROM table_name_second'")
            self.assertNotEqual(result.stdout.split(' ')[0], 'Empty', result.stdout)
        finally:
            shell.execute_shell_command(
                'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER),
                "'DELETE FROM table_name_second'")
Beispiel #49
0
 def prepare_mapreduce_job(jar, main_class=None, config=None, name=None, executor=execute_shell_command):
     """
     Creates instance of MapReduceJob
     :param name: name of job
     :param jar: executing jar
     :param executor: interface used by the client to run command.
     :return: MapReduceJob template
     :rtype : MapReduceJob
     """
     MapReduce.LOG.info("MapReduce job")
     config = config if config else Configuration.create(readonly=False, accepts_nulls=True)
     MapReduce.__validate_configs(config, name, "MapReduceJob", TaskOptions.KEYS_FOR_STREAMING_JOB)
     return MapReduceJob(
         name=name if name else "MR_JOB_{0}".format(uuid.uuid4()),
         config=config,
         jar=jar,
         main_class=main_class,
         executor=executor,
     )
Beispiel #50
0
    def test_import_table(self):
        try:
            metastore = IniFileMetaStore(file=os.path.join(
                os.path.dirname(__file__), 'resources/sqoop/custom.ini'))
            cmd = SqoopImport.load_preconfigured_job(config=Configuration.load(
                metastore=metastore, readonly=False, accepts_nulls=True
            )).from_rdbms().table(
                table="table_name", where="id>2",
                columns="id,last_name").to_hdfs(
                    target_dir="{0}/custom_directory".format(BASE_DIR)).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'hadoop fs',
                '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(
                result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command(
                'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Beispiel #51
0
 def prepare_streaming_job(config=None,
                           name=None,
                           jar="hadoop-streaming.jar",
                           executor=execute_shell_command):
     """
     Creates instance of StreamingJob
     :param name: name of job
     :param jar: executing jar
     :param executor: interface used by the client to run command.
     :return: StreamingJob template
     :rtype : StreamingJob
     """
     MapReduce.LOG.info("MapReduce streaming job")
     config = config if config else Configuration.create(readonly=False,
                                                         accepts_nulls=True)
     MapReduce.__validate_configs(config, name, "StreamingJob",
                                  TaskOptions.KEYS_FOR_MAPREDUCE)
     return StreamingJob(
         config=config,
         name=name if name else "MR_STREAMING_JOB_{0}".format(uuid.uuid4()),
         jar=jar,
         executor=executor)
Beispiel #52
0
    def test_fsimage_from_config(self):
        config = Configuration.create(readonly=False, accepts_nulls=True)
        config.set(section=CONFIG_ACLS_KEY,
                   key='confidential',
                   value='user:su:rwx')
        config.set(section=CONFIG_ACLS_KEY,
                   key='sales',
                   value='group:sales:r-x')
        config.set(section=CONFIG_HDFS_DIRS_KEY,
                   key='/raw/sales',
                   value='confidential,sales')
        snapshot = FsSnapshot.load_from_config(config=config,
                                               fs_section=CONFIG_HDFS_DIRS_KEY,
                                               acl_section=CONFIG_ACLS_KEY)
        files = snapshot.files

        self.assertTrue('/raw/sales' in files,
                        'File was not added to fs snapshot')
        self.assertTrue('user:su:rwx' in files['/raw/sales'],
                        '\'confidential\' access lvl was not mapped to file')
        self.assertTrue('group:sales:r-x' in files['/raw/sales'],
                        '\'sales\' access lvl was not mapped to file')
        self.assertFalse('default:fake:r-x' in files['/raw/sales'],
                         'Error in access lvl mapping')
Beispiel #53
0
 def test_try_execute_empty_command(self):
     self.assertRaises(PigCommandError, Pig(
         config=Configuration.create(),
         job_name=None,
         command_executor=None).run)