def test_apply_hdfs_snapshot(self): _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'bootsrap', 'bootstrap.ini') _raw_sales_dir = HDFS('/tmp/raw/sales') _raw_users_dir = HDFS('/tmp/raw/users') _raw_tmp_dir = HDFS('/tmp/raw/tmp') try: # run bootstrap script metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore) apply_hdfs_snapshot(_config) # asserts # assert directories were created self.assertTrue(_raw_sales_dir.exists(), "Directory '/tmp/raw/sales' was not created") self.assertTrue(_raw_users_dir.exists(), "Directory '/tmp/raw/users' was not created") self.assertTrue(_raw_tmp_dir.exists(), "Directory '/tmp/raw/tmp' was not created") # assert acls were applied sales_dir_acls = _raw_sales_dir.get_acls() users_dir_acls = _raw_users_dir.get_acls() self.assertIsNotNone(sales_dir_acls, '/tmp/raw/sales : ACL were not applied') self.assertTrue('group:sys-pii:r-x' in sales_dir_acls, '/tmp/raw/sales : pii acl was not applied') self.assertTrue('group:sales:r--' in sales_dir_acls, '/tmp/raw/sales : salse acl was not applied') self.assertIsNotNone(users_dir_acls, '/tmp/raw/users : ACL were not applied') self.assertTrue('group:sys-pii:r-x' in sales_dir_acls, '/tmp/raw/users : pii acl was not applied') finally: _test_basedir = HDFS('/tmp/raw') _test_basedir.delete_directory() self.assertFalse(_test_basedir.exists(), "ERROR: clean up failed")
def test_should_be_able_to_add_nones(self): _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore, readonly=False, accepts_nulls=True) _config.set(section='section_a', key='new_key', value=None) self.assertEqual('value', _config.get('section_a', 'key'), "Can't find old item") self.assertTrue(_config.has('section_a', 'new_key'), "New Item was not added")
def test_should_raise_exception_if_required_option_was_not_found(self): _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore) self.assertRaises(ConfigurationError, _config.require, 'section_a', 'item_a')
def test_run_preconfigured_job_without_parameters_substitution(self): _test_id = str(uuid.uuid4()) _job_name = "TEST_PIG_{}".format(_test_id) _input_dir = self.copy_file_from_local(self.temp_file("hello,world,world", ".txt")) _output_dir = "/tmp/data_{}".format(_test_id) _commands = "A = load '{}' using PigStorage(',');".format(_input_dir) _commands += "B = foreach A generate \$0 as id;" _commands += "STORE B into '{}';".format(_output_dir) # create job configuration. can also be loaded from .ini file _config = Configuration.create() _config.set(_job_name, TaskOptions.CONFIG_KEY_COMMANDS_STRING, _commands) _config.set(_job_name, TaskOptions.CONFIG_KEY_LOG_BRIEF, "enabled") _config.set( _job_name, TaskOptions.CONFIG_KEY_PARAMETER_VALUE, "input_dir={}\noutput_dir={}".format(_input_dir, _output_dir), ) try: _pig = Pig.load_preconfigured_job(config=_config, job_name=_job_name) _result = _pig.run() _result.if_failed_raise(AssertionError("test_run_preconfigured_job failed")) self.assertTrue(HDFS(_output_dir).exists(), "Cannot find job output") finally: self.delete_file_in_hdfs(_input_dir) self.delete_file_in_hdfs(_output_dir)
def test_streaming_map_only_job_generation(self): _config_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'resources', 'mapreduce', 'mapreduce_streaming_job.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore=metastore) _job_name = 'streaming_test_job_map_only' _expected_command = 'hadoop jar ' \ '{0}/resources/mapreduce/hadoop-streaming.jar ' \ '-D mapreduce.job.name={1} ' \ '-D value.delimiter.char=, ' \ '-D partition.to.process=20142010 ' \ '-mapper smapper.py ' \ '-reducer NONE ' \ '-numReduceTasks 0 ' \ '-input /raw/20102014 ' \ '-output /core/20102014'\ .format(os.path.dirname(os.path.realpath(__file__)), _job_name) MapReduce.prepare_streaming_job( jar='{0}/resources/mapreduce/hadoop-streaming.jar' .format(os.path.dirname(os.path.realpath(__file__))), config=_config, name=_job_name, executor=self.assert_generated_command(_expected_command) ).run()
def test_run_preconfigured_job_without_parameters_substitution(self): _test_id = str(uuid.uuid4()) _job_name = "TEST_PIG_{}".format(_test_id) _input_dir = self.copy_file_from_local( self.temp_file("hello,world,world", ".txt")) _output_dir = "/tmp/data_{}".format(_test_id) _commands = "A = load '{}' using PigStorage(',');".format(_input_dir) _commands += "B = foreach A generate \$0 as id;" _commands += "STORE B into '{}';".format(_output_dir) # create job configuration. can also be loaded from .ini file _config = Configuration.create() _config.set(_job_name, TaskOptions.CONFIG_KEY_COMMANDS_STRING, _commands) _config.set(_job_name, TaskOptions.CONFIG_KEY_LOG_BRIEF, 'enabled') _config.set( _job_name, TaskOptions.CONFIG_KEY_PARAMETER_VALUE, 'input_dir={}\noutput_dir={}'.format(_input_dir, _output_dir)) try: _pig = Pig.load_preconfigured_job(config=_config, job_name=_job_name) _result = _pig.run() _result.if_failed_raise( AssertionError("test_run_preconfigured_job failed")) self.assertTrue( HDFS(_output_dir).exists(), "Cannot find job output") finally: self.delete_file_in_hdfs(_input_dir) self.delete_file_in_hdfs(_output_dir)
def spark_app_config_template(self, master, name=str(uuid.uuid4())): _config = Configuration.create() _config.set(section=name, key=TaskOptions.SPARK_APP_CONFIG_MASTER, value=master) _config.set(section=name, key=TaskOptions.SPARK_APP_CONFIG_APPLICATION_JAR, value=os.path.join(os.path.dirname(__file__), "resources", "spark", "SparkExample.jar")) _config.set(section=name, key=TaskOptions.SPARK_APP_CONFIG_MAIN_CLASS, value="example.spark.WordCounter") return _config
def __init__(self, methodName='runTest'): super(TestMapReduceCommandGenerationFromIni, self).__init__(methodName) _config_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'resources', 'mapreduce', 'mapreduce_streaming_job.ini') metastore = IniFileMetaStore(file=_config_file) self._config = Configuration.load(metastore=metastore)
def test_export_table_with_staging(self): try: metastore = IniFileMetaStore(file=os.path.join( os.path.dirname(__file__), 'resources/sqoop/custom.ini')) cmd = SqoopExport.load_preconfigured_job(config=Configuration.load( metastore=metastore, readonly=False, accepts_nulls=True )).to_rdbms().table(table="table_name_second").from_hdfs( export_dir="{0}/data_custom_directory".format( BASE_DIR)).with_staging_table(staging_table="stag").run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'. format(USER, PASSWORD, MYSQL_SERVER), "'SELECT * FROM table_name_second'") self.assertNotEqual( result.stdout.split(' ')[0], 'Empty', result.stdout) finally: shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'. format(USER, PASSWORD, MYSQL_SERVER), "'DELETE FROM table_name_second'") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'. format(USER, PASSWORD, MYSQL_SERVER), "'DELETE FROM stag'")
def test_streaming_map_only_job_generation(self): _config_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'resources', 'mapreduce', 'mapreduce_streaming_job.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore=metastore) _job_name = 'streaming_test_job_map_only' _expected_command = 'hadoop jar ' \ '{0}/resources/mapreduce/hadoop-streaming.jar ' \ '-D mapreduce.job.name={1} ' \ '-D value.delimiter.char=, ' \ '-D partition.to.process=20142010 ' \ '-mapper smapper.py ' \ '-reducer NONE ' \ '-numReduceTasks 0 ' \ '-input /raw/20102014 ' \ '-output /core/20102014'\ .format(os.path.dirname(os.path.realpath(__file__)), _job_name) MapReduce.prepare_streaming_job( jar='{0}/resources/mapreduce/hadoop-streaming.jar'.format( os.path.dirname(os.path.realpath(__file__))), config=_config, name=_job_name, executor=self.assert_generated_command(_expected_command)).run()
def test_should_be_able_to_split_string_to_multiple_values(self): _values = ['one', 'two', 'three'] _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore, readonly=False, accepts_nulls=True) _config.set("section_b", 'list', ",".join(_values)) self.assertListEqual(_values, _config.get_list("section_b", 'list', delimiter=','))
def test_create_new_config(self): _config = Configuration.create() _section = 'new_section' _key = 'new_key' _value = 'new_value' _config.set(section=_section, key=_key, value=_value) self.assertTrue(_config.has(_section, _key), "Config option was not added") self.assertEqual(_value, _config.get(_section, _key))
def test_wrap_with_quotes(self): _pc = Pig(config=Configuration.create(), job_name=None, command_executor=None) self.assertEqual("", _pc._wrap_with_quotes_("")) self.assertEqual(None, _pc._wrap_with_quotes_(None)) self.assertEqual('"test"', _pc._wrap_with_quotes_("test")) self.assertEqual("'test'", _pc._wrap_with_quotes_("'test'")) self.assertEqual("'te\"st'", _pc._wrap_with_quotes_('te"st')) self.assertEqual('"te\'st"', _pc._wrap_with_quotes_("te'st"))
def test_load_config_from_file(self): _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini') _section = 'section_a' metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore) self.assertTrue(_config.has(section=_section, key='key'), 'Cannot find "key" option in test config') self.assertEqual('value', _config.require(_section, 'key'))
def __init__(self, name, config, executable, executor, main_class=None, shell_command="hadoop jar"): self.executor = executor self.executable = executable self._config = config if config else Configuration.create(readonly=False, accepts_nulls=True) self.name = name if name else "MR_TASK_{0}".format(uuid.uuid4()) self.main_class = main_class self._shell_command = shell_command self._process = None
def test_load_preconfigured_job(self): _command = 'pig -brief -optimizer_off SplitFilter -optimizer_off ColumnMapKeyPrune -e "ls /"' metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), 'resources/pig/pig.ini')) pig = Pig.load_preconfigured_job(job_name='pig test', config=Configuration.load( metastore=metastore, readonly=False, accepts_nulls=True), command_executor=mock_executor(expected_command=_command)) pig.without_split_filter().run()
def test_should_be_able_to_add_multiple_values_for_a_single_key(self): _values = [1, 2, 3, 4] _increment = ['one', 'two', 'three'] _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore, readonly=False, accepts_nulls=True) _config.update_list("section_b", 'list', *_values) self.assertListEqual(_values, _config.get_list("section_b", 'list')) _config.update_list("section_b", 'list', *_increment) self.assertListEqual(_values + _increment, _config.get_list("section_b", 'list'))
def test_should_not_be_able_to_add_new_items(self): _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore, readonly=True, accepts_nulls=False) self.assertRaises( excClass=ConfigurationError, callableObj=_config.set, section='test', key='key', value='value')
def test_load_preconfigured_job(self): _command = 'pig -brief -optimizer_off SplitFilter -optimizer_off ColumnMapKeyPrune -e "ls /"' metastore = IniFileMetaStore(file=os.path.join( os.path.dirname(__file__), 'resources/pig/pig.ini')) pig = Pig.load_preconfigured_job( job_name='pig test', config=Configuration.load(metastore=metastore, readonly=False, accepts_nulls=True), command_executor=mock_executor(expected_command=_command)) pig.without_split_filter().run()
def test_should_be_able_to_split_string_to_multiple_values(self): _values = ['one', 'two', 'three'] _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore, readonly=False, accepts_nulls=True) _config.set("section_b", 'list', ",".join(_values)) self.assertListEqual( _values, _config.get_list("section_b", 'list', delimiter=','))
def test_load_config(self): _command = ( 'hive -e "test" --define A=B --define C=D --hiveconf hello=world ' "--hivevar A=B --hivevar C=D --database hive" ) metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), "resources/hive/hive.ini")) hive = Hive.load_preconfigured_job( name="hive test", config=Configuration.load(metastore=metastore, readonly=False, accepts_nulls=True), executor=mock_executor(expected_command=_command), ).with_hive_conf("hello", "world") hive.run()
def test_should_not_be_able_to_add_new_items(self): _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore, readonly=True, accepts_nulls=False) self.assertRaises(excClass=ConfigurationError, callableObj=_config.set, section='test', key='key', value='value')
def test_load_config(self): _command = "hive -e \"test\" --define A=B --define C=D --hiveconf hello=world " \ "--hivevar A=B --hivevar C=D --database hive" metastore = IniFileMetaStore(file=os.path.join( os.path.dirname(__file__), 'resources/hive/hive.ini')) hive = Hive.load_preconfigured_job(name='hive test', config=Configuration.load( metastore=metastore, readonly=False, accepts_nulls=True), executor=mock_executor(expected_command=_command)) \ .with_hive_conf("hello", "world") hive.run()
def test_should_not_be_able_to_add_nones(self): _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore, readonly=False, accepts_nulls=False) self.assertRaises(ConfigurationError, _config.set, section='section_a', key='new_key', value=None)
def test_fsimage_from_config_withot_acls(self): config = Configuration.create(readonly=False, accepts_nulls=True) config.set(section=CONFIG_HDFS_DIRS_KEY, key='/raw/sales', value=None) snapshot = FsSnapshot.load_from_config(config=config, fs_section=CONFIG_HDFS_DIRS_KEY) files = snapshot.files self.assertTrue('/raw/sales' in files, 'File was not added to fs snapshot') self.assertTrue( len(files['/raw/sales']) == 0, 'ACL should be ignored for current configuration')
def test_import_direct(self): metastore = IniFileMetaStore(file=os.path.join( os.path.dirname(os.path.realpath(__file__)), 'resources', 'sqoop', 'sqoop.ini')) config = Configuration.load(metastore=metastore, readonly=False) self.assertEquals( SqoopImport.load_preconfigured_job(name="test", config=config).from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password", host="localhost", database="sqoop_tests").with_direct_mode(direct_split_size="1", name_2="12", names_3="1").table( table="table_name").to_hdfs().build(), '-DA=12 -DB=13 --connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/password --table table_name --direct -- --name-2=12 --names-3=1')
def test_import_with_hadoop_properties_from_ini_file(self): metastore = IniFileMetaStore(file=os.path.join( os.path.dirname(os.path.realpath(__file__)), 'resources', 'sqoop', 'sqoop.ini')) config = Configuration.load(metastore=metastore, readonly=False) self.assertEquals( SqoopImport.load_preconfigured_job(name="sqoo", config=config).from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password", host="localhost", database="sqoop_tests"). to_hdfs().table(table="table_name").with_hadoop_properties(some_properties="10").build(), "-DA=12 -DB=13 -Dsome.properties=10 --connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/password --table table_name")
def __init__(self, name=None, config=None, executor=execute_shell_command): """ Creates wrapper for Hive command line utility :param executor: custom executor :type executor: """ super(Hive, self).__init__() self.name = name if name else "HIVE_TASK_{0}".format(uuid.uuid4()) self.__executor = executor self._config = config if config else Configuration.create( readonly=False, accepts_nulls=True)
def __init__(self, config=None, name=None, executor=execute_shell_command): """ :param config: configurations :param name: name of the config section containing specific application configurations :param executor: he interface used by the client to launch Spark Application. """ super(SparkApplication, self).__init__() self.executor = executor self._configs = config if config else Configuration.create() self.name = name if name \ else "SPARK_JOB_{0}".format(uuid.uuid4())
def __init__(self, agent=None, conf_file=None, config=None, executor=execute_shell_command): """ Creates wrapper for Flume command line utility :param executor: custom executor :type executor: """ self.name = agent if agent else "FLUME_AGENT_{0}".format(uuid.uuid4()) self._executor = executor self._config = config if config else Configuration.create(readonly=False, accepts_nulls=True) self.__set_attr__(TaskOptions.CONFIG_KEY_AGENT_NAME, agent) self.__set_attr__(TaskOptions.CONFIG_KEY_CONF_FILE, conf_file)
def test_fsimage_from_config_withot_acls(self): config = Configuration.create(readonly=False, accepts_nulls=True) config.set(section=CONFIG_HDFS_DIRS_KEY, key='/raw/sales', value=None) snapshot = FsSnapshot.load_from_config(config=config, fs_section=CONFIG_HDFS_DIRS_KEY) files = snapshot.files self.assertTrue('/raw/sales' in files, 'File was not added to fs snapshot') self.assertTrue(len(files['/raw/sales']) == 0, 'ACL should be ignored for current configuration')
def __init__(self, name=None, config=None, executor=execute_shell_command): """ Creates wrapper for Hive command line utility :param executor: custom executor :type executor: """ super(Hive, self).__init__() self.name = name if name else "HIVE_TASK_{0}".format(uuid.uuid4()) self.__executor = executor self._config = config if config else Configuration.create( readonly=False, accepts_nulls=True )
def __init__(self, name, config, executable, executor, main_class=None, shell_command="hadoop jar"): self.executor = executor self.executable = executable self._config = config if config else Configuration.create( readonly=False, accepts_nulls=True) self.name = name if name else "MR_TASK_{0}".format(uuid.uuid4()) self.main_class = main_class self._shell_command = shell_command self._process = None
def test_apply_local_fs_snapshot(self): _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'bootsrap', 'bootstrap.ini') test_dir = LocalFS('/tmp/data_tmp') if test_dir.exists(): test_dir.delete_directory() try: metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore) apply_localfs_snapshot(_config) self.assertTrue(test_dir.exists(), "Folder was not created") finally: test_dir.delete_directory()
def __init__(self, agent=None, conf_file=None, config=None, executor=execute_shell_command): """ Creates wrapper for Flume command line utility :param executor: custom executor :type executor: """ self.name = agent if agent else "FLUME_AGENT_{0}".format(uuid.uuid4()) self._executor = executor self._config = config if config else Configuration.create( readonly=False, accepts_nulls=True ) self.__set_attr__(TaskOptions.CONFIG_KEY_AGENT_NAME, agent) self.__set_attr__(TaskOptions.CONFIG_KEY_CONF_FILE, conf_file)
def test_import_table(self): try: metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), 'resources/sqoop/custom.ini')) cmd = SqoopImport.load_preconfigured_job( config=Configuration.load(metastore=metastore, readonly=False, accepts_nulls=True)).from_rdbms().table( table="table_name", where="id>2", columns="id,last_name").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR)).run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def load_commands_from_string(commands, command_executor=execute_shell_command): """ Creates an instance of Pig client. Configures Pig client to parse and run commands from string. :param commands: Commands to execute (within quotes) :param command_executor: The interface used by the client to run command. :type commands: str :rtype: Pig """ _config = Configuration.create(readonly=False, accepts_nulls=True) _job_name = "PIG_TASK_{0}".format(uuid.uuid4()) _pig = Pig(config=_config, job_name=_job_name, command_executor=command_executor) _pig.execute_commands(commands=commands) return _pig
def prepare_streaming_job(config=None, name=None, jar="hadoop-streaming.jar", executor=execute_shell_command): """ Creates instance of StreamingJob :param name: name of job :param jar: executing jar :param executor: interface used by the client to run command. :return: StreamingJob template :rtype : StreamingJob """ MapReduce.LOG.info("MapReduce streaming job") config = config if config else Configuration.create(readonly=False, accepts_nulls=True) MapReduce.__validate_configs(config, name, "StreamingJob", TaskOptions.KEYS_FOR_MAPREDUCE) return StreamingJob( config=config, name=name if name else "MR_STREAMING_JOB_{0}".format(uuid.uuid4()), jar=jar, executor=executor, )
def load_commands_from_file(path, command_executor=execute_shell_command): """ Creates an instance of Pig client. Configures Pig client to run commands from specified script file. :param path: path to the script to execute :param command_executor: The interface used by the client to run command. :type path: str :rtype: Pig """ Pig.LOG.info("Loading Pig script from file : {0}".format(path)) _config = Configuration.create(readonly=False, accepts_nulls=True) _job_name = "PIG_TASK_{0}".format(uuid.uuid4()) _pig = Pig(config=_config, job_name=_job_name, command_executor=command_executor) _pig.execute_script(path=path) return _pig
def test_spark_submit_from_ini(self): _command = "spark-submit " \ "--master local[10] " \ "--class test.SparkApp " \ "--name test_app " \ "--jars lib001.jar,lib002.jar,lib003.jar " \ "--files dim001.cache.txt,dim002.cache.txt " \ "--properties-file spark.app.configs " \ "--conf \"spark.app.name=test_app spark.executor.memory=512m " \ "spark.serializer=org.apache.spark.serializer.KryoSerializer\" " \ "application.jar " \ "10 test" metastore=IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), "resources", "spark", "spark.app.ini")) spark = SparkApplication.load_preconfigured_job( config=Configuration.load(metastore, readonly=False), name="test_spark_app", executor=mock_executor(expected_command=_command)).application_jar("application.jar") spark.run(10, "test")
def test_export_table_with_call(self): try: metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), 'resources/sqoop/custom.ini')) cmd = SqoopExport.load_preconfigured_job( config=Configuration.load(metastore=metastore, readonly=False, accepts_nulls=True)).to_rdbms().from_hdfs( export_dir="{0}/data_custom_directory".format(BASE_DIR)).call(stored_procedure="p").run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER), "'SELECT * FROM table_name_second'") self.assertNotEqual(result.stdout.split(' ')[0], 'Empty', result.stdout) finally: shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER), "'DELETE FROM table_name_second'")
def prepare_mapreduce_job(jar, main_class=None, config=None, name=None, executor=execute_shell_command): """ Creates instance of MapReduceJob :param name: name of job :param jar: executing jar :param executor: interface used by the client to run command. :return: MapReduceJob template :rtype : MapReduceJob """ MapReduce.LOG.info("MapReduce job") config = config if config else Configuration.create(readonly=False, accepts_nulls=True) MapReduce.__validate_configs(config, name, "MapReduceJob", TaskOptions.KEYS_FOR_STREAMING_JOB) return MapReduceJob( name=name if name else "MR_JOB_{0}".format(uuid.uuid4()), config=config, jar=jar, main_class=main_class, executor=executor, )
def test_import_table(self): try: metastore = IniFileMetaStore(file=os.path.join( os.path.dirname(__file__), 'resources/sqoop/custom.ini')) cmd = SqoopImport.load_preconfigured_job(config=Configuration.load( metastore=metastore, readonly=False, accepts_nulls=True )).from_rdbms().table( table="table_name", where="id>2", columns="id,last_name").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR)).run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual( result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command( 'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def prepare_streaming_job(config=None, name=None, jar="hadoop-streaming.jar", executor=execute_shell_command): """ Creates instance of StreamingJob :param name: name of job :param jar: executing jar :param executor: interface used by the client to run command. :return: StreamingJob template :rtype : StreamingJob """ MapReduce.LOG.info("MapReduce streaming job") config = config if config else Configuration.create(readonly=False, accepts_nulls=True) MapReduce.__validate_configs(config, name, "StreamingJob", TaskOptions.KEYS_FOR_MAPREDUCE) return StreamingJob( config=config, name=name if name else "MR_STREAMING_JOB_{0}".format(uuid.uuid4()), jar=jar, executor=executor)
def test_fsimage_from_config(self): config = Configuration.create(readonly=False, accepts_nulls=True) config.set(section=CONFIG_ACLS_KEY, key='confidential', value='user:su:rwx') config.set(section=CONFIG_ACLS_KEY, key='sales', value='group:sales:r-x') config.set(section=CONFIG_HDFS_DIRS_KEY, key='/raw/sales', value='confidential,sales') snapshot = FsSnapshot.load_from_config(config=config, fs_section=CONFIG_HDFS_DIRS_KEY, acl_section=CONFIG_ACLS_KEY) files = snapshot.files self.assertTrue('/raw/sales' in files, 'File was not added to fs snapshot') self.assertTrue('user:su:rwx' in files['/raw/sales'], '\'confidential\' access lvl was not mapped to file') self.assertTrue('group:sales:r-x' in files['/raw/sales'], '\'sales\' access lvl was not mapped to file') self.assertFalse('default:fake:r-x' in files['/raw/sales'], 'Error in access lvl mapping')
def test_try_execute_empty_command(self): self.assertRaises(PigCommandError, Pig( config=Configuration.create(), job_name=None, command_executor=None).run)