def __init__(self, methodName='runTest'): super(TestMapReduceCommandGenerationFromIni, self).__init__(methodName) _config_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'resources', 'mapreduce', 'mapreduce_streaming_job.ini') metastore = IniFileMetaStore(file=_config_file) self._config = Configuration.load(metastore=metastore)
def test_should_raise_exception_if_required_option_was_not_found(self): _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore) self.assertRaises(ConfigurationError, _config.require, 'section_a', 'item_a')
def test_apply_hdfs_snapshot(self): _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'bootsrap', 'bootstrap.ini') _raw_sales_dir = HDFS('/tmp/raw/sales') _raw_users_dir = HDFS('/tmp/raw/users') _raw_tmp_dir = HDFS('/tmp/raw/tmp') try: # run bootstrap script metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore) apply_hdfs_snapshot(_config) # asserts # assert directories were created self.assertTrue(_raw_sales_dir.exists(), "Directory '/tmp/raw/sales' was not created") self.assertTrue(_raw_users_dir.exists(), "Directory '/tmp/raw/users' was not created") self.assertTrue(_raw_tmp_dir.exists(), "Directory '/tmp/raw/tmp' was not created") # assert acls were applied sales_dir_acls = _raw_sales_dir.get_acls() users_dir_acls = _raw_users_dir.get_acls() self.assertIsNotNone(sales_dir_acls, '/tmp/raw/sales : ACL were not applied') self.assertTrue('group:sys-pii:r-x' in sales_dir_acls, '/tmp/raw/sales : pii acl was not applied') self.assertTrue('group:sales:r--' in sales_dir_acls, '/tmp/raw/sales : salse acl was not applied') self.assertIsNotNone(users_dir_acls, '/tmp/raw/users : ACL were not applied') self.assertTrue('group:sys-pii:r-x' in sales_dir_acls, '/tmp/raw/users : pii acl was not applied') finally: _test_basedir = HDFS('/tmp/raw') _test_basedir.delete_directory() self.assertFalse(_test_basedir.exists(), "ERROR: clean up failed")
def test_streaming_map_only_job_generation(self): _config_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'resources', 'mapreduce', 'mapreduce_streaming_job.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore=metastore) _job_name = 'streaming_test_job_map_only' _expected_command = 'hadoop jar ' \ '{0}/resources/mapreduce/hadoop-streaming.jar ' \ '-D mapreduce.job.name={1} ' \ '-D value.delimiter.char=, ' \ '-D partition.to.process=20142010 ' \ '-mapper smapper.py ' \ '-reducer NONE ' \ '-numReduceTasks 0 ' \ '-input /raw/20102014 ' \ '-output /core/20102014'\ .format(os.path.dirname(os.path.realpath(__file__)), _job_name) MapReduce.prepare_streaming_job( jar='{0}/resources/mapreduce/hadoop-streaming.jar'.format( os.path.dirname(os.path.realpath(__file__))), config=_config, name=_job_name, executor=self.assert_generated_command(_expected_command)).run()
def test_export_table_with_staging(self): try: metastore = IniFileMetaStore(file=os.path.join( os.path.dirname(__file__), 'resources/sqoop/custom.ini')) cmd = SqoopExport.load_preconfigured_job(config=Configuration.load( metastore=metastore, readonly=False, accepts_nulls=True )).to_rdbms().table(table="table_name_second").from_hdfs( export_dir="{0}/data_custom_directory".format( BASE_DIR)).with_staging_table(staging_table="stag").run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'. format(USER, PASSWORD, MYSQL_SERVER), "'SELECT * FROM table_name_second'") self.assertNotEqual( result.stdout.split(' ')[0], 'Empty', result.stdout) finally: shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'. format(USER, PASSWORD, MYSQL_SERVER), "'DELETE FROM table_name_second'") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'. format(USER, PASSWORD, MYSQL_SERVER), "'DELETE FROM stag'")
def test_load_config_from_file(self): _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini') _section = 'section_a' metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore) self.assertTrue(_config.has(section=_section, key='key'), 'Cannot find "key" option in test config') self.assertEqual('value', _config.require(_section, 'key'))
def create( metastore=IniFileMetaStore(), readonly=False, accepts_nulls=True): """ Creates a new empty configuration based on metastore that should implements MetaStore interface. :param readonly: Boolean flag. Indicates if configuration is mutable. :param accepts_nulls: Boolean flag. Indicates if configuration accepts nones as a values :return: :rtype : Configuration """ return Configuration(metastore, readonly, accepts_nulls)
def test_load_preconfigured_job(self): _command = 'pig -brief -optimizer_off SplitFilter -optimizer_off ColumnMapKeyPrune -e "ls /"' metastore = IniFileMetaStore(file=os.path.join( os.path.dirname(__file__), 'resources/pig/pig.ini')) pig = Pig.load_preconfigured_job( job_name='pig test', config=Configuration.load(metastore=metastore, readonly=False, accepts_nulls=True), command_executor=mock_executor(expected_command=_command)) pig.without_split_filter().run()
def test_should_be_able_to_split_string_to_multiple_values(self): _values = ['one', 'two', 'three'] _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore, readonly=False, accepts_nulls=True) _config.set("section_b", 'list', ",".join(_values)) self.assertListEqual( _values, _config.get_list("section_b", 'list', delimiter=','))
def test_load_config(self): _command = "hive -e \"test\" --define A=B --define C=D --hiveconf hello=world " \ "--hivevar A=B --hivevar C=D --database hive" metastore = IniFileMetaStore(file=os.path.join( os.path.dirname(__file__), 'resources/hive/hive.ini')) hive = Hive.load_preconfigured_job(name='hive test', config=Configuration.load( metastore=metastore, readonly=False, accepts_nulls=True), executor=mock_executor(expected_command=_command)) \ .with_hive_conf("hello", "world") hive.run()
def test_should_be_able_to_add_nones(self): _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore, readonly=False, accepts_nulls=True) _config.set(section='section_a', key='new_key', value=None) self.assertEqual('value', _config.get('section_a', 'key'), "Can't find old item") self.assertTrue(_config.has('section_a', 'new_key'), "New Item was not added")
def test_should_not_be_able_to_add_new_items(self): _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore, readonly=True, accepts_nulls=False) self.assertRaises(excClass=ConfigurationError, callableObj=_config.set, section='test', key='key', value='value')
def test_should_not_be_able_to_add_nones(self): _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore, readonly=False, accepts_nulls=False) self.assertRaises(ConfigurationError, _config.set, section='section_a', key='new_key', value=None)
def test_import_with_hadoop_properties_from_ini_file(self): metastore = IniFileMetaStore(file=os.path.join( os.path.dirname(os.path.realpath(__file__)), 'resources', 'sqoop', 'sqoop.ini')) config = Configuration.load(metastore=metastore, readonly=False) self.assertEquals( SqoopImport.load_preconfigured_job(name="sqoo", config=config).from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password", host="localhost", database="sqoop_tests"). to_hdfs().table(table="table_name").with_hadoop_properties(some_properties="10").build(), "-DA=12 -DB=13 -Dsome.properties=10 --connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/password --table table_name")
def test_import_direct(self): metastore = IniFileMetaStore(file=os.path.join( os.path.dirname(os.path.realpath(__file__)), 'resources', 'sqoop', 'sqoop.ini')) config = Configuration.load(metastore=metastore, readonly=False) self.assertEquals( SqoopImport.load_preconfigured_job(name="test", config=config).from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password", host="localhost", database="sqoop_tests").with_direct_mode(direct_split_size="1", name_2="12", names_3="1").table( table="table_name").to_hdfs().build(), '-DA=12 -DB=13 --connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/password --table table_name --direct -- --name-2=12 --names-3=1')
def test_should_be_able_to_add_multiple_values_for_a_single_key(self): _values = [1, 2, 3, 4] _increment = ['one', 'two', 'three'] _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini') metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore, readonly=False, accepts_nulls=True) _config.update_list("section_b", 'list', *_values) self.assertListEqual(_values, _config.get_list("section_b", 'list')) _config.update_list("section_b", 'list', *_increment) self.assertListEqual(_values + _increment, _config.get_list("section_b", 'list'))
def test_apply_local_fs_snapshot(self): _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'bootsrap', 'bootstrap.ini') test_dir = LocalFS('/tmp/data_tmp') if test_dir.exists(): test_dir.delete_directory() try: metastore = IniFileMetaStore(file=_config_file) _config = Configuration.load(metastore) apply_localfs_snapshot(_config) self.assertTrue(test_dir.exists(), "Folder was not created") finally: test_dir.delete_directory()
def test_spark_submit_from_ini(self): _command = "spark-submit " \ "--master local[10] " \ "--class test.SparkApp " \ "--name test_app " \ "--jars lib001.jar,lib002.jar,lib003.jar " \ "--files dim001.cache.txt,dim002.cache.txt " \ "--properties-file spark.app.configs " \ "--conf \"spark.app.name=test_app spark.executor.memory=512m " \ "spark.serializer=org.apache.spark.serializer.KryoSerializer\" " \ "application.jar " \ "10 test" metastore=IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), "resources", "spark", "spark.app.ini")) spark = SparkApplication.load_preconfigured_job( config=Configuration.load(metastore, readonly=False), name="test_spark_app", executor=mock_executor(expected_command=_command)).application_jar("application.jar") spark.run(10, "test")
def test_import_table(self): try: metastore = IniFileMetaStore(file=os.path.join( os.path.dirname(__file__), 'resources/sqoop/custom.ini')) cmd = SqoopImport.load_preconfigured_job(config=Configuration.load( metastore=metastore, readonly=False, accepts_nulls=True )).from_rdbms().table( table="table_name", where="id>2", columns="id,last_name").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR)).run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual( result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command( 'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))