def test_import_with_incremental(self): try: cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table(table="table_name").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR)).run() self.assertEquals(cmd.status, 0, cmd.stderr) cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table(table="table_name").to_hdfs( target_dir="{0}/custom_directory".format( BASE_DIR)).with_incremental(incremental="append", last_value="5", check_column="id").run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual( result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command( 'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_download_dir_with_predicate(self): try: shell.execute_shell_command( 'mkdir', os.path.join(os.path.dirname(__file__), 'resources/download')) ftp = sftp_client(HOST, "{0}".format(BASE_DIR), USER, PASSWORD, HKEY_PATH) self.assertTrue(ftp.exists()) ftp.download_dir(local_path=os.path.join(os.path.dirname(__file__), 'resources/download'), predicate=self.predicate_get_description, recursive=True) self.assertTrue( os.path.exists( os.path.join(os.path.dirname(__file__), 'resources/download/{0}'.format(BASE_DIR)))) self.assertTrue( os.path.exists( os.path.join( os.path.dirname(__file__), 'resources/download/{0}/file'.format(BASE_DIR)))) self.assertFalse( os.path.exists( os.path.join( os.path.dirname(__file__), 'resources/download/{0}/folder'.format(BASE_DIR)))) finally: shutil.rmtree( os.path.join(os.path.dirname(__file__), 'resources/download'))
def test_export_table_with_staging(self): try: metastore = IniFileMetaStore(file=os.path.join( os.path.dirname(__file__), 'resources/sqoop/custom.ini')) cmd = SqoopExport.load_preconfigured_job(config=Configuration.load( metastore=metastore, readonly=False, accepts_nulls=True )).to_rdbms().table(table="table_name_second").from_hdfs( export_dir="{0}/data_custom_directory".format( BASE_DIR)).with_staging_table(staging_table="stag").run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'. format(USER, PASSWORD, MYSQL_SERVER), "'SELECT * FROM table_name_second'") self.assertNotEqual( result.stdout.split(' ')[0], 'Empty', result.stdout) finally: shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'. format(USER, PASSWORD, MYSQL_SERVER), "'DELETE FROM table_name_second'") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'. format(USER, PASSWORD, MYSQL_SERVER), "'DELETE FROM stag'")
def test_download_dir(self): try: shell.execute_shell_command('mkdir', os.path.join(os.path.dirname(__file__), 'resources/download')) ftp = sftp_client(HOST, "{0}".format(BASE_DIR), USER, PASSWORD, HKEY_PATH) self.assertTrue(ftp.exists()) ftp.download_dir(local_path=os.path.join(os.path.dirname(__file__), 'resources/download')) self.assertTrue(os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/download/{0}'.format(BASE_DIR)))) self.assertTrue( os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/download/{0}/file'.format(BASE_DIR)))) self.assertTrue( os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/download/{0}/folder'.format(BASE_DIR)))) self.assertTrue( os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/download/{0}/folder/file'.format(BASE_DIR)))) shutil.rmtree(os.path.join(os.path.dirname(__file__), 'resources/download/{0}'.format(BASE_DIR))) ftp.download_dir(local_path=os.path.join(os.path.dirname(__file__), 'resources/download'), recursive=False) self.assertTrue(os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/download/{0}'.format(BASE_DIR)))) self.assertTrue( os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/download/{0}/file'.format(BASE_DIR)))) self.assertFalse( os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/download/{0}/folder'.format(BASE_DIR)))) self.assertFalse( os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/download/{0}/folder/file'.format(BASE_DIR)))) shutil.rmtree(os.path.join(os.path.dirname(__file__), 'resources/download/{0}'.format(BASE_DIR))) finally: shutil.rmtree(os.path.join(os.path.dirname(__file__), 'resources/download'))
def has_command(command): """ Wrapper for Unix command used to identify the location of executables. Can be used to skip integration tests :param command: programname :return: """ print "CONFIGURE TEST CASES.....", str(execute_shell_command("which", command).is_ok()) return execute_shell_command("which", command).is_ok()
def has_command(command): """ Wrapper for Unix command used to identify the location of executables. Can be used to skip integration tests :param command: programname :return: """ print "CONFIGURE TEST CASES.....", str( execute_shell_command("which", command).is_ok()) return execute_shell_command("which", command).is_ok()
def test_run_producer(self): thread = KafkaThreadProducer() thread.daemon = True thread.start() sleep(TIME) cmd = shell.execute_shell_command('ps aux | grep -i kafka') self.assertTrue("kafka.producer.ConsoleProducer --broker-list {0}:{1} --topic test123".format(CLUSTER_NAME, PORT) in cmd.stdout, cmd.stdout) for stroke in cmd.stdout.split("\n"): if "kafka.producer.ConsoleProducer --broker-list {0}:{1} --topic test123".format(CLUSTER_NAME, PORT) in stroke: shell.execute_shell_command('kill -9 {0}'.format(stroke.split()[1]))
def test_download_dir_invalid_path_sftp(self): try: shell.execute_shell_command('mkdir', os.path.join(os.path.dirname(__file__), 'resources/download')) ftp = sftp_client(HOST, "{0}/file".format(BASE_DIR), USER, PASSWORD, HKEY_PATH) self.assertTrue(ftp.exists()) ftp.download_dir(local_path=os.path.join(os.path.dirname(__file__), 'resources/download')) finally: shutil.rmtree(os.path.join(os.path.dirname(__file__), 'resources/download'))
def test_run_consumer(self): thread = KafkaThreadConsumer() thread.daemon = True thread.start() sleep(TIME) cmd = shell.execute_shell_command('ps aux | grep -i kafka') self.assertTrue("kafka.consumer.ConsoleConsumer --zookeeper sandbox.hortonworks.com:2181 --from-beginning --topic test123" in cmd.stdout, cmd.stdout) for stroke in cmd.stdout.split("\n"): if "kafka.consumer.ConsoleConsumer --zookeeper sandbox.hortonworks.com:2181 --from-beginning --topic test123" in stroke: shell.execute_shell_command('kill -9 {0}'.format(stroke.split()[1]))
def test_get_replicas(self): self.assertEqual("0", HDFS("/").replicas(), "Root dir replicas should be 0") self.assertNotEqual("0", HDFS("/tmp").replicas(), "dir replicas should be 0") name = uuid.uuid4() hdfs_file = HDFS("/tmp/{0}".format(name)) hdfs_file.create_file() shell.execute_shell_command("hadoop dfs", "-setrep -w 1 /tmp/{0}".format(name)) if hdfs_file.exists(): self.assertEqual("1", hdfs_file.replicas(), "Number replicas of file must be 1") hdfs_file.delete() self.assertFalse(hdfs_file.exists())
def test_run_consumer(self): thread = KafkaThreadConsumer() thread.daemon = True thread.start() sleep(TIME) cmd = shell.execute_shell_command('ps aux | grep -i kafka') self.assertTrue( "kafka.consumer.ConsoleConsumer --zookeeper sandbox.hortonworks.com:2181 --from-beginning --topic test123" in cmd.stdout, cmd.stdout) for stroke in cmd.stdout.split("\n"): if "kafka.consumer.ConsoleConsumer --zookeeper sandbox.hortonworks.com:2181 --from-beginning --topic test123" in stroke: shell.execute_shell_command('kill -9 {0}'.format( stroke.split()[1]))
def test_import_query(self): try: cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").query( query="'SELECT * FROM table_name WHERE $CONDITIONS AND id>$id'", split_by="id", id="2").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR)).run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_import_with_connection_manager(self): try: cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table( table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).with_attr( connection_manager="org.apache.sqoop.manager.MySQLManager").run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_import_with_enclosing(self): try: cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table( table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).with_input_parsing( escaped_by="\\").with_output_parsing(escaped_by="\\", mysql_delimiters=True).run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_broker(self): shell.execute_shell_command('fuser -k -n tcp {0}'.format(PORT)) local = LocalFS("/tmp/kafka-test") if not local.exists(): local.create_directory() thread = KafkaThreadBroker() thread.daemon = True thread.start() sleep(TIME) cmd = shell.execute_shell_command('netstat -lntu') self.assertTrue("9010" in cmd.stdout, cmd.stdout) local.delete_directory() shell.execute_shell_command('fuser -k -n tcp {0}'.format(PORT))
def test_get_replicas(self): self.assertEqual('0', HDFS("/").replicas(), "Root dir replicas should be 0") self.assertNotEqual('0', HDFS("/tmp").replicas(), "dir replicas should be 0") name = uuid.uuid4() hdfs_file = HDFS("/tmp/{0}".format(name)) hdfs_file.create_file() shell.execute_shell_command('hadoop dfs', '-setrep -w 1 /tmp/{0}'.format(name)) if hdfs_file.exists(): self.assertEqual('1', hdfs_file.replicas(), "Number replicas of file must be 1") hdfs_file.delete() self.assertFalse(hdfs_file.exists())
def test_broker(self): shell.execute_shell_command('fuser -k -n tcp {0}'.format(PORT)) local = LocalFS("/tmp/kafka-test") if not local.exists(): local.create_directory() thread = KafkaThreadBroker() thread.daemon = True thread.start() sleep(TIME) cmd = shell.execute_shell_command('netstat -lntu') self.assertTrue("9010" in cmd.stdout, cmd.stdout) local.delete_directory() shell.execute_shell_command('fuser -k -n tcp {0}'.format(PORT))
def test_import_to_avrodatafile(self): try: cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table( table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).use_file_format( file_format="--as-avrodatafile").run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*.avro'.format(BASE_DIR)) self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_run_producer(self): thread = KafkaThreadProducer() thread.daemon = True thread.start() sleep(TIME) cmd = shell.execute_shell_command('ps aux | grep -i kafka') self.assertTrue( "kafka.producer.ConsoleProducer --broker-list {0}:{1} --topic test123" .format(CLUSTER_NAME, PORT) in cmd.stdout, cmd.stdout) for stroke in cmd.stdout.split("\n"): if "kafka.producer.ConsoleProducer --broker-list {0}:{1} --topic test123".format( CLUSTER_NAME, PORT) in stroke: shell.execute_shell_command('kill -9 {0}'.format( stroke.split()[1]))
def test_download_dir_invalid_path_sftp(self): try: shell.execute_shell_command( 'mkdir', os.path.join(os.path.dirname(__file__), 'resources/download')) ftp = sftp_client(HOST, "{0}/file".format(BASE_DIR), USER, PASSWORD, HKEY_PATH) self.assertTrue(ftp.exists()) ftp.download_dir(local_path=os.path.join(os.path.dirname(__file__), 'resources/download')) finally: shutil.rmtree( os.path.join(os.path.dirname(__file__), 'resources/download'))
def test_agent(self): thread = AgentThread() thread.daemon = True thread.start() time.sleep(TIME_TO_OPEN_PORT) cmd = shell.execute_shell_command('netstat -lntu') self.assertTrue("41414" in cmd.stdout, cmd.stdout)
def test_import_table(self): try: metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), 'resources/sqoop/custom.ini')) cmd = SqoopImport.load_preconfigured_job( config=Configuration.load(metastore=metastore, readonly=False, accepts_nulls=True)).from_rdbms().table( table="table_name", where="id>2", columns="id,last_name").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR)).run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_export_table_with_call(self): try: metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), 'resources/sqoop/custom.ini')) cmd = SqoopExport.load_preconfigured_job( config=Configuration.load(metastore=metastore, readonly=False, accepts_nulls=True)).to_rdbms().from_hdfs( export_dir="{0}/data_custom_directory".format(BASE_DIR)).call(stored_procedure="p").run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER), "'SELECT * FROM table_name_second'") self.assertNotEqual(result.stdout.split(' ')[0], 'Empty', result.stdout) finally: shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER), "'DELETE FROM table_name_second'")
def test_import_with_incremental(self): try: cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table( table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).run() self.assertEquals(cmd.status, 0, cmd.stderr) cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table( table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).with_incremental( incremental="append", last_value="5", check_column="id").run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_command(self): _host = "sandbox.hortonworks.com" cmd = DistCp().take(path="hdfs://{host}:8020/tmp/foo".format(host=_host)).copy_to( path="hdfs://{host}:8020/tmp/bar".format(host=_host) ).use( mappers=12 ).update_destination( synchronize=True ).preserve_replication_number()\ .preserve_block_size()\ .preserve_checksum_type()\ .preserve_group()\ .preserve_checksum_type()\ .preserve_user()\ .run() self.assertEquals(cmd.status, 0, cmd.stderr) self.assertEquals(shell.execute_shell_command('hadoop', 'fs', '-test', '-e', '/tmp/bar/test.txt').status, 0) self.assertEquals(shell.execute_shell_command('hadoop', 'fs', '-test', '-e', '/tmp/bar/test2.txt').status, 0)
def test_import_to_avrodatafile(self): try: cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table(table="table_name").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR) ).use_file_format(file_format="--as-avrodatafile").run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'hadoop fs', '-du -s {0}/custom_directory/part-m-*.avro'.format(BASE_DIR)) self.assertNotEqual( result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command( 'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_import_table(self): try: metastore = IniFileMetaStore(file=os.path.join( os.path.dirname(__file__), 'resources/sqoop/custom.ini')) cmd = SqoopImport.load_preconfigured_job(config=Configuration.load( metastore=metastore, readonly=False, accepts_nulls=True )).from_rdbms().table( table="table_name", where="id>2", columns="id,last_name").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR)).run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual( result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command( 'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_import_with_enclosing(self): try: cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table(table="table_name").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR) ).with_input_parsing(escaped_by="\\").with_output_parsing( escaped_by="\\", mysql_delimiters=True).run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual( result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command( 'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_import_to_hive(self): _path = HDFS(os.path.join('/user', getpass.getuser(), 'table_name')) try: if _path.exists(): _path.delete(recursive=_path.is_directory()) # shell.execute_shell_command('hadoop fs', '-rm -r /user/', getpass.getuser(), '/table_name') cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table( table="table_name").to_hive().run() # self.assertEquals(cmd.status, 0, cmd.stderr) # result = shell.execute_shell_command('hadoop fs', '-du -s /user/hive/warehouse/table_name/part-m-*') # self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command( 'hive', "-e 'DROP TABLE IF EXISTS table_name'")
def test_import_to_hive(self): _path = HDFS(os.path.join('/user', getpass.getuser(), 'table_name')) try: if _path.exists(): _path.delete(recursive=_path.is_directory()) # shell.execute_shell_command('hadoop fs', '-rm -r /user/', getpass.getuser(), '/table_name') cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests" ).table( table="table_name" ).to_hive().run() # self.assertEquals(cmd.status, 0, cmd.stderr) # result = shell.execute_shell_command('hadoop fs', '-du -s /user/hive/warehouse/table_name/part-m-*') # self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command('hive', "-e 'DROP TABLE IF EXISTS table_name'")
def test_import_with_connection_manager(self): try: cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table(table="table_name").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR) ).with_attr( connection_manager="org.apache.sqoop.manager.MySQLManager" ).run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual( result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command( 'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def setUpClass(cls): shell.execute_shell_command('hive -e \"drop database if EXISTS testdb CASCADE\"') shell.execute_shell_command('hive -e \"create database testdb\"') c = 'hive -e \"create table testdb.some_table(strings STRING) ' \ 'ROW FORMAT DELIMITED ' \ 'FIELDS TERMINATED BY \\",\\" ' \ 'STORED AS TEXTFILE\"' shell.execute_shell_command(c)
def test_import_query(self): try: cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests" ).query( query="'SELECT * FROM table_name WHERE $CONDITIONS AND id>$id'", split_by="id", id="2").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR)).run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual( result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command( 'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def tearDownClass(cls): shell.execute_shell_command( 'hadoop fs', '-rm -r {0}/rdbms.password'.format(BASE_DIR)) shell.execute_shell_command( 'hadoop fs', '-rm -r {0}/data_custom_directory'.format(BASE_DIR)) shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} -e'.format( USER, PASSWORD, MYSQL_SERVER), "'DROP DATABASE IF EXISTS sqoop_tests'")
def test_command(self): _host = "sandbox.hortonworks.com" cmd = DistCp().take(path="hdfs://{host}:8020/tmp/foo".format(host=_host)).copy_to( path="hdfs://{host}:8020/tmp/bar".format(host=_host) ).use( mappers=12 ).update_destination( synchronize=True ).preserve_replication_number()\ .preserve_block_size()\ .preserve_checksum_type()\ .preserve_group()\ .preserve_checksum_type()\ .preserve_user()\ .run() self.assertEquals(cmd.status, 0, cmd.stderr) self.assertEquals( shell.execute_shell_command('hadoop', 'fs', '-test', '-e', '/tmp/bar/test.txt').status, 0) self.assertEquals( shell.execute_shell_command('hadoop', 'fs', '-test', '-e', '/tmp/bar/test2.txt').status, 0)
def copy_file_from_local(self, path): execute_shell_command("hadoop", "fs", "-copyFromLocal", path, "/tmp/") import os os.remove(path) return "/tmp/" + os.path.split(path)[1]
def delete_file_in_hdfs(self, path="/tmp/data"): execute_shell_command("hadoop", "fs", "-rm -R" if path == "/tmp/data" else "-rm", path)
def tearDownClass(cls): shell.execute_shell_command('hive -e \"drop database if EXISTS testdb\"')
def setUpClass(cls): shell.execute_shell_command('hive -e "drop database if EXISTS testdb CASCADE;"')
def delete_file_in_hdfs(self, path="/tmp/hive_table"): execute_shell_command("hadoop", "fs", "-rm -R" if path == "/tmp/hive_table" else "-rm", path)
def setUpClass(cls): shell.execute_shell_command('hadoop fs', '-rm -r {0}/rdbms.password'.format(BASE_DIR)) shell.execute_shell_command('hadoop fs', '-copyFromLocal', os.path.join(os.path.dirname(__file__), 'resources/sqoop/rdbms.password'), BASE_DIR) shell.execute_shell_command('hadoop fs', '-mkdir', os.path.join(BASE_DIR, "data_custom_directory")) shell.execute_shell_command('hadoop fs', '-copyFromLocal', os.path.join(os.path.dirname(__file__), 'resources/sqoop/data_to_export.txt'), os.path.join(BASE_DIR, "data_custom_directory")) shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} -e'.format(USER, PASSWORD, MYSQL_SERVER), "'DROP DATABASE IF EXISTS sqoop_tests'") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} -e'.format(USER, PASSWORD, MYSQL_SERVER), "'CREATE DATABASE sqoop_tests'") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER), "'CREATE TABLE IF NOT EXISTS table_name(id INT(11) NOT NULL AUTO_INCREMENT," "last_name varchar(255) NOT NULL, first_name varchar(255), city varchar(255)," "PRIMARY KEY (id))'") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER), "'CREATE TABLE IF NOT EXISTS table_name_second(id INT(11) NOT NULL AUTO_INCREMENT," "last_name varchar(255) NOT NULL, first_name varchar(255), city varchar(255)," "PRIMARY KEY (id))'") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER), "'CREATE TABLE IF NOT EXISTS stag(id INT(11) NOT NULL AUTO_INCREMENT," "last_name varchar(255) NOT NULL, first_name varchar(255), city varchar(255)," "PRIMARY KEY (id))'") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER), "\"INSERT INTO table_name (last_name) VALUES ('Bob')\"") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER), "\"INSERT INTO table_name (last_name, first_name, city) VALUES ('Alex','Log','New York')\"") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER), "\"INSERT INTO table_name (last_name, first_name, city) VALUES ('Merry','Log','New York')\"") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER), "\"INSERT INTO table_name (last_name, first_name, city) VALUES ('Bob','Log','New York')\"") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER), "\"delimiter //\ncreate procedure p(in p_id INT, in p_last_name varchar(255), " "in p_first_name varchar(255), in p_city varchar(255)) begin insert into table_name_second(" "id, last_name, first_name, city) values(p_id,p_last_name,p_first_name,p_city);\nend//\"")
def tearDownClass(cls): shell.execute_shell_command('hadoop fs', '-rm -r {0}/rdbms.password'.format(BASE_DIR)) shell.execute_shell_command('hadoop fs', '-rm -r {0}/data_custom_directory'.format(BASE_DIR)) shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} -e'.format(USER, PASSWORD, MYSQL_SERVER), "'DROP DATABASE IF EXISTS sqoop_tests'")
def copy_file_from_local(self, path): execute_shell_command("hadoop", "fs", "-copyFromLocal", path, "/tmp/") import os os.remove(path) return "/tmp/" + os.path.split(path)[1]
def tearDown(self): shell.execute_shell_command('fuser -k -n tcp {0}'.format(PORT))
def setUp(self): super(TestFlume, self).setUp() shell.execute_shell_command('fuser -k -n tcp {0}'.format(PORT))
def setUpClass(cls): shell.execute_shell_command( 'hadoop fs', '-rm -r {0}/rdbms.password'.format(BASE_DIR)) shell.execute_shell_command( 'hadoop fs', '-copyFromLocal', os.path.join(os.path.dirname(__file__), 'resources/sqoop/rdbms.password'), BASE_DIR) shell.execute_shell_command( 'hadoop fs', '-mkdir', os.path.join(BASE_DIR, "data_custom_directory")) shell.execute_shell_command( 'hadoop fs', '-copyFromLocal', os.path.join(os.path.dirname(__file__), 'resources/sqoop/data_to_export.txt'), os.path.join(BASE_DIR, "data_custom_directory")) shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} -e'.format( USER, PASSWORD, MYSQL_SERVER), "'DROP DATABASE IF EXISTS sqoop_tests'") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} -e'.format( USER, PASSWORD, MYSQL_SERVER), "'CREATE DATABASE sqoop_tests'") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format( USER, PASSWORD, MYSQL_SERVER), "'CREATE TABLE IF NOT EXISTS table_name(id INT(11) NOT NULL AUTO_INCREMENT," "last_name varchar(255) NOT NULL, first_name varchar(255), city varchar(255)," "PRIMARY KEY (id))'") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format( USER, PASSWORD, MYSQL_SERVER), "'CREATE TABLE IF NOT EXISTS table_name_second(id INT(11) NOT NULL AUTO_INCREMENT," "last_name varchar(255) NOT NULL, first_name varchar(255), city varchar(255)," "PRIMARY KEY (id))'") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format( USER, PASSWORD, MYSQL_SERVER), "'CREATE TABLE IF NOT EXISTS stag(id INT(11) NOT NULL AUTO_INCREMENT," "last_name varchar(255) NOT NULL, first_name varchar(255), city varchar(255)," "PRIMARY KEY (id))'") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format( USER, PASSWORD, MYSQL_SERVER), "\"INSERT INTO table_name (last_name) VALUES ('Bob')\"") shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format( USER, PASSWORD, MYSQL_SERVER), "\"INSERT INTO table_name (last_name, first_name, city) VALUES ('Alex','Log','New York')\"" ) shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format( USER, PASSWORD, MYSQL_SERVER), "\"INSERT INTO table_name (last_name, first_name, city) VALUES ('Merry','Log','New York')\"" ) shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format( USER, PASSWORD, MYSQL_SERVER), "\"INSERT INTO table_name (last_name, first_name, city) VALUES ('Bob','Log','New York')\"" ) shell.execute_shell_command( 'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format( USER, PASSWORD, MYSQL_SERVER), "\"delimiter //\ncreate procedure p(in p_id INT, in p_last_name varchar(255), " "in p_first_name varchar(255), in p_city varchar(255)) begin insert into table_name_second(" "id, last_name, first_name, city) values(p_id,p_last_name,p_first_name,p_city);\nend//\"" )
def setUp(self): super(TestDistCp, self).setUp() shell.execute_shell_command('hadoop fs', '-mkdir /tmp/foo') shell.execute_shell_command('hadoop fs', '-mkdir /tmp/bar') shell.execute_shell_command('hadoop fs', '-touchz /tmp/foo/test.txt') shell.execute_shell_command('hadoop fs', '-touchz /tmp/foo/test2.txt')
def tearDown(self): shell.execute_shell_command('hadoop fs', '-rm -r /tmp/foo') shell.execute_shell_command('hadoop fs', '-rm -r /tmp/bar')
def tearDown(self): shell.execute_shell_command('hadoop fs', '-rm -r /tmp/foo') shell.execute_shell_command('hadoop fs', '-rm -r /tmp/bar')
def setUp(self): super(TestDistCp, self).setUp() shell.execute_shell_command('hadoop fs', '-mkdir /tmp/foo') shell.execute_shell_command('hadoop fs', '-mkdir /tmp/bar') shell.execute_shell_command('hadoop fs', '-touchz /tmp/foo/test.txt') shell.execute_shell_command('hadoop fs', '-touchz /tmp/foo/test2.txt')