def test_import_with_incremental(self): try: cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table(table="table_name").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR)).run() self.assertEquals(cmd.status, 0, cmd.stderr) cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table(table="table_name").to_hdfs( target_dir="{0}/custom_directory".format( BASE_DIR)).with_incremental(incremental="append", last_value="5", check_column="id").run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual( result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command( 'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_import_to_hbase(self): cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table(table="table_name").to_hbase( hbase_table="custom_table", hbase_create_table=True, hbase_row_key="id", column_family="f1").run() self.assertEquals(cmd.status, 0, cmd.stderr)
def test_export_call(self): self.assertEquals( Sqoop.export_data().to_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password", host="localhost", database="sqoop_tests").call( stored_procedure="procedure").from_hdfs( export_dir="some").build(), '--connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/password --export-dir some --call procedure')
def test_import_with_incremental_attributes(self): self.assertEquals( Sqoop.import_data().from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password", host="localhost", database="sqoop_tests"). to_hdfs().table(table="table_name").with_incremental(incremental="append", last_value="12", check_column="id").build(), "--connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/password --table table_name --as-textfile --incremental append --check-column id --last-value '12'")
def test_import_query(self): self.assertEquals(Sqoop.import_data().from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/sqoop.password", host="localhost", database="sqoop_tests"). query(query="'SELECT * FROM table_name WHERE $CONDITIONS AND id>$id'", split_by="id", id="2").to_hdfs(target_dir="/custom_directory").build(), "--connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/sqoop.password --query \"SELECT * FROM table_name WHERE \$CONDITIONS AND id>2\" --split-by id --target-dir /custom_directory --as-textfile")
def test_import_to_hbase(self): self.assertEquals(Sqoop.import_data().from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/rdbms.password", host="localhost", database="sqoop_tests"). \ table(table="table_name").to_hbase(hbase_table="custom_table", hbase_create_table="family", hbase_row_key="id", column_family="f1").build(), "--connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/rdbms.password --table table_name --as-textfile --hbase-table custom_table --hbase-create-table --hbase-row-key id --column-family f1")
def test_import_with_incremental(self): try: cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table( table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).run() self.assertEquals(cmd.status, 0, cmd.stderr) cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table( table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).with_incremental( incremental="append", last_value="5", check_column="id").run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def load_data_from_rdbms_to_hdfs(context): # configure Sqoop import job _sqoop_import_job_ = Sqoop.import_data().from_rdbms( host="127.0.0.1", rdbms="mysql", database="test_example", username="******", password_file="{0}/rdbms.password".format(BASE_DIR) ).table( table="first_table_name" ).to_hdfs(target_dir="{0}/data_from_import".format(BASE_DIR)) _sqoop_import_job_.run()
def test_import_query(self): try: cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").query( query="'SELECT * FROM table_name WHERE $CONDITIONS AND id>$id'", split_by="id", id="2").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR)).run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_import_with_connection_manager(self): try: cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table( table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).with_attr( connection_manager="org.apache.sqoop.manager.MySQLManager").run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_import_with_enclosing(self): try: cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table( table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).with_input_parsing( escaped_by="\\").with_output_parsing(escaped_by="\\", mysql_delimiters=True).run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_import_to_hbase(self): cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table(table="table_name").to_hbase( hbase_table="custom_table", hbase_create_table=True, hbase_row_key="id", column_family="f1").run() self.assertEquals(cmd.status, 0, cmd.stderr)
def test_import_to_avrodatafile(self): try: cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table( table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).use_file_format( file_format="--as-avrodatafile").run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*.avro'.format(BASE_DIR)) self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def load_data_from_hdfs_to_rdbms(context): # Configure and run Sqoop export job _sqoop_export_job_ = Sqoop.export_data().to_rdbms( host="127.0.0.1", rdbms="mysql", database="test_example", username="******", password_file="{0}/rdbms.password".format(BASE_DIR) ).table( table="second_table_name", columns=["name", "count"] ).from_hdfs( export_dir="{0}/data_to_export".format(BASE_DIR) ) _sqoop_export_job_.run()
def test_export_batch_with_hadoop_properties(self): self.assertEquals( Sqoop.export_data().to_rdbms( rdbms="mysql", username="******", password_file="/user/cloudera/password", host="localhost", database="sqoop_tests" ).table( table="table_name" ).with_hadoop_properties( some_properties="10" ).with_batch().from_hdfs( export_dir="some" ).build(), '-Dsome.properties=10 --connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/password --export-dir some --table table_name --batch')
def test_import_to_avrodatafile(self): try: cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table(table="table_name").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR) ).use_file_format(file_format="--as-avrodatafile").run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'hadoop fs', '-du -s {0}/custom_directory/part-m-*.avro'.format(BASE_DIR)) self.assertNotEqual( result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command( 'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_import_to_hive(self): _path = HDFS(os.path.join('/user', getpass.getuser(), 'table_name')) try: if _path.exists(): _path.delete(recursive=_path.is_directory()) # shell.execute_shell_command('hadoop fs', '-rm -r /user/', getpass.getuser(), '/table_name') cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table( table="table_name").to_hive().run() # self.assertEquals(cmd.status, 0, cmd.stderr) # result = shell.execute_shell_command('hadoop fs', '-du -s /user/hive/warehouse/table_name/part-m-*') # self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command( 'hive', "-e 'DROP TABLE IF EXISTS table_name'")
def test_import_with_enclosing(self): try: cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table(table="table_name").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR) ).with_input_parsing(escaped_by="\\").with_output_parsing( escaped_by="\\", mysql_delimiters=True).run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual( result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command( 'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_import_with_connection_manager(self): try: cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table(table="table_name").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR) ).with_attr( connection_manager="org.apache.sqoop.manager.MySQLManager" ).run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual( result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command( 'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_import_to_hive(self): _path = HDFS(os.path.join('/user', getpass.getuser(), 'table_name')) try: if _path.exists(): _path.delete(recursive=_path.is_directory()) # shell.execute_shell_command('hadoop fs', '-rm -r /user/', getpass.getuser(), '/table_name') cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests" ).table( table="table_name" ).to_hive().run() # self.assertEquals(cmd.status, 0, cmd.stderr) # result = shell.execute_shell_command('hadoop fs', '-du -s /user/hive/warehouse/table_name/part-m-*') # self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command('hive', "-e 'DROP TABLE IF EXISTS table_name'")
def test_import_query(self): try: cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests" ).query( query="'SELECT * FROM table_name WHERE $CONDITIONS AND id>$id'", split_by="id", id="2").to_hdfs( target_dir="{0}/custom_directory".format(BASE_DIR)).run() self.assertEquals(cmd.status, 0, cmd.stderr) result = shell.execute_shell_command( 'hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR)) self.assertNotEqual( result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command( 'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
def test_import_with_encoding(self): self.assertEquals( Sqoop.import_data().from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password", host="localhost", database="sqoop_tests"). to_hdfs().table(table="table_name").with_encoding(null_string="null", null_non_string="false").build(), "--connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/password --table table_name --as-textfile --null-string 'null' --null-non-string 'false'")
def test_export_without_export_dir(self): with self.assertRaises(Exception): Sqoop.export_data().to_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password", host="localhost", database="sqoop_tests").table( table="table_name").from_hdfs().build()
def test_import_without_database(self): with self.assertRaises(Exception): Sqoop.import_data().from_rdbms(rdbms="mysql", password_file="/user/cloudera/password", host="localhost", database="db").to_hdfs().table(table="table_name").build()
def test_import_without_credential(self): with self.assertRaises(Exception): Sqoop.import_data().from_rdbms(rdbms="mysql", host="localhost", database="db").to_hdfs().table( table="table_name").build()
def test_import_without_incremental_attributes(self): with self.assertRaises(Exception): Sqoop.import_data().from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password", host="localhost", database="db").to_hdfs().table(table="table_name"). \ with_incremental(incremental="append", last_value="12").build()
def test_import_table(self): self.assertEquals( Sqoop.import_data().from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password", host="localhost", database="sqoop_tests").table( table="table_name").to_hdfs().build(), '--connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/password --table table_name --as-textfile')