Beispiel #1
0
    def test_import_with_incremental(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(
                host=MYSQL_SERVER,
                rdbms="mysql",
                username="******",
                password_file="{0}/rdbms.password".format(BASE_DIR),
                database="sqoop_tests").table(table="table_name").to_hdfs(
                    target_dir="{0}/custom_directory".format(BASE_DIR)).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            cmd = Sqoop.import_data().from_rdbms(
                host=MYSQL_SERVER,
                rdbms="mysql",
                username="******",
                password_file="{0}/rdbms.password".format(BASE_DIR),
                database="sqoop_tests").table(table="table_name").to_hdfs(
                    target_dir="{0}/custom_directory".format(
                        BASE_DIR)).with_incremental(incremental="append",
                                                    last_value="5",
                                                    check_column="id").run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'hadoop fs',
                '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(
                result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command(
                'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Beispiel #2
0
    def test_download_dir_with_predicate(self):
        try:
            shell.execute_shell_command(
                'mkdir',
                os.path.join(os.path.dirname(__file__), 'resources/download'))
            ftp = sftp_client(HOST, "{0}".format(BASE_DIR), USER, PASSWORD,
                              HKEY_PATH)
            self.assertTrue(ftp.exists())

            ftp.download_dir(local_path=os.path.join(os.path.dirname(__file__),
                                                     'resources/download'),
                             predicate=self.predicate_get_description,
                             recursive=True)
            self.assertTrue(
                os.path.exists(
                    os.path.join(os.path.dirname(__file__),
                                 'resources/download/{0}'.format(BASE_DIR))))
            self.assertTrue(
                os.path.exists(
                    os.path.join(
                        os.path.dirname(__file__),
                        'resources/download/{0}/file'.format(BASE_DIR))))
            self.assertFalse(
                os.path.exists(
                    os.path.join(
                        os.path.dirname(__file__),
                        'resources/download/{0}/folder'.format(BASE_DIR))))

        finally:
            shutil.rmtree(
                os.path.join(os.path.dirname(__file__), 'resources/download'))
Beispiel #3
0
    def test_export_table_with_staging(self):
        try:
            metastore = IniFileMetaStore(file=os.path.join(
                os.path.dirname(__file__), 'resources/sqoop/custom.ini'))
            cmd = SqoopExport.load_preconfigured_job(config=Configuration.load(
                metastore=metastore, readonly=False, accepts_nulls=True
            )).to_rdbms().table(table="table_name_second").from_hdfs(
                export_dir="{0}/data_custom_directory".format(
                    BASE_DIR)).with_staging_table(staging_table="stag").run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.
                format(USER, PASSWORD,
                       MYSQL_SERVER), "'SELECT * FROM table_name_second'")
            self.assertNotEqual(
                result.stdout.split(' ')[0], 'Empty', result.stdout)
        finally:
            shell.execute_shell_command(
                'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.
                format(USER, PASSWORD,
                       MYSQL_SERVER), "'DELETE FROM table_name_second'")
            shell.execute_shell_command(
                'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.
                format(USER, PASSWORD, MYSQL_SERVER), "'DELETE FROM stag'")
Beispiel #4
0
    def test_download_dir(self):
        try:
            shell.execute_shell_command('mkdir', os.path.join(os.path.dirname(__file__), 'resources/download'))
            ftp = sftp_client(HOST, "{0}".format(BASE_DIR), USER, PASSWORD, HKEY_PATH)
            self.assertTrue(ftp.exists())

            ftp.download_dir(local_path=os.path.join(os.path.dirname(__file__), 'resources/download'))
            self.assertTrue(os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/download/{0}'.format(BASE_DIR))))
            self.assertTrue(
                os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/download/{0}/file'.format(BASE_DIR))))
            self.assertTrue(
                os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/download/{0}/folder'.format(BASE_DIR))))
            self.assertTrue(
                os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/download/{0}/folder/file'.format(BASE_DIR))))
            shutil.rmtree(os.path.join(os.path.dirname(__file__), 'resources/download/{0}'.format(BASE_DIR)))

            ftp.download_dir(local_path=os.path.join(os.path.dirname(__file__), 'resources/download'), recursive=False)
            self.assertTrue(os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/download/{0}'.format(BASE_DIR))))
            self.assertTrue(
                os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/download/{0}/file'.format(BASE_DIR))))
            self.assertFalse(
                os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/download/{0}/folder'.format(BASE_DIR))))
            self.assertFalse(
                os.path.exists(os.path.join(os.path.dirname(__file__), 'resources/download/{0}/folder/file'.format(BASE_DIR))))
            shutil.rmtree(os.path.join(os.path.dirname(__file__), 'resources/download/{0}'.format(BASE_DIR)))

        finally:
            shutil.rmtree(os.path.join(os.path.dirname(__file__), 'resources/download'))
Beispiel #5
0
def has_command(command):
    """
    Wrapper for  Unix command used to identify the location of executables.
    Can be used to skip integration tests
    :param command: programname
    :return:
    """
    print "CONFIGURE TEST CASES.....", str(execute_shell_command("which", command).is_ok())
    return execute_shell_command("which", command).is_ok()
Beispiel #6
0
def has_command(command):
    """
    Wrapper for  Unix command used to identify the location of executables.
    Can be used to skip integration tests
    :param command: programname
    :return:
    """
    print "CONFIGURE TEST CASES.....", str(
        execute_shell_command("which", command).is_ok())
    return execute_shell_command("which", command).is_ok()
Beispiel #7
0
 def test_run_producer(self):
     thread = KafkaThreadProducer()
     thread.daemon = True
     thread.start()
     sleep(TIME)
     cmd = shell.execute_shell_command('ps aux | grep -i kafka')
     self.assertTrue("kafka.producer.ConsoleProducer --broker-list {0}:{1} --topic test123".format(CLUSTER_NAME, PORT) in cmd.stdout, cmd.stdout)
     for stroke in cmd.stdout.split("\n"):
         if "kafka.producer.ConsoleProducer --broker-list {0}:{1} --topic test123".format(CLUSTER_NAME, PORT) in stroke:
             shell.execute_shell_command('kill -9 {0}'.format(stroke.split()[1]))
Beispiel #8
0
    def test_download_dir_invalid_path_sftp(self):
        try:
            shell.execute_shell_command('mkdir', os.path.join(os.path.dirname(__file__), 'resources/download'))
            ftp = sftp_client(HOST, "{0}/file".format(BASE_DIR), USER, PASSWORD, HKEY_PATH)
            self.assertTrue(ftp.exists())

            ftp.download_dir(local_path=os.path.join(os.path.dirname(__file__), 'resources/download'))

        finally:
            shutil.rmtree(os.path.join(os.path.dirname(__file__), 'resources/download'))
Beispiel #9
0
 def test_run_consumer(self):
     thread = KafkaThreadConsumer()
     thread.daemon = True
     thread.start()
     sleep(TIME)
     cmd = shell.execute_shell_command('ps aux | grep -i kafka')
     self.assertTrue("kafka.consumer.ConsoleConsumer --zookeeper sandbox.hortonworks.com:2181 --from-beginning --topic test123" in cmd.stdout, cmd.stdout)
     for stroke in cmd.stdout.split("\n"):
         if "kafka.consumer.ConsoleConsumer --zookeeper sandbox.hortonworks.com:2181 --from-beginning --topic test123" in stroke:
             shell.execute_shell_command('kill -9 {0}'.format(stroke.split()[1]))
Beispiel #10
0
 def test_get_replicas(self):
     self.assertEqual("0", HDFS("/").replicas(), "Root dir replicas should be 0")
     self.assertNotEqual("0", HDFS("/tmp").replicas(), "dir replicas should be 0")
     name = uuid.uuid4()
     hdfs_file = HDFS("/tmp/{0}".format(name))
     hdfs_file.create_file()
     shell.execute_shell_command("hadoop dfs", "-setrep -w 1 /tmp/{0}".format(name))
     if hdfs_file.exists():
         self.assertEqual("1", hdfs_file.replicas(), "Number replicas of file must be 1")
         hdfs_file.delete()
         self.assertFalse(hdfs_file.exists())
Beispiel #11
0
 def test_run_consumer(self):
     thread = KafkaThreadConsumer()
     thread.daemon = True
     thread.start()
     sleep(TIME)
     cmd = shell.execute_shell_command('ps aux | grep -i kafka')
     self.assertTrue(
         "kafka.consumer.ConsoleConsumer --zookeeper sandbox.hortonworks.com:2181 --from-beginning --topic test123"
         in cmd.stdout, cmd.stdout)
     for stroke in cmd.stdout.split("\n"):
         if "kafka.consumer.ConsoleConsumer --zookeeper sandbox.hortonworks.com:2181 --from-beginning --topic test123" in stroke:
             shell.execute_shell_command('kill -9 {0}'.format(
                 stroke.split()[1]))
Beispiel #12
0
    def test_import_query(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******",
                                                 password_file="{0}/rdbms.password".format(BASE_DIR),
                                                 database="sqoop_tests").query(
                query="'SELECT * FROM table_name WHERE $CONDITIONS AND id>$id'", split_by="id", id="2").to_hdfs(
                target_dir="{0}/custom_directory".format(BASE_DIR)).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Beispiel #13
0
    def test_import_with_connection_manager(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******",
                                                 password_file="{0}/rdbms.password".format(BASE_DIR),
                                                 database="sqoop_tests").table(
                table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).with_attr(
                connection_manager="org.apache.sqoop.manager.MySQLManager").run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Beispiel #14
0
    def test_import_with_enclosing(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******",
                                                 password_file="{0}/rdbms.password".format(BASE_DIR),
                                                 database="sqoop_tests").table(
                table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).with_input_parsing(
                escaped_by="\\").with_output_parsing(escaped_by="\\", mysql_delimiters=True).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Beispiel #15
0
 def test_broker(self):
     shell.execute_shell_command('fuser -k -n tcp {0}'.format(PORT))
     local = LocalFS("/tmp/kafka-test")
     if not local.exists():
         local.create_directory()
     thread = KafkaThreadBroker()
     thread.daemon = True
     thread.start()
     sleep(TIME)
     cmd = shell.execute_shell_command('netstat -lntu')
     self.assertTrue("9010" in cmd.stdout, cmd.stdout)
     local.delete_directory()
     shell.execute_shell_command('fuser -k -n tcp {0}'.format(PORT))
Beispiel #16
0
 def test_get_replicas(self):
     self.assertEqual('0', HDFS("/").replicas(), "Root dir replicas should be 0")
     self.assertNotEqual('0', HDFS("/tmp").replicas(), "dir replicas should be 0")
     name = uuid.uuid4()
     hdfs_file = HDFS("/tmp/{0}".format(name))
     hdfs_file.create_file()
     shell.execute_shell_command('hadoop dfs', '-setrep -w 1 /tmp/{0}'.format(name))
     if hdfs_file.exists():
         self.assertEqual('1',
                          hdfs_file.replicas(),
                          "Number replicas of file must be 1")
         hdfs_file.delete()
         self.assertFalse(hdfs_file.exists())
Beispiel #17
0
 def test_broker(self):
     shell.execute_shell_command('fuser -k -n tcp {0}'.format(PORT))
     local = LocalFS("/tmp/kafka-test")
     if not local.exists():
         local.create_directory()
     thread = KafkaThreadBroker()
     thread.daemon = True
     thread.start()
     sleep(TIME)
     cmd = shell.execute_shell_command('netstat -lntu')
     self.assertTrue("9010" in cmd.stdout, cmd.stdout)
     local.delete_directory()
     shell.execute_shell_command('fuser -k -n tcp {0}'.format(PORT))
Beispiel #18
0
    def test_import_to_avrodatafile(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******",
                                                 password_file="{0}/rdbms.password".format(BASE_DIR),
                                                 database="sqoop_tests").table(
                table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).use_file_format(
                file_format="--as-avrodatafile").run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command('hadoop fs',
                                                 '-du -s {0}/custom_directory/part-m-*.avro'.format(BASE_DIR))
            self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Beispiel #19
0
 def test_run_producer(self):
     thread = KafkaThreadProducer()
     thread.daemon = True
     thread.start()
     sleep(TIME)
     cmd = shell.execute_shell_command('ps aux | grep -i kafka')
     self.assertTrue(
         "kafka.producer.ConsoleProducer --broker-list {0}:{1} --topic test123"
         .format(CLUSTER_NAME, PORT) in cmd.stdout, cmd.stdout)
     for stroke in cmd.stdout.split("\n"):
         if "kafka.producer.ConsoleProducer --broker-list {0}:{1} --topic test123".format(
                 CLUSTER_NAME, PORT) in stroke:
             shell.execute_shell_command('kill -9 {0}'.format(
                 stroke.split()[1]))
Beispiel #20
0
    def test_download_dir_invalid_path_sftp(self):
        try:
            shell.execute_shell_command(
                'mkdir',
                os.path.join(os.path.dirname(__file__), 'resources/download'))
            ftp = sftp_client(HOST, "{0}/file".format(BASE_DIR), USER,
                              PASSWORD, HKEY_PATH)
            self.assertTrue(ftp.exists())

            ftp.download_dir(local_path=os.path.join(os.path.dirname(__file__),
                                                     'resources/download'))

        finally:
            shutil.rmtree(
                os.path.join(os.path.dirname(__file__), 'resources/download'))
Beispiel #21
0
 def test_agent(self):
     thread = AgentThread()
     thread.daemon = True
     thread.start()
     time.sleep(TIME_TO_OPEN_PORT)
     cmd = shell.execute_shell_command('netstat -lntu')
     self.assertTrue("41414" in cmd.stdout, cmd.stdout)
Beispiel #22
0
    def test_import_table(self):
        try:
            metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__),
                                                                   'resources/sqoop/custom.ini'))
            cmd = SqoopImport.load_preconfigured_job(
                config=Configuration.load(metastore=metastore,
                                           readonly=False,
                                           accepts_nulls=True)).from_rdbms().table(
                table="table_name", where="id>2",
                columns="id,last_name").to_hdfs(
                target_dir="{0}/custom_directory".format(BASE_DIR)).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Beispiel #23
0
    def test_export_table_with_call(self):
        try:
            metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__),
                                                                   'resources/sqoop/custom.ini'))
            cmd = SqoopExport.load_preconfigured_job(
                config=Configuration.load(metastore=metastore,
                                           readonly=False,
                                           accepts_nulls=True)).to_rdbms().from_hdfs(
                export_dir="{0}/data_custom_directory".format(BASE_DIR)).call(stored_procedure="p").run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER),
                "'SELECT * FROM table_name_second'")
            self.assertNotEqual(result.stdout.split(' ')[0], 'Empty', result.stdout)
        finally:
            shell.execute_shell_command(
                'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER),
                "'DELETE FROM table_name_second'")
Beispiel #24
0
    def test_import_with_incremental(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******",
                                                 password_file="{0}/rdbms.password".format(BASE_DIR),
                                                 database="sqoop_tests").table(
                table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******",
                                                 password_file="{0}/rdbms.password".format(BASE_DIR),
                                                 database="sqoop_tests").table(
                table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).with_incremental(
                incremental="append", last_value="5", check_column="id").run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Beispiel #25
0
    def test_command(self):
        _host = "sandbox.hortonworks.com"
        cmd = DistCp().take(path="hdfs://{host}:8020/tmp/foo".format(host=_host)).copy_to(
            path="hdfs://{host}:8020/tmp/bar".format(host=_host)
        ).use(
            mappers=12
        ).update_destination(
            synchronize=True
        ).preserve_replication_number()\
            .preserve_block_size()\
            .preserve_checksum_type()\
            .preserve_group()\
            .preserve_checksum_type()\
            .preserve_user()\
            .run()

        self.assertEquals(cmd.status, 0, cmd.stderr)

        self.assertEquals(shell.execute_shell_command('hadoop', 'fs', '-test', '-e', '/tmp/bar/test.txt').status, 0)
        self.assertEquals(shell.execute_shell_command('hadoop', 'fs', '-test', '-e', '/tmp/bar/test2.txt').status, 0)
Beispiel #26
0
    def test_import_to_avrodatafile(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(
                host=MYSQL_SERVER,
                rdbms="mysql",
                username="******",
                password_file="{0}/rdbms.password".format(BASE_DIR),
                database="sqoop_tests").table(table="table_name").to_hdfs(
                    target_dir="{0}/custom_directory".format(BASE_DIR)
                ).use_file_format(file_format="--as-avrodatafile").run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'hadoop fs',
                '-du -s {0}/custom_directory/part-m-*.avro'.format(BASE_DIR))
            self.assertNotEqual(
                result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command(
                'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Beispiel #27
0
    def test_import_table(self):
        try:
            metastore = IniFileMetaStore(file=os.path.join(
                os.path.dirname(__file__), 'resources/sqoop/custom.ini'))
            cmd = SqoopImport.load_preconfigured_job(config=Configuration.load(
                metastore=metastore, readonly=False, accepts_nulls=True
            )).from_rdbms().table(
                table="table_name", where="id>2",
                columns="id,last_name").to_hdfs(
                    target_dir="{0}/custom_directory".format(BASE_DIR)).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'hadoop fs',
                '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(
                result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command(
                'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Beispiel #28
0
    def test_import_with_enclosing(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(
                host=MYSQL_SERVER,
                rdbms="mysql",
                username="******",
                password_file="{0}/rdbms.password".format(BASE_DIR),
                database="sqoop_tests").table(table="table_name").to_hdfs(
                    target_dir="{0}/custom_directory".format(BASE_DIR)
                ).with_input_parsing(escaped_by="\\").with_output_parsing(
                    escaped_by="\\", mysql_delimiters=True).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'hadoop fs',
                '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(
                result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command(
                'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Beispiel #29
0
    def test_import_to_hive(self):
        _path = HDFS(os.path.join('/user', getpass.getuser(), 'table_name'))
        try:
            if _path.exists():
                _path.delete(recursive=_path.is_directory())
                # shell.execute_shell_command('hadoop fs', '-rm -r /user/', getpass.getuser(), '/table_name')
            cmd = Sqoop.import_data().from_rdbms(
                host=MYSQL_SERVER,
                rdbms="mysql",
                username="******",
                password_file="{0}/rdbms.password".format(BASE_DIR),
                database="sqoop_tests").table(
                    table="table_name").to_hive().run()

            # self.assertEquals(cmd.status, 0, cmd.stderr)
            # result = shell.execute_shell_command('hadoop fs', '-du -s /user/hive/warehouse/table_name/part-m-*')
            # self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:

            shell.execute_shell_command(
                'hive', "-e 'DROP TABLE IF EXISTS table_name'")
Beispiel #30
0
    def test_import_to_hive(self):
        _path = HDFS(os.path.join('/user', getpass.getuser(), 'table_name'))
        try:
            if _path.exists():
                _path.delete(recursive=_path.is_directory())
                # shell.execute_shell_command('hadoop fs', '-rm -r /user/', getpass.getuser(), '/table_name')
            cmd = Sqoop.import_data().from_rdbms(
                host=MYSQL_SERVER,
                rdbms="mysql",
                username="******",
                password_file="{0}/rdbms.password".format(BASE_DIR),
                database="sqoop_tests"
            ).table(
                table="table_name"
            ).to_hive().run()

            # self.assertEquals(cmd.status, 0, cmd.stderr)
            # result = shell.execute_shell_command('hadoop fs', '-du -s /user/hive/warehouse/table_name/part-m-*')
            # self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:

            shell.execute_shell_command('hive', "-e 'DROP TABLE IF EXISTS table_name'")
Beispiel #31
0
    def test_import_with_connection_manager(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(
                host=MYSQL_SERVER,
                rdbms="mysql",
                username="******",
                password_file="{0}/rdbms.password".format(BASE_DIR),
                database="sqoop_tests").table(table="table_name").to_hdfs(
                    target_dir="{0}/custom_directory".format(BASE_DIR)
                ).with_attr(
                    connection_manager="org.apache.sqoop.manager.MySQLManager"
                ).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'hadoop fs',
                '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(
                result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command(
                'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Beispiel #32
0
 def setUpClass(cls):
     shell.execute_shell_command('hive -e \"drop database if EXISTS testdb CASCADE\"')
     shell.execute_shell_command('hive -e \"create database testdb\"')
     c = 'hive -e \"create table testdb.some_table(strings STRING) ' \
         'ROW FORMAT DELIMITED ' \
         'FIELDS TERMINATED BY \\",\\" ' \
         'STORED AS TEXTFILE\"'
     shell.execute_shell_command(c)
Beispiel #33
0
    def test_import_query(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(
                host=MYSQL_SERVER,
                rdbms="mysql",
                username="******",
                password_file="{0}/rdbms.password".format(BASE_DIR),
                database="sqoop_tests"
            ).query(
                query="'SELECT * FROM table_name WHERE $CONDITIONS AND id>$id'",
                split_by="id",
                id="2").to_hdfs(
                    target_dir="{0}/custom_directory".format(BASE_DIR)).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'hadoop fs',
                '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(
                result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command(
                'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Beispiel #34
0
 def tearDownClass(cls):
     shell.execute_shell_command(
         'hadoop fs', '-rm -r {0}/rdbms.password'.format(BASE_DIR))
     shell.execute_shell_command(
         'hadoop fs', '-rm -r {0}/data_custom_directory'.format(BASE_DIR))
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} -e'.format(
             USER, PASSWORD, MYSQL_SERVER),
         "'DROP DATABASE IF EXISTS sqoop_tests'")
Beispiel #35
0
    def test_command(self):
        _host = "sandbox.hortonworks.com"
        cmd = DistCp().take(path="hdfs://{host}:8020/tmp/foo".format(host=_host)).copy_to(
            path="hdfs://{host}:8020/tmp/bar".format(host=_host)
        ).use(
            mappers=12
        ).update_destination(
            synchronize=True
        ).preserve_replication_number()\
            .preserve_block_size()\
            .preserve_checksum_type()\
            .preserve_group()\
            .preserve_checksum_type()\
            .preserve_user()\
            .run()

        self.assertEquals(cmd.status, 0, cmd.stderr)

        self.assertEquals(
            shell.execute_shell_command('hadoop', 'fs', '-test', '-e',
                                        '/tmp/bar/test.txt').status, 0)
        self.assertEquals(
            shell.execute_shell_command('hadoop', 'fs', '-test', '-e',
                                        '/tmp/bar/test2.txt').status, 0)
Beispiel #36
0
    def copy_file_from_local(self, path):
        execute_shell_command("hadoop", "fs", "-copyFromLocal", path, "/tmp/")
        import os

        os.remove(path)
        return "/tmp/" + os.path.split(path)[1]
Beispiel #37
0
 def delete_file_in_hdfs(self, path="/tmp/data"):
     execute_shell_command("hadoop", "fs",
                           "-rm -R" if path == "/tmp/data" else "-rm", path)
Beispiel #38
0
 def tearDownClass(cls):
     shell.execute_shell_command('hive -e \"drop database if EXISTS testdb\"')
Beispiel #39
0
 def setUpClass(cls):
     shell.execute_shell_command('hive -e "drop database if EXISTS testdb CASCADE;"')
Beispiel #40
0
 def delete_file_in_hdfs(self, path="/tmp/hive_table"):
     execute_shell_command("hadoop", "fs", "-rm -R" if path == "/tmp/hive_table" else "-rm", path)
Beispiel #41
0
 def setUpClass(cls):
     shell.execute_shell_command('hadoop fs', '-rm -r {0}/rdbms.password'.format(BASE_DIR))
     shell.execute_shell_command('hadoop fs', '-copyFromLocal',
                                 os.path.join(os.path.dirname(__file__),
                                              'resources/sqoop/rdbms.password'),
                                 BASE_DIR)
     shell.execute_shell_command('hadoop fs', '-mkdir', os.path.join(BASE_DIR, "data_custom_directory"))
     shell.execute_shell_command('hadoop fs', '-copyFromLocal',
                                 os.path.join(os.path.dirname(__file__),
                                              'resources/sqoop/data_to_export.txt'),
                                 os.path.join(BASE_DIR, "data_custom_directory"))
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} -e'.format(USER, PASSWORD, MYSQL_SERVER),
         "'DROP DATABASE IF EXISTS sqoop_tests'")
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} -e'.format(USER, PASSWORD, MYSQL_SERVER),
         "'CREATE DATABASE sqoop_tests'")
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER),
         "'CREATE TABLE IF NOT EXISTS table_name(id INT(11) NOT NULL AUTO_INCREMENT,"
         "last_name varchar(255) NOT NULL, first_name varchar(255), city varchar(255),"
         "PRIMARY KEY (id))'")
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER),
         "'CREATE TABLE IF NOT EXISTS table_name_second(id INT(11) NOT NULL AUTO_INCREMENT,"
         "last_name varchar(255) NOT NULL, first_name varchar(255), city varchar(255),"
         "PRIMARY KEY (id))'")
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER),
         "'CREATE TABLE IF NOT EXISTS stag(id INT(11) NOT NULL AUTO_INCREMENT,"
         "last_name varchar(255) NOT NULL, first_name varchar(255), city varchar(255),"
         "PRIMARY KEY (id))'")
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER),
         "\"INSERT INTO table_name (last_name) VALUES ('Bob')\"")
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER),
         "\"INSERT INTO table_name (last_name, first_name, city) VALUES ('Alex','Log','New York')\"")
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER),
         "\"INSERT INTO table_name (last_name, first_name, city) VALUES ('Merry','Log','New York')\"")
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER),
         "\"INSERT INTO table_name (last_name, first_name, city) VALUES ('Bob','Log','New York')\"")
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER),
         "\"delimiter //\ncreate procedure p(in p_id INT, in p_last_name varchar(255), "
         "in p_first_name varchar(255), in p_city varchar(255)) begin insert into table_name_second("
         "id, last_name, first_name, city) values(p_id,p_last_name,p_first_name,p_city);\nend//\"")
Beispiel #42
0
 def tearDownClass(cls):
     shell.execute_shell_command('hadoop fs', '-rm -r {0}/rdbms.password'.format(BASE_DIR))
     shell.execute_shell_command('hadoop fs', '-rm -r {0}/data_custom_directory'.format(BASE_DIR))
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} -e'.format(USER, PASSWORD, MYSQL_SERVER),
         "'DROP DATABASE IF EXISTS sqoop_tests'")
Beispiel #43
0
    def copy_file_from_local(self, path):
        execute_shell_command("hadoop", "fs", "-copyFromLocal", path, "/tmp/")
        import os

        os.remove(path)
        return "/tmp/" + os.path.split(path)[1]
Beispiel #44
0
 def tearDown(self):
     shell.execute_shell_command('fuser -k -n tcp {0}'.format(PORT))
Beispiel #45
0
 def setUp(self):
     super(TestFlume, self).setUp()
     shell.execute_shell_command('fuser -k -n tcp {0}'.format(PORT))
Beispiel #46
0
 def setUpClass(cls):
     shell.execute_shell_command(
         'hadoop fs', '-rm -r {0}/rdbms.password'.format(BASE_DIR))
     shell.execute_shell_command(
         'hadoop fs', '-copyFromLocal',
         os.path.join(os.path.dirname(__file__),
                      'resources/sqoop/rdbms.password'), BASE_DIR)
     shell.execute_shell_command(
         'hadoop fs', '-mkdir',
         os.path.join(BASE_DIR, "data_custom_directory"))
     shell.execute_shell_command(
         'hadoop fs', '-copyFromLocal',
         os.path.join(os.path.dirname(__file__),
                      'resources/sqoop/data_to_export.txt'),
         os.path.join(BASE_DIR, "data_custom_directory"))
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} -e'.format(
             USER, PASSWORD, MYSQL_SERVER),
         "'DROP DATABASE IF EXISTS sqoop_tests'")
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} -e'.format(
             USER, PASSWORD, MYSQL_SERVER), "'CREATE DATABASE sqoop_tests'")
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(
             USER, PASSWORD, MYSQL_SERVER),
         "'CREATE TABLE IF NOT EXISTS table_name(id INT(11) NOT NULL AUTO_INCREMENT,"
         "last_name varchar(255) NOT NULL, first_name varchar(255), city varchar(255),"
         "PRIMARY KEY (id))'")
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(
             USER, PASSWORD, MYSQL_SERVER),
         "'CREATE TABLE IF NOT EXISTS table_name_second(id INT(11) NOT NULL AUTO_INCREMENT,"
         "last_name varchar(255) NOT NULL, first_name varchar(255), city varchar(255),"
         "PRIMARY KEY (id))'")
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(
             USER, PASSWORD, MYSQL_SERVER),
         "'CREATE TABLE IF NOT EXISTS stag(id INT(11) NOT NULL AUTO_INCREMENT,"
         "last_name varchar(255) NOT NULL, first_name varchar(255), city varchar(255),"
         "PRIMARY KEY (id))'")
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(
             USER, PASSWORD, MYSQL_SERVER),
         "\"INSERT INTO table_name (last_name) VALUES ('Bob')\"")
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(
             USER, PASSWORD, MYSQL_SERVER),
         "\"INSERT INTO table_name (last_name, first_name, city) VALUES ('Alex','Log','New York')\""
     )
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(
             USER, PASSWORD, MYSQL_SERVER),
         "\"INSERT INTO table_name (last_name, first_name, city) VALUES ('Merry','Log','New York')\""
     )
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(
             USER, PASSWORD, MYSQL_SERVER),
         "\"INSERT INTO table_name (last_name, first_name, city) VALUES ('Bob','Log','New York')\""
     )
     shell.execute_shell_command(
         'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(
             USER, PASSWORD, MYSQL_SERVER),
         "\"delimiter //\ncreate procedure p(in p_id INT, in p_last_name varchar(255), "
         "in p_first_name varchar(255), in p_city varchar(255)) begin insert into table_name_second("
         "id, last_name, first_name, city) values(p_id,p_last_name,p_first_name,p_city);\nend//\""
     )
Beispiel #47
0
 def setUp(self):
     super(TestDistCp, self).setUp()
     shell.execute_shell_command('hadoop fs', '-mkdir /tmp/foo')
     shell.execute_shell_command('hadoop fs', '-mkdir /tmp/bar')
     shell.execute_shell_command('hadoop fs', '-touchz /tmp/foo/test.txt')
     shell.execute_shell_command('hadoop fs', '-touchz /tmp/foo/test2.txt')
Beispiel #48
0
 def tearDown(self):
     shell.execute_shell_command('hadoop fs', '-rm -r /tmp/foo')
     shell.execute_shell_command('hadoop fs', '-rm -r /tmp/bar')
Beispiel #49
0
 def tearDown(self):
     shell.execute_shell_command('hadoop fs', '-rm -r /tmp/foo')
     shell.execute_shell_command('hadoop fs', '-rm -r /tmp/bar')
Beispiel #50
0
 def setUp(self):
     super(TestDistCp, self).setUp()
     shell.execute_shell_command('hadoop fs', '-mkdir /tmp/foo')
     shell.execute_shell_command('hadoop fs', '-mkdir /tmp/bar')
     shell.execute_shell_command('hadoop fs', '-touchz /tmp/foo/test.txt')
     shell.execute_shell_command('hadoop fs', '-touchz /tmp/foo/test2.txt')