def test_delete_dir(self): local = LocalFS(os.path.dirname(os.path.realpath(__file__))) hdfs_file = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) local.copy_to_hdfs(hdfs_file.path) self.assertTrue(hdfs_file.exists(), "Target HDFS dir does not exists") hdfs_file.delete(recursive=True) self.assertFalse(hdfs_file.exists(), "Target HDFS dir was not deleted")
def test_file_size(self): local = LocalFS(os.path.realpath(__file__)) hdfs_file = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) try: local.copy_to_hdfs(hdfs_file.path) self.assertTrue(hdfs_file.exists(), "Local file was not copied to HDFS") self.assertEqual(hdfs_file.size(), local.size()) finally: hdfs_file.delete()
def test_create_directory(self): new_dir = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) self.assertFalse(new_dir.exists(), "Directory is already exists") try: new_dir.create_directory() self.assertTrue(new_dir.exists(), "Directory was not created") self.assertTrue(new_dir.is_directory()) finally: new_dir.delete(recursive=True) self.assertFalse(new_dir.exists(), "Directory was not removed")
def test_create_file(self): new_file = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) self.assertFalse(new_file.exists(), "File is already exists") try: new_file.create_file() self.assertTrue(new_file.exists(), "File was not created") self.assertFalse(new_file.is_directory(), "New file should not be a folder") finally: new_file.delete() self.assertFalse(new_file.exists(), "File was not removed")
def test_list_files(self): basedir = HDFS("/tmp") new_file = HDFS("/tmp/test.txt") try: new_file.create(directory=False) self.assertTrue(new_file.exists(), "File was not created") files = basedir.list_files() self.assertTrue(new_file in files) finally: new_file.delete() self.assertFalse(new_file.exists(), "File was not deleted")
def test_dir_size(self): local_basedir = os.path.dirname(os.path.realpath(__file__)) local = LocalFS(os.path.join(local_basedir, "resources", "test_dir_size")) hdfs_file = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) try: local.copy_to_hdfs(hdfs_file.path) self.assertTrue(hdfs_file.exists(), "Local file was not copied to HDFS") expected_fsize = local.size() self.assertEqual(hdfs_file.size(), expected_fsize) finally: hdfs_file.delete(recursive=True)
def test_get_replicas(self): self.assertEqual("0", HDFS("/").replicas(), "Root dir replicas should be 0") self.assertNotEqual("0", HDFS("/tmp").replicas(), "dir replicas should be 0") name = uuid.uuid4() hdfs_file = HDFS("/tmp/{0}".format(name)) hdfs_file.create_file() shell.execute_shell_command("hadoop dfs", "-setrep -w 1 /tmp/{0}".format(name)) if hdfs_file.exists(): self.assertEqual("1", hdfs_file.replicas(), "Number replicas of file must be 1") hdfs_file.delete() self.assertFalse(hdfs_file.exists())
def test_get_description(self): directory = HDFS("/tmp/bar") try: directory.create() self.assertEqual(directory.get_description().name, "/tmp/bar") self.assertEqual(directory.get_description().size, 0) self.assertEqual(directory.get_description().owner, getpass.getuser()) self.assertEqual(directory.get_description().create_date, None) finally: directory.delete(recursive=True) self.assertFalse(directory.delete(), "File was not deleted")
def test_get_replicas(self): self.assertEqual('0', HDFS("/").replicas(), "Root dir replicas should be 0") self.assertNotEqual('0', HDFS("/tmp").replicas(), "dir replicas should be 0") name = uuid.uuid4() hdfs_file = HDFS("/tmp/{0}".format(name)) hdfs_file.create_file() shell.execute_shell_command('hadoop dfs', '-setrep -w 1 /tmp/{0}'.format(name)) if hdfs_file.exists(): self.assertEqual('1', hdfs_file.replicas(), "Number replicas of file must be 1") hdfs_file.delete() self.assertFalse(hdfs_file.exists())
def test_copy_to_local(self): new_file = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) local_path = os.path.join("/tmp", "copied_from_hdfs") self.assertFalse(os.path.exists(local_path)) try: new_file.create_file() self.assertTrue(new_file.exists(), "File was not created") new_file.copy_to_local(local_path) self.assertTrue(os.path.exists(local_path), "File was not copied from HDFS") finally: new_file.delete() self.assertFalse(new_file.exists(), "File was not removed") os.remove(local_path) self.assertFalse(os.path.exists(local_path))
def test_copy_empty_dir(self): _dir = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) dst = HDFS("/tmp/dst_" + str(uuid.uuid4())) try: _dir.create(directory=True) self.assertTrue(_dir.exists(), "directory not found") self.assertFalse(dst.exists(), "dst directory is already exists") _dir.copy(dst) self.assertTrue(dst.exists(), "directory was not copied") finally: _dir.delete(True) dst.delete(True) self.assertFalse(_dir.exists(), "File was not deleted") self.assertFalse(dst.exists(), "File was not deleted")
def test_get_modification_time(self): now = datetime.now().strftime("%Y-%m-%d") _dir = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) _file = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) try: _dir.create_directory() _file.create_file() self.assertTrue(_dir.exists(), "Dir was not created") self.assertTrue(_file.exists(), "File was not created") self.assertEqual(now, _dir.modification_time().strftime("%Y-%m-%d"), "Error: dir modification time") self.assertEqual(now, _file.modification_time().strftime("%Y-%m-%d"), "Error: File modification time") finally: _dir.delete_directory() _file.delete()
def test_move_file(self): _file = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) dst = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) try: _file.create_file() self.assertTrue(_file.exists(), "File was not created") self.assertFalse(dst.exists(), "Destination file should not exist") _file.move(dst.path) self.assertFalse(_file.exists(), "Original file should be deleted") self.assertTrue(dst.exists(), "Destination file should be created") finally: _file.delete() dst.delete() self.assertFalse(_file.exists(), "File was not deleted") self.assertFalse(dst.exists(), "destination file was not deleted")
def test_recursive_list_files(self): basedir = HDFS("/tmp") new_folder = HDFS("/tmp/test123") new_file = HDFS("/tmp/test123/test.txt") try: new_folder.create(directory=True) self.assertTrue(new_folder.exists(), "Folder was not created") new_file.create(directory=False) self.assertTrue(new_file.exists(), "File was not created") files = basedir.recursive_list_files() self.assertTrue(new_file in files) self.assertTrue(new_folder in files) finally: new_folder.delete(recursive=True) self.assertFalse(new_file.exists(), "Folder was not deleted")
def test_move_empty_dir(self): _dir = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) dst = HDFS("/tmp/dst_" + str(uuid.uuid4())) try: _dir.create(directory=True) self.assertTrue(_dir.exists(), "directory not found") self.assertFalse(dst.exists(), "destination directory is already exists") _dir.move(dst.path) self.assertFalse(_dir.exists(), "Original directory was not removed") self.assertTrue(dst.exists(), "destination directory was not created") finally: _dir.delete(True) dst.delete(True) self.assertFalse(_dir.exists(), "File was not deleted") self.assertFalse(dst.exists(), "File was not deleted")
def test_copy_file(self): _file = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) dst = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) try: _file.create_file() self.assertTrue(_file.exists(), "original file not found") self.assertFalse(dst.exists(), "destination file already exists") _file.create() _file.copy(dst) self.assertTrue(dst.exists(), "file was not copied") self.assertTrue(_file.exists(), "original file should not be deleted") finally: _file.delete() dst.delete() self.assertFalse(_file.exists(), "File was not deleted") self.assertFalse(dst.exists(), "destination file was not deleted")
def test_distcp(self): directory = HDFS("/tmp/bar") directory.create() new_file = HDFS("/tmp/test_dist.txt") new_file.create(directory=False) _host = "sandbox.hortonworks.com" try: self.assertTrue(new_file.exists(), "File was not created") _file = HDFS("hdfs://{host}:8020/tmp/test_dist.txt".format(host=_host)) _file.distcp(dest="hdfs://{host}:8020/tmp/bar/test_dist.txt".format(host=_host)) file_after_copy = HDFS("/tmp/bar/test_dist.txt") self.assertTrue(file_after_copy.exists(), "File was not copied") finally: new_file.delete() directory.delete(recursive=True) self.assertFalse(new_file.exists(), "File was not deleted") self.assertFalse(directory.delete(), "File was not deleted")
def test_create(self): new_file = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) new_dir = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) # tets new file creation try: new_file.create(directory=False) self.assertTrue(new_file.exists(), "File was not created") self.assertFalse(new_file.is_directory(), "New file should not be a directory") finally: new_file.delete() self.assertFalse(new_file.exists(), "File was not removed") # test new folder creation try: new_dir.create(directory=True) self.assertTrue(new_dir.exists(), "Directory was not created") self.assertTrue(new_dir.is_directory(), "New file should be a directory") finally: new_dir.delete(recursive=True) self.assertFalse(new_dir.exists(), "Directory was not removed")
def test_import_to_hive(self): _path = HDFS(os.path.join('/user', getpass.getuser(), 'table_name')) try: if _path.exists(): _path.delete(recursive=_path.is_directory()) # shell.execute_shell_command('hadoop fs', '-rm -r /user/', getpass.getuser(), '/table_name') cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests").table( table="table_name").to_hive().run() # self.assertEquals(cmd.status, 0, cmd.stderr) # result = shell.execute_shell_command('hadoop fs', '-du -s /user/hive/warehouse/table_name/part-m-*') # self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command( 'hive', "-e 'DROP TABLE IF EXISTS table_name'")
def test_import_to_hive(self): _path = HDFS(os.path.join('/user', getpass.getuser(), 'table_name')) try: if _path.exists(): _path.delete(recursive=_path.is_directory()) # shell.execute_shell_command('hadoop fs', '-rm -r /user/', getpass.getuser(), '/table_name') cmd = Sqoop.import_data().from_rdbms( host=MYSQL_SERVER, rdbms="mysql", username="******", password_file="{0}/rdbms.password".format(BASE_DIR), database="sqoop_tests" ).table( table="table_name" ).to_hive().run() # self.assertEquals(cmd.status, 0, cmd.stderr) # result = shell.execute_shell_command('hadoop fs', '-du -s /user/hive/warehouse/table_name/part-m-*') # self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout) finally: shell.execute_shell_command('hive', "-e 'DROP TABLE IF EXISTS table_name'")
def clean_resources(): """ Cleans resources from previously flow. """ hdfs_file = HDFS("{0}/data_to_export".format(BASE_DIR)) if hdfs_file.exists(): hdfs_file.delete(recursive=True) hdfs_file = HDFS("{0}/data_from_import".format(BASE_DIR)) if hdfs_file.exists(): hdfs_file.delete(recursive=True) hdfs_file = HDFS("{0}/rdbms.password".format(BASE_DIR)) if hdfs_file.exists(): hdfs_file.delete()
# from merlin.tools.hive import Hive from ConfigParser import RawConfigParser from merlin.fs.localfs import LocalFS from merlin.fs.hdfs import HDFS from merlin.fs.ftp import ftp_client import os BASE_DIR = "/tmp" if __name__ == "__main__": hdfs_file = HDFS("{0}/raw".format(BASE_DIR)) if hdfs_file.exists(): hdfs_file.delete(recursive=True) config = RawConfigParser() config.read(os.path.join(os.path.dirname(__file__), "resources/ftp_config.ini")) host_download = config.get("ftp", "host.download") user_name = config.get("ftp", "user.name") password = config.get("ftp", "password") path = config.get("ftp", "path") ftp = ftp_client(host=host_download, login=user_name, password=password, path="/tmp") if ftp.exists(): ftp.delete(recursive=True)
def on_flow_failed(context): hdfs_file = HDFS("{0}/raw".format(BASE_DIR)) if hdfs_file.exists(): hdfs_file.delete(recursive=True)
def test_delete_file(self): _file = HDFS(os.path.join("/tmp", str(uuid.uuid4()))) _file.create_file() self.assertTrue(_file.exists(), "Target file can not be found") _file.delete() self.assertFalse(_file.exists(), "Target file was not deleted")