def test_target_path_exists_rename_fails_snakebite(self, remove, rename): rename.side_effect = hdfs.get_autoconfig_client( threading.local()).rename with hdfs.HdfsAtomicWritePipe(self.path) as fobj: fobj.write(b'test1') fobj = hdfs.HdfsAtomicWriteDirPipe(self.path) self.assertRaises(HdfsAtomicWriteError, fobj.close)
def write_wrapper(self, *args, **kwargs): # type: (Any, *Any, **Any) -> None """ Writer wrapper. :param self: Instance of self of the caller. :param args: Arguments. :param kwargs: Keyed arguments. :return: None """ write_function(self, *args, **kwargs) if self._is_atomic_output(): spark_context = args[1] # type: ignore # Using Any type annotations instead of the specific types because pyspark is only sent to the cluster # with luigi PySparkTask tasks. So for non-PySparkTasks, pyskark # package has to be sent by adding it to py-package, which would be redundant and not certain if that will # result into conflicts. So we don't send pyspark over the cluster just for this type annotation, and # therefore ignoring it. # We should move this SparkSpecific part to something like base_pyspark.py and then put all the spark # related things there. if spark_context.master.startswith('local'): old_path = normalize_local_filename(self._get_temp_output_path()) new_path = normalize_local_filename(self._get_output_path()) rename(old_path, new_path) else: hdfs_client = hdfs.get_autoconfig_client() # type: ignore hdfs_client.rename_dont_move(self._get_temp_output_path(), self._get_output_path())
def test_target_path_exists_rename_fails_snakebite(self, remove, rename): rename.side_effect = hdfs.get_autoconfig_client(threading.local()).rename testpath = self._test_file() try: if self.fs.exists(testpath): self.fs.remove(testpath, skip_trash=True) except: if self.fs.exists(self._test_dir()): self.fs.remove(self._test_dir(), skip_trash=True) with hdfs.HdfsAtomicWritePipe(testpath) as fobj: fobj.write(b'test1') fobj = hdfs.HdfsAtomicWritePipe(testpath) self.assertRaises(HdfsAtomicWriteError, fobj.close)
def finish(self, job): hdfs_client = hdfs.get_autoconfig_client() outpath = unicode(job.output().path) temppath = unicode(job.working_output_path()) if hdfs_client.exists(outpath) is False and hdfs_client.exists( temppath) is True: hdfs_client.rename_dont_move(temppath, outpath) else: if hdfs_client.exists(job.output().path): message = 'Destination directory already exists {}.'.format( outpath) else: message = 'Temporary destination directory does not exists {}.'.format( temppath) raise Exception(message)
def test_get_autoconfig_client_cached(self): original_client = get_autoconfig_client() for _ in range(100): self.assertIs(original_client, get_autoconfig_client())
def add_client(): clients.append(get_autoconfig_client())
def test_snakebite_with_hadoopcli_fallback(self): client = hdfs.get_autoconfig_client() self.tezt_rename_dont_move(client)
def test_snakebite(self): client = hdfs.get_autoconfig_client() self.tezt_rename_dont_move(client)
def test_hadoopcli(self): client = hdfs.get_autoconfig_client() self.tezt_rename_dont_move(client)
def test_snakebite_with_hadoopcli_fallback(self): client = hdfs.get_autoconfig_client(threading.local()) self.assertTrue(isinstance(client, CascadingClient)) self.tezt_rename_dont_move(client)
def test_snakebite(self): client = hdfs.get_autoconfig_client(threading.local()) self.assertTrue(isinstance(client, SnakebiteHdfsClient)) self.tezt_rename_dont_move(client)
def test_target_path_exists_rename_fails_snakebite(self, remove, rename): rename.side_effect = hdfs.get_autoconfig_client(threading.local()).rename with hdfs.HdfsAtomicWritePipe(self.path) as fobj: fobj.write(b'test1') fobj = hdfs.HdfsAtomicWriteDirPipe(self.path) self.assertRaises(HdfsAtomicWriteError, fobj.close)
def __init__(self, hdfs_client=None, warehouse_location=None): self.hdfs_client = hdfs_client or get_autoconfig_client() self.warehouse_location = warehouse_location or get_hive_warehouse_location( )