def test_tmp_move(self): target = hdfs.HdfsTarget(is_tmp=True) target2 = hdfs.HdfsTarget("luigi_hdfs_testdir") if target2.exists(): target2.remove() with target.open('w'): pass self.assertTrue(target.exists()) target.move(target2.path) self.assertFalse(target.exists()) self.assertTrue(target2.exists())
def test_tmp_move(self): target = hdfs.HdfsTarget(is_tmp=True) target2 = hdfs.HdfsTarget(self._test_file()) if target2.exists(): target2.remove(skip_trash=True) with target.open('w'): pass self.assertTrue(target.exists()) target.move(target2.path) self.assertFalse(target.exists()) self.assertTrue(target2.exists())
def test_rename_no_parent(self): if hdfs.exists("foo"): hdfs.remove("foo") target1 = hdfs.HdfsTarget(is_tmp=True) target2 = hdfs.HdfsTarget("foo/bar") with target1.open('w'): pass self.assertTrue(target1.exists()) target1.move(target2.path) self.assertFalse(target1.exists()) self.assertTrue(target2.exists())
def put_file(self, local_target, local_filename, target_path): if local_target.exists(): local_target.remove() self.create_file(local_target) target = hdfs.HdfsTarget(target_path) if target.exists(): target.remove(skip_trash=True) hdfs.mkdir(target.path) client.put(local_target.path, target_path) target_file_path = target_path + "/" + local_filename return hdfs.HdfsTarget(target_file_path)
def test_rename_no_grandparent(self): grandparent = self._test_dir() + '/foo' if self.fs.exists(grandparent): self.fs.remove(grandparent, skip_trash=True) target1 = hdfs.HdfsTarget(is_tmp=True) target2 = hdfs.HdfsTarget(grandparent + '/bar/baz') with target1.open('w'): pass self.assertTrue(target1.exists()) target1.move(target2.path) self.assertFalse(target1.exists()) self.assertTrue(target2.exists())
def test_multifile(self): with self.target.open('w') as fobj: fobj.write(b'foo\n') second = hdfs.HdfsTarget(self.target.path + '/data2', format=hdfs.Plain) with second.open('w') as fobj: fobj.write(b'bar\n') invisible = hdfs.HdfsTarget(self.target.path + '/_SUCCESS', format=hdfs.Plain) with invisible.open('w') as fobj: fobj.write(b'b0rk\n') self.assertTrue(second.exists()) self.assertTrue(invisible.exists()) self.assertTrue(self.target.exists()) with self.target.open('r') as fobj: parts = sorted(fobj.read().strip(b'\n').split(b'\n')) self.assertEqual(tuple(parts), (b'bar', b'foo'))
def test_atomicity(self): target = hdfs.HdfsTarget("luigi_hdfs_testfile") if target.exists(): target.remove() fobj = target.open("w") self.assertFalse(target.exists()) fobj.close() self.assertTrue(target.exists())
def test_with_close(self): target = hdfs.HdfsTarget("luigi_hdfs_testfile") if target.exists(): target.remove() with target.open('w') as fobj: fobj.write('hej\n') self.assertTrue(target.exists())
def test_atomicity(self): target = hdfs.HdfsTarget(self._test_file()) if target.exists(): target.remove(skip_trash=True) fobj = target.open("w") self.assertFalse(target.exists()) fobj.close() self.assertTrue(target.exists())
def test_with_close(self): target = hdfs.HdfsTarget(self._test_file()) if target.exists(): target.remove(skip_trash=True) with target.open('w') as fobj: fobj.write('hej\n') self.assertTrue(target.exists())
def test_with_exception(self): target = hdfs.HdfsTarget(self._test_file()) if target.exists(): target.remove(skip_trash=True) def foo(): with target.open('w') as fobj: fobj.write('hej\n') raise TestException('Test triggered exception') self.assertRaises(TestException, foo) self.assertFalse(target.exists())
def test_create_parents(self): parent = "luigi_hdfs_testdir" target = hdfs.HdfsTarget("%s/testfile" % parent) if hdfs.exists(parent): hdfs.remove(parent) self.assertFalse(hdfs.exists(parent)) fobj = target.open('w') fobj.write('lol\n') fobj.close() self.assertTrue(hdfs.exists(parent)) self.assertTrue(target.exists())
def test_create_ancestors(self): parent = self._test_dir() target = hdfs.HdfsTarget("%s/foo/bar/baz" % parent) if self.fs.exists(parent): self.fs.remove(parent, skip_trash=True) self.assertFalse(self.fs.exists(parent)) fobj = target.open('w') fobj.write('lol\n') fobj.close() self.assertTrue(self.fs.exists(parent)) self.assertTrue(target.exists())
def test_tmp_cleanup(self): path = self._test_file() target = hdfs.HdfsTarget(path, is_tmp=True) if target.exists(): target.remove(skip_trash=True) with target.open('w') as fobj: fobj.write('lol\n') self.assertTrue(target.exists()) del target import gc gc.collect() self.assertFalse(self.fs.exists(path))
def test_glob_exists(self): target = hdfs.HdfsTarget("luigi_hdfs_testdir") if target.exists(): target.remove() hdfs.mkdir(target.path) t1 = hdfs.HdfsTarget(target.path + "/part-00001") t2 = hdfs.HdfsTarget(target.path + "/part-00002") t3 = hdfs.HdfsTarget(target.path + "/another") with t1.open('w') as f: f.write('foo\n') with t2.open('w') as f: f.write('bar\n') with t3.open('w') as f: f.write('biz\n') files = hdfs.HdfsTarget("luigi_hdfs_testdir/part-0000*") self.assertEqual(files.glob_exists(2), True) self.assertEqual(files.glob_exists(3), False) self.assertEqual(files.glob_exists(1), False)
def test_glob_exists(self): target_dir = hdfs.HdfsTarget(self._test_dir()) if target_dir.exists(): target_dir.remove(skip_trash=True) self.fs.mkdir(target_dir.path) t1 = hdfs.HdfsTarget(target_dir.path + "/part-00001") t2 = hdfs.HdfsTarget(target_dir.path + "/part-00002") t3 = hdfs.HdfsTarget(target_dir.path + "/another") with t1.open('w') as f: f.write('foo\n') with t2.open('w') as f: f.write('bar\n') with t3.open('w') as f: f.write('biz\n') files = hdfs.HdfsTarget("%s/part-0000*" % target_dir.path) self.assertTrue(files.glob_exists(2)) self.assertFalse(files.glob_exists(3)) self.assertFalse(files.glob_exists(1))
def test_readback(self): target = hdfs.HdfsTarget(self._test_file()) if target.exists(): target.remove(skip_trash=True) origdata = 'lol\n' fobj = target.open("w") fobj.write(origdata) fobj.close() fobj = target.open('r') data = fobj.read() self.assertEqual(origdata, data)
def tezt_rename_dont_move(self, client): """ I happen to just want to test this, Since I know the codepaths will be quite different for the three kinds of clients """ if client.exists('d'): client.remove('d') client.mkdir('d/a') client.mkdir('d/b') self.assertEqual(2, len(list(client.listdir('d')))) target = hdfs.HdfsTarget('d/a', fs=client) self.assertFalse(target.move_dir('d/b')) self.assertEqual(2, len(list(client.listdir('d')))) self.assertTrue(target.move_dir('d/c')) self.assertEqual(2, len(list(client.listdir('d'))))
def test_readback(self): target = hdfs.HdfsTarget("luigi_hdfs_testfile") if target.exists(): target.remove() origdata = 'lol\n' fobj = target.open("w") fobj.write(origdata) fobj.close() fobj = target.open('r') data = fobj.read() self.assertEqual(origdata, data)
def test_slow_exists(self): target = hdfs.HdfsTarget(self._test_file()) try: target.remove(skip_trash=True) except: pass self.assertFalse(self.fs.exists(target.path)) target.open("w").close() self.assertTrue(self.fs.exists(target.path)) def should_raise(): self.fs.exists("hdfs://doesnotexist/foo") self.assertRaises(hdfs.HDFSCliError, should_raise) def should_raise_2(): self.fs.exists("hdfs://_doesnotexist_/foo") self.assertRaises(hdfs.HDFSCliError, should_raise_2)
def test_slow_exists(self): target = hdfs.HdfsTarget("luigi_hdfs_testfile") try: target.remove() except: pass self.assertFalse(hdfs.exists(target.path)) target.open("w").close() self.assertTrue(hdfs.exists(target.path)) def should_raise(): hdfs.exists("hdfs://doesnotexist/foo") self.assertRaises(RuntimeError, should_raise) def should_raise_2(): hdfs.exists("hdfs://_doesnotexist_/foo") self.assertRaises(RuntimeError, should_raise_2)
def setUp(self): self.target = hdfs.HdfsTarget("luigi_hdfs_testfile", format=self.format) if self.target.exists(): self.target.remove(skip_trash=True)
def create_target(self, format=None): target = hdfs.HdfsTarget(self._test_file(), format=format) if target.exists(): target.remove(skip_trash=True) return target
def test_luigi_tmp(self): target = hdfs.HdfsTarget(is_tmp=True) self.assertFalse(target.exists()) with target.open('w'): pass self.assertTrue(target.exists())
def setUp(self): super(_HdfsFormatTest, self).setUp() self.target = hdfs.HdfsTarget(self._test_file(), format=self.format) if self.target.exists(): self.target.remove(skip_trash=True)