def test_tmp_move(self): target = hdfs.HdfsTarget(is_tmp=True) target2 = hdfs.HdfsTarget(self._test_file()) if target2.exists(): target2.remove(skip_trash=True) with target.open('w'): pass self.assertTrue(target.exists()) target.move(target2.path) self.assertFalse(target.exists()) self.assertTrue(target2.exists())
def test_flag_target(self): target = hdfs.HdfsFlagTarget("/some/dir/", format=format) if target.exists(): target.remove(skip_trash=True) self.assertFalse(target.exists()) t1 = hdfs.HdfsTarget(target.path + "part-00000", format=format) with t1.open('w'): pass t2 = hdfs.HdfsTarget(target.path + "_SUCCESS", format=format) with t2.open('w'): pass self.assertTrue(target.exists())
def test_rename_no_grandparent(self): grandparent = self._test_dir() + '/foo' if self.fs.exists(grandparent): self.fs.remove(grandparent, skip_trash=True) target1 = hdfs.HdfsTarget(is_tmp=True) target2 = hdfs.HdfsTarget(grandparent + '/bar/baz') with target1.open('w'): pass self.assertTrue(target1.exists()) target1.move(target2.path) self.assertFalse(target1.exists()) self.assertTrue(target2.exists())
def put_file(self, local_target, local_filename, target_path, delpath=True): if local_target.exists(): local_target.remove() self.create_file(local_target) if delpath: target = hdfs.HdfsTarget(target_path) if target.exists(): target.remove(skip_trash=True) self.fs.mkdir(target.path) self.fs.put(local_target.path, target_path) target_file_path = target_path + "/" + local_filename return hdfs.HdfsTarget(target_file_path)
def test_multifile(self): with self.target.open('w') as fobj: fobj.write(b'foo\n') second = hdfs.HdfsTarget(self.target.path + '/data2', format=hdfs.Plain) with second.open('w') as fobj: fobj.write(b'bar\n') invisible = hdfs.HdfsTarget(self.target.path + '/_SUCCESS', format=hdfs.Plain) with invisible.open('w') as fobj: fobj.write(b'b0rk\n') self.assertTrue(second.exists()) self.assertTrue(invisible.exists()) self.assertTrue(self.target.exists()) with self.target.open('r') as fobj: parts = sorted(fobj.read().strip(b'\n').split(b'\n')) self.assertEqual(tuple(parts), (b'bar', b'foo'))
class TestTask(luigi.Task): rundate = luigi.DateParameter(default=datetime.now().date()) table = "test" host = "localhost:3306" db = "testdb" user = "******" pw = "password" def input(self): """ Provides the input directories. the path of directories are of the form /in/'directory'/'filename'. It opens all the directories within \in and provides all the files in those directories as input. """ dir_in = '/in' args = "hdfs dfs -ls "+dir_in+" | awk '{print $8}'" proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) s_output, s_err = proc.communicate() result = s_output.split() listf =[] for dir in result: args = "hdfs dfs -ls "+dir+" | awk '{print $8}'" proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) s_output, s_err = proc.communicate() listf.extend(s_output.split()) return [hdfs.HdfsTarget(str) for str in listf ]
def test_create_ancestors(self): parent = self._test_dir() target = hdfs.HdfsTarget("%s/foo/bar/baz" % parent) if self.fs.exists(parent): self.fs.remove(parent, skip_trash=True) self.assertFalse(self.fs.exists(parent)) fobj = target.open('w') fobj.write('lol\n') fobj.close() self.assertTrue(self.fs.exists(parent)) self.assertTrue(target.exists())
def test_glob_exists(self): target_dir = hdfs.HdfsTarget(self._test_dir()) if target_dir.exists(): target_dir.remove(skip_trash=True) self.fs.mkdir(target_dir.path) t1 = hdfs.HdfsTarget(target_dir.path + "/part-00001") t2 = hdfs.HdfsTarget(target_dir.path + "/part-00002") t3 = hdfs.HdfsTarget(target_dir.path + "/another") with t1.open('w') as f: f.write('foo\n') with t2.open('w') as f: f.write('bar\n') with t3.open('w') as f: f.write('biz\n') files = hdfs.HdfsTarget("%s/part-0000*" % target_dir.path) self.assertTrue(files.glob_exists(2)) self.assertFalse(files.glob_exists(3)) self.assertFalse(files.glob_exists(1))
def test_tmp_cleanup(self): path = self._test_file() target = hdfs.HdfsTarget(path, is_tmp=True) if target.exists(): target.remove(skip_trash=True) with target.open('w') as fobj: fobj.write('lol\n') self.assertTrue(target.exists()) del target import gc gc.collect() self.assertFalse(self.fs.exists(path))
def tezt_rename_dont_move(self, client): """ I happen to just want to test this, Since I know the codepaths will be quite different for the three kinds of clients """ if client.exists('d'): client.remove('d') client.mkdir('d/a') client.mkdir('d/b') self.assertEqual(2, len(list(client.listdir('d')))) target = hdfs.HdfsTarget('d/a', fs=client) self.assertFalse(target.move_dir('d/b')) self.assertEqual(2, len(list(client.listdir('d')))) self.assertTrue(target.move_dir('d/c')) self.assertEqual(2, len(list(client.listdir('d'))))
def test_slow_exists(self): target = hdfs.HdfsTarget(self._test_file()) try: target.remove(skip_trash=True) except: pass self.assertFalse(self.fs.exists(target.path)) target.open("w").close() self.assertTrue(self.fs.exists(target.path)) def should_raise(): self.fs.exists("hdfs://doesnotexist/foo") self.assertRaises(hdfs.HDFSCliError, should_raise) def should_raise_2(): self.fs.exists("hdfs://_doesnotexist_/foo") self.assertRaises(hdfs.HDFSCliError, should_raise_2)
def test_pickle(self): t = hdfs.HdfsTarget("/tmp/dir") pickle.dumps(t)
def test_luigi_tmp(self): target = hdfs.HdfsTarget(is_tmp=True) self.assertFalse(target.exists()) with target.open('w'): pass self.assertTrue(target.exists())
def create_target(self, format=None): target = hdfs.HdfsTarget(self._test_file(), format=format) if target.exists(): target.remove(skip_trash=True) return target
def setUp(self): super(ComplexOldFormatTest, self).setUp() self.target = hdfs.HdfsTarget(self._test_file(), format=self.format) if self.target.exists(): self.target.remove(skip_trash=True)