def test_du(self): root = os.environ['MOCK_HDFS_ROOT'] data_path_1 = os.path.join(root, 'data1') with open(data_path_1, 'w') as f: f.write("abcd") remote_data_1 = 'hdfs:///data1' data_dir = os.path.join(root, 'more') os.mkdir(data_dir) remote_dir = 'hdfs:///more' data_path_2 = os.path.join(data_dir, 'data2') with open(data_path_2, 'w') as f: f.write("defg") remote_data_2 = 'hdfs:///more/data2' data_path_3 = os.path.join(data_dir, 'data3') with open(data_path_3, 'w') as f: f.write("hijk") remote_data_2 = 'hdfs:///more/data3' runner = HadoopJobRunner(conf_path=False) self.assertEqual(runner.du(root), 12) self.assertEqual(runner.du(remote_dir), 8) self.assertEqual(runner.du(remote_dir + '/*'), 8) self.assertEqual(runner.du(remote_data_1), 4) self.assertEqual(runner.du(remote_data_2), 4)