def test_hadoop_methods(self, bucket=None): if bucket is None: bucket = self.remote_bucket data = ['foo', 'bar', 'baz'] data.extend(map(str, range(100))) with hadoop_open(f'{bucket}/test_out.txt', 'w') as f: for d in data: f.write(d) f.write('\n') with hadoop_open(f'{bucket}/test_out.txt') as f: data2 = [line.strip() for line in f] self.assertEqual(data, data2) with hadoop_open(f'{bucket}/test_out.txt.gz', 'w') as f: for d in data: f.write(d) f.write('\n') with hadoop_open(f'{bucket}/test_out.txt.gz') as f: data3 = [line.strip() for line in f] self.assertEqual(data, data3) hadoop_copy(f'{bucket}/test_out.txt.gz', f'{bucket}/test_out.copy.txt.gz') with hadoop_open(f'{bucket}/test_out.copy.txt.gz') as f: data4 = [line.strip() for line in f] self.assertEqual(data, data4) local_fs = HadoopFS() with local_fs.open(resource('randomBytes'), buffer_size=100) as f: with hadoop_open(f'{bucket}/randomBytesOut', 'w', buffer_size=2**18) as out: b = f.read() out.write(b) with hadoop_open(f'{bucket}/randomBytesOut', buffer_size=2**18) as f: b2 = f.read() self.assertEqual(b, b2)
def fs(self): if self._fs is None: from hail.fs.hadoop_fs import HadoopFS self._fs = HadoopFS() return self._fs
def fs(self): if self._fs is None: from hail.fs.hadoop_fs import HadoopFS self._fs = HadoopFS(self._utils_package_object, self._jbackend.fs()) return self._fs