Example #1
0
    def test_hadoop_methods(self, bucket=None):
        if bucket is None:
            bucket = self.remote_bucket

        data = ['foo', 'bar', 'baz']
        data.extend(map(str, range(100)))

        with hadoop_open(f'{bucket}/test_out.txt', 'w') as f:
            for d in data:
                f.write(d)
                f.write('\n')

        with hadoop_open(f'{bucket}/test_out.txt') as f:
            data2 = [line.strip() for line in f]

        self.assertEqual(data, data2)

        with hadoop_open(f'{bucket}/test_out.txt.gz', 'w') as f:
            for d in data:
                f.write(d)
                f.write('\n')

        with hadoop_open(f'{bucket}/test_out.txt.gz') as f:
            data3 = [line.strip() for line in f]

        self.assertEqual(data, data3)

        hadoop_copy(f'{bucket}/test_out.txt.gz',
                    f'{bucket}/test_out.copy.txt.gz')

        with hadoop_open(f'{bucket}/test_out.copy.txt.gz') as f:
            data4 = [line.strip() for line in f]

        self.assertEqual(data, data4)

        local_fs = HadoopFS()
        with local_fs.open(resource('randomBytes'), buffer_size=100) as f:
            with hadoop_open(f'{bucket}/randomBytesOut',
                             'w',
                             buffer_size=2**18) as out:
                b = f.read()
                out.write(b)

        with hadoop_open(f'{bucket}/randomBytesOut', buffer_size=2**18) as f:
            b2 = f.read()

        self.assertEqual(b, b2)
Example #2
0
 def fs(self):
     if self._fs is None:
         from hail.fs.hadoop_fs import HadoopFS
         self._fs = HadoopFS()
     return self._fs
Example #3
0
 def fs(self):
     if self._fs is None:
         from hail.fs.hadoop_fs import HadoopFS
         self._fs = HadoopFS(self._utils_package_object,
                             self._jbackend.fs())
     return self._fs