def append(self): replication = 1 # see https://issues.apache.org/jira/browse/HDFS-3091 content, update = make_random_data(), make_random_data() path = self._make_random_path() with self.fs.open_file(path, "w", replication=replication) as fo: fo.write(content) try: with silent_call(self.fs.open_file, path, "a") as fo: fo.write(update) except IOError: sys.stderr.write("NOT SUPPORTED ... ") return else: with self.fs.open_file(path) as fi: self.assertEqual(fi.read(), content + update)
def append(self): replication = 1 # see https://issues.apache.org/jira/browse/HDFS-3091 content, update = make_random_data(), make_random_data() path = self._make_random_path() with self.fs.open_file(path, "w", replication=replication) as fo: fo.write(content) try: with silent_call(self.fs.open_file, path, "a") as fo: fo.write(update) except IOError: sys.stderr.write("NOT SUPPORTED ... ") return else: with self.fs.open_file(path) as fi: self.assertEqual(fi.read(), content+update)
def pread(self): content = make_random_data() offset, length = 2, 3 path = self._make_random_file(content=content) with self.fs.open_file(path) as f: self.assertEqual(f.pread(offset, length), content[offset:offset+length]) self.assertEqual(f.tell(), 0)
def block_boundary(self): path = self._make_random_path() CHUNK_SIZE = 10 N = 2 kwargs = {} if pydoop.hadoop_version_info().has_deprecated_bs(): bs = hdfs.fs.hdfs().default_block_size() else: bs = N * get_bytes_per_checksum() kwargs['blocksize'] = bs total_data_size = 2 * bs with self.fs.open_file(path, "w", **kwargs) as f: data = make_random_data(total_data_size) i = 0 bufsize = hdfs.common.BUFSIZE while i < len(data): f.write(data[i:i + bufsize]) i += bufsize with self.fs.open_file(path) as f: p = total_data_size - CHUNK_SIZE for pos in 0, 1, bs - 1, bs, bs + 1, p - 1, p, p + 1, total_data_size - 1: expected_len = CHUNK_SIZE if pos <= p else total_data_size - pos f.seek(pos) chunk = f.read(CHUNK_SIZE) self.assertEqual(len(chunk), expected_len)
def copy_on_self(self): content = make_random_data() path = self._make_random_file(content=content) path1 = self._make_random_path() self.fs.copy(path, self.fs, path1) with self.fs.open_file(path1) as f: self.assertEqual(f.read(), content)
def block_boundary(self): path = self._make_random_path() CHUNK_SIZE = 10 N = 2 kwargs = {} if pydoop.hadoop_version_info().has_deprecated_bs(): bs = hdfs.fs.hdfs().default_block_size() else: bs = N * get_bytes_per_checksum() kwargs['blocksize'] = bs total_data_size = 2 * bs with self.fs.open_file(path, "w", **kwargs) as f: data = make_random_data(total_data_size) i = 0 bufsize = hdfs.common.BUFSIZE while i < len(data): f.write(data[i:i+bufsize]) i += bufsize with self.fs.open_file(path) as f: p = total_data_size - CHUNK_SIZE for pos in 0, 1, bs-1, bs, bs+1, p-1, p, p+1, total_data_size-1: expected_len = CHUNK_SIZE if pos <= p else total_data_size - pos f.seek(pos) chunk = f.read(CHUNK_SIZE) self.assertEqual(len(chunk), expected_len)
def pread(self): content = make_random_data() offset, length = 2, 3 path = self._make_random_file(content=content) with self.fs.open_file(path) as f: self.assertEqual(f.pread(offset, length), content[offset:offset + length]) self.assertEqual(f.tell(), 0)
def read(self): content = make_random_data() path = self._make_random_file(content=content) with self.fs.open_file(path) as f: self.assertEqual(f.read(), content) with self.fs.open_file(path) as f: self.assertEqual(f.read(3), content[:3]) self.assertRaisesExternal(IOError, f.write, content)
def _make_random_file(self, where=None, content=None, **kwargs): kwargs["flags"] = "w" content = content or make_random_data() path = self._make_random_path(where=where) with self.fs.open_file(path, **kwargs) as fo: bytes_written = fo.write(content) self.assertEqual(bytes_written, len(content)) return path
def write_chunk(self): content = make_random_data() chunk = create_string_buffer(len(content)) chunk[:] = content path = self._make_random_path() with self.fs.open_file(path, "w") as fo: bytes_written = fo.write_chunk(chunk) self.assertEqual(bytes_written, len(content)) return path
def pread_chunk(self): content = make_random_data() offset, length = 2, 3 chunk = create_string_buffer(length) path = self._make_random_file(content=content) with self.fs.open_file(path) as f: bytes_read = f.pread_chunk(offset, chunk) self.assertEqual(bytes_read, length) self.assertEqual(chunk.value, content[offset:offset+length]) self.assertEqual(f.tell(), 0)
def read_chunk(self): content = make_random_data() path = self._make_random_file(content=content) size = len(content) for chunk_size in size - 1, size, size + 1: with self.fs.open_file(path) as f: chunk = create_string_buffer(chunk_size) bytes_read = f.read_chunk(chunk) self.assertEqual(bytes_read, min(size, chunk_size)) self.assertEqual(chunk.value, content[:bytes_read])
def pread_chunk(self): content = make_random_data() offset, length = 2, 3 chunk = create_string_buffer(length) path = self._make_random_file(content=content) with self.fs.open_file(path) as f: bytes_read = f.pread_chunk(offset, chunk) self.assertEqual(bytes_read, length) self.assertEqual(chunk.value, content[offset:offset + length]) self.assertEqual(f.tell(), 0)
def read_chunk(self): content = make_random_data() path = self._make_random_file(content=content) size = len(content) for chunk_size in size-1, size, size+1: with self.fs.open_file(path) as f: chunk = create_string_buffer(chunk_size) bytes_read = f.read_chunk(chunk) self.assertEqual(bytes_read, min(size, chunk_size)) self.assertEqual(chunk.value, content[:bytes_read])
def get_path_info(self): content = make_random_data() path = self._make_random_file(content=content) info = self.fs.get_path_info(path) self.__check_path_info(info, kind="file", size=len(content)) self.assertTrue(info['name'].endswith(path)) path = self._make_random_dir() info = self.fs.get_path_info(path) self.__check_path_info(info, kind="directory") self.assertTrue(info['name'].endswith(path)) self.assertRaises(IOError, self.fs.get_path_info, self._make_random_path())
def file_attrs(self): path = self._make_random_path() with self.fs.open_file(path, os.O_WRONLY) as f: self.assertTrue(f.name.endswith(path)) self.assertEqual(f.size, 0) self.assertEqual(f.mode, "w") content = make_random_data() f.write(content) self.assertEqual(f.size, len(content)) with self.fs.open_file(path) as f: self.assertTrue(f.name.endswith(path)) self.assertEqual(f.size, len(content)) self.assertEqual(f.mode, "r")
def _make_random_file(self, where=None, content=None, **kwargs): kwargs["flags"] = "w" content = content or make_random_data() path = self._make_random_path(where=where) with self.fs.open_file(path, **kwargs) as fo: i = 0 bytes_written = 0 bufsize = hdfs.common.BUFSIZE while i < len(content): bytes_written += fo.write(content[i:i+bufsize]) i += bufsize self.assertEqual(bytes_written, len(content)) return path
def _make_random_file(self, where=None, content=None, **kwargs): kwargs["flags"] = "w" content = content or make_random_data() path = self._make_random_path(where=where) with self.fs.open_file(path, **kwargs) as fo: i = 0 bytes_written = 0 bufsize = hdfs.common.BUFSIZE while i < len(content): bytes_written += fo.write(content[i:i + bufsize]) i += bufsize self.assertEqual(bytes_written, len(content)) return path
def block_boundary(self): path = self._make_random_path() CHUNK_SIZE = 10 N = 2 bs = N * get_bytes_per_checksum() total_data_size = 2 * bs with self.fs.open_file(path, "w", blocksize=bs) as f: f.write(make_random_data(total_data_size)) with self.fs.open_file(path) as f: p = total_data_size - CHUNK_SIZE for pos in 0, 1, bs-1, bs, bs+1, p-1, p, p+1, total_data_size-1: expected_len = CHUNK_SIZE if pos <= p else total_data_size - pos f.seek(pos) chunk = f.read(CHUNK_SIZE) self.assertEqual(len(chunk), expected_len)
def setUp(self): wd = tempfile.mkdtemp() wd_bn = os.path.basename(wd) self.local_wd = "file:%s" % wd fs = hdfs.hdfs("default", 0) fs.create_directory(wd_bn) self.hdfs_wd = fs.get_path_info(wd_bn)["name"] fs.close() basenames = ["test_path_%d" % i for i in xrange(2)] self.local_paths = ["%s/%s" % (self.local_wd, bn) for bn in basenames] self.hdfs_paths = ["%s/%s" % (self.hdfs_wd, bn) for bn in basenames] self.data = make_random_data(4 * BUFSIZE + BUFSIZE / 2) for path in self.local_paths: self.assertTrue(path.startswith("file:")) for path in self.hdfs_paths: if not hdfs.default_is_local(): self.assertTrue(path.startswith("hdfs:"))
def setUp(self): wd = tempfile.mkdtemp() wd_bn = os.path.basename(wd) self.local_wd = "file:%s" % wd fs = hdfs.hdfs("default", 0) fs.create_directory(wd_bn) self.hdfs_wd = fs.get_path_info(wd_bn)["name"] fs.close() basenames = ["test_path_%d" % i for i in xrange(2)] self.local_paths = ["%s/%s" % (self.local_wd, bn) for bn in basenames] self.hdfs_paths = ["%s/%s" % (self.hdfs_wd, bn) for bn in basenames] self.data = make_random_data(4*BUFSIZE + BUFSIZE/2) for path in self.local_paths: self.assertTrue(path.startswith("file:")) for path in self.hdfs_paths: if not hdfs.default_is_local(): self.assertTrue(path.startswith("hdfs:"))
def flush(self): path = self._make_random_path() with self.fs.open_file(path, "w") as f: f.write(make_random_data()) f.flush()
def available(self): content = make_random_data() path = self._make_random_file(content=content) with self.fs.open_file(path) as f: self.assertEqual(len(content), f.available())