コード例 #1
0
ファイル: test_hdfs_fs.py プロジェクト: kmatzen/pydoop
    def readline_block_boundary(self):
        kwargs = {}
        if pydoop.hadoop_version_info().has_deprecated_bs():
            bs = hdfs.fs.hdfs().default_block_size()
        else:
            bs = u.get_bytes_per_checksum()
            kwargs["blocksize"] = bs

        line = "012345678\n"
        path = self._make_random_path()
        with self.fs.open_file(path, flags="w", **kwargs) as f:
            bytes_written = lines_written = 0
            while bytes_written < bs + 1:
                f.write(line)
                lines_written += 1
                bytes_written += len(line)
        with self.fs.open_file(path) as f:
            lines = []
            while 1:
                l = f.readline()
                if l == "":
                    break
                lines.append(l)
        self.assertEqual(len(lines), lines_written)
        for i, l in enumerate(lines):
            self.assertEqual(l, line, "line %d: %r != %r" % (i, l, line))
コード例 #2
0
ファイル: common_hdfs_tests.py プロジェクト: kmatzen/pydoop
  def block_boundary(self):
    path = self._make_random_path()
    CHUNK_SIZE = 10
    N = 2
    kwargs = {}
    if pydoop.hadoop_version_info().has_deprecated_bs():
        bs = hdfs.fs.hdfs().default_block_size()
    else:
        bs = N * get_bytes_per_checksum()
        kwargs['blocksize'] = bs
    total_data_size = 2 * bs
    with self.fs.open_file(path, "w", **kwargs) as f:
      data = make_random_data(total_data_size)
      i = 0
      bufsize = hdfs.common.BUFSIZE
      while i < len(data):
        f.write(data[i:i+bufsize])
        i += bufsize

    with self.fs.open_file(path) as f:
      p = total_data_size - CHUNK_SIZE
      for pos in 0, 1, bs-1, bs, bs+1, p-1, p, p+1, total_data_size-1:
        expected_len = CHUNK_SIZE if pos <= p else total_data_size - pos
        f.seek(pos)
        chunk = f.read(CHUNK_SIZE)
        self.assertEqual(len(chunk), expected_len)
コード例 #3
0
    def block_boundary(self):
        path = self._make_random_path()
        CHUNK_SIZE = 10
        N = 2
        kwargs = {}
        if pydoop.hadoop_version_info().has_deprecated_bs():
            bs = hdfs.fs.hdfs().default_block_size()
        else:
            bs = N * get_bytes_per_checksum()
            kwargs['blocksize'] = bs
        total_data_size = 2 * bs
        with self.fs.open_file(path, "w", **kwargs) as f:
            data = make_random_data(total_data_size)
            i = 0
            bufsize = hdfs.common.BUFSIZE
            while i < len(data):
                f.write(data[i:i + bufsize])
                i += bufsize

        with self.fs.open_file(path) as f:
            p = total_data_size - CHUNK_SIZE
            for pos in 0, 1, bs - 1, bs, bs + 1, p - 1, p, p + 1, total_data_size - 1:
                expected_len = CHUNK_SIZE if pos <= p else total_data_size - pos
                f.seek(pos)
                chunk = f.read(CHUNK_SIZE)
                self.assertEqual(len(chunk), expected_len)
コード例 #4
0
ファイル: test_hdfs_fs.py プロジェクト: onlynone/pydoop
  def readline_block_boundary(self):
    kwargs = {}
    if pydoop.hadoop_version_info().has_deprecated_bs():
      bs = hdfs.fs.hdfs().default_block_size()
    else:
      bs = u.get_bytes_per_checksum()
      kwargs['blocksize'] = bs

    line = "012345678\n"
    path = self._make_random_path()
    with self.fs.open_file(path, flags="w", **kwargs) as f:
      bytes_written = lines_written = 0
      while bytes_written < bs + 1:
        f.write(line)
        lines_written += 1
        bytes_written += len(line)
    with self.fs.open_file(path) as f:
      lines = []
      while 1:
        l = f.readline()
        if l == "":
          break
        lines.append(l)
    self.assertEqual(len(lines), lines_written)
    for i, l in enumerate(lines):
      self.assertEqual(l, line, "line %d: %r != %r" % (i, l, line))
コード例 #5
0
 def block_boundary(self):
   path = self._make_random_path()
   CHUNK_SIZE = 10
   N = 2
   bs = N * get_bytes_per_checksum()
   total_data_size = 2 * bs
   with self.fs.open_file(path, "w", blocksize=bs) as f:
     f.write(make_random_data(total_data_size))
   with self.fs.open_file(path) as f:
     p = total_data_size - CHUNK_SIZE
     for pos in 0, 1, bs-1, bs, bs+1, p-1, p, p+1, total_data_size-1:
       expected_len = CHUNK_SIZE if pos <= p else total_data_size - pos
       f.seek(pos)
       chunk = f.read(CHUNK_SIZE)
       self.assertEqual(len(chunk), expected_len)
コード例 #6
0
ファイル: test_hdfs_fs.py プロジェクト: jkahn/pydoop-code
 def readline_block_boundary(self):
     bs = u.get_bytes_per_checksum()
     line = "012345678\n"
     path = self._make_random_path()
     with self.fs.open_file(path, flags="w", blocksize=bs) as f:
         bytes_written = lines_written = 0
         while bytes_written < bs + 1:
             f.write(line)
             lines_written += 1
             bytes_written += len(line)
     with self.fs.open_file(path) as f:
         lines = []
         while 1:
             l = f.readline()
             if l == "":
                 break
             lines.append(l)
     self.assertEqual(len(lines), lines_written)
     for i, l in enumerate(lines):
         self.assertEqual(l, line, "line %d: %r != %r" % (i, l, line))