def test_round_trip(self): '''Can round-trip content.''' content = 'This is some test content.' with scratch_file('example.gz') as path: with gzippy.open(path, 'wb') as fout: fout.write(content) with gzippy.open(path, 'rb') as fin: self.assertEqual(fin.read(), content)
def test_incremental_reads(self): '''Incremental reads''' content = b'This is some test content.' with scratch_file('example.gz') as path: with gzippy.open(path, 'wb') as fout: fout.write(content) with gzippy.open(path, 'rb') as fin: self.assertEqual(fin.read(10), content[:10])
def test_lines(self): '''Can read the file line by line.''' parts = ['some\n', 'lines\n', 'in\n', 'a\n', 'file'] content = ''.join(parts) with scratch_file('example.gz') as path: with gzippy.open(path, 'wb') as fout: fout.write(content) with gzippy.open(path) as fin: self.assertEqual(list(fin), parts)
def test_unsupported_incremental_reads(self): '''Incremental reads are as-of-yet unsupported.''' content = 'This is some test content.' with scratch_file('example.gz') as path: with gzippy.open(path, 'wb') as fout: fout.write(content) with self.assertRaises(IOError): with gzippy.open(path, 'rb') as fin: fin.read(10)
def test_lines_consolidation(self): '''Consolidates lines across multiple chunks.''' parts = [b'some\n', b'lines\n', b'in\n', b'a\n', b'file'] chunks = [b'so', b'm', b'e\nlines\n', b'i', b'n', b'\n', b'a\nfile'] content = b''.join(chunks) with scratch_file('example.gz') as path: with gzippy.open(path, 'wb') as fout: fout.write(content) with gzippy.open(path) as fin: self.assertEqual(list(fin), parts)
def test_lines_consolidation(self): '''Consolidates lines across multiple chunks.''' parts = ['some\n', 'lines\n', 'in\n', 'a\n', 'file'] chunks = ['so', 'm', 'e\nlines\n', 'i', 'n', '\n', 'a\nfile'] content = ''.join(chunks) with scratch_file('example.gz') as path: with gzippy.open(path, 'wb') as fout: fout.write(content) with gzippy.open(path) as fin: with mock.patch.object(fin, 'chunks', iter(chunks)): self.assertEqual(list(fin), parts)
def test_reader_size_mismatch(self): '''Raises an exception when the size doesn't match.''' with scratch_file('example.gz') as path: with gzippy.open(path, 'wb') as fout: fout.write('This is some test content.') # Rewrite the last four bytes with open(path, 'r+b') as fout: fout.seek(-4, 2) fout.write('\xFF' * 4) with self.assertRaises(IOError): with gzippy.open(path) as fin: fin.read()
def load_data(): """Return the MNIST data as a tuple containing the training data, the validation data, and the test data. The ``training_data`` is returned as a tuple with two entries. The first entry contains the actual training images. This is a numpy ndarray with 50,000 entries. Each entry is, in turn, a numpy ndarray with 784 values, representing the 28 * 28 = 784 pixels in a single MNIST image. The second entry in the ``training_data`` tuple is a numpy ndarray containing 50,000 entries. Those entries are just the digit values (0...9) for the corresponding images contained in the first entry of the tuple. The ``validation_data`` and ``test_data`` are similar, except each contains only 10,000 images. This is a nice data format, but for use in neural networks it's helpful to modify the format of the ``training_data`` a little. That's done in the wrapper function ``load_data_wrapper()``, see below. """ f = gzip.open('../data/mnist.pkl.gz', 'rb') training_data, validation_data, test_data = cPickle.load(f) f.close() return (training_data, validation_data, test_data)
def test_open_with_append(self): '''Opening in append mode is not allowed.''' with scratch_file('example.gz') as path: with open(path, 'w+') as fout: pass with self.assertRaises(ValueError): with gzippy.open(path, 'ab') as fout: pass
def test_open_with_plus(self): '''Opening with r+ is not allowed.''' with scratch_file('example.gz') as path: with open(path, 'w+') as fout: pass with self.assertRaises(ValueError): with gzippy.open(path, 'r+') as fin: pass
yield run start += time.time() times.append(start) print('Average of %s runs: %s\n' % (runs, sum(times) / len(times))) split = [b'This is some example content'] * 100000 joined = b'\n'.join(split) for _ in timer('Gzip single write', 5): with gzip.open('example.gz', 'wb') as fout: fout.write(joined) for _ in timer('Gzippy single write', 5): with gzippy.open('example.gz', 'wb') as fout: fout.write(joined) for _ in timer('Gzip with repeated writes', 5): with gzip.open('example.gz', 'wb') as fout: for element in split: fout.write(element) for _ in timer('Gzip with repeated writes', 5): with gzippy.open('example.gz', 'wb') as fout: for element in split: fout.write(element) # Read in an example file with gzip.open('example.gz', 'wb') as fout: fout.write(joined)