def test_returns_dict_with_description(self): temp_fs = fsopendir('temp://') reader = MagicMock() hdf_partition = HDFPartition(temp_fs, path='temp.h5') ret = hdf_partition._info(reader) self.assertIn('version', ret) self.assertIn('data_start_pos', ret)
def test_removes_files(self): temp_fs = fsopendir('temp://') temp_fs.createfile('temp.h5') hdf_partition = HDFPartition(temp_fs, path='temp.h5') self.assertTrue(temp_fs.exists('temp.h5')) hdf_partition.remove() self.assertFalse(temp_fs.exists('temp.h5'))
def test_returns_stat_from_meta(self): temp_fs = fsopendir('temp://') hdf_partition = HDFPartition(temp_fs, path='temp.h5') with patch.object(HDFPartition, 'meta', new_callable=PropertyMock) as fake_meta: fake_meta.return_value = {'stats': 22} self.assertEqual(hdf_partition.stats, 22)
def test_contains_meta_from_reader(self): temp_fs = fsopendir('temp://') filename = temp_fs.getsyspath('temp.h5') _create_h5(filename) hdf_partition = HDFPartition(temp_fs, path='temp.h5') with patch.object(HDFReader, 'meta', {'a': ''}): self.assertEqual(hdf_partition.meta, {'a': ''})
def test_creates_stat_from_reader(self, fake_init, fake_run): fake_init.return_value = None fake_run.return_value = {'a': 1} temp_fs = fsopendir('temp://') hdf_partition = HDFPartition(temp_fs, path='temp.h5') with patch.object(hdf_partition, '_reader', MagicMock()): with patch.object(hdf_partition, '_writer', MagicMock()): ret = hdf_partition.run_stats() self.assertEqual(ret, {'a': 1})
def write_small_blocks(): df = HDFPartition(fs, path='foobar') if df.exists: df.remove() with Timer() as t, df.writer as w: w.headers = headers type_index = w.meta['schema'][0].index('type') pos_index = w.meta['schema'][0].index('pos') columns = w.meta['schema'][1:] for column in columns: column[type_index] = type(rows[0][column[pos_index] - 1]).__name__ for i in range(N): w.insert_row(rows[i]) print('HDF write small', float(N) / t.elapsed, w.n_rows)
def test_writes_stat_to_writer(self, fake_init, fake_run): fake_run.return_value = {'stat': 1} fake_init.return_value = None temp_fs = fsopendir('temp://') hdf_partition = HDFPartition(temp_fs, path='temp.h5') fake_reader = MagicMock() fake_writer = MagicMock(spec=HDFWriter) fake_set_stats = MagicMock() fake_writer.__enter__ = lambda x: fake_set_stats # FIXME: So complicated. Refactor. with patch.object(hdf_partition, '_reader', fake_reader): with patch.object(hdf_partition, '_writer', fake_writer): hdf_partition.run_stats() self.assertEqual(fake_set_stats.mock_calls, [call.set_stats({'stat': 1})])
def test_returns_true_if_file_exists(self): temp_fs = fsopendir('temp://') temp_fs.createfile('temp.h5') hdf_partition = HDFPartition(temp_fs, path='temp.h5') self.assertTrue(hdf_partition.exists)
def test_datafile_read_write(self): import datetime from random import randint, random from contexttimer import Timer from uuid import uuid4 fs = fsopendir('temp://') N = 50000 # Basic read/write tests. def rand_date_a(): return datetime.date(randint(2000, 2015), randint(1, 12), 10) epoch = datetime.date(1970, 1, 1) def rand_date_b(): return (datetime.date(randint(2000, 2015), randint(1, 12), 10) - epoch).total_seconds() row = lambda: (0, 1, random(), str(uuid4()), rand_date_b(), rand_date_b()) headers = list('abcdefghi')[:len(row())] rows = [row() for i in range(N)] def write_large_blocks(): df = HDFPartition(fs, path='foobar') if df.exists: df.remove() with Timer() as t, df.writer as w: w.headers = headers type_index = w.meta['schema'][0].index('type') pos_index = w.meta['schema'][0].index('pos') columns = w.meta['schema'][1:] for column in columns: column[type_index] = type(rows[0][column[pos_index] - 1]).__name__ w.insert_rows(rows) print('HDF write large', float(N) / t.elapsed, w.n_rows) def write_small_blocks(): df = HDFPartition(fs, path='foobar') if df.exists: df.remove() with Timer() as t, df.writer as w: w.headers = headers type_index = w.meta['schema'][0].index('type') pos_index = w.meta['schema'][0].index('pos') columns = w.meta['schema'][1:] for column in columns: column[type_index] = type(rows[0][column[pos_index] - 1]).__name__ for i in range(N): w.insert_row(rows[i]) print('HDF write small', float(N) / t.elapsed, w.n_rows) write_large_blocks() write_small_blocks() # timing reader. df = HDFPartition(fs, 'foobar') with Timer() as t: count = 0 i = 0 s = 0 r = df.reader for i, row in enumerate(r): count += 1 r.close() print('HDFPartition iter ', float(N) / t.elapsed, i, count, s) with Timer() as t: count = 0 r = df.reader for row in r.rows: count += 1 r.close() print('HDFPartition rows ', float(N) / t.elapsed) with Timer() as t: count = 0 r = df.reader for row in r.raw: count += 1 r.close() print('HDFPartition raw ', float(N) / t.elapsed)