コード例 #1
0
 def test_returns_dict_with_description(self):
     temp_fs = fsopendir('temp://')
     reader = MagicMock()
     hdf_partition = HDFPartition(temp_fs, path='temp.h5')
     ret = hdf_partition._info(reader)
     self.assertIn('version', ret)
     self.assertIn('data_start_pos', ret)
コード例 #2
0
    def test_removes_files(self):
        temp_fs = fsopendir('temp://')
        temp_fs.createfile('temp.h5')
        hdf_partition = HDFPartition(temp_fs, path='temp.h5')

        self.assertTrue(temp_fs.exists('temp.h5'))
        hdf_partition.remove()
        self.assertFalse(temp_fs.exists('temp.h5'))
コード例 #3
0
    def test_returns_stat_from_meta(self):
        temp_fs = fsopendir('temp://')

        hdf_partition = HDFPartition(temp_fs, path='temp.h5')

        with patch.object(HDFPartition, 'meta',
                          new_callable=PropertyMock) as fake_meta:
            fake_meta.return_value = {'stats': 22}
            self.assertEqual(hdf_partition.stats, 22)
コード例 #4
0
    def test_contains_meta_from_reader(self):
        temp_fs = fsopendir('temp://')
        filename = temp_fs.getsyspath('temp.h5')
        _create_h5(filename)

        hdf_partition = HDFPartition(temp_fs, path='temp.h5')

        with patch.object(HDFReader, 'meta', {'a': ''}):
            self.assertEqual(hdf_partition.meta, {'a': ''})
コード例 #5
0
    def test_creates_stat_from_reader(self, fake_init, fake_run):
        fake_init.return_value = None
        fake_run.return_value = {'a': 1}
        temp_fs = fsopendir('temp://')

        hdf_partition = HDFPartition(temp_fs, path='temp.h5')

        with patch.object(hdf_partition, '_reader', MagicMock()):
            with patch.object(hdf_partition, '_writer', MagicMock()):
                ret = hdf_partition.run_stats()
                self.assertEqual(ret, {'a': 1})
コード例 #6
0
ファイル: test_hdf.py プロジェクト: CivicSpleen/ambry_sources
        def write_small_blocks():
            df = HDFPartition(fs, path='foobar')

            if df.exists:
                df.remove()

            with Timer() as t, df.writer as w:
                w.headers = headers
                type_index = w.meta['schema'][0].index('type')
                pos_index = w.meta['schema'][0].index('pos')
                columns = w.meta['schema'][1:]
                for column in columns:
                    column[type_index] = type(rows[0][column[pos_index] -
                                                      1]).__name__
                for i in range(N):
                    w.insert_row(rows[i])
            print('HDF write small', float(N) / t.elapsed, w.n_rows)
コード例 #7
0
    def test_writes_stat_to_writer(self, fake_init, fake_run):
        fake_run.return_value = {'stat': 1}
        fake_init.return_value = None
        temp_fs = fsopendir('temp://')

        hdf_partition = HDFPartition(temp_fs, path='temp.h5')

        fake_reader = MagicMock()
        fake_writer = MagicMock(spec=HDFWriter)
        fake_set_stats = MagicMock()
        fake_writer.__enter__ = lambda x: fake_set_stats
        # FIXME: So complicated. Refactor.

        with patch.object(hdf_partition, '_reader', fake_reader):
            with patch.object(hdf_partition, '_writer', fake_writer):
                hdf_partition.run_stats()
                self.assertEqual(fake_set_stats.mock_calls,
                                 [call.set_stats({'stat': 1})])
コード例 #8
0
 def test_returns_true_if_file_exists(self):
     temp_fs = fsopendir('temp://')
     temp_fs.createfile('temp.h5')
     hdf_partition = HDFPartition(temp_fs, path='temp.h5')
     self.assertTrue(hdf_partition.exists)
コード例 #9
0
ファイル: test_hdf.py プロジェクト: CivicSpleen/ambry_sources
    def test_datafile_read_write(self):
        import datetime
        from random import randint, random
        from contexttimer import Timer
        from uuid import uuid4

        fs = fsopendir('temp://')

        N = 50000

        # Basic read/write tests.

        def rand_date_a():
            return datetime.date(randint(2000, 2015), randint(1, 12), 10)

        epoch = datetime.date(1970, 1, 1)

        def rand_date_b():
            return (datetime.date(randint(2000, 2015), randint(1, 12), 10) -
                    epoch).total_seconds()

        row = lambda: (0, 1, random(), str(uuid4()), rand_date_b(),
                       rand_date_b())

        headers = list('abcdefghi')[:len(row())]

        rows = [row() for i in range(N)]

        def write_large_blocks():

            df = HDFPartition(fs, path='foobar')

            if df.exists:
                df.remove()
            with Timer() as t, df.writer as w:
                w.headers = headers
                type_index = w.meta['schema'][0].index('type')
                pos_index = w.meta['schema'][0].index('pos')
                columns = w.meta['schema'][1:]
                for column in columns:
                    column[type_index] = type(rows[0][column[pos_index] -
                                                      1]).__name__
                w.insert_rows(rows)

            print('HDF write large', float(N) / t.elapsed, w.n_rows)

        def write_small_blocks():
            df = HDFPartition(fs, path='foobar')

            if df.exists:
                df.remove()

            with Timer() as t, df.writer as w:
                w.headers = headers
                type_index = w.meta['schema'][0].index('type')
                pos_index = w.meta['schema'][0].index('pos')
                columns = w.meta['schema'][1:]
                for column in columns:
                    column[type_index] = type(rows[0][column[pos_index] -
                                                      1]).__name__
                for i in range(N):
                    w.insert_row(rows[i])
            print('HDF write small', float(N) / t.elapsed, w.n_rows)

        write_large_blocks()

        write_small_blocks()

        # timing reader.
        df = HDFPartition(fs, 'foobar')

        with Timer() as t:
            count = 0
            i = 0
            s = 0
            r = df.reader
            for i, row in enumerate(r):
                count += 1
            r.close()

        print('HDFPartition iter  ', float(N) / t.elapsed, i, count, s)

        with Timer() as t:
            count = 0
            r = df.reader
            for row in r.rows:
                count += 1
            r.close()

        print('HDFPartition rows  ', float(N) / t.elapsed)

        with Timer() as t:
            count = 0
            r = df.reader
            for row in r.raw:
                count += 1
            r.close()
        print('HDFPartition raw   ', float(N) / t.elapsed)