Example #1
0
    def test_time_box_date_io(self):
        file_name = 'date_io.npb'
        tb = example_time_box(file_name)
        tb._date_differentials = None
        tb._dates = np.array([
            np.datetime64('2018-01-01T00:00', 's'),
            np.datetime64('2018-01-02T12:00', 's'),
            np.datetime64('2018-01-03T05:00', 's'),
            np.datetime64('2018-01-05T00:00', 's')
        ])
        tb.write()
        self.assertEqual(3, tb._date_differentials.size)
        self.assertEqual(np.uint8, tb._date_differentials.dtype)
        self.assertEqual(HOURS, tb._date_differential_units)
        self.assertEqual(24 + 12, tb._date_differentials[0])
        self.assertEqual(12 + 5, tb._date_differentials[1])
        self.assertEqual(19 + 24, tb._date_differentials[2])

        tb_new = TimeBox(file_name)
        tb_new.read()
        self.assertEqual(1, tb_new._timebox_version)
        self.assertFalse(tb_new._tag_names_are_strings)
        self.assertTrue(tb_new._date_differentials_stored)
        self.assertEqual(1, tb_new._bytes_per_date_differential)
        self.assertEqual(HOURS, tb_new._date_differential_units)
        self.assertEqual(np.uint8, tb_new._date_differentials.dtype)

        self.assertEqual(3, tb_new._date_differentials.size)
        self.assertEqual(24 + 12, tb_new._date_differentials[0])
        self.assertEqual(12 + 5, tb_new._date_differentials[1])
        self.assertEqual(19 + 24, tb_new._date_differentials[2])

        os.remove(file_name)
        return
Example #2
0
    def test_unpack_options(self):
        # we are looking at individual bits.
        # so far only 2 options, so we'll test integer
        # values 0, 1, 2, and 3
        # 0000 0000
        # 0000 0001
        # 0000 0010
        # 0000 0011
        tb = TimeBox('')

        tb._unpack_options(0)
        self.assertFalse(tb._tag_names_are_strings)
        self.assertFalse(tb._date_differentials_stored)

        tb._unpack_options(1)
        self.assertTrue(tb._tag_names_are_strings)
        self.assertFalse(tb._date_differentials_stored)

        tb._unpack_options(2)
        self.assertFalse(tb._tag_names_are_strings)
        self.assertTrue(tb._date_differentials_stored)

        tb._unpack_options(3)
        self.assertTrue(tb._tag_names_are_strings)
        self.assertTrue(tb._date_differentials_stored)
        return
Example #3
0
    def test_io_pandas(self):
        file_name = 'save_pandas.npb'
        df = pd.read_csv('timebox/tests/data/test1.csv').set_index('date')
        logging.debug('Starting test_io_pandas with df\n{}'.format(df))
        tb = TimeBox.save_pandas(df, file_name)
        tb_read = TimeBox(file_name)
        df2 = tb_read.to_pandas()
        self.assertListEqual(list(df.columns.sort_values()),
                             list(df2.columns.sort_values()))

        df = df.sort_index()
        # ensure index is same
        for i in range(0, len(df.index)):
            self.assertEqual(pd.to_datetime(df.index[i]),
                             pd.to_datetime(df2.index[i]))

        # ensure each value is the same
        columns = df.columns
        for c in columns:
            logging.debug('Testing column: {}'.format(c))
            logging.debug('Original frame:{}'.format(df[c]))
            logging.debug('TB frame:{}'.format(df2[c]))
            self.assertEqual(df[c].sum(), df2[c].sum())

        os.remove(file_name)
        return
Example #4
0
 def test_read_save_read_save(self):
     tb_file_name = 'timebox/tests/data/test_tb_io.npb'
     df = pd.read_csv('timebox/tests/data/ETH-USD_integers.csv',
                      index_col=0)
     tb = TimeBox.save_pandas(df, tb_file_name)
     tb2 = TimeBox(tb.file_path)
     tb2.read()
     tb2.write()
     tb2.read()
     tb.read()
     os.remove(tb_file_name)
     return
    def test_read_write_data_with_tag_name_as_string(self):
        file_name = 'test_io.npb'
        tb = example_time_box(file_name)
        tb._update_required_bytes_for_tag_identifier()
        self.assertEqual(15 * 4, tb._num_bytes_for_tag_identifier)
        tb.write()

        tb_read = TimeBox(file_name)
        tb_read.read()
        self.assertTrue(tb_read._tag_names_are_strings)
        self.assertEqual(3, len(tb_read._tags))
        self.assertTrue('tag_0' in tb_read._tags)
        self.assertTrue('tag_1' in tb_read._tags)
        self.assertTrue('tag_2_long_name' in tb_read._tags)

        os.remove(file_name)
        return
Example #6
0
    def test_save_pandas(self):
        file_name = 'save_pandas.npb'
        df = pd.read_csv('timebox/tests/data/ETH-USD_combined_utc.csv',
                         index_col=0)
        tb = TimeBox.save_pandas(df, file_name)
        self.assertTrue(os.path.exists(file_name))

        tb_read = TimeBox(file_name)
        df2 = tb_read.to_pandas()

        df_columns = list(df)
        df_columns.sort()
        df2_columns = list(df2)
        df2_columns.sort()

        self.assertListEqual(df_columns, df2_columns)
        os.remove(file_name)
        return
def example_time_box(file_name: str):
    tb = TimeBox(file_name)
    tb._timebox_version = 1
    tb._tag_names_are_strings = True
    tb._date_differentials_stored = False
    tb._num_points = 4
    tb._tags = {
        'tag_0': TimeBoxTag('tag_0', 1, 'u'),
        'tag_1': TimeBoxTag('tag_1', 2, 'i'),
        'tag_2_long_name': TimeBoxTag('tag_2_long_name', 4, 'f')
    }
    tb._start_date = np.datetime64('2018-01-01', 's')
    tb._seconds_between_points = 3600
    tb._tags['tag_0'].data = np.array([1, 2, 3, 4], dtype=np.uint8)
    tb._tags['tag_1'].data = np.array([-4, -2, 0, 2000], dtype=np.int16)
    tb._tags['tag_2_long_name'].data = np.array([5.2, 0.8, 3.1415, 8],
                                                dtype=np.float32)
    return tb
Example #8
0
    def test_tag_definitions_to_from_bytes_integer(self):
        first = TimeBox('')
        first._tag_names_are_strings = False
        first._tags = example_tag_definitions()
        first._update_required_bytes_for_tag_identifier()
        tags_bytes_results = TimeBoxTag.tag_list_to_bytes(
            [first._tags[t] for t in first._tags],
            first._num_bytes_for_tag_identifier, first._tag_names_are_strings)
        self.assertEqual(5 * (2 + NUM_BYTES_PER_DEFINITION_WITHOUT_IDENTIFIER),
                         tags_bytes_results.num_bytes)
        self.assertEqual(
            813,
            np.frombuffer(tags_bytes_results.byte_code, dtype=np.uint8).sum())

        second = TimeBox('')
        second._num_bytes_for_tag_identifier = 2
        second._tag_names_are_strings = False
        second._tag_definitions = TimeBoxTag.tag_definitions_from_bytes(
            tags_bytes_results.byte_code, second._num_bytes_for_tag_identifier,
            second._tag_names_are_strings)
        second_tag_bytes_results = TimeBoxTag.tag_list_to_bytes(
            [second._tag_definitions[t] for t in second._tag_definitions],
            second._num_bytes_for_tag_identifier,
            second._tag_names_are_strings)
        self.assertEqual(tags_bytes_results.num_bytes,
                         second_tag_bytes_results.num_bytes)
        self.assertEqual(tags_bytes_results.byte_code,
                         second_tag_bytes_results.byte_code)

        second._num_bytes_for_tag_identifier = 4
        bad_bytes = TimeBoxTag.tag_list_to_bytes(
            [second._tag_definitions[t] for t in second._tag_definitions],
            second._num_bytes_for_tag_identifier,
            second._tag_names_are_strings)
        self.assertNotEqual(second_tag_bytes_results.byte_code,
                            bad_bytes.byte_code)

        second._tag_names_are_strings = True
        bad_bytes = TimeBoxTag.tag_list_to_bytes(
            [second._tag_definitions[t] for t in second._tag_definitions],
            second._num_bytes_for_tag_identifier,
            second._tag_names_are_strings)
        self.assertNotEqual(second_tag_bytes_results.byte_code,
                            bad_bytes.byte_code)
        return
Example #9
0
    def test_update_required_bytes(self):
        tb = TimeBox('')
        tb._tag_names_are_strings = False
        tb._tags[0] = TimeBoxTag(0, 1, 'u')
        tb._update_required_bytes_for_tag_identifier()
        self.assertEqual(1, tb._num_bytes_for_tag_identifier)
        tb._tags[256] = TimeBoxTag(256, 1, 'u')
        tb._update_required_bytes_for_tag_identifier()
        self.assertEqual(2, tb._num_bytes_for_tag_identifier)

        tb._tag_names_are_strings = True
        tb._tags = {
            'a': TimeBoxTag('a', 1, 'u'),
            'ab': TimeBoxTag('ab', 1, 'u'),
            'abc': TimeBoxTag('abc', 1, 'u')
        }
        tb._update_required_bytes_for_tag_identifier()
        self.assertEqual(12, tb._num_bytes_for_tag_identifier)
        return
Example #10
0
 def test_init(self):
     tb = TimeBox('test_file_path.txt')
     self.assertEqual('test_file_path.txt', tb.file_path)
     return
Example #11
0
    def test_read_write_file_info_date_deltas(self):
        tb = TimeBox('')
        tb._timebox_version = 1
        tb._tag_names_are_strings = False
        tb._date_differentials_stored = True
        tb._num_points = 10
        tb._tags = example_tag_definitions()
        tb._start_date = np.datetime64('2018-01-01', 's')
        tb._bytes_per_date_differential = 4
        tb._date_differential_units = SECONDS

        file_name = 'test_delta.npb'
        with open(file_name, 'wb') as f:
            self.assertEqual(230, tb._write_file_info(f))

        tb_read = TimeBox('')
        with open(file_name, 'rb') as f:
            self.assertEqual(230, tb_read._read_file_info(f))

        self.assertEqual(tb._timebox_version, tb_read._timebox_version)
        self.assertEqual(tb._tag_names_are_strings,
                         tb_read._tag_names_are_strings)
        self.assertEqual(tb._date_differentials_stored,
                         tb_read._date_differentials_stored)
        self.assertEqual(tb._num_points, tb_read._num_points)
        self.assertEqual(tb._start_date, tb_read._start_date)
        self.assertEqual(tb._bytes_per_date_differential,
                         tb_read._bytes_per_date_differential)
        self.assertEqual(tb._date_differential_units,
                         tb_read._date_differential_units)
        for t in tb._tags:
            self.assertTrue(t in tb_read._tags)
            self.assertEqual(tb._tags[t].identifier,
                             tb_read._tags[t].identifier)
            self.assertEqual(tb._tags[t].type_char, tb_read._tags[t].type_char)
            self.assertEqual(tb._tags[t].dtype, tb_read._tags[t].dtype)
            self.assertEqual(tb._tags[t].bytes_per_value,
                             tb_read._tags[t].bytes_per_value)

        os.remove(file_name)
        return
Example #12
0
    def test_encode_options(self):
        # we are looking at individual bits.
        # so far only 2 options, so we'll test integer
        # values 0, 1, 2, and 3
        # 0000 0000
        # 0000 0001
        # 0000 0010
        # 0000 0011
        tb = TimeBox('')

        tb._tag_names_are_strings = False
        tb._date_differentials_stored = False
        self.assertEqual(0, tb._encode_options())

        tb._tag_names_are_strings = True
        tb._date_differentials_stored = False
        self.assertEqual(1, tb._encode_options())

        tb._tag_names_are_strings = False
        tb._date_differentials_stored = True
        self.assertEqual(2, tb._encode_options())

        tb._tag_names_are_strings = True
        tb._date_differentials_stored = True
        self.assertEqual(3, tb._encode_options())
        return
Example #13
0
 def test_pandas_errors(self):
     df = pd.DataFrame.from_dict(
         {'value_1': np.array([0, 1, 2], dtype=np.uint8)}, orient='columns')
     with self.assertRaises(InvalidPandasIndexError):
         TimeBox.save_pandas(df, 'not_going_to_save.npb')
     return
Example #14
0
def example_time_box(file_name: str):
    tb = TimeBox(file_name)
    tb._timebox_version = 1
    tb._tag_names_are_strings = False
    tb._date_differentials_stored = True
    tb._num_points = 4
    tb._tags = {
        0: TimeBoxTag(0, 1, 'u'),
        1: TimeBoxTag(1, 2, 'i'),
        2: TimeBoxTag(2, 4, 'f')
    }
    tb._start_date = np.datetime64('2018-01-01', 's')

    tb._tags[0].data = np.array([1, 2, 3, 4], dtype=np.uint8)
    tb._tags[1].data = np.array([-4, -2, 0, 2000], dtype=np.int16)
    tb._tags[2].data = np.array([5.2, 0.8, 3.1415, 8], dtype=np.float32)

    tb._date_differentials = np.array([1, 1, 1], dtype=np.uint8)
    tb._date_differential_units = DAYS
    tb._bytes_per_date_differential = 1
    return tb
Example #15
0
start = time()
df = pd.read_csv(reference_file, index_col=0)
time_to_read_csv = time() - start

copy_of_reference_file = 'timebox/tests/data/ETH-USD_combined_copy.csv'
start = time()
df.to_csv(copy_of_reference_file)
time_to_write_csv = time() - start

write_result('pandas csv', time_to_write_csv, time_to_read_csv,
             os.path.getsize(reference_file))
os.remove(copy_of_reference_file)

timebox_file_name = 'timebox/tests/data/test_timebox_io.npb'
start = time()
TimeBox.save_pandas(df, timebox_file_name)
time_to_process_df_and_save_timebox = time() - start

new_tb = TimeBox(timebox_file_name)
start = time()
typed_df = new_tb.to_pandas()
time_to_read_and_convert_to_pandas = time() - start

write_result('file <-> timebox <-> pandas',
             time_to_process_df_and_save_timebox,
             time_to_read_and_convert_to_pandas,
             os.path.getsize(timebox_file_name))

tb = TimeBox(timebox_file_name)
start = time()
tb.read()