def test_unpack_options(self): # we are looking at individual bits. # so far only 2 options, so we'll test integer # values 0, 1, 2, and 3 # 0000 0000 # 0000 0001 # 0000 0010 # 0000 0011 tb = TimeBox('') tb._unpack_options(0) self.assertFalse(tb._tag_names_are_strings) self.assertFalse(tb._date_differentials_stored) tb._unpack_options(1) self.assertTrue(tb._tag_names_are_strings) self.assertFalse(tb._date_differentials_stored) tb._unpack_options(2) self.assertFalse(tb._tag_names_are_strings) self.assertTrue(tb._date_differentials_stored) tb._unpack_options(3) self.assertTrue(tb._tag_names_are_strings) self.assertTrue(tb._date_differentials_stored) return
def test_io_pandas(self): file_name = 'save_pandas.npb' df = pd.read_csv('timebox/tests/data/test1.csv').set_index('date') logging.debug('Starting test_io_pandas with df\n{}'.format(df)) tb = TimeBox.save_pandas(df, file_name) tb_read = TimeBox(file_name) df2 = tb_read.to_pandas() self.assertListEqual(list(df.columns.sort_values()), list(df2.columns.sort_values())) df = df.sort_index() # ensure index is same for i in range(0, len(df.index)): self.assertEqual(pd.to_datetime(df.index[i]), pd.to_datetime(df2.index[i])) # ensure each value is the same columns = df.columns for c in columns: logging.debug('Testing column: {}'.format(c)) logging.debug('Original frame:{}'.format(df[c])) logging.debug('TB frame:{}'.format(df2[c])) self.assertEqual(df[c].sum(), df2[c].sum()) os.remove(file_name) return
def test_encode_options(self): # we are looking at individual bits. # so far only 2 options, so we'll test integer # values 0, 1, 2, and 3 # 0000 0000 # 0000 0001 # 0000 0010 # 0000 0011 tb = TimeBox('') tb._tag_names_are_strings = False tb._date_differentials_stored = False self.assertEqual(0, tb._encode_options()) tb._tag_names_are_strings = True tb._date_differentials_stored = False self.assertEqual(1, tb._encode_options()) tb._tag_names_are_strings = False tb._date_differentials_stored = True self.assertEqual(2, tb._encode_options()) tb._tag_names_are_strings = True tb._date_differentials_stored = True self.assertEqual(3, tb._encode_options()) return
def test_time_box_date_io(self): file_name = 'date_io.npb' tb = example_time_box(file_name) tb._date_differentials = None tb._dates = np.array([ np.datetime64('2018-01-01T00:00', 's'), np.datetime64('2018-01-02T12:00', 's'), np.datetime64('2018-01-03T05:00', 's'), np.datetime64('2018-01-05T00:00', 's') ]) tb.write() self.assertEqual(3, tb._date_differentials.size) self.assertEqual(np.uint8, tb._date_differentials.dtype) self.assertEqual(HOURS, tb._date_differential_units) self.assertEqual(24 + 12, tb._date_differentials[0]) self.assertEqual(12 + 5, tb._date_differentials[1]) self.assertEqual(19 + 24, tb._date_differentials[2]) tb_new = TimeBox(file_name) tb_new.read() self.assertEqual(1, tb_new._timebox_version) self.assertFalse(tb_new._tag_names_are_strings) self.assertTrue(tb_new._date_differentials_stored) self.assertEqual(1, tb_new._bytes_per_date_differential) self.assertEqual(HOURS, tb_new._date_differential_units) self.assertEqual(np.uint8, tb_new._date_differentials.dtype) self.assertEqual(3, tb_new._date_differentials.size) self.assertEqual(24 + 12, tb_new._date_differentials[0]) self.assertEqual(12 + 5, tb_new._date_differentials[1]) self.assertEqual(19 + 24, tb_new._date_differentials[2]) os.remove(file_name) return
def test_tag_definitions_to_from_bytes_integer(self): first = TimeBox('') first._tag_names_are_strings = False first._tags = example_tag_definitions() first._update_required_bytes_for_tag_identifier() tags_bytes_results = TimeBoxTag.tag_list_to_bytes( [first._tags[t] for t in first._tags], first._num_bytes_for_tag_identifier, first._tag_names_are_strings) self.assertEqual(5 * (2 + NUM_BYTES_PER_DEFINITION_WITHOUT_IDENTIFIER), tags_bytes_results.num_bytes) self.assertEqual( 813, np.frombuffer(tags_bytes_results.byte_code, dtype=np.uint8).sum()) second = TimeBox('') second._num_bytes_for_tag_identifier = 2 second._tag_names_are_strings = False second._tag_definitions = TimeBoxTag.tag_definitions_from_bytes( tags_bytes_results.byte_code, second._num_bytes_for_tag_identifier, second._tag_names_are_strings) second_tag_bytes_results = TimeBoxTag.tag_list_to_bytes( [second._tag_definitions[t] for t in second._tag_definitions], second._num_bytes_for_tag_identifier, second._tag_names_are_strings) self.assertEqual(tags_bytes_results.num_bytes, second_tag_bytes_results.num_bytes) self.assertEqual(tags_bytes_results.byte_code, second_tag_bytes_results.byte_code) second._num_bytes_for_tag_identifier = 4 bad_bytes = TimeBoxTag.tag_list_to_bytes( [second._tag_definitions[t] for t in second._tag_definitions], second._num_bytes_for_tag_identifier, second._tag_names_are_strings) self.assertNotEqual(second_tag_bytes_results.byte_code, bad_bytes.byte_code) second._tag_names_are_strings = True bad_bytes = TimeBoxTag.tag_list_to_bytes( [second._tag_definitions[t] for t in second._tag_definitions], second._num_bytes_for_tag_identifier, second._tag_names_are_strings) self.assertNotEqual(second_tag_bytes_results.byte_code, bad_bytes.byte_code) return
def test_read_write_file_info_date_deltas(self): tb = TimeBox('') tb._timebox_version = 1 tb._tag_names_are_strings = False tb._date_differentials_stored = True tb._num_points = 10 tb._tags = example_tag_definitions() tb._start_date = np.datetime64('2018-01-01', 's') tb._bytes_per_date_differential = 4 tb._date_differential_units = SECONDS file_name = 'test_delta.npb' with open(file_name, 'wb') as f: self.assertEqual(230, tb._write_file_info(f)) tb_read = TimeBox('') with open(file_name, 'rb') as f: self.assertEqual(230, tb_read._read_file_info(f)) self.assertEqual(tb._timebox_version, tb_read._timebox_version) self.assertEqual(tb._tag_names_are_strings, tb_read._tag_names_are_strings) self.assertEqual(tb._date_differentials_stored, tb_read._date_differentials_stored) self.assertEqual(tb._num_points, tb_read._num_points) self.assertEqual(tb._start_date, tb_read._start_date) self.assertEqual(tb._bytes_per_date_differential, tb_read._bytes_per_date_differential) self.assertEqual(tb._date_differential_units, tb_read._date_differential_units) for t in tb._tags: self.assertTrue(t in tb_read._tags) self.assertEqual(tb._tags[t].identifier, tb_read._tags[t].identifier) self.assertEqual(tb._tags[t].type_char, tb_read._tags[t].type_char) self.assertEqual(tb._tags[t].dtype, tb_read._tags[t].dtype) self.assertEqual(tb._tags[t].bytes_per_value, tb_read._tags[t].bytes_per_value) os.remove(file_name) return
def test_read_save_read_save(self): tb_file_name = 'timebox/tests/data/test_tb_io.npb' df = pd.read_csv('timebox/tests/data/ETH-USD_integers.csv', index_col=0) tb = TimeBox.save_pandas(df, tb_file_name) tb2 = TimeBox(tb.file_path) tb2.read() tb2.write() tb2.read() tb.read() os.remove(tb_file_name) return
def test_read_write_data_with_tag_name_as_string(self): file_name = 'test_io.npb' tb = example_time_box(file_name) tb._update_required_bytes_for_tag_identifier() self.assertEqual(15 * 4, tb._num_bytes_for_tag_identifier) tb.write() tb_read = TimeBox(file_name) tb_read.read() self.assertTrue(tb_read._tag_names_are_strings) self.assertEqual(3, len(tb_read._tags)) self.assertTrue('tag_0' in tb_read._tags) self.assertTrue('tag_1' in tb_read._tags) self.assertTrue('tag_2_long_name' in tb_read._tags) os.remove(file_name) return
def example_time_box(file_name: str): tb = TimeBox(file_name) tb._timebox_version = 1 tb._tag_names_are_strings = True tb._date_differentials_stored = False tb._num_points = 4 tb._tags = { 'tag_0': TimeBoxTag('tag_0', 1, 'u'), 'tag_1': TimeBoxTag('tag_1', 2, 'i'), 'tag_2_long_name': TimeBoxTag('tag_2_long_name', 4, 'f') } tb._start_date = np.datetime64('2018-01-01', 's') tb._seconds_between_points = 3600 tb._tags['tag_0'].data = np.array([1, 2, 3, 4], dtype=np.uint8) tb._tags['tag_1'].data = np.array([-4, -2, 0, 2000], dtype=np.int16) tb._tags['tag_2_long_name'].data = np.array([5.2, 0.8, 3.1415, 8], dtype=np.float32) return tb
def test_save_pandas(self): file_name = 'save_pandas.npb' df = pd.read_csv('timebox/tests/data/ETH-USD_combined_utc.csv', index_col=0) tb = TimeBox.save_pandas(df, file_name) self.assertTrue(os.path.exists(file_name)) tb_read = TimeBox(file_name) df2 = tb_read.to_pandas() df_columns = list(df) df_columns.sort() df2_columns = list(df2) df2_columns.sort() self.assertListEqual(df_columns, df2_columns) os.remove(file_name) return
def test_update_required_bytes(self): tb = TimeBox('') tb._tag_names_are_strings = False tb._tags[0] = TimeBoxTag(0, 1, 'u') tb._update_required_bytes_for_tag_identifier() self.assertEqual(1, tb._num_bytes_for_tag_identifier) tb._tags[256] = TimeBoxTag(256, 1, 'u') tb._update_required_bytes_for_tag_identifier() self.assertEqual(2, tb._num_bytes_for_tag_identifier) tb._tag_names_are_strings = True tb._tags = { 'a': TimeBoxTag('a', 1, 'u'), 'ab': TimeBoxTag('ab', 1, 'u'), 'abc': TimeBoxTag('abc', 1, 'u') } tb._update_required_bytes_for_tag_identifier() self.assertEqual(12, tb._num_bytes_for_tag_identifier) return
def example_time_box(file_name: str): tb = TimeBox(file_name) tb._timebox_version = 1 tb._tag_names_are_strings = False tb._date_differentials_stored = True tb._num_points = 4 tb._tags = { 0: TimeBoxTag(0, 1, 'u'), 1: TimeBoxTag(1, 2, 'i'), 2: TimeBoxTag(2, 4, 'f') } tb._start_date = np.datetime64('2018-01-01', 's') tb._tags[0].data = np.array([1, 2, 3, 4], dtype=np.uint8) tb._tags[1].data = np.array([-4, -2, 0, 2000], dtype=np.int16) tb._tags[2].data = np.array([5.2, 0.8, 3.1415, 8], dtype=np.float32) tb._date_differentials = np.array([1, 1, 1], dtype=np.uint8) tb._date_differential_units = DAYS tb._bytes_per_date_differential = 1 return tb
def test_init(self): tb = TimeBox('test_file_path.txt') self.assertEqual('test_file_path.txt', tb.file_path) return
copy_of_reference_file = 'timebox/tests/data/ETH-USD_combined_copy.csv' start = time() df.to_csv(copy_of_reference_file) time_to_write_csv = time() - start write_result('pandas csv', time_to_write_csv, time_to_read_csv, os.path.getsize(reference_file)) os.remove(copy_of_reference_file) timebox_file_name = 'timebox/tests/data/test_timebox_io.npb' start = time() TimeBox.save_pandas(df, timebox_file_name) time_to_process_df_and_save_timebox = time() - start new_tb = TimeBox(timebox_file_name) start = time() typed_df = new_tb.to_pandas() time_to_read_and_convert_to_pandas = time() - start write_result('file <-> timebox <-> pandas', time_to_process_df_and_save_timebox, time_to_read_and_convert_to_pandas, os.path.getsize(timebox_file_name)) tb = TimeBox(timebox_file_name) start = time() tb.read() time_to_read_timebox = time() - start start = time()