def test_validation_errors(self):
        tb = example_time_box('')
        tb._tags = {
            0: TimeBoxTag(0, 1, 'u'),
            1: TimeBoxTag(1, 2, 'i'),
            2: TimeBoxTag(2, 4, 'f')
        }
        with self.assertRaises(DataDoesNotMatchTagDefinitionError):
            tb._validate_data_for_write()

        tb = example_time_box('')
        tb._tags[0].data = None
        with self.assertRaises(DataDoesNotMatchTagDefinitionError):
            tb._validate_data_for_write()

        tb = example_time_box('')
        tb._tags[0].dtype = None
        with self.assertRaises(DataDoesNotMatchTagDefinitionError):
            tb._validate_data_for_write()

        tb._tags[0].dtype = np.uint8
        tb._tags[0].data = np.array([1], dtype=np.uint8)
        with self.assertRaises(DataShapeError):
            tb._validate_data_for_write()

        return
Example #2
0
    def test_timebox_floating_point_rounding(self):
        t = TimeBoxTag(0, 8, 'f')
        t.use_compression = True
        t.floating_point_rounded = True
        t.num_decimals_to_store = 2
        t.data = np.array([0.5, -0.5, 10.2345, 0], np.float64)
        t.encode_data()
        self.assertEqual('m', t._compression_mode)
        self.assertEqual('u', t._compressed_type_char)
        self.assertEqual(2, t._compressed_bytes_per_value)
        self.assertEqual(-50, t._compression_reference_value)
        self.assertEqual(np.int64, t._compression_reference_value_dtype)
        self.assertEqual(100, t._encoded_data[0])
        self.assertEqual(0, t._encoded_data[1])
        self.assertEqual(1023 + 50, t._encoded_data[2])
        self.assertEqual(50, t._encoded_data[3])

        t._decode_data()
        self.assertEqual(np.float64, t.data.dtype)
        self.assertEqual(0.5, t.data[0])
        self.assertEqual(-0.5, t.data[1])
        self.assertEqual(10.23, t.data[2])
        self.assertEqual(0, t.data[3])

        return
Example #3
0
def example_tag_definitions():
    return {
        0: TimeBoxTag(0, 1, 'u'),
        1: TimeBoxTag(1, 2, 'i'),
        2: TimeBoxTag(2, 4, 'f'),
        255: TimeBoxTag(255, 8, 'i'),
        256: TimeBoxTag(256, 8, 'f')
    }
Example #4
0
 def test_tag_to_bytes(self):
     t = TimeBoxTag(1, 8, 'u', options=0)
     t_byte_result = t.info_to_bytes(1, False)
     self.assertEqual(41, t_byte_result[0])
     t_bytes = t_byte_result.byte_code
     self.assertEqual(1, t_bytes[0])  # identifier
     self.assertEqual(0, t_bytes[1])  # options, byte 1
     self.assertEqual(0, t_bytes[2])  # options, byte 2
     self.assertEqual(8, t_bytes[3])  # bytes per value
     self.assertEqual(117, t_bytes[4])  # type char
     self.assertEqual(b'\x00\x00\x00\x00',
                      t_bytes[5:9])  # num bytes extra info
     return
Example #5
0
    def test_encode_decode_def_bytes_floating_point_rounding(self):
        t = TimeBoxTag(0, 8, 'f', options=0)
        encoded_bytes = t._encode_def_bytes()
        self.assertEqual(encoded_bytes,
                         b''.join([b'\x00' for _ in range(0, 32)]))
        t.floating_point_rounded = True
        t.num_decimals_to_store = 2
        encoded_bytes = t._encode_def_bytes()
        self.assertEqual(2, encoded_bytes[0])

        t = TimeBoxTag(0, 8, 'f', options=4)
        t._decode_def_bytes(encoded_bytes)
        self.assertTrue(t.floating_point_rounded)
        self.assertEqual(2, t.num_decimals_to_store)
        return
Example #6
0
    def from_pandas(cls, df: pd.DataFrame):
        """
        Expects that the passing df has an index that is type Timestamp
        or string which can be converted to Timestamp. All dtypes in pandas
        data frame must be in the float/int/u-int family
        :param df: pandas DataFrame
        :return: TimeBox object
        """
        # make sure the pandas data frame is sorted on date
        logging.debug('Before sorting: {}'.format(df.head()))
        df = df.sort_index()
        logging.debug('After sorting: {}'.format(df.head()))

        tb = TimeBox()
        tb._tag_names_are_strings = True

        # ensure index is there and can be converted to numpy array of datetime64s
        logging.debug('Datetime index dtype before and after:\n{}'.format(
            df.index.dtype))
        tb._dates = df.index.values.astype(np.datetime64)
        logging.debug('after: {}'.format(tb._dates.dtype))
        tb._start_date = np.amin(tb._dates.astype(np.dtype('datetime64[s]')))
        logging.debug('Min date: {}'.format(tb._start_date))
        tb._date_differentials_stored = True
        tb._num_points = tb._dates.size

        # get column names and info
        for c in df.columns:
            type_info = parse_pandas_dtype(df[c].dtype)
            tb._tags[c] = TimeBoxTag(c, type_info[0], type_info[1])
            tb._tags[c].data = df[c].values

        return tb
Example #7
0
    def test_timebox_tag_decompression(self):
        t = TimeBoxTag(0, 8, 'u')
        t.use_compression = True
        t._encoded_data = np.array([0, 1, 2, 5], np.uint8)
        t._compression_mode = 'm'
        t._compressed_bytes_per_value = 1
        t._compressed_type_char = 'u'
        t._compression_reference_value = 1000000

        t._decode_data()
        self.assertEqual(4, t.data.size)
        self.assertEqual(8, t.data.itemsize)
        self.assertEqual(1000000, t.data[0])
        self.assertEqual(1000001, t.data[1])
        self.assertEqual(1000002, t.data[2])
        self.assertEqual(1000005, t.data[3])
        return
Example #8
0
    def test_tag_definitions_to_from_bytes_integer(self):
        first = TimeBox('')
        first._tag_names_are_strings = False
        first._tags = example_tag_definitions()
        first._update_required_bytes_for_tag_identifier()
        tags_bytes_results = TimeBoxTag.tag_list_to_bytes(
            [first._tags[t] for t in first._tags],
            first._num_bytes_for_tag_identifier, first._tag_names_are_strings)
        self.assertEqual(5 * (2 + NUM_BYTES_PER_DEFINITION_WITHOUT_IDENTIFIER),
                         tags_bytes_results.num_bytes)
        self.assertEqual(
            813,
            np.frombuffer(tags_bytes_results.byte_code, dtype=np.uint8).sum())

        second = TimeBox('')
        second._num_bytes_for_tag_identifier = 2
        second._tag_names_are_strings = False
        second._tag_definitions = TimeBoxTag.tag_definitions_from_bytes(
            tags_bytes_results.byte_code, second._num_bytes_for_tag_identifier,
            second._tag_names_are_strings)
        second_tag_bytes_results = TimeBoxTag.tag_list_to_bytes(
            [second._tag_definitions[t] for t in second._tag_definitions],
            second._num_bytes_for_tag_identifier,
            second._tag_names_are_strings)
        self.assertEqual(tags_bytes_results.num_bytes,
                         second_tag_bytes_results.num_bytes)
        self.assertEqual(tags_bytes_results.byte_code,
                         second_tag_bytes_results.byte_code)

        second._num_bytes_for_tag_identifier = 4
        bad_bytes = TimeBoxTag.tag_list_to_bytes(
            [second._tag_definitions[t] for t in second._tag_definitions],
            second._num_bytes_for_tag_identifier,
            second._tag_names_are_strings)
        self.assertNotEqual(second_tag_bytes_results.byte_code,
                            bad_bytes.byte_code)

        second._tag_names_are_strings = True
        bad_bytes = TimeBoxTag.tag_list_to_bytes(
            [second._tag_definitions[t] for t in second._tag_definitions],
            second._num_bytes_for_tag_identifier,
            second._tag_names_are_strings)
        self.assertNotEqual(second_tag_bytes_results.byte_code,
                            bad_bytes.byte_code)
        return
def example_time_box(file_name: str):
    tb = TimeBox(file_name)
    tb._timebox_version = 1
    tb._tag_names_are_strings = True
    tb._date_differentials_stored = False
    tb._num_points = 4
    tb._tags = {
        'tag_0': TimeBoxTag('tag_0', 1, 'u'),
        'tag_1': TimeBoxTag('tag_1', 2, 'i'),
        'tag_2_long_name': TimeBoxTag('tag_2_long_name', 4, 'f')
    }
    tb._start_date = np.datetime64('2018-01-01', 's')
    tb._seconds_between_points = 3600
    tb._tags['tag_0'].data = np.array([1, 2, 3, 4], dtype=np.uint8)
    tb._tags['tag_1'].data = np.array([-4, -2, 0, 2000], dtype=np.int16)
    tb._tags['tag_2_long_name'].data = np.array([5.2, 0.8, 3.1415, 8],
                                                dtype=np.float32)
    return tb
Example #10
0
    def test_update_required_bytes(self):
        tb = TimeBox('')
        tb._tag_names_are_strings = False
        tb._tags[0] = TimeBoxTag(0, 1, 'u')
        tb._update_required_bytes_for_tag_identifier()
        self.assertEqual(1, tb._num_bytes_for_tag_identifier)
        tb._tags[256] = TimeBoxTag(256, 1, 'u')
        tb._update_required_bytes_for_tag_identifier()
        self.assertEqual(2, tb._num_bytes_for_tag_identifier)

        tb._tag_names_are_strings = True
        tb._tags = {
            'a': TimeBoxTag('a', 1, 'u'),
            'ab': TimeBoxTag('ab', 1, 'u'),
            'abc': TimeBoxTag('abc', 1, 'u')
        }
        tb._update_required_bytes_for_tag_identifier()
        self.assertEqual(12, tb._num_bytes_for_tag_identifier)
        return
Example #11
0
def example_time_box(file_name: str):
    tb = TimeBox(file_name)
    tb._timebox_version = 1
    tb._tag_names_are_strings = False
    tb._date_differentials_stored = True
    tb._num_points = 4
    tb._tags = {
        0: TimeBoxTag(0, 1, 'u'),
        1: TimeBoxTag(1, 2, 'i'),
        2: TimeBoxTag(2, 4, 'f')
    }
    tb._start_date = np.datetime64('2018-01-01', 's')

    tb._tags[0].data = np.array([1, 2, 3, 4], dtype=np.uint8)
    tb._tags[1].data = np.array([-4, -2, 0, 2000], dtype=np.int16)
    tb._tags[2].data = np.array([5.2, 0.8, 3.1415, 8], dtype=np.float32)

    tb._date_differentials = np.array([1, 1, 1], dtype=np.uint8)
    tb._date_differential_units = DAYS
    tb._bytes_per_date_differential = 1
    return tb
Example #12
0
    def _write_file_info(self, file_handle) -> int:
        """
        Writes out the file info to the file handle
        :param file_handle: file handle object in 'wb' mode. pre-seeked to correct position (0)
        :return: int, seek bytes advanced in this method
        """
        np.array([np.uint8(self._timebox_version)],
                 dtype=np.uint8).tofile(file_handle)
        np.array([np.uint16(self._encode_options())],
                 dtype=np.uint16).tofile(file_handle)
        np.array([np.uint8(len(self._tags))],
                 dtype=np.uint8).tofile(file_handle)
        np.array([np.uint32(self._num_points)],
                 dtype=np.uint32).tofile(file_handle)

        self._update_required_bytes_for_tag_identifier()
        np.array([np.uint8(self._num_bytes_for_tag_identifier)],
                 dtype=np.uint8).tofile(file_handle)
        bytes_seek = 1 + 2 + 1 + 4 + 1

        sorted_tags = sorted([t for t in self._tags])
        tags_to_bytes_result = TimeBoxTag.tag_list_to_bytes(
            [self._tags[t] for t in sorted_tags],
            self._num_bytes_for_tag_identifier, self._tag_names_are_strings)
        file_handle.write(tags_to_bytes_result.byte_code)
        bytes_seek += tags_to_bytes_result.num_bytes

        np.array([np.datetime64(self._start_date,
                                dtype='datetime64[s]')]).tofile(file_handle)
        bytes_seek += 8

        if self._date_differentials_stored:
            np.array([np.uint8(self._bytes_per_date_differential)],
                     dtype=np.uint8).tofile(file_handle)
            int_to_store_date_diff_units = get_int_for_date_units_from_date_utils_constant(
                self._date_differential_units)
            np.array([np.uint16(int_to_store_date_diff_units)],
                     dtype=np.uint16).tofile(file_handle)
            bytes_seek += 3
        else:
            np.array([np.uint32(self._seconds_between_points)],
                     dtype=np.uint32).tofile(file_handle)
            bytes_seek += 4

        return bytes_seek
Example #13
0
    def _read_file_info(self, file_handle) -> int:
        """
        Reads the file info from a file_handle. Populates file internals
        :param file_handle: file handle object in 'rb' mode that is seeked to the correct position (0)
        :return: int, seek bytes increased since file_handle was received
        """
        self._timebox_version = read_unsigned_int(file_handle.read(1))
        self._unpack_options(int(read_unsigned_int(file_handle.read(2))))
        num_tags = read_unsigned_int(file_handle.read(1))
        self._num_points = read_unsigned_int(file_handle.read(4))
        self._num_bytes_for_tag_identifier = read_unsigned_int(
            file_handle.read(1))
        bytes_seek = 1 + 2 + 1 + 4 + 1

        # first 2 bytes are info on the tag
        bytes_for_tag_def = num_tags * (
            self._num_bytes_for_tag_identifier +
            NUM_BYTES_PER_DEFINITION_WITHOUT_IDENTIFIER)
        self._tags = TimeBoxTag.tag_definitions_from_bytes(
            file_handle.read(bytes_for_tag_def),
            self._num_bytes_for_tag_identifier, self._tag_names_are_strings)
        bytes_seek += bytes_for_tag_def

        self._start_date = np.fromfile(file_handle,
                                       dtype='datetime64[s]',
                                       count=1)[0]
        bytes_seek += 8

        if self._date_differentials_stored:
            self._seconds_between_points = 0
            self._bytes_per_date_differential = read_unsigned_int(
                file_handle.read(1))
            stored_value_for_date_diff_units = read_unsigned_int(
                file_handle.read(2))
            self._date_differential_units = get_date_utils_constant_from_stored_units_int(
                stored_value_for_date_diff_units)
            bytes_seek += 3
        else:
            self._seconds_between_points = read_unsigned_int(
                file_handle.read(4))
            self._bytes_per_date_differential = 0
            self._date_differential_units = 0
            bytes_seek += 4
        return bytes_seek
Example #14
0
 def test_timebox_tag_compression(self):
     t = TimeBoxTag(0, 8, 'u')
     t.use_compression = True
     t.data = np.array([1000000, 1000001, 1000002, 1000005], np.uint64)
     t.encode_data()
     self.assertEqual('m', t._compression_mode)
     self.assertEqual('u', t._compressed_type_char)
     self.assertEqual(1, t._compressed_bytes_per_value)
     self.assertEqual(1000000, t._compression_reference_value)
     self.assertEqual(np.uint64, t._compression_reference_value_dtype)
     self.assertEqual(0, t._encoded_data[0])
     self.assertEqual(1, t._encoded_data[1])
     self.assertEqual(2, t._encoded_data[2])
     self.assertEqual(5, t._encoded_data[3])
     return
Example #15
0
    def test_tag_info_init(self):
        tag_info = TimeBoxTag('my_id', 4, 'f')
        self.assertEqual('my_id', tag_info.identifier)
        self.assertEqual(4, tag_info.bytes_per_value)
        self.assertEqual('f', tag_info.type_char)
        self.assertEqual(np.float32, tag_info.dtype)

        tag_info = TimeBoxTag('my_id', 4, ord('f'))
        self.assertEqual(np.float32, tag_info.dtype)

        self.assertEqual(None, tag_info.data)
        self.assertEqual(None, tag_info._encoded_data)
        self.assertEqual(None, tag_info.num_points)

        self.assertFalse(tag_info.use_compression)
        self.assertFalse(tag_info.use_hash_table)
        self.assertFalse(tag_info.floating_point_rounded)

        self.assertEqual(None, tag_info._compressed_type_char)
        self.assertEqual(None, tag_info._compressed_bytes_per_value)
        self.assertEqual(None, tag_info._compression_mode)
        self.assertEqual(None, tag_info._compression_reference_value)
        self.assertEqual(tag_info.dtype,
                         tag_info._compression_reference_value_dtype)

        self.assertEqual(None, tag_info.num_decimals_to_store)
        self.assertEqual(0, tag_info.num_bytes_extra_information)

        tag_info = TimeBoxTag('my_id', 4, 'f', options=1)
        self.assertTrue(tag_info.use_compression)
        tag_info = TimeBoxTag('my_id', 4, 'f', options=3)
        self.assertTrue(tag_info.use_hash_table)
        tag_info = TimeBoxTag('my_id', 4, 'f', options=2)
        self.assertTrue(tag_info.use_hash_table)
        self.assertFalse(tag_info.use_compression)

        tag_info = TimeBoxTag('my_id',
                              4,
                              'f',
                              options=0,
                              untyped_bytes=b''.join(
                                  [b'\x00' for _ in range(0, 32)]))
        return
Example #16
0
    def test_encode_decode_def_bytes_compression(self):
        t = TimeBoxTag(0, 8, 'u', options=0)
        encoded_bytes = t._encode_def_bytes()
        self.assertEqual(encoded_bytes,
                         b''.join([b'\x00' for _ in range(0, 32)]))
        t.use_compression = True
        t._compression_mode = 'e'
        t._compressed_bytes_per_value = 2
        t._compressed_type_char = 'u'
        t._compression_reference_value = 5
        t._compression_reference_value_dtype = np.dtype(np.uint64)
        encoded_bytes = t._encode_def_bytes()
        self.assertEqual(101, encoded_bytes[0])
        self.assertEqual(2, encoded_bytes[1])
        self.assertEqual(117, encoded_bytes[2])
        self.assertEqual(8, encoded_bytes[3])
        self.assertEqual(117, encoded_bytes[4])
        self.assertEqual(5, encoded_bytes[5])

        t = TimeBoxTag(0, 8, 'u', options=1)
        t._decode_def_bytes(encoded_bytes)
        self.assertTrue(t.use_compression)
        self.assertEqual('e', t._compression_mode)
        self.assertEqual(2, t._compressed_bytes_per_value)
        self.assertEqual('u', t._compressed_type_char)
        self.assertEqual(5, t._compression_reference_value)
        return
Example #17
0
    def test_tag_options(self):
        t = TimeBoxTag(1, 8, 'u', options=0)

        t.use_compression = False
        t.use_hash_table = False
        t.floating_point_rounded = False
        self.assertEqual(0, t._encode_options())

        t.use_compression = True
        t.use_hash_table = False
        t.floating_point_rounded = False
        self.assertEqual(1, t._encode_options())

        t.use_compression = False
        t.use_hash_table = True
        t.floating_point_rounded = False
        self.assertEqual(2, t._encode_options())

        t.use_compression = True
        t.use_hash_table = True
        t.floating_point_rounded = False
        self.assertEqual(3, t._encode_options())

        t.use_compression = False
        t.use_hash_table = False
        t.floating_point_rounded = True
        self.assertEqual(4, t._encode_options())

        t.use_compression = True
        t.use_hash_table = False
        t.floating_point_rounded = True
        self.assertEqual(5, t._encode_options())

        t.use_compression = False
        t.use_hash_table = True
        t.floating_point_rounded = True
        self.assertEqual(6, t._encode_options())

        t.use_compression = True
        t.use_hash_table = True
        t.floating_point_rounded = True
        self.assertEqual(7, t._encode_options())

        t.use_compression = True
        t.use_hash_table = True
        t.floating_point_rounded = False
        self.assertEqual(3, t._encode_options())

        t._decode_options(0)
        self.assertFalse(t.use_compression)
        self.assertFalse(t.use_hash_table)
        self.assertFalse(t.floating_point_rounded)

        t._decode_options(1)
        self.assertTrue(t.use_compression)
        self.assertFalse(t.use_hash_table)
        self.assertFalse(t.floating_point_rounded)

        t._decode_options(2)
        self.assertFalse(t.use_compression)
        self.assertTrue(t.use_hash_table)
        self.assertFalse(t.floating_point_rounded)

        t._decode_options(3)
        self.assertTrue(t.use_compression)
        self.assertTrue(t.use_hash_table)
        self.assertFalse(t.floating_point_rounded)

        t._decode_options(4)
        self.assertFalse(t.use_compression)
        self.assertFalse(t.use_hash_table)
        self.assertTrue(t.floating_point_rounded)

        t._decode_options(5)
        self.assertTrue(t.use_compression)
        self.assertFalse(t.use_hash_table)
        self.assertTrue(t.floating_point_rounded)

        t._decode_options(6)
        self.assertFalse(t.use_compression)
        self.assertTrue(t.use_hash_table)
        self.assertTrue(t.floating_point_rounded)

        t._decode_options(7)
        self.assertTrue(t.use_compression)
        self.assertTrue(t.use_hash_table)
        self.assertTrue(t.floating_point_rounded)
        return
Example #18
0
    def test_get_tag_info_dtype(self):
        actual = TimeBoxTag.tag_info_dtype(4, True)
        self.assertEqual('tag_identifier', actual.descr[0][0])
        self.assertEqual('<U1', actual.descr[0][1])

        self.assertEqual('options', actual.descr[1][0])
        self.assertEqual('<u2', actual.descr[1][1])

        self.assertEqual('bytes_per_point', actual.descr[2][0])
        self.assertEqual('|u1', actual.descr[2][1])

        self.assertEqual('type_char', actual.descr[3][0])
        self.assertEqual('|u1', actual.descr[3][1])

        self.assertEqual('bytes_extra_information', actual.descr[4][0])
        self.assertEqual('<u4', actual.descr[4][1])

        for i in range(0, 32):
            self.assertEqual('def_byte_{}'.format(i + 1),
                             actual.descr[5 + i][0])
            self.assertEqual('|u1', actual.descr[5 + i][1])

        actual = TimeBoxTag.tag_info_dtype(16, True)
        self.assertEqual('<U4', actual.descr[0][1])

        actual = TimeBoxTag.tag_info_dtype(32, True)
        self.assertEqual('<U8', actual.descr[0][1])

        actual = TimeBoxTag.tag_info_dtype(128, True)
        self.assertEqual('<U32', actual.descr[0][1])

        actual = TimeBoxTag.tag_info_dtype(1, False)
        self.assertEqual('|u1', actual.descr[0][1])

        actual = TimeBoxTag.tag_info_dtype(2, False)
        self.assertEqual('<u2', actual.descr[0][1])

        actual = TimeBoxTag.tag_info_dtype(4, False)
        self.assertEqual('<u4', actual.descr[0][1])

        actual = TimeBoxTag.tag_info_dtype(8, False)
        self.assertEqual('<u8', actual.descr[0][1])

        # test errors
        with self.assertRaises(TagIdentifierByteRepresentationError):
            TimeBoxTag.tag_info_dtype(2, True)
        with self.assertRaises(TagIdentifierByteRepresentationError):
            TimeBoxTag.tag_info_dtype(0, True)
        with self.assertRaises(TagIdentifierByteRepresentationError):
            TimeBoxTag.tag_info_dtype(-1, True)
        with self.assertRaises(ValueError):
            TimeBoxTag.tag_info_dtype(0.5, False)
        return