def test__sanitize_data_emptystrings(self):
     test_ndarray_utf8 = array([('', '')],
                               dtype=[('empty', str), ('string', str)])
     test_ndarray_bytes = array([('', '')],
                                dtype=[('empty', bytes), ('string', bytes)])
     with self.assertRaises(TypeError):
         _sanitize_data_for_reading(test_ndarray_bytes)
     with self.assertRaises(TypeError):
         _sanitize_data_for_writing(test_ndarray_utf8)
 def scan(self, name, obj):
     if isinstance(obj, h5py.Group):
         self.contents[name] = 'group'
     else:
         self.contents[name] = 'dataset'
         self.contents['{}.data'.format(name)] = \
             str(f5f._sanitize_data_for_reading(obj))
         self.contents['{}.cols'.format(name)] = obj.dtype.names
     attrdict = {}
     for item in obj.attrs:
         attrdict[item] = \
             str(f5f._sanitize_data_for_reading(obj.attrs[item]))
     self.contents['{}.attrs'.format(name)] = attrdict
     return None
    def test__sanitize_data_py2(self):
        # We expect nothing to get sanitized in python 2
        test_string = 'Avast'
        self.assertEqual(test_string, _sanitize_data_for_reading(test_string))
        self.assertEqual(test_string, _sanitize_data_for_writing(test_string))

        test_array = array('Arr', dtype=str)
        self.assertEqual(test_array, _sanitize_data_for_reading(test_array))
        self.assertEqual(test_array, _sanitize_data_for_writing(test_array))

        test_ndarray = array([('Narr', 0)],
                             dtype=[('string', (str, 4)), ('int', int)])
        self.assertEqual(test_ndarray,
                         _sanitize_data_for_reading(test_ndarray))
        self.assertEqual(test_ndarray,
                         _sanitize_data_for_writing(test_ndarray))
    def test__sanitize_data_py3(self):
        # We expect conversion from utf8 to bytestrings and vice-versa
        test_string = 'Avast'
        self.assertEqual(test_string,
                         _sanitize_data_for_reading(test_string.encode()))
        self.assertEqual(test_string.encode(),
                         _sanitize_data_for_writing(test_string))

        test_array = array('Arr', dtype=str)
        self.assertEqual(
            test_array, _sanitize_data_for_reading(np.char.encode(test_array)))
        self.assertEqual(np.char.encode(test_array),
                         _sanitize_data_for_writing(test_array))

        test_ndarray_utf8 = array([('Narr', 0)],
                                  dtype=[('string', (str, 4)), ('int', int)])
        test_ndarray_bytes = array([(b'Narr', 0)],
                                   dtype=[('string', (bytes, 4)),
                                          ('int', int)])
        self.assertEqual(test_ndarray_utf8,
                         _sanitize_data_for_reading(test_ndarray_bytes))
        self.assertEqual(test_ndarray_bytes,
                         _sanitize_data_for_writing(test_ndarray_utf8))