def test_serialize_1d_array(self): """...Test serialization of 1d dense array is done as expected """ array = np.random.rand(100).astype(self.dtype) serialize_array(array, self.array_file) serialized_array = load_array(self.array_file, dtype=self.dtype) np.testing.assert_array_almost_equal(array, serialized_array)
def test_serialize_2d_array(self): """...Test serialization of 2d dense array is done as expected """ array = np.random.rand(10, 10) serialize_array(array, self.array_file) serialized_array = load_array(self.array_file, array_dim=2) np.testing.assert_array_almost_equal(array, serialized_array)
def test_serialize_sparse_2d_array(self): """...Test serialization of 2d dense array is done as expected """ array = sparse.rand(10, 10, density=0.3, format='csr') serialize_array(array, self.array_file) serialized_array = load_array(self.array_file, array_dim=2, array_type='sparse') np.testing.assert_array_almost_equal(array.toarray(), serialized_array.toarray())
def save_adult_dataset_for_cpp_benchmarks(): """Fetches and saves as C++ cereal serialized file the adult dataset """ save_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../tools/benchmark/data') os.makedirs(save_path, exist_ok=True) label_path = os.path.join(save_path, 'adult.labels.cereal') features_path = os.path.join(save_path, 'adult.features.cereal') X, y = fetch_tick_dataset('binary/adult/adult.trn.bz2') serialize_array(y, label_path) serialize_array(X, features_path)
def test_serialize_sparse_2d_array(self): """...Test serialization of 2d sparse array is done as expected """ array = sparse.rand(10, 10, density=0.3, format='csr').astype(self.dtype) serialize_array(array, self.array_file) serialized_array = load_array(self.array_file, array_dim=2, array_type='sparse', dtype=self.dtype) np.testing.assert_array_almost_equal(array.toarray(), serialized_array.toarray()) # python 3.5 has show to required this - investigate typemappers gc.collect()
def test_serialize_column_major_2d_array(self): """...Test serialization of 2d dense array is done as expected """ row_array = np.arange(80).reshape(10, 8).astype(self.dtype) col_array = np.asfortranarray(row_array) serialize_array(col_array, self.array_file) serialized_col_array = load_array(self.array_file, array_dim=2, dtype=self.dtype, major="col") np.testing.assert_array_almost_equal(col_array, row_array) np.testing.assert_array_almost_equal(col_array, np.asfortranarray(row_array)) np.testing.assert_array_almost_equal(col_array.flatten('K'), serialized_col_array.flatten('K')) np.testing.assert_array_almost_equal(col_array, serialized_col_array)
def save_url_dataset_for_cpp_benchmarks(n_days): """Fetches and saves as C++ cereal serialized file the URL dataset Parameters ---------- n_days : `int` Number of days kept from the original dataset. As this dataset is quite big, you might not want to use it in totality. """ save_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '../../tools/benchmark/data') os.makedirs(save_path, exist_ok=True) label_path = os.path.join(save_path, 'url.{}.labels.cereal'.format(n_days)) features_path = os.path.join(save_path, 'url.{}.features.cereal'.format(n_days)) X, y = fetch_url_dataset(n_days=n_days) serialize_array(y, label_path) serialize_array(X, features_path)