Esempio n. 1
0
    def test_serialize_1d_array(self):
        """...Test serialization of 1d dense array is done as expected
        """
        array = np.random.rand(100).astype(self.dtype)
        serialize_array(array, self.array_file)

        serialized_array = load_array(self.array_file, dtype=self.dtype)
        np.testing.assert_array_almost_equal(array, serialized_array)
Esempio n. 2
0
    def test_serialize_2d_array(self):
        """...Test serialization of 2d dense array is done as expected
        """
        array = np.random.rand(10, 10)
        serialize_array(array, self.array_file)

        serialized_array = load_array(self.array_file, array_dim=2)
        np.testing.assert_array_almost_equal(array, serialized_array)
Esempio n. 3
0
    def test_serialize_sparse_2d_array(self):
        """...Test serialization of 2d dense array is done as expected
        """
        array = sparse.rand(10, 10, density=0.3, format='csr')
        serialize_array(array, self.array_file)

        serialized_array = load_array(self.array_file, array_dim=2,
                                      array_type='sparse')
        np.testing.assert_array_almost_equal(array.toarray(),
                                             serialized_array.toarray())
Esempio n. 4
0
def save_adult_dataset_for_cpp_benchmarks():
    """Fetches and saves as C++ cereal serialized file the adult dataset
    """
    save_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             '../../tools/benchmark/data')
    os.makedirs(save_path, exist_ok=True)

    label_path = os.path.join(save_path, 'adult.labels.cereal')
    features_path = os.path.join(save_path, 'adult.features.cereal')

    X, y = fetch_tick_dataset('binary/adult/adult.trn.bz2')
    serialize_array(y, label_path)
    serialize_array(X, features_path)
Esempio n. 5
0
    def test_serialize_sparse_2d_array(self):
        """...Test serialization of 2d sparse array is done as expected
        """
        array = sparse.rand(10, 10, density=0.3,
                            format='csr').astype(self.dtype)
        serialize_array(array, self.array_file)

        serialized_array = load_array(self.array_file,
                                      array_dim=2,
                                      array_type='sparse',
                                      dtype=self.dtype)
        np.testing.assert_array_almost_equal(array.toarray(),
                                             serialized_array.toarray())

        # python 3.5 has show to required this - investigate typemappers
        gc.collect()
Esempio n. 6
0
    def test_serialize_column_major_2d_array(self):
        """...Test serialization of 2d dense array is done as expected
        """
        row_array = np.arange(80).reshape(10, 8).astype(self.dtype)

        col_array = np.asfortranarray(row_array)
        serialize_array(col_array, self.array_file)
        serialized_col_array = load_array(self.array_file,
                                          array_dim=2,
                                          dtype=self.dtype,
                                          major="col")
        np.testing.assert_array_almost_equal(col_array, row_array)
        np.testing.assert_array_almost_equal(col_array,
                                             np.asfortranarray(row_array))
        np.testing.assert_array_almost_equal(col_array.flatten('K'),
                                             serialized_col_array.flatten('K'))
        np.testing.assert_array_almost_equal(col_array, serialized_col_array)
Esempio n. 7
0
def save_url_dataset_for_cpp_benchmarks(n_days):
    """Fetches and saves as C++ cereal serialized file the URL dataset

    Parameters
    ----------
    n_days : `int`
        Number of days kept from the original dataset.
        As this dataset is quite big, you might not want to use it in totality.
    """
    save_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                             '../../tools/benchmark/data')
    os.makedirs(save_path, exist_ok=True)

    label_path = os.path.join(save_path, 'url.{}.labels.cereal'.format(n_days))
    features_path = os.path.join(save_path,
                                 'url.{}.features.cereal'.format(n_days))

    X, y = fetch_url_dataset(n_days=n_days)
    serialize_array(y, label_path)
    serialize_array(X, features_path)