Exemple #1
0
 def bundle(self, key, path, new_key):
     """Copy the data to another HDF5 file with new key."""
     data = self.get(key)
     with h5py.File(path) as h5f:
         if ss.isspmatrix(data) or isinstance(data, h5sparse.Dataset):
             h5f = h5sparse.Group(h5f)
         h5f.create_dataset(new_key, data=data)
Exemple #2
0
 def get_function_kwargs(self, will_generate_keys, data,
                         manually_create_dataset=False):
     kwargs = {}
     if len(data) > 0:
         kwargs['data'] = data
     if manually_create_dataset is True:
         kwargs['create_dataset_functions'] = {
             k: partial(self.h5f.create_dataset, k)
             for k in will_generate_keys
         }
     elif manually_create_dataset in SPARSE_FORMAT_SET:
         kwargs['create_dataset_functions'] = {
             k: partial(h5sparse.Group(self.h5f).create_dataset, k)
             for k in will_generate_keys
         }
     return kwargs
Exemple #3
0
    def write_data(self, result_dict):
        for data_definition, result in six.viewitems(result_dict):
            # check nan
            if ss.isspmatrix(result):
                if np.isnan(result.data).any():
                    raise ValueError("data {} have nan".format(data_definition))
            elif np.isnan(result).any():
                raise ValueError("data {} have nan".format(data_definition))

            # write data
            with SimpleTimer("Writing generated data {} to hdf5 file".format(data_definition),
                             end_in_new_line=False):
                if data_definition.json() in self.h5f:
                    # self.h5f[key][...] = result
                    raise NotImplementedError("Overwriting not supported. Please report an issue.")
                else:
                    h5sparse.Group(self.h5f).create_dataset(data_definition.json(), data=result)
        self.h5f.flush()
Exemple #4
0
 def write_data(self, result_dict):
     for key, result in six.iteritems(result_dict):
         if ss.isspmatrix(result):
             if np.isnan(result.data).any():
                 raise ValueError("data {} have nan".format(key))
         elif np.isnan(result).any():
             raise ValueError("data {} have nan".format(key))
         with SimpleTimer(
                 "Writing generated data {} to hdf5 file".format(key),
                 end_in_new_line=False):
             if key in self.h5f:
                 # self.h5f[key][...] = result
                 raise NotImplementedError("Overwriting not supported.")
             else:
                 if (isinstance(result, ss.csc_matrix)
                         or isinstance(result, ss.csr_matrix)):
                     # sparse matrix
                     h5sparse.Group(self.h5f).create_dataset(key,
                                                             data=result)
                 else:
                     self.h5f.create_dataset(key, data=result)
     self.h5f.flush()
Exemple #5
0
 def get(self, key):
     if isinstance(key, basestring):
         return h5sparse.Group(self.h5f)[key]
     return {k: h5sparse.Group(self.h5f)[k] for k in key}

# read data

h5f = h5sparse.File("C:\\Users\\22560\\Desktop\\dis.h5")

h5f['sparse/matrix'][1:3]
h5f['sparse']['matrix'][1:3].toarray()


import h5py

# allow us to use h5py to get data
h5f = h5py.File("test.h5")

h5sparse.Group(h5f)['sparse/matrix']

h5sparse.Dataset(h5f['sparse/matrix'])







# test append method in h5sparse

import h5sparse
import h5py
import scipy.sparse as ss
import h5sparse
Exemple #7
0
 def get(self, data_definition):
     if isinstance(data_definition, DataDefinition):
         return h5sparse.Group(self.h5f)[data_definition.json()]
     return {k: h5sparse.Group(self.h5f)[k.json()] for k in data_definition}