def check_numpy_scalar_argument_return_void(self): f = PyCFunction('foo') f += Variable('a1', numpy.void, 'in, out') f += Variable('a2', numpy.void, 'in, out') foo = f.build() args = ('he', 4) results = (numpy.void('he'), numpy.void(4)) assert_equal(foo(*args), results)
def test_meta_nonempty(): df1 = pd.DataFrame({'A': pd.Categorical(['Alice', 'Bob', 'Carol']), 'B': list('abc'), 'C': 'bar', 'D': np.float32(1), 'E': np.int32(1), 'F': pd.Timestamp('2016-01-01'), 'G': pd.date_range('2016-01-01', periods=3, tz='America/New_York'), 'H': pd.Timedelta('1 hours', 'ms'), 'I': np.void(b' '), 'J': pd.Categorical([UNKNOWN_CATEGORIES] * 3)}, columns=list('DCBAHGFEIJ')) df2 = df1.iloc[0:0] df3 = meta_nonempty(df2) assert (df3.dtypes == df2.dtypes).all() assert df3['A'][0] == 'Alice' assert df3['B'][0] == 'foo' assert df3['C'][0] == 'foo' assert df3['D'][0] == np.float32(1) assert df3['D'][0].dtype == 'f4' assert df3['E'][0] == np.int32(1) assert df3['E'][0].dtype == 'i4' assert df3['F'][0] == pd.Timestamp('1970-01-01 00:00:00') assert df3['G'][0] == pd.Timestamp('1970-01-01 00:00:00', tz='America/New_York') assert df3['H'][0] == pd.Timedelta('1', 'ms') assert df3['I'][0] == 'foo' assert df3['J'][0] == UNKNOWN_CATEGORIES s = meta_nonempty(df2['A']) assert s.dtype == df2['A'].dtype assert (df3['A'] == s).all()
def test_meta_nonempty(): df1 = pd.DataFrame({'A': pd.Categorical(['Alice', 'Bob', 'Carol']), 'B': list('abc'), 'C': 'bar', 'D': 3.0, 'E': pd.Timestamp('2016-01-01'), 'F': pd.date_range('2016-01-01', periods=3, tz='America/New_York'), 'G': pd.Timedelta('1 hours'), 'H': np.void(b' ')}, columns=list('DCBAHGFE')) df2 = df1.iloc[0:0] df3 = meta_nonempty(df2) assert (df3.dtypes == df2.dtypes).all() assert df3['A'][0] == 'Alice' assert df3['B'][0] == 'foo' assert df3['C'][0] == 'foo' assert df3['D'][0] == 1.0 assert df3['E'][0] == pd.Timestamp('1970-01-01 00:00:00') assert df3['F'][0] == pd.Timestamp('1970-01-01 00:00:00', tz='America/New_York') assert df3['G'][0] == pd.Timedelta('1 days') assert df3['H'][0] == 'foo' s = meta_nonempty(df2['A']) assert s.dtype == df2['A'].dtype assert (df3['A'] == s).all()
def _convert_value(self, value): """Convert a string into a numpy object (scalar or array). The value is most of the time a string, but it can be python object in case if TIFF decoder for example. """ if isinstance(value, list): # convert to a numpy array return numpy.array(value) if isinstance(value, dict): # convert to a numpy associative array key_dtype = numpy.min_scalar_type(list(value.keys())) value_dtype = numpy.min_scalar_type(list(value.values())) associative_type = [('key', key_dtype), ('value', value_dtype)] assert key_dtype.kind != "O" and value_dtype.kind != "O" return numpy.array(list(value.items()), dtype=associative_type) if isinstance(value, numbers.Number): dtype = numpy.min_scalar_type(value) assert dtype.kind != "O" return dtype.type(value) if isinstance(value, six.binary_type): try: value = value.decode('utf-8') except UnicodeDecodeError: return numpy.void(value) if " " in value: result = self._convert_list(value) else: result = self._convert_scalar_value(value) return result
def _saveValue(self, group, name, value): # we pickle to a string and convert to numpy.void, # because HDF5 has some limitations as to which strings it can serialize # (see http://docs.h5py.org/en/latest/strings.html) pickled = numpy.void(pickle.dumps(value, 0)) dset = group.create_dataset(name, data=pickled) dset.attrs['version'] = self._version self._failed_to_deserialize = False
def store(self, k, v): logging.info("{} storing {}".format(self.TAG, k)) v_ = np.void(zlib.compress(cPickle.dumps(v, protocol=cPickle.HIGHEST_PROTOCOL))) if k in self.db: logging.error("{} Overwriting group {}!".format(self.TAG, k)) del self.db[k] self.db[k] = [v_]
def _save_attr_value(self, value): # TODO: Fix for old python 2.x # Remove after 3.x transition # Store all the values as pickled strings because hdf can # only store strings or ndarrays as attributes. bpv = pickle.dumps(value) if not isinstance(bpv, bytes): bpv = six.b(bpv) npvbpv = np.void(bpv) return npvbpv
def VideoToStringArray(video_array): """Converts a NCHW video array to a N length string array with JPEG encoded strings, to be able to store as h5 files. """ nframes = video_array.shape[0] frames = np.split(np.transpose(video_array, (0, 2, 3, 1)), nframes, axis=0) # np.void from http://docs.h5py.org/en/latest/strings.html frames = np.array([np.void(cv2.imencode( '.jpg', frame[0])[1].tostring()) for frame in frames]) return frames
def serialize_hdf5(self, h5py_group): logger.debug("Serializing") h5py_group[self.HDF5_GROUP_FILENAME] = self._filename h5py_group["pickled_type"] = pickle.dumps(type(self), 0) # HACK: can this be done more elegantly? with tempfile.TemporaryFile() as f: self._tiktorch_net.serialize(f) f.seek(0) h5py_group["classifier"] = numpy.void(f.read())
def save_dataset_as_hdf5(dataset, filename=None, variant=None): """ Method to write simple datasets to an HDF5 file. :param dataset: The dataset to be stored as a dictionary of tuples. Each entry is one usage and contains (input_data, targets) :type dataset: dict[unicode, (numpy.ndarray, pylstm.targets.Targets)] :param filename: Filename/path of the file that should be written. Will overwrite if it already exists. Can be None if variant is given. :type filename: unicode :param variant: hdf5 group object the dataset will be saved to instead of writing it to a new file. Either this or filename has to be set. :rtype: None """ hdffile = None if variant is None: assert filename is not None import h5py hdffile = h5py.File(filename, "w") variant = hdffile if 'description' in dataset: variant.attrs['description'] = dataset['description'] for usage in ['training', 'validation', 'test']: if usage not in dataset: continue input_data, targets = dataset[usage] grp = variant.create_group(usage) grp.create_dataset('input_data', data=input_data, chunks=get_chunksize(input_data), compression="gzip") if targets.is_labeling(): targets_encoded = np.void(cPickle.dumps(targets.data)) targets_ds = grp.create_dataset('targets', data=targets_encoded, dtype=targets_encoded.dtype) else: targets_ds = grp.create_dataset( 'targets', data=targets.data, chunks=get_chunksize(targets.data), compression="gzip" ) targets_ds.attrs.create('targets_type', str(targets.targets_type[0])) targets_ds.attrs.create('binarize_to', targets.binarize_to or 0) if targets.mask is not None: grp.create_dataset('mask', data=targets.mask, dtype='u1') if hdffile is not None: hdffile.close()
def write_hdf5(self, filename, dataset_name=None, info=None): r"""Writes ImageArray to hdf5 file. Parameters ---------- filename: string The filename to create and write a dataset to dataset_name: string The name of the dataset to create in the file. info: dictionary A dictionary of supplementary info to write to append as attributes to the dataset. Examples -------- >>> a = YTArray([1,2,3], 'cm') >>> myinfo = {'field':'dinosaurs', 'type':'field_data'} >>> a.write_hdf5('test_array_data.h5', dataset_name='dinosaurs', ... info=myinfo) """ import h5py from yt.extern.six.moves import cPickle as pickle if info is None: info = {} info["units"] = str(self.units) info["unit_registry"] = np.void(pickle.dumps(self.units.registry.lut)) if dataset_name is None: dataset_name = "array_data" f = h5py.File(filename) if dataset_name in f.keys(): d = f[dataset_name] # Overwrite without deleting if we can get away with it. if d.shape == self.shape and d.dtype == self.dtype: d[:] = self for k in d.attrs.keys(): del d.attrs[k] else: del f[dataset_name] d = f.create_dataset(dataset_name, data=self) else: d = f.create_dataset(dataset_name, data=self) for k, v in info.items(): d.attrs[k] = v f.close()
def metadata(self, value): try: del self.metadata except KeyError: pass dump = pickle.dumps(value) for i, start in enumerate(range(0, len(dump), MAX_ATTRIBUTE_SIZE)): self._group.attrs['_metadata{}'.format(i)] = np.void( dump[start : start + MAX_ATTRIBUTE_SIZE]) self._group.attrs['_metadata_num'] = i + 1
def write_to(self, group, append=False): """Writes the properties to a `group`, or append it""" data = self.data if append is True: try: # concatenate original and new properties in a single list original = read_properties(group) data = original + data except EOFError: pass # no former data to append on # h5py does not support embedded NULLs in strings ('\x00') data = pickle.dumps(data).replace(b'\x00', b'__NULL__') group['properties'][...] = np.void(data)
def save(self, hdf5_handle): g = hdf5_handle # Class settings g.attrs.update(self.settings) # Class attributes h = g.create_group("class") h.attrs["label"] = self.label if self.settings["store_cxx_serial"]: if self.verbose: self.log << "[h5] Writing cxx serial" << self.log.endl # Prune pid data if not required to compute gradients prune_pid_data = False if self.options['spectrum.gradients'] else True cxx_serial = self.spectrum.saves(prune_pid_data) h = g.create_dataset("cxx_serial", data=np.void(cxx_serial)) if self.settings["store_cmap"]: if self.verbose: self.log << "[h5] Writing coefficient map" << self.log.endl h = g.create_group("cmap") for idx, cmap in enumerate(self.cmap): hh = h.create_group('%d' % idx) for key in cmap: hh.create_dataset(key, data=cmap[key], compression='gzip') if self.settings["store_gcmap"]: if self.verbose: self.log << "[h5] Writing global coefficient map" << self.log.endl h = g.create_group("gcmap") for idx, gcmap in enumerate(self.gcmap): hh = h.create_group('%d' % idx) for key in gcmap: hh.create_dataset(key, data=gcmap[key], compression='gzip') if self.settings["store_sdmap"]: if self.verbose: self.log << "[h5] Writing descriptor map" << self.log.endl h = g.create_group('sdmap') for idx, sdmap in enumerate(self.sdmap): hh = h.create_group('%d' % idx) for key in sdmap: hh.create_dataset(key, data=sdmap[key], compression='gzip') if self.settings["store_gsdmap"]: if self.verbose: self.log << "[h5] Writing global descriptor map" << self.log.endl h = g.create_group('gsdmap') for idx, gsdmap in enumerate(self.gsdmap): hh = h.create_group('%d' % idx) for key in gsdmap: hh.create_dataset(key, data=gsdmap[key], compression='gzip') if self.settings["store_sd"]: if self.verbose: self.log << "[h5] Writing descriptor matrix" << self.log.endl g.create_dataset('sd', data=self.sd, compression='gzip') if self.settings["store_gsd"]: if self.verbose: self.log << "[h5] Writing global descriptor matrix" << self.log.endl g.create_dataset('gsd', data=self.gsd, compression='gzip') return self
def write_img(self,file_name,timestamp,img): if self.file is None: self.file=h5py.File( file_name,'w' ) grp = None if self.hdf5_key in self.file: grp = self.file[self.hdf5_key] else: grp = self.file.create_group(self.hdf5_key) print 'create timestamp sub group ',str(timestamp) try: subgrp = grp.create_group(str(timestamp) ) dt=h5py.special_dtype(vlen=bytes) dsetimg = subgrp.create_dataset( 'png_img' , ( len(img) ) , dtype=dt ) #dsetimg[:] = np.void( img ) dsetimg[i] = np.void(img) print 'image persisted!',len(img) except Exception: print 'exception occured' pass
def _create_data(self): """Initialize hold data by merging all headers of each frames. """ headers = [] types = set([]) for fabio_frame in self.__fabio_reader.iter_frames(): header = fabio_frame.header data = [] for key, value in header.items(): data.append("%s: %s" % (str(key), str(value))) data = "\n".join(data) try: line = data.encode("ascii") types.add(numpy.string_) except UnicodeEncodeError: try: line = data.encode("utf-8") types.add(numpy.unicode_) except UnicodeEncodeError: # Fallback in void line = numpy.void(data) types.add(numpy.void) headers.append(line) if numpy.void in types: dtype = numpy.void elif numpy.unicode_ in types: dtype = numpy.unicode_ else: dtype = numpy.string_ if dtype == numpy.unicode_ and h5py is not None: # h5py only support vlen unicode dtype = h5py.special_dtype(vlen=six.text_type) return numpy.array(headers, dtype=dtype)
else: data = _nonempty_index(idx.categories) cats = None return pd.CategoricalIndex(data, categories=cats, ordered=idx.ordered, name=idx.name) elif typ is pd.MultiIndex: levels = [_nonempty_index(i) for i in idx.levels] labels = [[0, 0] for i in idx.levels] return pd.MultiIndex(levels=levels, labels=labels, names=idx.names) raise TypeError("Don't know how to handle index of " "type {0}".format(type(idx).__name__)) _simple_fake_mapping = { 'b': np.bool_(True), 'V': np.void(b' '), 'M': np.datetime64('1970-01-01'), 'm': np.timedelta64(1), 'S': np.str_('foo'), 'a': np.str_('foo'), 'U': np.unicode_('foo'), 'O': 'foo' } def _scalar_from_dtype(dtype): if dtype.kind in ('i', 'f', 'u'): return dtype.type(1) elif dtype.kind == 'c': return dtype.type(complex(1, 0)) elif dtype.kind in _simple_fake_mapping:
files = [] for f_name in os.listdir(v_dir): fid = int(f_name.split('.')[0]) if sample: if fid not in sample_ids: continue elif args.skip > 1: if (fid - 1) % args.skip != 0: continue files.append((fid, f_name)) if is_lmdb: with frame_db.begin(write=True, buffers=True) as txn: for fid, f_name in files: s = read_img(os.path.join(v_dir, f_name)) key = "%s/%08d" % ( vvid, fid ) # by padding zeros, frames in db are stored in order txn.put(key, s) else: for fid, f_name in files: s = read_img(os.path.join(v_dir, f_name)) key = "%s/%08d" % ( vvid, fid ) # by padding zeros, frames in db are stored in order frame_db[key] = np.void(s) call(["rm", "-rf", v_dir]) done_videos.add(vvid)
for filename in ( fnmatch.filter(filenames, "*.F90") + fnmatch.filter(filenames, "*.f") + fnmatch.filter(filenames, "Makefile") + fnmatch.filter(filenames, "NuLib_README") + fnmatch.filter(filenames, "*.inc") ): matches.append(os.path.join(root, filename)) filelist = filelist + matches tarfile = "nulib_src.tar.gz" tarstring = "tar -czvf " + tarfile + " " for xfile in filelist: tarstring = tarstring + xfile + " " print tarstring os.system(tarstring) infile = open(tarfile, "rb") data = infile.read() infile.close() wrapdata = np.void(data) h5file = h5py.File(nulib_table_name, "r+") dset = h5file.create_dataset("NuLib Source", data=wrapdata) h5file.close()
def writeHDF5(self, cachePath, targetname): f = h5py.File(cachePath) dataset = f.create_dataset(targetname, shape = (1,), dtype=np.dtype('V13')) dataset[0] = np.void(pickle.dumps(self)) f.close()
rand_clusts = np.zeros(all_ids_zf.shape[1]) nw_units = rand_clusts.size // (len(paths_512_zf) * 2 ) # assume 2 layers in t branch if file_key in dlist_file: dlist = pickle.loads(np.array(dlist_file[file_key])) else: # loop through each invidual layer removing desired number of units (since we never shuffle # between layers) for j in range(len(paths_512_zf) * 2): rand_clusts[j * nw_units:j * nw_units + int(nw_units * ptr / 100)] = 1 dlist = a.create_det_drop_list(i, rand_clusts, all_ids_zf, [1], True) dlist_file.create_dataset( file_key, data=np.void(pickle.dumps(dlist, pickle.HIGHEST_PROTOCOL))) pos = ana_zf.run_simulation(mp, "r", "bfevolve", drop_list=dlist) rem_d["values"].append(a.preferred_fraction(pos, "r", 1.0)) dlist_file.close() rem_d = DataFrame(rem_d) fig, ax = pl.subplots() sns.pointplot("state", "values", "species", rem_d, ci=68, ax=ax) ax.set_ylabel("Fraction within +/- 1C") ax.set_xlabel("") sns.despine(fig, ax) fig.savefig(save_folder + "zf_network_stability.pdf", type="pdf") # panel 2: Full distribution of example type removals in zebrafish # for fish-like clusters - their indices fast_on_like = 4 slow_on_like = 5
def analyze(self): plt.plot([1, 2, 0, 3, 4]) f = io.BytesIO() plt.savefig(f, format="PNG") f.seek(0) self.set_dataset("thumbnail", np.void(f.read()))
np.datetime64(None) np.datetime64(None, "D") np.timedelta64() np.timedelta64(0) np.timedelta64(0, "D") np.timedelta64(0, ('ms', 3)) np.timedelta64(0, b"D") np.timedelta64("3") np.timedelta64(b"5") np.timedelta64(np.timedelta64(2)) np.timedelta64(dt.timedelta(2)) np.timedelta64(None) np.timedelta64(None, "D") np.void(1) np.void(np.int64(1)) np.void(True) np.void(np.bool_(True)) np.void(b"test") np.void(np.bytes_("test")) # Protocols i8 = np.int64() u8 = np.uint64() f8 = np.float64() c16 = np.complex128() b_ = np.bool_() td = np.timedelta64() U = np.str_("1") S = np.bytes_("1")
def testArrayVoid(self): d = self.create_dataset(data=numpy.void([b"abc\xF0"])) result = self.formatter.toString(d[()], dtype=d.dtype) self.assertEqual(result, '[b"\\x61\\x62\\x63\\xF0"]')
class A: def __float__(self): return 1.0 np.int8(A()) # E: incompatible type np.int16(A()) # E: incompatible type np.int32(A()) # E: incompatible type np.int64(A()) # E: incompatible type np.uint8(A()) # E: incompatible type np.uint16(A()) # E: incompatible type np.uint32(A()) # E: incompatible type np.uint64(A()) # E: incompatible type np.void("test") # E: incompatible type np.generic(1) # E: Cannot instantiate abstract class np.number(1) # E: Cannot instantiate abstract class np.integer(1) # E: Cannot instantiate abstract class np.inexact(1) # E: Cannot instantiate abstract class np.character("test") # E: Cannot instantiate abstract class np.flexible(b"test") # E: Cannot instantiate abstract class np.float64(value=0.0) # E: Unexpected keyword argument np.int64(value=0) # E: Unexpected keyword argument np.uint64(value=0) # E: Unexpected keyword argument np.complex128(value=0.0j) # E: Unexpected keyword argument np.str_(value='bob') # E: No overload variant np.bytes_(value=b'test') # E: No overload variant np.void(value=b'test') # E: Unexpected keyword argument
skip_thought_txt = sanitize_string(t) txt_ids = get_ids(skip_thought_txt, vocab) max_len = len(txt_ids) if max_len < len(txt_ids) else max_len batch_txt_ids.append(txt_ids) id_lens.append(len(txt_ids)) # padding with eos for arr in batch_txt_ids: n = len(arr) rem = max_len - n concat_arr = [vocab('<end>')]*rem arr+=concat_arr input = Variable(torch.LongTensor(batch_txt_ids)) output_seq2vec = uniskip(input, lengths=id_lens).data.numpy() # for c, e in enumerate(embeddings): for c, e in enumerate(output_seq2vec): ex = split.create_group(example_name + '_' + str(c)) ex.create_dataset('name', data=example_name) ex.create_dataset('img', data=np.void(img)) ex.create_dataset('embeddings', data=e) ex.create_dataset('class', data=_class) ex.create_dataset('txt', data=txt[c].astype(object), dtype=dt) print(example_name)
def dict2group(self, dictionary, group, **kwds): "Recursive writer of dicts and signals" from hyperspy.misc.utils import DictionaryTreeBrowser from hyperspy.signal import BaseSignal for key, value in dictionary.items(): if isinstance(value, dict): self.dict2group(value, group.require_group(key), **kwds) elif isinstance(value, DictionaryTreeBrowser): self.dict2group(value.as_dictionary(), group.require_group(key), **kwds) elif isinstance(value, BaseSignal): kn = key if key.startswith('_sig_') else '_sig_' + key self.write_signal(value, group.require_group(kn)) elif isinstance(value, (np.ndarray, self.Dataset, da.Array)): self.overwrite_dataset(group, value, key, **kwds) elif value is None: group.attrs[key] = '_None_' elif isinstance(value, bytes): try: # binary string if has any null characters (otherwise not # supported by hdf5) value.index(b'\x00') group.attrs['_bs_' + key] = np.void(value) except ValueError: group.attrs[key] = value.decode() elif isinstance(value, str): group.attrs[key] = value elif isinstance(value, AxesManager): self.dict2group( value.as_dictionary(), group.require_group('_hspy_AxesManager_' + key), **kwds) elif isinstance(value, list): if len(value): self.parse_structure(key, group, value, '_list_', **kwds) else: group.attrs['_list_empty_' + key] = '_None_' elif isinstance(value, tuple): if len(value): self.parse_structure(key, group, value, '_tuple_', **kwds) else: group.attrs['_tuple_empty_' + key] = '_None_' elif value is Undefined: continue else: try: group.attrs[key] = value except BaseException: _logger.exception( "The writer could not write the following " f"information in the file: {key} : {value}")
cats = idx.categories return pd.CategoricalIndex(data, categories=cats, ordered=idx.ordered, name=idx.name) elif typ is pd.MultiIndex: levels = [_nonempty_index(l) for l in idx.levels] labels = [[0, 0] for i in idx.levels] return pd.MultiIndex(levels=levels, labels=labels, names=idx.names) raise TypeError("Don't know how to handle index of " "type {0}".format(type(idx).__name__)) _simple_fake_mapping = { 'b': np.bool_(True), 'V': np.void(b' '), 'M': np.datetime64('1970-01-01'), 'm': np.timedelta64(1), 'S': np.str_('foo'), 'a': np.str_('foo'), 'U': np.unicode_('foo'), 'O': 'foo' } def _scalar_from_dtype(dtype): if dtype.kind in ('i', 'f', 'u'): return dtype.type(1) elif dtype.kind == 'c': return dtype.type(complex(1, 0)) elif dtype.kind in _simple_fake_mapping:
def test_void_scalar_recursion(self): # gh-9345 repr(np.void(b'test')) # RecursionError ?
def write(self, key: str, value: np.ndarray) -> str: serialized_feats = lilcom.compress(value, tick_power=self.tick_power) self.hdf.create_dataset(key, data=np.void(serialized_feats)) return key
def testVoid(self): d = self.create_dataset(data=numpy.void(b"abc\xF0")) result = self.formatter.toString(d[()], dtype=d.dtype) self.assertEqual(result, 'b"\\x61\\x62\\x63\\xF0"')
def save_bytes(s, parent, name, _): parent.create_dataset(name, data=np.void(s))
def dict2hdfgroup(dictionary, group, **kwds): from hyperspy.misc.utils import DictionaryTreeBrowser from hyperspy.signal import BaseSignal def parse_structure(key, group, value, _type, **kwds): try: # Here we check if there are any signals in the container, as # casting a long list of signals to a numpy array takes a very long # time. So we check if there are any, and save numpy the trouble if np.any([isinstance(t, BaseSignal) for t in value]): tmp = np.array([[0]]) else: tmp = np.array(value) except ValueError: tmp = np.array([[0]]) if tmp.dtype is np.dtype('O') or tmp.ndim is not 1: dict2hdfgroup(dict(zip( [str(i) for i in range(len(value))], value)), group.create_group(_type + str(len(value)) + '_' + key), **kwds) elif tmp.dtype.type is np.unicode_: group.create_dataset(_type + key, tmp.shape, dtype=h5py.special_dtype(vlen=str), **kwds) group[_type + key][:] = tmp[:] else: group.create_dataset( _type + key, data=tmp, **kwds) for key, value in dictionary.items(): if isinstance(value, dict): dict2hdfgroup(value, group.create_group(key), **kwds) elif isinstance(value, DictionaryTreeBrowser): dict2hdfgroup(value.as_dictionary(), group.create_group(key), **kwds) elif isinstance(value, BaseSignal): if key.startswith('_sig_'): try: write_signal(value, group[key]) except: write_signal(value, group.create_group(key)) else: write_signal(value, group.create_group('_sig_' + key)) elif isinstance(value, np.ndarray): group.create_dataset(key, data=value, **kwds) elif value is None: group.attrs[key] = '_None_' elif isinstance(value, bytes): try: # binary string if has any null characters (otherwise not # supported by hdf5) value.index(b'\x00') group.attrs['_bs_' + key] = np.void(value) except ValueError: group.attrs[key] = value.decode() elif isinstance(value, str): group.attrs[key] = value elif isinstance(value, AxesManager): dict2hdfgroup(value.as_dictionary(), group.create_group('_hspy_AxesManager_' + key), **kwds) elif isinstance(value, list): if len(value): parse_structure(key, group, value, '_list_', **kwds) else: group.attrs['_list_empty_' + key] = '_None_' elif isinstance(value, tuple): if len(value): parse_structure(key, group, value, '_tuple_', **kwds) else: group.attrs['_tuple_empty_' + key] = '_None_' elif value is Undefined: continue else: try: group.attrs[key] = value except: _logger.exception( "The hdf5 writer could not write the following " "information in the file: %s : %s", key, value)
def pick(obj): """create a serialized object that can go into hdf5 in py2 and py3, and can be read by both """ return np.void(pickle.dumps(obj, 0))
def make_od(freq, od, args, outbuf): print(freq, od) horns = {30:[27, 28], 44:[24, 25, 26], 70:[18, 19, 20, 21, 22, 23]} #psi_uv from https://www.aanda.org/articles/aa/full_html/2016/10/aa25818-15/T5.html mbangs = {27:-22.46, 28:22.45, 24:0.01, 25:-113.23, 26:113.23, 18:22.15, 19:22.4, 20:22.38, 21:-22.38, 22:-22.34, 23:-22.08} nsides = {30:512, 44:512, 70:1024} nside = nsides[freq] npsi = 4096 outName = os.path.join(args.out_dir, 'LFI_0' + str(freq) + '_' + str(od).zfill(6) + '.h5') try: exFile = h5py.File(os.path.join(args.planck_dir, 'LFI_0' + str(freq) + '_' + str(horns[freq][0]) + '_L2_002_OD' + str(od).zfill(4) +'.h5'), 'r') except (OSError): return if(args.restart and os.path.exists(outName)): for pid, index in zip(exFile['AHF_info/PID'], range(len(exFile['AHF_info/PID']))): startIndex = np.where(exFile['Time/OBT'] > exFile['AHF_info/PID_start'][index]) endIndex = np.where(exFile['Time/OBT'] > exFile['AHF_info/PID_end'][index]) if len(startIndex[0]) > 0: pid_start = startIndex[0][0] else:#catch days with no pids continue if len(endIndex[0]) is not 0: pid_end = endIndex[0][0] else:#catch final pid per od pid_end = len(exFile['Time/OBT']) if pid_start == pid_end:#catch chunks with no data like od 1007 continue outbuf['id' + str(pid)] = str(pid) + ' "' + outName + '" ' + '1\n' return outFile = h5py.File(outName, 'w') rimo = fits.open(args.rimo) if args.velocity_file is not None: velFile = fits.open(args.velocity_file) if args.position_file is not None: posArray = np.loadtxt(args.position_file, comments='*').transpose() #Julian Date to Modified Julian Date posArray[0] -= 2400000.5 #make common group for things we only read once #polang, mbeamang, nside, fsamp, npsi prefix = '/common' rimo_i = np.where(rimo[1].data.field('detector').flatten() == 'LFI' + str(horns[freq][0]) + 'M') #sampling frequency fsamp = rimo[1].data.field('f_samp')[rimo_i] outFile.create_dataset(prefix + '/fsamp', data=fsamp) #nside outFile.create_dataset(prefix + '/nside', data=[nside]) #psi angle resolution outFile.create_dataset(prefix + '/npsi', data=[npsi]) #number of sigma resolution in tod compression ntodsigma = 100 outFile.create_dataset(prefix + '/ntodsigma', data=[ntodsigma]) detNames = '' polangs = [] mainbeamangs = [] for horn in horns[freq]: for hornType in ['M', 'S']: rimo_i = np.where(rimo[1].data.field('detector').flatten() == 'LFI' + str(horn) + hornType) detNames += str(horn) + hornType + ', ' polangs.append(math.radians(rimo[1].data.field('psi_pol')[rimo_i])) mainbeamangs.append(math.radians(mbangs[horn])) compstring='huffman' if args.no_compress: compstring = 'uncompressed' #experiment name outFile.create_dataset(prefix + '/datatype', data=np.string_('LFI_' + compstring)) #make detector names lookup outFile.create_dataset(prefix + '/det', data=np.string_(detNames[0:-2])) #make polarization angle outFile.create_dataset(prefix + '/polang', data=polangs) outFile[prefix + '/polang'].attrs['legend'] = detNames[0:-2] #make main beam angle outFile.create_dataset(prefix + '/mbang', data=mainbeamangs) outFile[prefix + '/mbang'].attrs['legend'] = detNames[0:-2] #huffman coded bits for pid, index in zip(exFile['AHF_info/PID'], range(len(exFile['AHF_info/PID']))): startIndex = np.where(exFile['Time/OBT'] > exFile['AHF_info/PID_start'][index]) endIndex = np.where(exFile['Time/OBT'] > exFile['AHF_info/PID_end'][index]) if len(startIndex[0]) > 0: pid_start = startIndex[0][0] else:#catch days with no pids continue if len(endIndex[0]) is not 0: pid_end = endIndex[0][0] else:#catch final pid per od pid_end = len(exFile['Time/OBT']) if pid_start == pid_end:#catch chunks with no data like od 1007 continue obt = exFile['Time/OBT'][pid_start] cut1 = exFile['Time/OBT'][exFile['Time/OBT'] > exFile['AHF_info/PID_start'][index]] #common fields prefix = str(pid).zfill(6) + '/common' #time field outFile.create_dataset(prefix + '/time', data=[exFile['Time/MJD'][pid_start], exFile['Time/OBT'][pid_start], exFile['Time/SCET'][pid_start]]) outFile[prefix + '/time'].attrs['type'] = 'MJD, OBT, SCET' #length of the tod outFile.create_dataset(prefix + '/ntod', data=[pid_end - pid_start]) #velocity field velIndex = np.where(velFile[1].data.scet > exFile['Time/SCET'][pid_start])[0][0] #rotate from ecliptic to galactic r = hp.Rotator(coord=['E', 'G']) outFile.create_dataset(prefix + '/vsun', data=r([velFile[1].data.xvel[velIndex], velFile[1].data.yvel[velIndex], velFile[1].data.zvel[velIndex]])) #add some metadata so someone might be able to figure out what is going on outFile[prefix + '/vsun'].attrs['info'] = '[x, y, z]' outFile[prefix + '/vsun'].attrs['coords'] = 'galactic' #satelite position posIndex = np.where(posArray[0] > exFile['Time/MJD'][pid_start])[0][0] outFile.create_dataset(prefix + '/satpos', data=[posArray[1][posIndex], posArray[2][posIndex]]) #add metadata outFile[prefix + '/satpos'].attrs['info'] = '[Lon, Lat]' outFile[prefix + '/satpos'].attrs['coords'] = 'horizon ecliptic' #open per freq npipe gains file if required if args.gains_dir is not None and "npipe" in args.gains_dir:#this is a shitty test gainsFile = fits.open(os.path.join(args.gains_dir, 'gains_0' + str(freq) + '_iter01.fits')) #make huffman code table pixArray = [[], [], []] todArray = [] for horn in horns[freq]: for hornType in ['M', 'S']: fileName = h5py.File(os.path.join(args.planck_dir, 'LFI_0' + str(freq) + '_' + str(horn) + '_L2_002_OD' + str(od).zfill(4) +'.h5'), 'r') rimo_i = np.where(rimo[1].data.field('detector').flatten() == 'LFI' + str(horn) + hornType) #get all pointing data newTheta, newPhi = r(fileName[str(horn) + hornType + '/THETA'][pid_start:pid_end], fileName[str(horn) + hornType + '/PHI'][pid_start:pid_end]) pixels = hp.pixelfunc.ang2pix(nside, newTheta, newPhi) if len(pixels > 0): delta = np.diff(pixels) delta = np.insert(delta, 0, pixels[0]) pixArray[0].append(delta) #get all pol angle data psiArray = fileName[str(horn) + hornType + '/PSI'][pid_start:pid_end] + r.angle_ref(fileName[str(horn) + hornType + '/THETA'][pid_start:pid_end], fileName[str(horn) + hornType + '/PHI'][pid_start:pid_end]) + math.radians(rimo[1].data.field('psi_pol')[rimo_i]) psiArray = np.where(psiArray < 0, 2*np.pi + psiArray, psiArray) psiArray = np.where(psiArray >= 2*np.pi, psiArray - 2*np.pi, psiArray) psiBins = np.linspace(0, 2*np.pi, num=4096) psiIndexes = np.digitize(psiArray, psiBins) if(len(psiIndexes) > 0): delta = np.diff(psiIndexes) delta = np.insert(delta, 0, psiIndexes[0]) pixArray[1].append(delta) #get all flag data flagArray = fileName[str(horn) + hornType + '/FLAG'][pid_start:pid_end] if (len(flagArray) > 0): delta = np.diff(flagArray) delta = np.insert(delta, 0, flagArray[0]) pixArray[2].append(delta) #make tod data tod = fileName[str(horn) + hornType +'/SIGNAL'][pid_start:pid_end] sigma0 = rimo[1].data.field('net')[rimo_i] * math.sqrt(fsamp) gain = 1 #make gain if args.gains_dir is not None and "npipe" in args.gains_dir:#this is a shitty test baseGain = fits.getdata(os.path.join(args.gains_dir, 'C0' + str(freq) + '-0000-DX11D-20150209_uniform.fits'),extname='LFI' + str(horn) + hornType)[0][0] gainArr = gainsFile['LFI' + str(horn) + hornType].data.cumulative obtArr = (1e-9 * pow(2,16)) * gainsFile[1].data.OBT gainI = np.where(obtArr <= obt)[0][-1] gain = np.array([1.0/(baseGain * gainArr[gainI])]) elif args.gains_dir is not None: gainFile = fits.open(os.path.join(args.gains_dir, 'LFI_0' + str(freq) + '_LFI' + str(horn) + hornType + '_001.fits')) gain=1.0/gainFile[1].data.GAIN[np.where(gainFile[1].data.PID == pid)] gainFile.close() #TODO: fix this if(type(gain) is int or gain.size == 0): gain = [0.06] todInd = np.int32(ntodsigma * tod/(sigma0*gain[0])) delta = np.diff(todInd) delta = np.insert(delta, 0, todInd[0]) todArray.append(delta) h = huffman.Huffman("", nside) h.GenerateCode(pixArray) if(not args.no_compress_tod): hTod = huffman.Huffman("", nside) hTod.GenerateCode(todArray) huffarray = np.append(np.append(np.array(h.node_max), h.left_nodes), h.right_nodes) outFile.create_dataset(prefix + '/hufftree', data=huffarray) outFile.create_dataset(prefix + '/huffsymb', data=h.symbols) if(not args.no_compress_tod): huffarrayTod = np.append(np.append(np.array(hTod.node_max), hTod.left_nodes), hTod.right_nodes) outFile.create_dataset(prefix + '/todtree', data=huffarrayTod) outFile.create_dataset(prefix + '/todsymb', data=hTod.symbols) #open per freq npipe gains file if required if args.gains_dir is not None and "npipe" in args.gains_dir:#this is a shitty test gainsFile = fits.open(os.path.join(args.gains_dir, 'gains_0' + str(freq) + '_iter01.fits')) for horn in horns[freq]: fileName = h5py.File(os.path.join(args.planck_dir, 'LFI_0' + str(freq) + '_' + str(horn) + '_L2_002_OD' + str(od).zfill(4) +'.h5'), 'r') for hornType in ['S', 'M']: prefix = str(pid).zfill(6) + '/' + str(horn) + hornType #get RIMO index #print(rimo[1].data.field('detector').flatten().shape, rimo[1].data.field('detector').flatten(), 'LFI' +str(horn) + hornType) rimo_i = np.where(rimo[1].data.field('detector').flatten() == 'LFI' + str(horn) + hornType) #make flag data flagArray = fileName[str(horn) + hornType + '/FLAG'][pid_start:pid_end] if (len(flagArray) > 0): delta = np.diff(flagArray) delta = np.insert(delta, 0, flagArray[0]) if(args.no_compress): outFile.create_dataset(prefix+'/flag', data=flagArray) else: outFile.create_dataset(prefix + '/flag', data=np.void(bytes(h.byteCode(delta)))) #outFile.create_dataset(prefix + '/flag', data=flagArray, compression='gzip', shuffle=True) #make pixel number newTheta, newPhi = r(fileName[str(horn) + hornType + '/THETA'][pid_start:pid_end], fileName[str(horn) + hornType + '/PHI'][pid_start:pid_end]) pixels = hp.pixelfunc.ang2pix(nside, newTheta, newPhi) outP = [0,0,0] nsamps = min(100, len(newTheta)) mapPix = np.zeros(hp.nside2npix(512)) mapCross = np.zeros(hp.nside2npix(512)) outAng = [0,0] if len(pixels > 0): #compute average outer product pair1 = random.sample(range(len(newTheta)), nsamps) pair2 = random.sample(range(len(newTheta)), nsamps) #pair1 = range(nsamps) #pair2 = np.int32(np.ones(nsamps)) vecs = hp.pixelfunc.ang2vec(newTheta, newPhi) for a1, a2 in zip(pair1, pair2): crossP = np.cross(vecs[a1], vecs[a2]) if(crossP[0] < 0): crossP *= -1 theta, phi = hp.vec2ang(crossP) if(not math.isnan(theta) and not math.isnan(phi)): #print(theta, phi, crossP, vecs[a1], vecs[a2]) outAng[0] += theta/nsamps outAng[1] += phi/nsamps else: outAng[0] += outAng[0]/nsamps outAng[1] += outAng[1]/nsamps delta = np.diff(pixels) delta = np.insert(delta, 0, pixels[0]) if(args.no_compress): outFile.create_dataset(prefix+'/pix', data=pixels) outFile.create_dataset(prefix+'/theta', data=newTheta) outFile.create_dataset(prefix+'/phi', data=newPhi) else: outFile.create_dataset(prefix + '/pix', data=np.void(bytes(h.byteCode(delta)))) outFile.create_dataset(prefix + '/outP', data=outAng) #make pol angle psiArray = fileName[str(horn) + hornType + '/PSI'][pid_start:pid_end] + r.angle_ref(fileName[str(horn) + hornType + '/THETA'][pid_start:pid_end], fileName[str(horn) + hornType + '/PHI'][pid_start:pid_end]) + math.radians(rimo[1].data.field('psi_pol')[rimo_i]) psiArray = np.where(psiArray < 0, 2*np.pi + psiArray, psiArray) psiArray = np.where(psiArray >= 2*np.pi, psiArray - 2*np.pi, psiArray) psiBins = np.linspace(0, 2*np.pi, num=4096) psiIndexes = np.digitize(psiArray, psiBins) #if(pid == 3798 and horn == 28 and hornType == 'M'): # print(len(psiIndexes)) # np.set_printoptions(threshold=sys.maxsize) # for i in range(4000): # print(i, psiArray[i], psiIndexes[i]) if(len(psiIndexes) > 0): delta = np.diff(psiIndexes) delta = np.insert(delta, 0, psiIndexes[0]) if(args.no_compress): outFile.create_dataset(prefix + '/psi', data=psiArray) else: outFile.create_dataset(prefix + '/psi', data=np.void(bytes(h.byteCode(delta)))) #outFile.create_dataset(prefix + '/psi', data=psiIndexes, compression='gzip', shuffle=True) #scalars gain = 1 #make gain if args.gains_dir is not None and "npipe" in args.gains_dir:#this is a shitty test baseGain = fits.getdata(os.path.join(args.gains_dir, 'C0' + str(freq) + '-0000-DX11D-20150209_uniform.fits'),extname='LFI' + str(horn) + hornType)[0][0] gainArr = gainsFile['LFI' + str(horn) + hornType].data.cumulative obtArr = (1e-9 * pow(2,16)) * gainsFile[1].data.OBT gainI = np.where(obtArr <= obt)[0][-1] gain = np.array([1.0/(baseGain * gainArr[gainI])]) elif args.gains_dir is not None: gainFile = fits.open(os.path.join(args.gains_dir, 'LFI_0' + str(freq) + '_LFI' + str(horn) + hornType + '_001.fits')) gain=1.0/gainFile[1].data.GAIN[np.where(gainFile[1].data.PID == pid)] gainFile.close() #TODO: fix this if(type(gain) is int or gain.size == 0): gain = [0.06] #make white noise sigma0 = rimo[1].data.field('net')[rimo_i] * math.sqrt(fsamp) #make f_knee fknee = rimo[1].data.field('f_knee')[rimo_i] #make 1/f noise exponent alpha = rimo[1].data.field('alpha')[rimo_i] #print(gain, sigma0, fknee, alpha) outFile.create_dataset(prefix + '/scalars', data=np.array([gain, sigma0, fknee, alpha]).flatten()) outFile[prefix + '/scalars'].attrs['legend'] = 'gain, sigma0, fknee, alpha' #make psd noise #make tod data tod = fileName[str(horn) + hornType +'/SIGNAL'][pid_start:pid_end] if(args.no_compress or args.no_compress_tod): outFile.create_dataset(prefix + '/tod', data=tod, dtype='f4') else: todInd = np.int32(ntodsigma * tod/(sigma0*gain[0])) delta = np.diff(todInd) delta = np.insert(delta, 0, todInd[0]) outFile.create_dataset(prefix + '/tod', data=np.void(bytes(hTod.byteCode(delta)))) #undifferenced data? TODO #outFile.create_dataset(prefix + '/') #make other #write to output file outbuf['id' + str(pid)] = str(pid) + ' "' + os.path.abspath(outName) + '" ' + '1 ' + str(outAng[0][0]) + ' ' + str(outAng[1][0]) +'\n'
def create_dataset(self, name, shape=None, data=None, **kwds): f = io.BytesIO() array, rate = data soundfile.write(f, array, rate, format=self.format) self.file.create_dataset(name, shape=shape, data=np.void(f.getvalue()), **kwds)
buffer = dict() for packet in ch10.packet_headers(): pcntr += 1 lggr.info(f'Packet #{pcntr} type: ' f'{Py106.Packet.DataType.TypeName(packet.DataType)}') if packet.DataType == Py106.Packet.DataType.TMATS: lggr.debug(f'Require {paragrp.name}/TMATS HDF5 group and store TMATS ' f'attributes') ch10.read_data() rawgrp.attrs['rcc_version'] = ch10_tmats.ch10ver derive_tmats_attrs(paragrp, ch10.Buffer.raw[4:ch10.Header.DataLen]) tmats_grp = rawgrp.create_group('TMATS') dset = tmats_grp.create_dataset( 'data', shape=(), data=np.void(ch10.Buffer.raw[4:ch10.Header.DataLen])) dset.attrs['name'] = 'TMATS buffer' lggr.info('Finished with TMATS information') elif packet.DataType == Py106.Packet.DataType.MIL1553_FMT_1: ch10.read_data() # Loop over each message in the 1553 packet... for msg in ch10_1553.msgs(): ch = ch10.Header.ChID if msg.p1553Hdr.contents.Field.BlockStatus.RT2RT: # RT-to-RT message rx_cmd = msg.pCmdWord1.contents.Field tx_cmd = msg.pCmdWord2.contents.Field grp1553 = ( f'1553/Ch_{ch}/RT_{tx_cmd.RTAddr}/SA_{tx_cmd.SubAddr}/T/' f'RT_{rx_cmd.RTAddr}/SA_{rx_cmd.SubAddr}')
def _saveValue(group, name, value): if value: group.create_dataset(name, data=np.void(value))
@group_split_dispatch.register((pd.DataFrame, pd.Series, pd.Index)) def group_split_pandas(df, c, k, ignore_index=False): indexer, locations = pd._libs.algos.groupsort_indexer( c.astype(np.int64, copy=False), k ) df2 = df.take(indexer) locations = locations.cumsum() parts = [df2.iloc[a:b] for a, b in zip(locations[:-1], locations[1:])] return dict(zip(range(k), parts)) _simple_fake_mapping = { "b": np.bool_(True), "V": np.void(b" "), "M": np.datetime64("1970-01-01"), "m": np.timedelta64(1), "S": np.str_("foo"), "a": np.str_("foo"), "U": np.unicode_("foo"), "O": "foo", } def _scalar_from_dtype(dtype): if dtype.kind in ("i", "f", "u"): return dtype.type(1) elif dtype.kind == "c": return dtype.type(complex(1, 0)) elif dtype.kind in _simple_fake_mapping:
def dict2hdfgroup(dictionary, group, compression=None): from hyperspy.misc.utils import DictionaryTreeBrowser from hyperspy.signal import Signal def parse_structure(key, group, value, _type, compression): try: # Here we check if there are any signals in the container, as casting a long list of signals to a # numpy array takes a very long time. So we check if there are any, # and save numpy the trouble if np.any([isinstance(t, Signal) for t in value]): tmp = np.array([[0]]) else: tmp = np.array(value) except ValueError: tmp = np.array([[0]]) if tmp.dtype is np.dtype("O") or tmp.ndim is not 1: dict2hdfgroup( dict(zip([unicode(i) for i in xrange(len(value))], value)), group.create_group(_type + str(len(value)) + "_" + key), compression=compression, ) elif tmp.dtype.type is np.unicode_: group.create_dataset( _type + key, tmp.shape, dtype=h5py.special_dtype(vlen=unicode), compression=compression ) group[_type + key][:] = tmp[:] else: group.create_dataset(_type + key, data=tmp, compression=compression) for key, value in dictionary.iteritems(): if isinstance(value, dict): dict2hdfgroup(value, group.create_group(key), compression=compression) elif isinstance(value, DictionaryTreeBrowser): dict2hdfgroup(value.as_dictionary(), group.create_group(key), compression=compression) elif isinstance(value, Signal): if key.startswith("_sig_"): try: write_signal(value, group[key]) except: write_signal(value, group.create_group(key)) else: write_signal(value, group.create_group("_sig_" + key)) elif isinstance(value, np.ndarray): group.create_dataset(key, data=value, compression=compression) elif value is None: group.attrs[key] = "_None_" elif isinstance(value, str): try: # binary string if has any null characters (otherwise not # supported by hdf5) _ = value.index("\x00") group.attrs["_bs_" + key] = np.void(value) except ValueError: try: # Store strings as unicode using the default encoding group.attrs[key] = unicode(value) except UnicodeEncodeError: pass except UnicodeDecodeError: group.attrs["_bs_" + key] = np.void(value) # binary string elif isinstance(value, AxesManager): dict2hdfgroup( value.as_dictionary(), group.create_group("_hspy_AxesManager_" + key), compression=compression ) elif isinstance(value, (datetime.date, datetime.time)): group.attrs["_datetime_" + key] = repr(value) elif isinstance(value, list): if len(value): parse_structure(key, group, value, "_list_", compression) else: group.attrs["_list_empty_" + key] = "_None_" elif isinstance(value, tuple): if len(value): parse_structure(key, group, value, "_tuple_", compression) else: group.attrs["_tuple_empty_" + key] = "_None_" elif value is Undefined: continue else: try: group.attrs[key] = value except: print("The hdf5 writer could not write the following " "information in the file") print("%s : %s" % (key, value))
def toHdf5(self,fileName,group='component1/part1'): """ Dump field to HDF5, in a simple format suitable for interoperability (TODO: document). :param str fileName: HDF5 file :param str group: HDF5 group the data will be saved under. The HDF hierarchy is like this:: group | +--- mesh_01 {hash=25aa0aa04457} | +--- [vertex_coords] | +--- [cell_types] | \--- [cell_vertices] +--- mesh_02 {hash=17809e2b86ea} | +--- [vertex_coords] | +--- [cell_types] | \--- [cell_vertices] +--- ... +--- field_01 | +--- -> mesh_01 | \--- [vertex_values] +--- field_02 | +--- -> mesh_01 | \--- [vertex_values] +--- field_03 | +--- -> mesh_02 | \--- [cell_values] \--- ... where ``plain`` names are HDF (sub)groups, ``[bracketed]`` names are datasets, ``{name=value}`` are HDF attributes, ``->`` prefix indicated HDF5 hardlink (transparent to the user); numerical suffixes (``_01``, ...) are auto-allocated. Mesh objects are hardlinked using HDF5 hardlinks if an identical mesh is already stored in the group, based on hexdigest of its full data. .. note:: This method has not been tested yet. The format is subject to future changes. """ import h5py, hashlib hdf=h5py.File(fileName,'a',libver='latest') if group not in hdf: gg=hdf.create_group(group) else: gg=hdf[group] # raise IOError('Path "%s" is already used in "%s".'%(path,fileName)) def lowestUnused(trsf,predicate,start=1): 'Find the lowest unused index, where *predicate* is used to test for existence, and *trsf* transforms integer (starting at *start* and incremented until unused value is found) to whatever predicate accepts as argument. Lowest transformed value is returned.' import itertools,sys for i in itertools.count(start=start): t=trsf(i) if not predicate(t): return t # save mesh (not saved if there already) newgrp=lowestUnused(trsf=lambda i:'mesh_%02d'%i,predicate=lambda t:t in gg) mh5=self.getMesh().asHdf5Object(parentgroup=gg,newgroup=newgrp) if self.value: fieldGrp=hdf.create_group(lowestUnused(trsf=lambda i,group=group: group+'/field_%02d'%i,predicate=lambda t: t in hdf)) fieldGrp['mesh']=mh5 fieldGrp.attrs['fieldID']=self.fieldID fieldGrp.attrs['valueType']=self.valueType # string/bytes may not contain NULL when stored as string in HDF5 # see http://docs.h5py.org/en/2.3/strings.html # that's why we cast to opaque type "void" and uncast using tostring before unpickling fieldGrp.attrs['units']=numpy.void(pickle.dumps(self.unit)) fieldGrp.attrs['time']=numpy.void(pickle.dumps(self.time)) #fieldGrp.attrs['time']=self.time.getValue() if self.fieldType==FieldType.FT_vertexBased: val=numpy.empty(shape=(self.getMesh().getNumberOfVertices(),self.getRecordSize()),dtype=numpy.float) for vert in range(self.getMesh().getNumberOfVertices()): val[vert]=self.getVertexValue(vert).getValue() fieldGrp['vertex_values']=val elif self.fieldType==FieldType.FT_cellBased: # raise NotImplementedError("Saving cell-based fields to HDF5 is not yet implemented.") val=numpy.empty(shape=(self.getMesh().getNumberOfCells(),self.getRecordSize()),dtype=numpy.float) for cell in range(self.getMesh().getNumberOfCells()): val[cell]=self.getCellValue(cell) fieldGrp['cell_values']=val else: raise RuntimeError("Unknown fieldType %d."%(self.fieldType))
def huffman_compress(): for data_block in sd.data_block_list_local: prompt(f"Rank {rank:^6} starting data block {data_block}", nature='info') for channel_name in config['channel_detector_dict'].keys(): io_obj.open_tod_file(channel_name, data_block, 'r') io_obj_huff.open_tod_file(channel_name, data_block, 'w') segment_list = sd.get_segment_list( data_block, config['num_segments_per_data_block']) channel_common, channel_common_attributes = io_obj.read_channel_common( ['fsamp', 'det', 'polang', 'mbang', 'coords']) coords = channel_common.pop('coords') channel_common.update({ 'nside': config['huffman']['nside'], 'npsi': config['huffman']['npsi'] }) io_obj_huff.write_channel_common(channel_common, channel_common_attributes) for segment in segment_list: segment_common, segment_common_attributes = io_obj.read_segment_common( segment, ['time', 'vsun', 'psi_hwp']) pix_array = [[], [], []] for detector_name in config['channel_detector_dict'][ channel_name]: tod = io_obj.read_tod(segment, detector_name, ['theta', 'phi', 'psi']) # pixels pixels = hp.ang2pix(config['huffman']['nside'], tod['theta'], tod['phi']) delta = np.diff(pixels) delta = np.insert(delta, 0, pixels[0]) pix_array[0].append(delta) # psi psi_bins = np.linspace(0, 2 * np.pi, num=config['huffman']['npsi']) psi_index = np.digitize(tod['psi'], psi_bins) delta = np.diff(psi_index) delta = np.insert(delta, 0, psi_index[0]) pix_array[1].append(delta) # flag flag = np.ones(tod['theta'].size) delta = np.diff(flag) delta = np.insert(delta, 0, flag[0]) pix_array[2].append(delta) h = huffman.Huffman("", config['huffman']['nside']) h.GenerateCode(pix_array) huffarray = np.append( np.append(np.array(h.node_max), h.left_nodes), h.right_nodes) segment_common.update({ 'hufftree': huffarray, 'huffsymb': h.symbols }) io_obj_huff.write_segment_common(segment, segment_common, segment_common_attributes) for detector_name in config['channel_detector_dict'][ channel_name]: tod = io_obj.read_tod(segment, detector_name, ['signal', 'theta', 'phi', 'psi']) tod['tod'] = tod.pop('signal') scalars = io_obj.read_tod_scalars(segment, detector_name) # signal, noise and scalars io_obj_huff.write_tod(segment, detector_name, tod, tod_write_field=['tod']) io_obj_huff.write_tod_scalars(segment, detector_name, scalars) # flag flag = np.ones(tod['tod'].size) delta = np.diff(flag) delta = np.insert(delta, 0, flag[0]) io_obj_huff.write_tod( segment, detector_name, {'flag': np.void(bytes(h.byteCode(delta)))}, tod_write_field=['flag']) # pixels pixels = hp.ang2pix(config['huffman']['nside'], tod['theta'], tod['phi']) delta = np.diff(pixels) delta = np.insert(delta, 0, pixels[0]) io_obj_huff.write_tod( segment, detector_name, {'pix': np.void(bytes(h.byteCode(delta)))}, tod_write_field=['pix']) # psi psi_bins = np.linspace(0, 2 * np.pi, num=config['huffman']['npsi']) psi_index = np.digitize(tod['psi'], psi_bins) delta = np.diff(psi_index) delta = np.insert(delta, 0, psi_index[0]) io_obj_huff.write_tod( segment, detector_name, {'psi': np.void(bytes(h.byteCode(delta)))}, tod_write_field=['psi']) io_obj.close_tod_file() io_obj_huff.close_tod_file()
def testArrayVoid(self): d = self.create_dataset(data=numpy.void([b"abc\xF0"])) result = self.formatter.toString(d[()], dtype=d.dtype) self.assertEquals(result, '[b"\\x61\\x62\\x63\\xF0"]')
def _compress(b: bytes) -> ndarray: """Compress bytes.""" return void(compress(b, level=9))
def test_numpy_void(self): formatter = TextFormatter() result = formatter.toString(numpy.void(b"\xFF")) self.assertEquals(result, 'b"\\xFF"')
def writeHDF5(self, cachePath, targetname): data = (np.void(pickle.dumps(self)), ) with h5py.File(cachePath) as f: f.create_dataset(targetname, data=data)
def pickle_to_string(value): return numpy.void(pickle.dumps(value))
def dict2hdfgroup(dictionary, group, **kwds): from hyperspy.misc.utils import DictionaryTreeBrowser from hyperspy.signal import BaseSignal def parse_structure(key, group, value, _type, **kwds): try: # Here we check if there are any signals in the container, as # casting a long list of signals to a numpy array takes a very long # time. So we check if there are any, and save numpy the trouble if np.any([isinstance(t, BaseSignal) for t in value]): tmp = np.array([[0]]) else: tmp = np.array(value) except ValueError: tmp = np.array([[0]]) if tmp.dtype is np.dtype('O') or tmp.ndim is not 1: dict2hdfgroup(dict(zip( [str(i) for i in range(len(value))], value)), group.create_group(_type + str(len(value)) + '_' + key), **kwds) elif tmp.dtype.type is np.unicode_: if _type + key in group: del group[_type + key] group.create_dataset(_type + key, tmp.shape, dtype=h5py.special_dtype(vlen=str), **kwds) group[_type + key][:] = tmp[:] else: if _type + key in group: del group[_type + key] group.create_dataset( _type + key, data=tmp, **kwds) for key, value in dictionary.items(): if isinstance(value, dict): dict2hdfgroup(value, group.create_group(key), **kwds) elif isinstance(value, DictionaryTreeBrowser): dict2hdfgroup(value.as_dictionary(), group.create_group(key), **kwds) elif isinstance(value, BaseSignal): kn = key if key.startswith('_sig_') else '_sig_' + key write_signal(value, group.require_group(kn)) elif isinstance(value, (np.ndarray, h5py.Dataset, da.Array)): overwrite_dataset(group, value, key, **kwds) elif value is None: group.attrs[key] = '_None_' elif isinstance(value, bytes): try: # binary string if has any null characters (otherwise not # supported by hdf5) value.index(b'\x00') group.attrs['_bs_' + key] = np.void(value) except ValueError: group.attrs[key] = value.decode() elif isinstance(value, str): group.attrs[key] = value elif isinstance(value, AxesManager): dict2hdfgroup(value.as_dictionary(), group.create_group('_hspy_AxesManager_' + key), **kwds) elif isinstance(value, list): if len(value): parse_structure(key, group, value, '_list_', **kwds) else: group.attrs['_list_empty_' + key] = '_None_' elif isinstance(value, tuple): if len(value): parse_structure(key, group, value, '_tuple_', **kwds) else: group.attrs['_tuple_empty_' + key] = '_None_' elif value is Undefined: continue else: try: group.attrs[key] = value except BaseException: _logger.exception( "The hdf5 writer could not write the following " "information in the file: %s : %s", key, value)
def create_hdf5_types(group): print("- Creating HDF types...") main_group = group.create_group("HDF5") # H5T_INTEGER int_data = numpy.random.randint(-100, 100, size=10 * 4 * 4 * 4) uint_data = numpy.random.randint(0, 100, size=10 * 4 * 4 * 4) group = main_group.create_group("integer_little_endian") for size in (1, 2, 4, 8): store_subdimensions(group, int_data, '<i' + str(size), prefix='int' + str(size * 8)) store_subdimensions(group, uint_data, '<u' + str(size), prefix='uint' + str(size * 8)) group = main_group.create_group("integer_big_endian") for size in (1, 2, 4, 8): store_subdimensions(group, int_data, '>i' + str(size), prefix='int' + str(size * 8)) store_subdimensions(group, uint_data, '>u' + str(size), prefix='uint' + str(size * 8)) # H5T_FLOAT float_data = numpy.random.rand(10 * 4 * 4 * 4) group = main_group.create_group("float_little_endian") for size in (2, 4, 8): store_subdimensions(group, float_data, '<f' + str(size), prefix='float' + str(size * 8)) group = main_group.create_group("float_big_endian") for size in (2, 4, 8): store_subdimensions(group, float_data, '>f' + str(size), prefix='float' + str(size * 8)) # H5T_TIME main_group.create_group("time") # H5T_STRING main_group["text/ascii"] = b"abcd" main_group["text/bad_ascii"] = b"ab\xEFcd\xFF" main_group["text/utf8"] = u"me \u2661 tu" # H5T_BITFIELD main_group.create_group("bitfield") # H5T_OPAQUE group = main_group.create_group("opaque") main_group["opaque/ascii"] = numpy.void(b"abcd") main_group["opaque/utf8"] = numpy.void(u"i \u2661 my mother".encode("utf-8")) main_group["opaque/thing"] = numpy.void(b"\x10\x20\x30\x40\xF0") main_group["opaque/big_thing"] = numpy.void(b"\x10\x20\x30\x40\xF0" * 100000) data = numpy.void(b"\x10\x20\x30\x40\xFF" * 20) data = numpy.array([data] * 10 * 4 * 4 * 4, numpy.void) store_subdimensions(group, data, "void") # H5T_COMPOUND a = numpy.array([(1, 2., 'Hello'), (2, 3., "World")], dtype=[('foo', 'i4'), ('bar', 'f4'), ('baz', 'S10')]) b = numpy.zeros(3, dtype='3int8, float32, (2,3)float64') c = numpy.zeros(3, dtype=('i4', [('r', 'u1'), ('g', 'u1'), ('b', 'u1'), ('a', 'u1')])) d = numpy.zeros(3, dtype=[('x', 'f4'), ('y', numpy.float32), ('value', 'f4', (2, 2))]) e = numpy.zeros(3, dtype={'names': ['col1', 'col2'], 'formats': ['i4', 'f4']}) f = numpy.array([(1.5, 2.5, (1.0, 2.0)), (3., 4., (4., 5.)), (1., 3., (2., 6.))], dtype=[('x', 'f4'), ('y', numpy.float32), ('value', 'f4', (2, 2))]) main_group["compound/numpy_example_a"] = a main_group["compound/numpy_example_b"] = b main_group["compound/numpy_example_c"] = c main_group["compound/numpy_example_d"] = d main_group["compound/numpy_example_e"] = e main_group["compound/numpy_example_f"] = f dt = numpy.dtype([('start', numpy.uint32), ('stop', numpy.uint32)]) vlen_dt = h5py.special_dtype(vlen=dt) data = numpy.array([[(1, 2), (2, 3)], [(3, 5), (5, 8), (8, 9)]], vlen_dt) dataset = main_group.create_dataset("compound/vlen", data.shape, data.dtype) for i, row in enumerate(data): dataset[i] = row # numpy complex is a H5T_COMPOUND real_data = numpy.random.rand(10 * 4 * 4 * 4) imaginary_data = numpy.random.rand(10 * 4 * 4 * 4) complex_data = real_data + imaginary_data * 1j group = main_group.create_group("compound/numpy_complex_little_endian") for size in (8, 16, 32): store_subdimensions(group, complex_data, '<c' + str(size), prefix='complex' + str(size * 8)) group = main_group.create_group("compound/numpy_complex_big_endian") for size in (8, 16, 32): store_subdimensions(group, complex_data, '>c' + str(size), prefix='complex' + str(size * 8)) # H5T_REFERENCE ref_dt = h5py.special_dtype(ref=h5py.Reference) group = main_group.create_group("reference") data = group.create_dataset("data", data=numpy.random.rand(10, 10)) group.create_dataset("ref_0d", data=data.ref, dtype=ref_dt) group.create_dataset("ref_1d", data=[data.ref, None], dtype=ref_dt) group.create_dataset("regionref_0d", data=data.regionref[0:10, 0:5], dtype=ref_dt) group.create_dataset("regionref_1d", data=[data.regionref[0:10, 0:5]], dtype=ref_dt) # H5T_ENUM enum_dt = h5py.special_dtype(enum=('i', {"RED": 0, "GREEN": 1, "BLUE": 42})) group = main_group.create_group("enum") uint_data = numpy.random.randint(0, 100, size=10 * 4 * 4 * 4) uint_data.shape = 10, 4, 4, 4 group.create_dataset("color_0d", data=numpy.array(42, dtype=enum_dt)) group.create_dataset("color_1d", data=numpy.array([0, 1, 100, 42], dtype=enum_dt)) group.create_dataset("color_4d", data=numpy.array(uint_data, dtype=enum_dt)) # numpy bool is a H5T_ENUM bool_data = uint_data < 50 bool_group = main_group.create_group("enum/numpy_boolean") store_subdimensions(bool_group, bool_data, "bool") # H5T_VLEN group = main_group.create_group("vlen") text = u"i \u2661 my dad" unicode_vlen_dt = h5py.special_dtype(vlen=str) group.create_dataset("unicode", data=numpy.array(text, dtype=unicode_vlen_dt)) group.create_dataset("unicode_1d", data=numpy.array([text], dtype=unicode_vlen_dt))
def pickle_to_string(value): return numpy.void(pickle.dumps(value, protocol=0))
def test_void(self): with pytest.raises(TypeError): np.void(b'test', garbage=True)