def PrintHierarchy(gid, level, text): global nodeNameX, nodeNameY, datasetX, datasetY, nodeCoordinate, datasetCoord for hidStr in h5py.h5g.GroupIter(gid): lid = gid.links.get_info(hidStr) try: hid = h5py.h5o.open(gid, hidStr) except KeyError as e: hid = None t = None t = type(hid) if t == h5py.h5g.GroupID: PrintHierarchy(hid, level + 1, text + " > " + hidStr) else: if hidStr == nodeNameX: if t == h5py.h5d.DatasetID: datasetX = h5py.Dataset(hid) elif t == np.ndarray: datasetX = hid if hidStr == nodeNameY: if t == h5py.h5d.DatasetID: datasetY = h5py.Dataset(hid) elif t == np.ndarray: datasetY = hid if hidStr == nodeCoordinate: if t == h5py.h5d.DatasetID: datasetCoord = h5py.Dataset(hid) elif t == np.ndarray: datasetCoord = hid print text + " > " + hidStr
def PrintHierarchy(self, gid, level, text): for hidStr in h5py.h5g.GroupIter(gid): lid = gid.links.get_info(hidStr) try: hid = h5py.h5o.open(gid, hidStr) except KeyError as e: hid, t = None, None t = type(hid) if t == h5py.h5g.GroupID: self.PrintHierarchy(hid, level+1, text+" > "+hidStr) elif t == h5py.h5d.DatasetID: if hidStr == self.nodeNameX: self.datasetX = h5py.Dataset(hid) elif hidStr == self.nodeNameY: self.datasetY = h5py.Dataset(hid) elif hidStr == self.nodeCoordinate: self.datasetCoord = h5py.Dataset(hid) elif t == np.ndarray: if hidStr == self.nodeNameX: self.datasetX = hid elif hidStr == self.nodeNameY: self.datasetY = hid elif hidStr == self.nodeCoordinate: self.datasetCoord = hid
def setUp(self): TestCase.setUp(self) sid = h5py.h5s.create(h5py.h5s.NULL) tid = h5py.h5t.C_S1.copy() tid.set_size(10) dsid = h5py.h5d.create(self.f.id, b'x', tid, sid) self.dset = h5py.Dataset(dsid)
def createDataset(fout, procInfo, varName): spaceid = h5py.h5s.create_simple((procInfo.numRows, procInfo.numCols)) plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE) plist.set_fill_time(h5py.h5d.FILL_TIME_NEVER) datasetid = h5py.h5d.create(fout.id, varName, h5py.h5t.NATIVE_DOUBLE, spaceid, plist) return h5py.Dataset(datasetid)
def setUp(self): TestCase.setUp(self) sid = h5py.h5s.create(h5py.h5s.NULL) tid = h5py.h5t.C_S1.copy() tid.set_size(10) dsid = h5py.h5d.create(self.f.id, b'x', tid, sid) self.dset = h5py.Dataset(dsid) self.empty_obj = h5py.Empty(np.dtype("S10"))
def setUp(self): TestCase.setUp(self) filename = "dataset_testempty." + self.base_domain self.f = h5py.File(filename, 'w') sid = h5py.h5s.create(h5py.h5s.NULL) tid = h5py.h5t.C_S1.copy() tid.set_size(10) dsid = h5py.h5d.create(self.f.id, b'x', tid, sid) self.dset = h5py.Dataset(dsid)
def get_dataset_as_type(d, dtype='float32'): # creates a new Dataset instance that points to the same HDF5 identifier d_new = h5py.Dataset(d.id) # set the ._local.astype attribute to the desired output type d_new._local.astype = np.dtype(dtype) return d_new
def setUp(self): TestCase.setUp(self) filename = self.getFileName("dataset_testempty") print("filename:", filename) self.f = h5py.File(filename, 'w') sid = h5py.h5s.create(h5py.h5s.NULL) tid = h5py.h5t.C_S1.copy() tid.set_size(10) dsid = h5py.h5d.create(self.f.id, b'x', tid, sid) self.dset = h5py.Dataset(dsid)
def __create_dataset_nofill(self, group, name, shape, dtype, chunks=None): spaceid = h5py.h5s.create_simple(shape) plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE) plist.set_fill_time(h5py.h5d.FILL_TIME_NEVER) if chunks not in [None, []] and isinstance(chunks, tuple): plist.set_chunk(chunks) typeid = h5py.h5t.py_create(dtype) datasetid = h5py.h5d.create(group.file.id, group.name + '/' + name, typeid, spaceid, plist) data = h5py.Dataset(datasetid) return data
def test_read_no_fill_value(writable_file): # With FILL_TIME_NEVER, HDF5 doesn't write zeros in the output array for # unallocated chunks. If we read into uninitialized memory, it can appear # to read random values. https://github.com/h5py/h5py/issues/2069 dcpl = h5py.h5p.create(h5py.h5p.DATASET_CREATE) dcpl.set_chunk((1,)) dcpl.set_fill_time(h5py.h5d.FILL_TIME_NEVER) ds = h5py.Dataset(h5py.h5d.create( writable_file.id, b'a', h5py.h5t.IEEE_F64LE, h5py.h5s.create_simple((5,)), dcpl )) np.testing.assert_array_equal(ds[:3], np.zeros(3, np.float64))
def test_b16_uint16(self): arr1 = np.arange(10, dtype=np.uint16) path = self.mktemp() with h5py.File(path, 'w') as f: space = h5py.h5s.create_simple(arr1.shape) dset_id = h5py.h5d.create(f.id, b'test', h5py.h5t.STD_B16LE, space) dset = h5py.Dataset(dset_id) dset[:] = arr1 with h5py.File(path, 'r') as f: dset = f['test'] self.assertArrayEqual(dset[:], arr1)
def __init__(self, parent, title, hid): # Forcing a specific style on the window. # Should this include styles passed? style = wx.DEFAULT_FRAME_STYLE | wx.NO_FULL_REPAINT_ON_RESIZE wx.Frame.__init__(self, parent, title=title, size=wx.Size(850, 650), style=style) imgDir = ut.Path.GetImage() icon = wx.Icon(os.path.join(imgDir, 'h5pyViewer.ico'), wx.BITMAP_TYPE_ICO) self.SetIcon(icon) canvas = GLCanvasImg(self, self.SetStatusCB) t = type(hid) if t == h5py.h5d.DatasetID: ds = h5py.Dataset(hid) self.dataSet = ds sizer = wx.BoxSizer(wx.VERTICAL) sizer.Add(canvas, 1, wx.LEFT | wx.TOP | wx.GROW) self.SetSizer(sizer) wxAxCtrlLst = [] l = len(ds.shape) idxXY = (l - 2, l - 1) for idx, l in enumerate(ds.shape): if idx in idxXY: continue wxAxCtrl = ut.SliderGroup(self, label='Axis:%d' % idx, range=(0, l - 1)) wxAxCtrl.idx = idx wxAxCtrlLst.append(wxAxCtrl) sizer.Add(wxAxCtrl.sizer, 0, wx.EXPAND | wx.ALIGN_CENTER | wx.ALL, border=5) wxAxCtrl.SetCallback(self.OnSetView, wxAxCtrl) sl = ut.GetSlice(idxXY, ds.shape, wxAxCtrlLst) canvas.data = ds[sl] #self.Fit() self.Centre() self.canvas = canvas self.sizer = sizer self.idxXY = idxXY self.wxAxCtrlLst = wxAxCtrlLst self.BuildMenu()
def _parseData(self, x=None, y=None, index=None): if y is None: raise RuntimeError("y keyword argument is mandatory!") if hasattr(y, "info") and hasattr(y, "data"): data = y.data mcaIndex = y.info.get("McaIndex", -1) else: data = y mcaIndex = -1 if index is None: index = mcaIndex if index < 0: index = len(data.shape) - 1 #workaround a problem with h5py try: if index in [0]: testException = data[0:1] else: if len(data.shape) == 2: testException = data[0:1, -1] elif len(data.shape) == 3: testException = data[0:1, 0:1, -1] except AttributeError: txt = "%s" % type(data) if 'h5py' in txt: _logger.info("Implementing h5py workaround") import h5py data = h5py.Dataset(data.id) else: raise # only usual spectra case supported if index != (len(data.shape) - 1): raise IndexError("Only stacks of spectra supported") if len(data.shape) != 3: txt = "For the time being only " txt += "three dimensional arrays supported" raise NotImplementedError(txt) if len(data.shape) != 3: txt = "For the time being only " txt += "three dimensional arrays supported" raise NotImplementedError(txt) # make sure to get x data if x is None: x = numpy.arange(data.shape[index]).astype(numpy.float32) elif x.size != data.shape[index]: raise NotImplementedError( "All the spectra should share same X axis") #data = numpy.transpose(data, (1,0,2)) return data, x, index
def createDataset(fnameOut, procInfo): propfaid = h5py.h5p.create(h5py.h5p.FILE_ACCESS) propfaid.set_fapl_mpio(comm, mpiInfo) fid = h5py.h5f.create(fnameOut, flags=h5py.h5f.ACC_TRUNC, fapl=propfaid) fout = h5py.File(fid) spaceid = h5py.h5s.create_simple((procInfo.numRows, procInfo.numCols)) plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE) plist.set_fill_time(h5py.h5d.FILL_TIME_NEVER) datasetid = h5py.h5d.create(fout.id, "rows", h5py.h5t.NATIVE_DOUBLE, spaceid, plist) rows = h5py.Dataset(datasetid) return (fout, rows)
def __init__(self, parent, lbl, hid): wx.Frame.__init__(self, parent, title=lbl, size=wx.Size(850, 650)) imgDir = ut.Path.GetImage() icon = wx.Icon(os.path.join(imgDir, "h5pyViewer.ico"), wx.BITMAP_TYPE_ICO) self.SetIcon(icon) t = type(hid) if t == h5py.h5d.DatasetID: data = h5py.Dataset(hid) canvas = MPLCanvasImg(self, self.SetStatusCB) sizer = wx.BoxSizer(wx.VERTICAL) sizer.Add(canvas, 1, wx.LEFT | wx.TOP | wx.GROW) self.SetSizer(sizer) toolbar = ut.AddToolbar(canvas, sizer) wxAxCtrlLst = [] l = len(data.shape) idxXY = (l - 2, l - 1) for idx, l in enumerate(data.shape): if idx in idxXY: continue wxAxCtrl = ut.SliderGroup(self, label="Axis:%d" % idx, range=(0, l - 1)) wxAxCtrl.idx = idx wxAxCtrlLst.append(wxAxCtrl) sizer.Add(wxAxCtrl.sizer, 0, wx.EXPAND | wx.ALIGN_CENTER | wx.ALL, border=5) wxAxCtrl.SetCallback(HdfImageFrame.OnSetView, wxAxCtrl) sl = ut.GetSlice(idxXY, data.shape, wxAxCtrlLst) canvas.InitChild(data[sl]) # self.Fit() self.Centre() self.BuildMenu(data.dtype) self.canvas = canvas self.sizer = sizer self.toolbar = toolbar self.data = data self.idxXY = idxXY self.wxAxCtrlLst = wxAxCtrlLst
def _create_big_dataset(self, where, name, shape, dtype): """ Create and return a dataset that doesn't get filled right when created """ spaceid = h5py.h5s.create_simple(shape) plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE) plist.set_fill_time(h5py.h5d.FILL_TIME_NEVER) if shape[0] < 500 or shape[1] < 512: chunkshape = shape else: chunkshape = (shape[0] / 500, shape[1] / 512 ) # TODO: don't use fixed values? plist.set_chunk(chunkshape) datasetid = h5py.h5d.create(where.id, name, h5py.h5t.NATIVE_FLOAT, spaceid, plist) return h5py.Dataset(datasetid)
def create_dataset_early_allocated(group, name, size, dtype): """ Create an HdF5 dataset, allocating the full space for it at the start of the process. This can make it faster to write data incrementally from multiple processes. The dataset is also not pre-filled, saving more time. Parameters ---------- group: h5py.Group the parent for the dataset name: str name for the new dataset size: int The size of the new data set (which must be 1D) dtype: str Data type, One of f4, f8, i4, i8 """ # create a data-space object, which describes the dimensions of the dataset space_id = h5py.h5s.create_simple((size, )) # Create and fill a property list describing options # which apply to the data set. plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE) plist.set_fill_time(h5py.h5d.FILL_TIME_NEVER) plist.set_alloc_time(h5py.h5d.ALLOC_TIME_EARLY) dtype = { 'f8': h5py.h5t.NATIVE_DOUBLE, 'f4': h5py.h5t.NATIVE_FLOAT, 'i4': h5py.h5t.NATIVE_INT32, 'i8': h5py.h5t.NATIVE_INT64, }[dtype] datasetid = h5py.h5d.create(group.id, name.encode('ascii'), dtype, space_id, plist) data_set = h5py.Dataset(datasetid) return data_set
def test_virtual_prefix(tmp_path): (tmp_path / 'a').mkdir() (tmp_path / 'b').mkdir() src_file = h5.File(tmp_path / 'a' / 'src.h5', 'w') src_file['data'] = np.arange(10) vds_file = h5.File(tmp_path / 'b' / 'vds.h5', 'w') layout = h5.VirtualLayout(shape=(10, )) layout[:] = h5.VirtualSource('src.h5', 'data', shape=(10, )) vds_file.create_virtual_dataset('data', layout, fillvalue=-1) # Path doesn't resolve np.testing.assert_array_equal(vds_file['data'], np.full(10, fill_value=-1)) path_a = bytes(tmp_path / 'a') dapl = h5.h5p.create(h5.h5p.DATASET_ACCESS) dapl.set_virtual_prefix(path_a) vds_id = h5.h5d.open(vds_file.id, b'data', dapl=dapl) vds = h5.Dataset(vds_id) # Now it should find the source file and read the data correctly np.testing.assert_array_equal(vds[:], np.arange(10)) # Check that get_virtual_prefix gives back what we put in assert vds.id.get_access_plist().get_virtual_prefix() == path_a
def test1(hid): print(hid) ds = h5py.Dataset(hid) ds plt.plot(ds[:, 1]) plt.show()
def numpyPCA(stack, index=-1, ncomponents=10, binning=None, center=True, scale=True, mask=None, spectral_mask=None, **kw): if DEBUG: print("PCATools.numpyPCA") print("index = %d" % index) print("center = %s" % center) print("scale = %s" % scale) #recover the actual data to work with if hasattr(stack, "info") and hasattr(stack, "data"): #we are dealing with a PyMca data object data = stack.data else: data = stack force = kw.get("force", True) oldShape = data.shape if index not in [0, -1, len(oldShape) - 1]: data = None raise IndexError("1D index must be one of 0, -1 or %d, got %d" %\ (len(oldShape) - 1, index)) if index < 0: actualIndex = len(oldShape) + index else: actualIndex = index #workaround a problem with h5py try: if actualIndex in [0]: testException = data[0:1] else: if len(data.shape) == 2: testException = data[0:1, -1] elif len(data.shape) == 3: testException = data[0:1, 0:1, -1] except AttributeError: txt = "%s" % type(data) if 'h5py' in txt: print("Implementing h5py workaround") import h5py data = h5py.Dataset(data.id) else: raise #the number of spatial pixels nPixels = 1 for i in range(len(oldShape)): if i != actualIndex: nPixels *= oldShape[i] #the number of channels nChannels = oldShape[actualIndex] if binning is None: binning = 1 N = int(nChannels / binning) if ncomponents > N: msg = "Requested %d components for a maximum of %d" % (ncomponents, N) raise ValueError(msg) # avgSpectrum is unused, but it makes the code readable cov, avgSpectrum, calculatedPixels = getCovarianceMatrix( stack, index=index, binning=binning, force=force, center=center, spatial_mask=mask, weights=spectral_mask) #the total variance is the sum of the elements of the diagonal totalVariance = numpy.diag(cov) standardDeviation = numpy.sqrt(totalVariance) standardDeviation = standardDeviation + (standardDeviation == 0) print("Total Variance = ", totalVariance.sum()) normalizeToUnitStandardDeviation = scale if 0: #option to normalize to unit standard deviation if normalizeToUnitStandardDeviation: for i in range(cov.shape[0]): if totalVariance[i] > 0: cov[i, :] /= numpy.sqrt(totalVariance[i]) cov[:, i] /= numpy.sqrt(totalVariance[i]) if DEBUG: import time t0 = time.time() evalues, evectors = numpy.linalg.eigh(cov) # The total variance should also be the sum of all the eigenvalues calculatedTotalVariance = evalues.sum() if abs(totalVariance.sum() - evalues.sum()) > 0.0001: print("WARNING: Discrepancy on total variance") print("Variance from covariance matrix = ", totalVariance.sum()) print("Variance from sum of eigenvalues = ", calculatedTotalVariance) if DEBUG: print("Eig elapsed = ", time.time() - t0) cov = None dtype = numpy.float32 images = numpy.zeros((ncomponents, nPixels), dtype) eigenvectors = numpy.zeros((ncomponents, N), dtype) eigenvalues = numpy.zeros((ncomponents, ), dtype) #sort eigenvalues if 1: a = [(evalues[i], i) for i in range(len(evalues))] a.sort() a.reverse() totalExplainedVariance = 0.0 for i0 in range(ncomponents): i = a[i0][1] eigenvalues[i0] = evalues[i] partialExplainedVariance = 100. * evalues[i] / \ calculatedTotalVariance print("PC%02d Explained variance %.5f %% " %\ (i0 + 1, partialExplainedVariance)) totalExplainedVariance += partialExplainedVariance eigenvectors[i0, :] = evectors[:, i] #print("NORMA = ", numpy.dot(evectors[:, i].T, evectors[:, i])) print("Total explained variance = %.2f %% " % totalExplainedVariance) else: idx = numpy.argsort(evalues) eigenvalues[:] = evalues[idx] eigenvectors[:, :] = evectors[:, idx].T #calculate the projections # Subtracting the average and normalizing to standard deviation gives worse results. # Verions 5.0.0 to 5.1.0 implemented that behavior as default. # When dealing with the CH1777 test dataset the Sb signal was less contrasted against # the Ca signal. # Clearly the user should have control about subtracting the average or not and # normalizing to the standard deviation or not. subtractAndNormalize = False if actualIndex in [0]: for i in range(oldShape[actualIndex]): if subtractAndNormalize: tmpData = (data[i].reshape(1, -1) - avgSpectrum[i]) / standardDeviation[i] else: tmpData = data[i].reshape(1, -1) for j in range(ncomponents): images[j:j + 1, :] += tmpData * eigenvectors[j, i] if len(oldShape) == 3: #reshape the images images.shape = ncomponents, oldShape[1], oldShape[2] else: #array of spectra if len(oldShape) == 2: for i in range(nPixels): #print i tmpData = data[i, :] tmpData.shape = 1, nChannels if subtractAndNormalize: tmpData = (tmpData[:, ::binning] - avgSpectrum) / standardDeviation else: tmpData = tmpData[:, ::binning] for j in range(ncomponents): images[j, i] = numpy.dot(tmpData, eigenvectors[j]) #reshape the images images.shape = ncomponents, nPixels elif len(oldShape) == 3: i = 0 for r in range(oldShape[0]): for c in range(oldShape[1]): #print i tmpData = data[r, c, :] tmpData.shape = 1, nChannels if subtractAndNormalize: tmpData = (tmpData[:, ::binning] - avgSpectrum) / standardDeviation else: tmpData = tmpData[:, ::binning] for j in range(ncomponents): images[j, i] = numpy.dot(tmpData, eigenvectors[j]) i += 1 #reshape the images images.shape = ncomponents, oldShape[0], oldShape[1] return images, eigenvalues, eigenvectors
numRows = len(keepIndices) numCols = rowsIn.shape[1] # create the output file and dataset efficiently propfaid = h5py.h5p.create(h5py.h5p.FILE_ACCESS) propfaid.set_fapl_mpio(comm, mpiInfo) fid = h5py.h5f.create(fnameOut, flags=h5py.h5f.ACC_TRUNC, fapl=propfaid) fout = h5py.File(fid) spaceid = h5py.h5s.create_simple((numRows, numCols)) plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE) plist.set_fill_time(h5py.h5d.FILL_TIME_NEVER) datasetid = h5py.h5d.create(fout.id, "rows,", h5py.h5t.NATIVE_DOUBLE, spaceid, plist) rowsOut = h5py.Dataset(datasetid) # write out the metadata (note we only retain some of the metadata) # this will write out the observedLocations in the original (not subsetted!) dataset, # so keep this in mind when using this field to unfold from the matrix to the 3d grid if rank == 0: observedLonCoords = metadataIn["observedLonCoords"][keepIndices] observedLatCoords = metadataIn["observedLatCoords"][keepIndices] observedLevelNumbers = metadataIn["observedLevelNumbers"][keepIndices] observedLocations = metadataIn["observedLocations"][keepIndices] np.savez(metadataFnameOut, observedLonCoords=observedLonCoords, observedLatCoords=observedLatCoords, observedLevelNumbers=observedLevelNumbers, observedLocations=observedLocations)
import h5py import time fx=h5py.File('haha_nofill.h5','w') spaceid = h5py.h5s.create_simple((30000,8000000)) plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE) plist.set_fill_time(h5py.h5d.FILL_TIME_NEVER) plist.set_chunk((60, 15625)) start=time.time() datasetid =h5py.h5d.create(fx.id,"data",h5py.h5t.NATIVE_FLOAT, spaceid, plist) dset = h5py.Dataset(datasetid) # then you can use normal h5py api to deal with this dset object fx.close() end=time.time() print ('%f seconds'%(end-start))
def getCovarianceMatrix(stack, index=None, binning=None, dtype=numpy.float64, force=True, center=True, weights=None, spatial_mask=None): """ Calculate the covariance matrix of input data (stack) array. The input array is to be understood as a set of observables (spectra) taken at different instances (for instance spatial coordinates). :param stack: Array of data. Dimension greater than one. :type stack: Numpy ndarray. :param index: Integer specifying the array dimension containing the "observables". Only the first the first (index = 0) or the last dimension (index = -1 or index = (ndimensions - 1)) supported. :type index: Integer (default is -1 to indicate it is the last dimension of input array) :param binning: Current implementation corresponds to a sampling of the spectral data and not to an actual binning. This may change in future versions. :type binning: Positive integer (default 1) :param dtype: Keyword indicating the data type of the returned covariance matrix. :type dtype: A valid numpy data type (default numpy.float64) :param force: Indicate how to calculate the covariance matrix: - False : Perform the product data.T * data in one call - True : Perform the product data.T * data progressively (smaller memory footprint) :type force: Boolean (default True) :param center: Indicate if the mean is to be subtracted from the observables. :type center: Boolean (default True) :param weights: Weight to be applied to each observable. It can therefore be used as a spectral mask setting the weight to 0 on the values to ignore. :type weights: Numpy ndarray of same size as the observables or None (default). :spatial_mask: Array of size n where n is the number of measurement instances. In mapping experiments, n would be equal to the number of pixels. :type spatial_mask: Numpy array of unsigned bytes (numpy.uint8) or None (default). :returns: The covMatrix, the average spectrum and the number of used pixels. """ #the 1D mask = weights should correspond to the values, before or after #sampling? it could be handled as weigths to be applied to the #spectra. That would allow two uses, as mask and as weights, at #the cost of a multiplication. #the spatial_mask accounts for pixels to be considered. It allows #to calculate the covariance matrix of a subset or to deal with #non finite data (NaN, +inf, -inf, ...). The calling program #should set the mask. #recover the actual data to work with if hasattr(stack, "info") and hasattr(stack, "data"): #we are dealing with a PyMca data object data = stack.data if index is None: index = stack.info.get("McaWindex", -1) else: data = stack if index is None: index = -1 oldShape = data.shape if index not in [0, -1, len(oldShape) - 1]: data = None raise IndexError("1D index must be one of 0, -1 or %d" % len(oldShape)) if index < 0: actualIndex = len(oldShape) + index else: actualIndex = index #the number of spatial pixels nPixels = 1 for i in range(len(oldShape)): if i != actualIndex: nPixels *= oldShape[i] #remove inf or nan #image_data = data.sum(axis=actualIndex) #spatial_mask = numpy.isfinite(image_data) # #the starting number of channels or of images N = oldShape[actualIndex] # our binning (better said sampling) is spectral, in order not to # affect the spatial resolution if binning is None: binning = 1 if spatial_mask is not None: cleanMask = spatial_mask[:].reshape(nPixels) usedPixels = cleanMask.sum() badMask = numpy.array(spatial_mask < 1, dtype=cleanMask.dtype) badMask.shape = nPixels else: cleanMask = None usedPixels = nPixels nChannels = int(N / binning) if weights is None: weights = numpy.ones(N, numpy.float) if weights.size == nChannels: # binning was taken into account cleanWeights = weights[:] else: cleanWeights = weights[::binning] #end of checking part eigenvectorLength = nChannels if (not force) and isinstance(data, numpy.ndarray): if DEBUG: print("Memory consuming calculation") #make a direct calculation (memory cosuming) #take a view to the data dataView = data[:] if index in [0]: #reshape the view to allow the matrix multiplication dataView.shape = -1, nPixels cleanWeights.shape = -1, 1 dataView = dataView[::binning] * cleanWeights if cleanMask is not None: dataView[:, badMask] = 0 sumSpectrum = dataView.sum(axis=1, dtype=numpy.float64) #and return the standard covariance matrix as a matrix product covMatrix = dotblas.dot(dataView, dataView.T)\ / float(usedPixels - 1) else: #the last index dataView.shape = nPixels, -1 cleanWeights.shape = 1, -1 dataView = dataView[:, ::binning] * cleanWeights if cleanMask is not None: cleanMask.shape = -1 if 0: for i in range(dataView.shape[-1]): dataView[badMask, i] = 0 else: dataView[badMask] = 0 sumSpectrum = dataView.sum(axis=0, dtype=numpy.float64) #and return the standard covariance matrix as a matrix product covMatrix = dotblas.dot(dataView.T, dataView )\ / float(usedPixels - 1) if center: averageMatrix = numpy.outer(sumSpectrum, sumSpectrum)\ / (usedPixels * (usedPixels - 1)) covMatrix -= averageMatrix averageMatrix = None return covMatrix, sumSpectrum / usedPixels, usedPixels #we are dealing with dynamically loaded data if DEBUG: print("DYNAMICALLY LOADED DATA") #create the needed storage space for the covariance matrix try: covMatrix = numpy.zeros((eigenvectorLength, eigenvectorLength), dtype=dtype) sumSpectrum = numpy.zeros((eigenvectorLength, ), numpy.float64) except: #make sure no reference to the original input data is kept cleanWeights = None covMatrix = None averageMatrix = None data = None raise #workaround a problem with h5py try: if actualIndex in [0]: testException = data[0:1] else: if len(data.shape) == 2: testException = data[0:1, -1] elif len(data.shape) == 3: testException = data[0:1, 0:1, -1] except AttributeError: txt = "%s" % type(data) if 'h5py' in txt: print("Implementing h5py workaround") import h5py data = h5py.Dataset(data.id) else: raise if actualIndex in [0]: #divider is used to decide the fraction of images to keep in memory #in order to limit file access on dynamically loaded data. #Since two chunks of the same size are used, the amount of memory #needed is twice the data size divided by the divider. #For instance, divider = 10 implies the data to be read 5.5 times #from disk while having a memory footprint of about one fifth of #the dataset size. step = 0 divider = 10 while step < 1: step = int(oldShape[index] / divider) divider -= 2 if divider <= 0: step = oldShape[index] break if DEBUG: print("Reading chunks of %d images" % step) nImagesRead = 0 if (binning == 1) and oldShape[index] >= step: chunk1 = numpy.zeros((step, nPixels), numpy.float64) chunk2 = numpy.zeros((nPixels, step), numpy.float64) if spatial_mask is not None: badMask.shape = -1 cleanMask.shape = -1 i = 0 while i < N: iToRead = min(step, N - i) #get step images for the first chunk chunk1[0:iToRead] = data[i:i + iToRead].reshape(iToRead, -1) if spatial_mask is not None: chunk1[0:iToRead, badMask] = 0 sumSpectrum[i:i + iToRead] = chunk1[0:iToRead].sum(axis=1) if center: average = sumSpectrum[i:i + iToRead] / usedPixels average.shape = iToRead, 1 chunk1[0:iToRead] -= average if spatial_mask is not None: chunk1[0:iToRead, badMask] = 0 nImagesRead += iToRead j = 0 while j <= i: #get step images for the second chunk if j == i: jToRead = iToRead if 0: for k in range(0, jToRead): chunk2[:, k] = chunk1[k] else: chunk2[:, 0:jToRead] = chunk1[0:jToRead, :].T else: #get step images for the second chunk jToRead = min(step, nChannels - j) #with loop: #for k in range(0, jToRead): # chunk2[:,k] = data[(j+k):(j+k+1)].reshape(1,-1) # if spatial_mask is not None: # chunk2[badMask[(j+k):(j+k+1),k]] = 0 #equivalent without loop: chunk2[:, 0:jToRead] =\ data[j:(j + jToRead)].reshape(jToRead, -1).T if spatial_mask is not None: chunk2[badMask, 0:jToRead] = 0 nImagesRead += jToRead if center: average = \ chunk2[:, 0:jToRead].sum(axis=0) / usedPixels average.shape = 1, jToRead chunk2[:, 0:jToRead] -= average if spatial_mask is not None: chunk2[badMask, 0:jToRead] = 0 #dot product if (iToRead != step) or (jToRead != step): covMatrix[i: (i + iToRead), j: (j + jToRead)] =\ dotblas.dot(chunk1[:iToRead, :nPixels], chunk2[:nPixels, :jToRead]) else: covMatrix[i: (i + iToRead), j: (j + jToRead)] =\ dotblas.dot(chunk1, chunk2) if i != j: covMatrix[j: (j + jToRead), i: (i + iToRead)] =\ covMatrix[i: (i + iToRead), j: (j + jToRead)].T #increment j j += jToRead i += iToRead chunk1 = None chunk2 = None if DEBUG: print("totalImages Read = ", nImagesRead) elif (binning > 1) and (oldShape[index] >= step): chunk1 = numpy.zeros((step, nPixels), numpy.float64) chunk2 = numpy.zeros((nPixels, step), numpy.float64) #one by one reading till we fill the chunks imagesToRead = numpy.arange(0, oldShape[index], binning) i = int(imagesToRead[weights > 0][0]) spectrumIndex = 0 nImagesRead = 0 while i < N: #fill chunk1 jj = 0 for iToRead in range(0, int(min(step * binning, N - i)), binning): chunk1[jj] = data[i + iToRead].reshape(1, -1) * \ weights[i + iToRead] jj += 1 sumSpectrum[spectrumIndex:(spectrumIndex + jj)] = \ chunk1[0:jj].sum(axis=1) if center: average = \ sumSpectrum[spectrumIndex:(spectrumIndex + jj)] / nPixels average.shape = jj, 1 chunk1[0:jj] -= average nImagesRead += jj iToRead = jj j = 0 while j <= i: #get step images for the second chunk if j == i: jToRead = iToRead chunk2[:, 0:jToRead] = chunk1[0:jToRead, :].T else: #get step images for the second chunk jj = 0 for jToRead in range(0, int(min(step * binning, N - j)), binning): chunk2[:, jj] =\ data[j + jToRead].reshape(1, -1)\ * weights[j + jToRead] jj += 1 nImagesRead += jj if center: average = chunk2[:, 0:jj].sum(axis=0) / nPixels average.shape = 1, jj chunk2 -= average jToRead = jj #dot product if (iToRead != step) or (jToRead != step): covMatrix[i:(i + iToRead), j:(j + jToRead)] =\ dotblas.dot(chunk1[:iToRead, :nPixels], chunk2[:nPixels, :jToRead]) else: covMatrix[i:(i + iToRead), j:(j + jToRead)] =\ dotblas.dot(chunk1, chunk2) if i != j: covMatrix[j:(j + jToRead), i:(i + iToRead)] =\ covMatrix[i:(i + iToRead), j:(j + jToRead)].T #increment j j += jToRead * step i += iToRead * step chunk1 = None chunk2 = None else: raise ValueError("PCATools.getCovarianceMatrix: Unhandled case") #should one divide by N or by N-1 ?? if we use images, we #assume the observables are the images, not the spectra!!! #so, covMatrix /= nChannels is wrong and one has to use: covMatrix /= usedPixels else: #the data are already arranged as (nPixels, nChannels) and we #basically have to return data.T * data to get back the covariance #matrix as (nChannels, nChannels) #if someone had the bad idea to store the data in HDF5 with a chunk #size based on the pixels and not on the spectra a loop based on #reading spectrum per spectrum can be very slow step = 0 divider = 10 while step < 1: step = int(nPixels / divider) divider -= 1 if divider <= 0: step = nPixels break step = nPixels if DEBUG: print("Reading chunks of %d spectra" % step) cleanWeights.shape = 1, -1 if len(data.shape) == 2: if cleanMask is not None: badMask.shape = -1 tmpData = numpy.zeros((step, nChannels), numpy.float64) k = 0 while k < nPixels: kToRead = min(step, nPixels - k) tmpData[0:kToRead] = data[k:k + kToRead, ::binning] if cleanMask is not None: tmpData[badMask[k:k + kToRead]] = 0 a = tmpData[0:kToRead] * cleanWeights sumSpectrum += a.sum(axis=0) covMatrix += dotblas.dot(a.T, a) a = None k += kToRead tmpData = None elif len(data.shape) == 3: if oldShape[0] == 1: #close to the previous case tmpData = numpy.zeros((step, nChannels), numpy.float64) if cleanMask is not None: badMask.shape = data.shape[0], data.shape[1] for i in range(oldShape[0]): k = 0 while k < oldShape[1]: kToRead = min(step, oldShape[1] - k) tmpData[0:kToRead] = data[i, k:k + kToRead, ::binning]\ * cleanWeights if cleanMask is not None: tmpData[0:kToRead][badMask[i, k:k + kToRead]] = 0 a = tmpData[0:kToRead] sumSpectrum += a.sum(axis=0) covMatrix += dotblas.dot(a.T, a) a = None k += kToRead tmpData = None elif oldShape[1] == 1: #almost identical to the previous case tmpData = numpy.zeros((step, nChannels), numpy.float64) if cleanMask is not None: badMask.shape = data.shape[0], data.shape[1] for i in range(oldShape[1]): k = 0 while k < oldShape[0]: kToRead = min(step, oldShape[0] - k) tmpData[0:kToRead] = data[k: k + kToRead, i, ::binning]\ * cleanWeights if cleanMask is not None: tmpData[0:kToRead][badMask[k:k + kToRead, i]] = 0 a = tmpData[0:kToRead] sumSpectrum += a.sum(axis=0) covMatrix += dotblas.dot(a.T, a) a = None k += kToRead tmpData = None elif oldShape[0] < 21: if step > oldShape[1]: step = oldShape[1] tmpData = numpy.zeros((step, nChannels), numpy.float64) if cleanMask is not None: badMask.shape = data.shape[0], data.shape[1] for i in range(oldShape[0]): k = 0 while k < oldShape[1]: kToRead = min(step, oldShape[1] - k) tmpData[0:kToRead] = data[i, k: k + kToRead, ::binning]\ * cleanWeights if cleanMask is not None: tmpData[0:kToRead][badMask[i, k:k + kToRead]] = 0 a = tmpData[0:kToRead] sumSpectrum += a.sum(axis=0) covMatrix += dotblas.dot(a.T, a) a = None k += kToRead tmpData = None else: #I should choose the sizes in terms of the size #of the dataset if oldShape[0] < 41: #divide by 10 deltaRow = 4 elif oldShape[0] < 101: #divide by 10 deltaRow = 10 else: #take pieces of one tenth deltaRow = int(oldShape[0] / 10) deltaCol = oldShape[1] tmpData = numpy.zeros((deltaRow, deltaCol, nChannels), numpy.float64) if cleanMask is not None: badMask.shape = data.shape[0], data.shape[1] i = 0 while i < oldShape[0]: iToRead = min(deltaRow, oldShape[0] - i) kToRead = iToRead * oldShape[1] tmpData[:iToRead] = data[i:(i + iToRead), :, ::binning] if cleanMask is not None: tmpData[0:kToRead][badMask[i:(i + iToRead), :]] = 0 a = tmpData[:iToRead] a.shape = kToRead, nChannels a *= cleanWeights if 0: #weight each spectrum a /= (a.sum(axis=1).reshape(-1, 1)) sumSpectrum += a.sum(axis=0) covMatrix += dotblas.dot(a.T, a) a = None i += iToRead #should one divide by N or by N-1 ?? covMatrix /= usedPixels - 1 if center: #the n-1 appears again here averageMatrix = numpy.outer(sumSpectrum, sumSpectrum)\ / (usedPixels * (usedPixels - 1)) covMatrix -= averageMatrix averageMatrix = None return covMatrix, sumSpectrum / usedPixels, usedPixels
def waveforms(hid): 'plots the 10th last waveforms' ds = h5py.Dataset(hid) plt.plot(ds[-10, :, :]) plt.show()
def plot1d(hid): print(hid) ds = h5py.Dataset(hid) ds plt.plot(ds[0, :]) plt.show()
def test2(hid): ds = h5py.Dataset(hid) ds plt.plot(ds[:, 0].T) plt.show()
def batchROIMultipleSpectra(self, x=None, y=None, configuration=None, net=True, xAtMinMax=False, index=None, xLabel=None): """ This method performs the actual fit. The y keyword is the only mandatory input argument. :param x: 1D array containing the x axis (usually the channels) of the spectra. :param y: 3D array containing the data, usually [nrows, ncolumns, nchannels] :param weight: 0 Means no weight, 1 Use an average weight, 2 Individual weights (slow) :param net: 0 Means no subtraction, 1 Calculate :param xAtMinMax: if True, calculate X at maximum and minimum Y . Default is false. :param index: Index of dimension where to apply the ROIs. :param xLabel: Type of ROI to be used. :return: A dictionnary with the images and the image names as keys. """ if y is None: raise RuntimeError("y keyword argument is mandatory!") if hasattr(y, "info") and hasattr(y, "data"): data = y.data mcaIndex = y.info.get("McaIndex", -1) else: data = y mcaIndex = -1 if index is None: index = mcaIndex if index < 0: index = len(data.shape) - 1 #workaround a problem with h5py try: if index in [0]: testException = data[0:1] else: if len(data.shape) == 2: testException = data[0:1, -1] elif len(data.shape) == 3: testException = data[0:1, 0:1, -1] except AttributeError: txt = "%s" % type(data) if 'h5py' in txt: _logger.info("Implementing h5py workaround") import h5py data = h5py.Dataset(data.id) else: raise # make sure to get x data if x is None: x = numpy.arange(data.shape[index]).astype(numpy.float32) if configuration is not None: self.setConfiguration(configuration) # read the current configuration config = self.getConfiguration() # start the work roiList0 = config["ROI"]["roilist"] if type(roiList0) not in [type([]), type((1, ))]: roiList0 = [roiList0] # operate only on compatible ROIs roiList = [] for roi in roiList0: if roi.upper() == "ICR": roiList.append(roi) roiType = config["ROI"]["roidict"][roi]["type"] if xLabel is None: roiList.append(roi) elif xLabel.lower() == roiType.lower(): roiList.append(roi) # only usual spectra case supported if index != (len(data.shape) - 1): raise IndexError("Only stacks of spectra supported") if len(data.shape) != 3: txt = "For the time being only " txt += "three dimensional arrays supported" raise NotImplemented(txt) if len(data.shape) != 3: txt = "For the time being only " txt += "three dimensional arrays supported" raise NotImplemented(txt) totalSpectra = 1 for i in range(len(data.shape)): if i != index: totalSpectra *= data.shape[i] if x.size != data.shape[index]: raise NotImplemented("All the spectra should share same X axis") jStep = min(1000, data.shape[1]) nRois = len(roiList) iXMinList = [None] * nRois iXMaxList = [None] * nRois nRows = data.shape[0] nColumns = data.shape[1] if xAtMinMax: results = numpy.zeros((nRois * 4, nRows, nColumns), numpy.float) names = [None] * 4 * nRois else: results = numpy.zeros((nRois * 2, nRows, nColumns), numpy.float) names = [None] * 2 * nRois for i in range(0, data.shape[0]): #print(i) #chunks of nColumns spectra if i == 0: chunk = numpy.zeros((jStep, data.shape[index]), numpy.float) xData = x jStart = 0 while jStart < data.shape[1]: jEnd = min(jStart + jStep, data.shape[1]) chunk[:(jEnd - jStart)] = data[i, jStart:jEnd] for j, roi in enumerate(roiList): if i == 0: roiType = config["ROI"]["roidict"][roi]["type"] roiLine = roi roiFrom = config["ROI"]["roidict"][roi]["from"] roiTo = config["ROI"]["roidict"][roi]["to"] if roiLine == "ICR": iXMinList[j] = 0 iXMaxList[j] = data.shape[index] else: iXMinList[j] = numpy.nonzero(x <= roiFrom)[0][-1] iXMaxList[j] = numpy.nonzero(x >= roiTo)[0][0] + 1 names[j] = "ROI " + roiLine names[j + nRois] = "ROI " + roiLine + " Net" if xAtMinMax: names[j + 2 * nRois] = "ROI " + roiLine + ( " %s at Max." % roiType) names[j + 3 * nRois] = "ROI " + roiLine + ( " %s at Min." % roiType) iXMin = iXMinList[j] iXMax = iXMaxList[j] #if i == 0: # print roi, " iXMin = ", iXMin, "iXMax = ", iXMax tmpArray = chunk[:(jEnd - jStart), iXMin:iXMax] left = tmpArray[:, 0] right = tmpArray[:, -1] rawSum = tmpArray.sum(axis=-1, dtype=numpy.float) netSum = rawSum - (0.5 * (left + right) * (iXMax - iXMin + 1)) results[j][i, :(jEnd - jStart)] = rawSum results[j + nRois][i, :(jEnd - jStart)] = netSum if xAtMinMax: # maxImage results[j + 2 * nRois][i, :(jEnd - jStart)] = \ numpy.argmax(tmpArray, axis=1) + iXMin # minImage results[j + 3 * nRois][i, :(jEnd - jStart)] = \ numpy.argmin(tmpArray, axis=1) + iXMin jStart = jEnd outputDict = {'images': results, 'names': names} return outputDict
def GetPropertyStr(wxTree, wxNode): path = HdfViewerFrame.GetPath(wxTree, wxNode) hidStr = wxTree.GetItemText(wxNode) hid = wxTree.GetPyData(wxNode) #o=wxTree.GetItemData(wxNode) #print o.Data,wxTree.GetPyData(wxNode) #if type(gid)==h5py.h5g.GroupID: txt = path + '\n' t = type(hid) if t == tuple: if hid[0] == None: txt += 'missing external link:\n ' + hid[1] + '\n ' + hid[2] return txt else: txt += 'external link:\n ' + hid[1] + '\n ' + hid[2] hid = hid[0] t = type(hid) if t == h5py.h5f.FileID: txt += type(hid).__name__ + ':%d\n' % hid.id hid = h5py.h5o.open(hid, '/') t = type(hid) objInf = h5py.h5o.get_info(hid) #print t,hid.id,objInf.fileno, objInf.rc, objInf.type, objInf.addr, objInf.hdr txt += type(hid).__name__ + ':%d\n' % hid.id txt += 'addr:%d fileno:%d refCnt:%d\n' % (objInf.addr, objInf.fileno, objInf.rc) try: wxNodeParent = wxTree.GetItemParent(wxNode) txtParent = wxTree.GetItemText(wxNode) dataParent = wxTree.GetPyData(wxNode) gid = wxTree.GetPyData(wxNodeParent) softLnk = gid.get_linkval(hidStr) except BaseException as e: pass else: txt += 'Soft Link:' + softLnk + '\n' try: numAttr = h5py.h5a.get_num_attrs(hid) except ValueError as e: pass else: if numAttr > 20: txt += 'Attributes:%d (too many to show)\n' % numAttr else: txt += 'Attributes:%d\n' % numAttr for idxAttr in range(numAttr): aid = h5py.h5a.open(hid, index=idxAttr) txt += '\t' + aid.name + '\t' + str(GetAttrVal(aid)) + '\n' val = None if t == h5py.h5g.GroupID: pass elif t == h5py.h5d.DatasetID: txt += '\nshape: ' + str(hid.shape) + '\n' tt = hid.get_type() ttt = type(tt) if ttt == h5py.h5t.TypeCompoundID: txt += 'type: Compound\n' elif ttt == h5py.h5t.TypeStringID: sz = tt.get_size() txt += 'type: String (length %d)\n' % sz else: txt += 'type: ' + str(tt.dtype) + '\n' pl = hid.get_create_plist() txFcn = ( ('chunk', h5py.h5p.PropDCID.get_chunk), ('fill time', h5py.h5p.PropDCID.get_fill_time), ('alloc_time', h5py.h5p.PropDCID.get_alloc_time), #('class', h5py.h5p.PropDCID.get_class), #('fill_value', h5py.h5p.PropDCID.get_fill_value), #('filter', h5py.h5p.PropDCID.get_filter), #('filter_by_id',h5py.h5p.PropDCID.get_filter_by_id), ('layout', h5py.h5p.PropDCID.get_layout), ('nfilters', h5py.h5p.PropDCID.get_nfilters), #('obj_track_times', h5py.h5p.PropDCID.get_obj_track_times), ) for tx, func in txFcn: try: v = func(pl) except ValueError as e: pass else: txt += tx + ':' + str(v) + '\n' if hid.shape == () or np.prod( hid.shape) < 10: #show up to max. 10 element arrays #if ttt==h5py.h5t.TypeStringID or hid.shape==() or hid.shape==(1,): ds = h5py.Dataset(hid) txt += 'Value:\n\t' + str(ds.value) + '\n' return txt
def __init__(self, parent,lbl,hid): wx.Frame.__init__(self, parent, title='HDFGridView: '+lbl,size=wx.Size(750, 650)) imgDir=ut.Path.GetImage() icon = wx.Icon(os.path.join(imgDir,'h5pyViewer.ico'), wx.BITMAP_TYPE_ICO) self.SetIcon(icon) pan = wx.Panel(self, -1) t=type(hid) if t==h5py.h5d.DatasetID: data=h5py.Dataset(hid) elif t==np.ndarray: data=hid else: raise(TypeError('unhandled type')) grid = Grid(pan, data) tbl=grid.GetTable() sizer = wx.BoxSizer(wx.VERTICAL) sizer.Add(grid, 1, wx.EXPAND) wxAxCtrlLst=[] l=len(data.shape) if l==1: if type(hid.get_type())==h5py.h5t.TypeCompoundID: tbl = Table1DCompound(data) else: tbl = Table1DArray(data) else: idxXY=(l-2,l-1) #idxXY=(l-1,l-2) for idx,l in enumerate(data.shape): if idx in idxXY: continue wxAxCtrl=ut.SliderGroup(pan, label='Axis:%d'%idx,range=(0,l-1)) wxAxCtrl.idx=idx wxAxCtrlLst.append(wxAxCtrl) sizer.Add(wxAxCtrl.sizer, 0, wx.EXPAND | wx.ALIGN_CENTER | wx.ALL, border=5) wxAxCtrl.SetCallback(Grid.OnSetView,wxAxCtrl) sl=ut.GetSlice(idxXY,data.shape,wxAxCtrlLst) if type(hid.get_type())==h5py.h5t.TypeCompoundID: tbl = Table2DArray(data) else: tbl = Table2DArray(data) tbl.idxXY=idxXY if idxXY[0]<idxXY[1]: tbl.view = tbl.data[sl] else: tbl.view = tbl.data[sl].T self.wxAxCtrlLst=wxAxCtrlLst #print type(tbl) grid.SetTable (tbl, True) #AutoSize must be called after SetTable, but takes lot of time on big tables! if tbl.GetNumberCols()*tbl.GetNumberRows()<50*50: grid.AutoSizeColumns(True);grid.AutoSizeRows(True) #grid.SetDefaultColSize(200, True) self.grid=grid pan.SetSizer(sizer) pan.Layout() self.Centre() self.BuildMenu() grid.Bind(wx.grid.EVT_GRID_CMD_COL_SIZE, self.OnColSize)
reportbarrier("Creating output file and dataset") #Ask for alignment with the stripe size (use lfs getstripe on target directory to determine) propfaid = h5py.h5p.create(h5py.h5p.FILE_ACCESS) propfaid.set_fapl_mpio(MPI.COMM_WORLD, mpi_info) #propfaid.set_alignment(1024, 1024*1024) #driver='mpio', comm=MPI.COMM_WORLD fid = h5py.h5f.create(join(datapath, foutname + ".h5"), flags=h5py.h5f.ACC_TRUNC, fapl=propfaid) fout = h5py.File(fid) # Don't use filling spaceid = h5py.h5s.create_simple((numRows, numCols)) plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE) plist.set_fill_time(h5py.h5d.FILL_TIME_NEVER) datasetid = h5py.h5d.create(fout.id, "rows", h5py.h5t.NATIVE_FLOAT, spaceid, plist) rows = h5py.Dataset(datasetid) reportbarrier("Finished creating output file and dataset") localcolumncount = np.sum(numLocalTimeSlices) curlevdata = np.empty((numlats*numlongs, localcolumncount), dtype=np.float32) chunktotransfer = np.empty((rowChunkSize*localcolumncount,), dtype=np.float32) listwriter = map(chunkidxToWriter, np.arange(numWriters)) if rank in listwriter: collectedchunk = np.ascontiguousarray(np.empty((numCols*rowChunkSize,), \ dtype=np.float32)) chunktowrite = np.ascontiguousarray(np.empty((rowChunkSize, numCols), \ dtype=np.float32)) else: collectedchunk = None curlevdatatemp=np.ascontiguousarray(np.zeros((numlats*numlongs*numtimeslices), \