def thinPick(src, dest, ax, step=None, delsrc=False, verbose=False): """ Thin a dataset by picking every nth point and disgarding the rest src -> Source dataset (hdfpath object) dest -> Destination dataset path (hdfpath object) ax -> Axis to apply op to (name) step -> The points kept will be indices i*step delsrc -> Boolean, if true src file will be deleted after operation verbose -> Boolean, if true activates printouts """ if step is None: step = 10 else: step = int(step) #Load some file parameters to calculate the shape of the new dataset with h5py.File(src.file, 'r') as sf: srcgrp = sf[src.group] oldshape = srcgrp['data'].shape dimlabels = hdftools.arrToStrList( srcgrp['data'].attrs['dimensions'][:]) #Get ax index axind = getAxInd(ax, dimlabels) newshape = np.copy(oldshape) newshape[axind] = int(np.ceil(oldshape[axind] / step)) chunked_array_op(src, dest, ax, thinPickOp, newshape, delsrc=delsrc, verbose=verbose, step=step)
def avgDim(src, dest, ax, delsrc=False, verbose=False): """ Average over one dimension of a dataset (collapsing it to len=1) src -> Source dataset (hdfpath object) dest -> Destination dataset path (hdfpath object) ax -> Axis to apply op to (name) delsrc -> Boolean, if true src file will be deleted after operation verbose -> Boolean, if true activates printouts """ #Load some file parameters to calculate the shape of the new dataset with h5py.File(src.file, 'r') as sf: srcgrp = sf[src.group] oldshape = srcgrp['data'].shape dimlabels = hdftools.arrToStrList( srcgrp['data'].attrs['dimensions'][:]) #Get ax index axind = getAxInd(ax, dimlabels) newshape = np.copy(oldshape) newshape[axind] = 1 #Call the avgDim function, wrapped in the chunked_array_op framework chunked_array_op(src, dest, ax, avgDimOp, newshape, delsrc=delsrc, verbose=verbose)
def thinBin(src, dest, ax, bin=None, delsrc=False, verbose=False): """ Thin a dataset by averaging it over non-overlapping bins. src -> Source dataset (hdfpath object) dest -> Destination dataset path (hdfpath object) ax -> Axis to apply op to (name) bin -> The width of each bin delsrc -> Boolean, if true src file will be deleted after operation verbose -> Boolean, if true activates printouts """ if bin is None: bin = 10 else: bin = int(bin) #Load some file parameters to calculate the shape of the new dataset with h5py.File(src.file, 'r') as sf: srcgrp = sf[src.group] oldshape = srcgrp['data'].shape dimlabels = hdftools.arrToStrList( srcgrp['data'].attrs['dimensions'][:]) #Get ax index axind = getAxInd(ax, dimlabels) newshape = np.copy(oldshape) newshape[axind] = int(np.ceil(oldshape[axind] / bin)) chunked_array_op(src, dest, ax, thinBinOp, newshape, delsrc=delsrc, verbose=verbose, bin=bin)
def chunked_array_op(src, dest, ax, op, newshape, delsrc=False, verbose=False, **args): """ Apply one of the array functions to an entire dataset, breaking the dataset up into chunks to keep memory load low. src -> Source dataset (hdfpath object) dest -> Destination dataset path (hdfpath object) ax -> Axis (0 indexed) to average op -> Function to be applied. This function must be one of the op functions defined in this file, and must be included in the elif tree in this function newshape -> Shape the new dataset will be after op has been applied delsrc -> Boolean, if true src file will be deleted after operation verbose -> Boolean, if true activates printouts """ with h5py.File(src.file, 'r') as sf: srcgrp = sf[src.group] #Check source is valid dataset validDataset(srcgrp) #Load information about source dataset oldshape = list(srcgrp['data'].shape) ndim = len(oldshape) dimlabels = hdftools.arrToStrList( srcgrp['data'].attrs['dimensions'][:]) #Get ax index axind = getAxInd(ax, dimlabels) #Decide on a chunking axis #Get a list of the axes indices ordered by chunk size, largest to smallest chunks = np.flip(np.argsort(srcgrp['data'].chunks)) #Chose the largest one that ISN'T the chosen axis chunkax = chunks[0] if chunkax == axind: chunkax = chunks[1] print("Chunking axis: " + str(dimlabels[chunkax])) if srcgrp['data'].chunks[chunkax] < 2: print("WARNING: POSSIBLE INEFFICENT CHUNKING DETECTED!") #Determine optimal chunksize (along chunkax) ideal_chunk_elms = 1e7 #1e7*4 bytes (per float32) ~ 40mb, which is good nper = np.product(oldshape) / oldshape[ chunkax] #number of values per chunk ax value chunksize = int(np.round(ideal_chunk_elms / nper)) if chunksize < 1: chunksize = 1 #Determine nchunks nchunks = int(np.ceil(oldshape[chunkax] / chunksize)) #Create the destination dataset with h5py.File(dest.file, 'w') as df: destgrp = df[dest.group] #Copy all the dataset attributes hdftools.copyAttrs(srcgrp, destgrp) #Create new data array destgrp.require_dataset('data', newshape, np.float32, chunks=True, compression='gzip') destgrp['data'].attrs['unit'] = srcgrp['data'].attrs['unit'] if verbose: print(srcgrp['data'].shape) print(destgrp['data'].shape) #Copy the axes over, except the one being operated on #That axis will be copied over later, with changes for axis in dimlabels: if axis != ax: srcgrp.copy(axis, destgrp) #Create the axis being operated on...unless it is now trivial #Newshape was determined above, and is specific to the op if newshape[axind] > 1: destgrp.require_dataset(ax, (newshape[axind], ), np.float32, chunks=True) destgrp[ax].attrs['unit'] = srcgrp[ax].attrs['unit'] new_dimlabels = dimlabels #No changes else: new_dimlabels = dimlabels.pop(axind) #Trivial: remove this dim destgrp['data'].attrs['dimensions'] = hdftools.strListToArr( new_dimlabels) #Initialize time-remaining printout #Chunks are big, so report more often than usual tr = util.timeRemaining(nchunks, reportevery=1) for i in range(nchunks): #Update time remaining if verbose: tr.updateTimeRemaining(i) sl = [slice(None)] * ndim #Assemble the chunk slices if i != nchunks - 1: sl[chunkax] = slice(i * chunksize, (i + 1) * chunksize, None) else: sl[chunkax] = slice(i * chunksize, None, None) #Apply op to the chunk op(srcgrp['data'], destgrp['data'], sl, axind, args) #Make the new axis by applying op to the old axis op(srcgrp[ax], destgrp[ax], [slice(None)], 0, args) #If requested, delete the source file if delsrc: os.remove(src.file)
def trimDim(src, dest, ax, ind_bounds=None, val_bounds=None, delsrc=False, verbose=False): """ Trim a dimension of a dataset, disgarding some data src -> Source dataset (hdfpath object) dest -> Destination dataset path (hdfpath object) ax -> Axis to apply op to (name) bounds -> Start and stop bounds for trim. Default is indicies, but interpreted as values if 'values' flag is set. values -> Boolean, if true interpret bounds as axis values not indices. delsrc -> Boolean, if true src file will be deleted after operation verbose -> Boolean, if true activates printouts """ if not ind_bounds and not val_bounds: print("Using ind bounds (as default)") bounds = (None, None) if ind_bounds and not val_bounds: print("Using ind bounds") bounds = ind_bounds elif val_bounds and not ind_bounds: print("Using val bounds") #If values are being passed, figure out the indices here with h5py.File(src.file, 'r') as sf: srcgrp = sf[src.group] oldshape = srcgrp['data'].shape dimlabels = hdftools.arrToStrList( srcgrp['data'].attrs['dimensions'][:]) #Get ax index axind = getAxInd(ax, dimlabels) if val_bounds[0] < srcgrp[ax][:].min(): a = 0 else: a = np.abs(srcgrp[ax][:] - val_bounds[0]).argmin() if val_bounds[1] > srcgrp[ax][:].max(): b = oldshape[axind] - 1 else: b = np.abs(srcgrp[ax][:] - val_bounds[1]).argmin() bounds = (a, b) bounds = np.clip(bounds, 0, oldshape[axind] - 1) print(bounds) else: raise ValueError("Cannot specify ind_bounds AND val_bounds!") #Load some file parameters to calculate the shape of the new dataset with h5py.File(src.file, 'r') as sf: srcgrp = sf[src.group] oldshape = srcgrp['data'].shape dimlabels = hdftools.arrToStrList( srcgrp['data'].attrs['dimensions'][:]) #Get ax index axind = getAxInd(ax, dimlabels) newshape = np.copy(oldshape) newshape[axind] = np.abs(bounds[1] - bounds[0]) chunked_array_op(src, dest, ax, trimDimOp, newshape, delsrc=delsrc, verbose=verbose, bounds=bounds)
def fullToBmag(src, dest, verbose=False): with h5py.File(src.file, 'r') as sf: srcgrp = sf[src.group] try: dimlabels = hdftools.arrToStrList( srcgrp['data'].attrs['dimensions'][:] ) shape = np.array(srcgrp['data'].shape) #Same as the old shape, but now without the channels dimension... shape[-1] = 1 except KeyError: raise KeyError("bdot.fullToBmag requires the data array to have an attribute 'dimensions' and 'shape'") #We will duplicate the chunking on the new array chunks = srcgrp['data'].chunks try: xax = dimlabels.index("xaxis") yax = dimlabels.index("yaxis") zax = dimlabels.index("zaxis") xaxis = srcgrp['xaxis'] yaxis = srcgrp['yaxis'] zaxis = srcgrp['zaxis'] nti = shape[ dimlabels.index("time") ] nx = shape[xax] ny = shape[yax] nz = shape[zax] except KeyError: raise KeyError("bdot.fullToBmag requires dimensions 'time', 'xaxis', 'yaxis', 'zaxis'") #Create the destination file directory if necessary hdftools.requireDirs(dest.file) #Delete destination file if it already exists if os.path.exists(dest.file): os.remove(dest.file) with h5py.File(dest.file, 'w') as df: destgrp = df[dest.group] destgrp.require_dataset('data', shape, np.float32, chunks=chunks, compression='gzip') destgrp['data'].attrs['unit'] = 'G' destgrp['data'].attrs['dimensions'] = hdftools.strListToArr(dimlabels) #Copy the axes over for ax in dimlabels: if ax != 'chan': srcgrp.copy(ax, destgrp) else: destgrp.require_dataset('chan', (1,), np.int32, chunks=True)[:] = [0] destgrp['chan'].attrs['unit'] = '' chunksize = 100 nchunks = int(np.ceil(nti/chunksize)) #Initialize time-remaining printout tr = util.timeRemaining(nchunks, reportevery=10) for i in range(nchunks): #Update time remaining if verbose: tr.updateTimeRemaining(i) a = i*chunksize if i == nchunks-1: b = None else: b = (i+1)*chunksize bx = srcgrp['data'][a:b, ..., 0] by = srcgrp['data'][a:b, ..., 1] bz = srcgrp['data'][a:b, ..., 2] destgrp['data'][a:b, ...,0] = np.sqrt(np.power(bx,2) + np.power(by,2) + np.power(bz,2)) return dest
def fullToCurrent(src, dest, verbose=False): with h5py.File(src.file, 'r') as sf: srcgrp = sf[src.group] try: dimlabels = hdftools.arrToStrList( srcgrp['data'].attrs['dimensions'][:] ) shape = srcgrp['data'].shape except KeyError: raise KeyError("bdot.fullToCurrent requires the data array to have an attribute 'dimensions' and 'shape'") #We will duplicate the chunking on the new array chunks = srcgrp['data'].chunks try: xax = dimlabels.index("xaxis") yax = dimlabels.index("yaxis") zax = dimlabels.index("zaxis") xaxis = srcgrp['xaxis'] yaxis = srcgrp['yaxis'] zaxis = srcgrp['zaxis'] nti = shape[ dimlabels.index("time") ] nx = shape[xax] ny = shape[yax] nz = shape[zax] except KeyError: raise KeyError("bdot.fullToCurrent requires dimensions 'time', 'xaxis', 'yaxis', 'zaxis'") if nti > 10000: print("WARNING: NTI IS LARGE! CURRENT CALCULATION WILL TAKE A VERY LONG TIME!") print("If you have better things to do with your CPU hours, try thinning the data first.") #Create the destination file directory if necessary hdftools.requireDirs(dest.file) #Delete destination file if it already exists if os.path.exists(dest.file): os.remove(dest.file) with h5py.File(dest.file, 'w') as df: destgrp = df[dest.group] destgrp.require_dataset('data', shape, np.float32, chunks=chunks, compression='gzip') destgrp['data'].attrs['unit'] = 'A/cm^2' destgrp['data'].attrs['dimensions'] = hdftools.strListToArr(dimlabels) #Copy the axes over for ax in dimlabels: srcgrp.copy(ax, destgrp) chunksize = 100 nchunks = int(np.ceil(nti/chunksize)) #Initialize time-remaining printout tr = util.timeRemaining(nchunks, reportevery=10) for i in range(nchunks): #Update time remaining if verbose: tr.updateTimeRemaining(i) a = i*chunksize if i == nchunks-1: b = None else: b = (i+1)*chunksize #Constant is (c/4pi) * (conversion CGS -> A/m^2)*(conversion A/m^2 -> A/cm^2) #(2.99e10/4pi)*(3.0e-5)*(1e-4) #3e-5 is from the NRL formulary destgrp['data'][a:b, ...] = (7.138)*math.curl(srcgrp['data'][a:b, ...], xax, yax, zax, xaxis, yaxis, zaxis) return dest