Example #1
0
def thinPick(src, dest, ax, step=None, delsrc=False, verbose=False):
    """
    Thin a dataset by picking every nth point and disgarding the rest
    
    src -> Source dataset (hdfpath object)
    dest -> Destination dataset path (hdfpath object)
    ax -> Axis to apply op to (name)
    step -> The points kept will be indices i*step
    delsrc -> Boolean, if true src file will be deleted after operation
    verbose -> Boolean, if true activates printouts
    """
    if step is None:
        step = 10
    else:
        step = int(step)

    #Load some file parameters to calculate the shape of the new dataset
    with h5py.File(src.file, 'r') as sf:
        srcgrp = sf[src.group]
        oldshape = srcgrp['data'].shape
        dimlabels = hdftools.arrToStrList(
            srcgrp['data'].attrs['dimensions'][:])
        #Get ax index
        axind = getAxInd(ax, dimlabels)
        newshape = np.copy(oldshape)
        newshape[axind] = int(np.ceil(oldshape[axind] / step))

    chunked_array_op(src,
                     dest,
                     ax,
                     thinPickOp,
                     newshape,
                     delsrc=delsrc,
                     verbose=verbose,
                     step=step)
Example #2
0
def avgDim(src, dest, ax, delsrc=False, verbose=False):
    """
    Average over one dimension of a dataset (collapsing it to len=1)
    src -> Source dataset (hdfpath object)
    dest -> Destination dataset path (hdfpath object)
    ax -> Axis to apply op to (name)
    delsrc -> Boolean, if true src file will be deleted after operation
    verbose -> Boolean, if true activates printouts
    """

    #Load some file parameters to calculate the shape of the new dataset
    with h5py.File(src.file, 'r') as sf:
        srcgrp = sf[src.group]
        oldshape = srcgrp['data'].shape
        dimlabels = hdftools.arrToStrList(
            srcgrp['data'].attrs['dimensions'][:])
        #Get ax index
        axind = getAxInd(ax, dimlabels)
        newshape = np.copy(oldshape)
        newshape[axind] = 1

    #Call the avgDim function, wrapped in the chunked_array_op framework
    chunked_array_op(src,
                     dest,
                     ax,
                     avgDimOp,
                     newshape,
                     delsrc=delsrc,
                     verbose=verbose)
Example #3
0
def thinBin(src, dest, ax, bin=None, delsrc=False, verbose=False):
    """
    Thin a dataset by averaging it over non-overlapping bins.
    
    src -> Source dataset (hdfpath object)
    dest -> Destination dataset path (hdfpath object)
    ax -> Axis to apply op to (name)
    bin -> The width of each bin
    delsrc -> Boolean, if true src file will be deleted after operation
    verbose -> Boolean, if true activates printouts
    """

    if bin is None:
        bin = 10
    else:
        bin = int(bin)

    #Load some file parameters to calculate the shape of the new dataset
    with h5py.File(src.file, 'r') as sf:
        srcgrp = sf[src.group]
        oldshape = srcgrp['data'].shape
        dimlabels = hdftools.arrToStrList(
            srcgrp['data'].attrs['dimensions'][:])
        #Get ax index
        axind = getAxInd(ax, dimlabels)
        newshape = np.copy(oldshape)
        newshape[axind] = int(np.ceil(oldshape[axind] / bin))

    chunked_array_op(src,
                     dest,
                     ax,
                     thinBinOp,
                     newshape,
                     delsrc=delsrc,
                     verbose=verbose,
                     bin=bin)
Example #4
0
def chunked_array_op(src,
                     dest,
                     ax,
                     op,
                     newshape,
                     delsrc=False,
                     verbose=False,
                     **args):
    """
    Apply one of the array functions to an entire dataset, breaking the
    dataset up into chunks to keep memory load low.
    
    src -> Source dataset (hdfpath object)
    dest -> Destination dataset path (hdfpath object)
    ax -> Axis (0 indexed) to average
    op -> Function to be applied. This function must be one of the op functions
    defined in this file, and must be included in the elif tree in this function
    newshape -> Shape the new dataset will be after op has been applied
    delsrc -> Boolean, if true src file will be deleted after operation
    verbose -> Boolean, if true activates printouts
    """

    with h5py.File(src.file, 'r') as sf:
        srcgrp = sf[src.group]

        #Check source is valid dataset
        validDataset(srcgrp)

        #Load information about source dataset
        oldshape = list(srcgrp['data'].shape)
        ndim = len(oldshape)
        dimlabels = hdftools.arrToStrList(
            srcgrp['data'].attrs['dimensions'][:])

        #Get ax index
        axind = getAxInd(ax, dimlabels)

        #Decide on a chunking axis
        #Get a list of the axes indices ordered by chunk size, largest to smallest
        chunks = np.flip(np.argsort(srcgrp['data'].chunks))

        #Chose the largest one that ISN'T the chosen axis
        chunkax = chunks[0]
        if chunkax == axind:
            chunkax = chunks[1]
        print("Chunking axis: " + str(dimlabels[chunkax]))

        if srcgrp['data'].chunks[chunkax] < 2:
            print("WARNING: POSSIBLE INEFFICENT CHUNKING DETECTED!")

        #Determine optimal chunksize (along chunkax)
        ideal_chunk_elms = 1e7  #1e7*4 bytes (per float32) ~ 40mb, which is good
        nper = np.product(oldshape) / oldshape[
            chunkax]  #number of values per chunk ax value

        chunksize = int(np.round(ideal_chunk_elms / nper))
        if chunksize < 1:
            chunksize = 1

        #Determine nchunks
        nchunks = int(np.ceil(oldshape[chunkax] / chunksize))

        #Create the destination dataset
        with h5py.File(dest.file, 'w') as df:
            destgrp = df[dest.group]

            #Copy all the dataset attributes
            hdftools.copyAttrs(srcgrp, destgrp)

            #Create new data array
            destgrp.require_dataset('data',
                                    newshape,
                                    np.float32,
                                    chunks=True,
                                    compression='gzip')
            destgrp['data'].attrs['unit'] = srcgrp['data'].attrs['unit']

            if verbose:
                print(srcgrp['data'].shape)
                print(destgrp['data'].shape)

            #Copy the axes over, except the one being operated on
            #That axis will be copied over later, with changes
            for axis in dimlabels:
                if axis != ax:
                    srcgrp.copy(axis, destgrp)

            #Create the axis being operated on...unless it is now trivial
            #Newshape was determined above, and is specific to the op
            if newshape[axind] > 1:
                destgrp.require_dataset(ax, (newshape[axind], ),
                                        np.float32,
                                        chunks=True)
                destgrp[ax].attrs['unit'] = srcgrp[ax].attrs['unit']
                new_dimlabels = dimlabels  #No changes
            else:
                new_dimlabels = dimlabels.pop(axind)  #Trivial: remove this dim

            destgrp['data'].attrs['dimensions'] = hdftools.strListToArr(
                new_dimlabels)

            #Initialize time-remaining printout
            #Chunks are big, so report more often than usual
            tr = util.timeRemaining(nchunks, reportevery=1)

            for i in range(nchunks):
                #Update time remaining
                if verbose:
                    tr.updateTimeRemaining(i)
                sl = [slice(None)] * ndim

                #Assemble the chunk slices
                if i != nchunks - 1:
                    sl[chunkax] = slice(i * chunksize, (i + 1) * chunksize,
                                        None)
                else:
                    sl[chunkax] = slice(i * chunksize, None, None)

            #Apply op to the chunk
                op(srcgrp['data'], destgrp['data'], sl, axind, args)

            #Make the new axis by applying op to the old axis
            op(srcgrp[ax], destgrp[ax], [slice(None)], 0, args)

    #If requested, delete the source file
    if delsrc:
        os.remove(src.file)
Example #5
0
def trimDim(src,
            dest,
            ax,
            ind_bounds=None,
            val_bounds=None,
            delsrc=False,
            verbose=False):
    """
    Trim a dimension of a dataset, disgarding some data
    
    src -> Source dataset (hdfpath object)
    dest -> Destination dataset path (hdfpath object)
    ax -> Axis to apply op to (name)
    bounds -> Start and stop bounds for trim. Default is indicies, but
    interpreted as values if 'values' flag is set.
    values -> Boolean, if true interpret bounds as axis values not indices.
    delsrc -> Boolean, if true src file will be deleted after operation
    verbose -> Boolean, if true activates printouts
    """

    if not ind_bounds and not val_bounds:
        print("Using ind bounds (as default)")
        bounds = (None, None)
    if ind_bounds and not val_bounds:
        print("Using ind bounds")
        bounds = ind_bounds
    elif val_bounds and not ind_bounds:
        print("Using val bounds")
        #If values are being passed, figure out the indices here
        with h5py.File(src.file, 'r') as sf:
            srcgrp = sf[src.group]
            oldshape = srcgrp['data'].shape
            dimlabels = hdftools.arrToStrList(
                srcgrp['data'].attrs['dimensions'][:])
            #Get ax index
            axind = getAxInd(ax, dimlabels)

            if val_bounds[0] < srcgrp[ax][:].min():
                a = 0
            else:
                a = np.abs(srcgrp[ax][:] - val_bounds[0]).argmin()

            if val_bounds[1] > srcgrp[ax][:].max():
                b = oldshape[axind] - 1
            else:
                b = np.abs(srcgrp[ax][:] - val_bounds[1]).argmin()
            bounds = (a, b)
            bounds = np.clip(bounds, 0, oldshape[axind] - 1)
            print(bounds)
    else:
        raise ValueError("Cannot specify ind_bounds AND val_bounds!")

    #Load some file parameters to calculate the shape of the new dataset
    with h5py.File(src.file, 'r') as sf:
        srcgrp = sf[src.group]
        oldshape = srcgrp['data'].shape
        dimlabels = hdftools.arrToStrList(
            srcgrp['data'].attrs['dimensions'][:])
        #Get ax index
        axind = getAxInd(ax, dimlabels)
        newshape = np.copy(oldshape)
        newshape[axind] = np.abs(bounds[1] - bounds[0])

    chunked_array_op(src,
                     dest,
                     ax,
                     trimDimOp,
                     newshape,
                     delsrc=delsrc,
                     verbose=verbose,
                     bounds=bounds)
Example #6
0
def fullToBmag(src, dest, verbose=False):
    with h5py.File(src.file, 'r') as sf:
        srcgrp = sf[src.group]
        try:
            dimlabels = hdftools.arrToStrList( srcgrp['data'].attrs['dimensions'][:] )
            shape =  np.array(srcgrp['data'].shape)
            #Same as the old shape, but now without the channels dimension...
            shape[-1] = 1

        except KeyError: 
            raise KeyError("bdot.fullToBmag requires the data array to have an attribute 'dimensions' and 'shape'")
            
        #We will duplicate the chunking on the new array
        chunks = srcgrp['data'].chunks
        

        try:
            xax = dimlabels.index("xaxis") 
            yax = dimlabels.index("yaxis") 
            zax = dimlabels.index("zaxis") 
            
            xaxis = srcgrp['xaxis']
            yaxis = srcgrp['yaxis']
            zaxis = srcgrp['zaxis']
            
            nti = shape[ dimlabels.index("time")  ]
            nx = shape[xax]
            ny = shape[yax]
            nz = shape[zax]
            
        except KeyError:
            raise KeyError("bdot.fullToBmag requires dimensions 'time', 'xaxis', 'yaxis', 'zaxis'")
            

        
        #Create the destination file directory if necessary
        hdftools.requireDirs(dest.file)
        
        #Delete destination file if it already exists
        if os.path.exists(dest.file):
          os.remove(dest.file)
        
        with h5py.File(dest.file, 'w') as df:
            destgrp = df[dest.group]
            
            destgrp.require_dataset('data', shape, np.float32, chunks=chunks, compression='gzip')
            destgrp['data'].attrs['unit'] = 'G'
            destgrp['data'].attrs['dimensions'] = hdftools.strListToArr(dimlabels)
            
            #Copy the axes over
            for ax in dimlabels:
                if ax != 'chan':
                     srcgrp.copy(ax, destgrp)
                else:
                     destgrp.require_dataset('chan', (1,), np.int32, chunks=True)[:] = [0]
                     destgrp['chan'].attrs['unit'] = ''
                     
                
                
            chunksize = 100
            nchunks = int(np.ceil(nti/chunksize))
            
            #Initialize time-remaining printout
            tr = util.timeRemaining(nchunks, reportevery=10)
            
            for i in range(nchunks):
                #Update time remaining
                if verbose:
                        tr.updateTimeRemaining(i)

                a = i*chunksize
                if i == nchunks-1:
                    b = None
                else:
                    b = (i+1)*chunksize
                
                bx = srcgrp['data'][a:b, ..., 0]
                by = srcgrp['data'][a:b, ..., 1]
                bz = srcgrp['data'][a:b, ..., 2]
                
                destgrp['data'][a:b, ...,0] = np.sqrt(np.power(bx,2) +
                       np.power(by,2) + np.power(bz,2))

           
        return dest     
Example #7
0
def fullToCurrent(src, dest, verbose=False):
    with h5py.File(src.file, 'r') as sf:
        srcgrp = sf[src.group]
        try:
            dimlabels = hdftools.arrToStrList( srcgrp['data'].attrs['dimensions'][:] )
            shape =  srcgrp['data'].shape
        except KeyError: 
            raise KeyError("bdot.fullToCurrent requires the data array to have an attribute 'dimensions' and 'shape'")
            
        #We will duplicate the chunking on the new array
        chunks = srcgrp['data'].chunks
        

        try:
            xax = dimlabels.index("xaxis") 
            yax = dimlabels.index("yaxis") 
            zax = dimlabels.index("zaxis") 
            
            xaxis = srcgrp['xaxis']
            yaxis = srcgrp['yaxis']
            zaxis = srcgrp['zaxis']
            
            nti = shape[ dimlabels.index("time")  ]
            nx = shape[xax]
            ny = shape[yax]
            nz = shape[zax]
            
        except KeyError:
            raise KeyError("bdot.fullToCurrent requires dimensions 'time', 'xaxis', 'yaxis', 'zaxis'")
            
            
        if nti > 10000:
            print("WARNING: NTI IS LARGE! CURRENT CALCULATION WILL TAKE A VERY LONG TIME!")
            print("If you have better things to do with your CPU hours, try thinning the data first.")
        
        #Create the destination file directory if necessary
        hdftools.requireDirs(dest.file)
        
        #Delete destination file if it already exists
        if os.path.exists(dest.file):
          os.remove(dest.file)
        
        with h5py.File(dest.file, 'w') as df:
            destgrp = df[dest.group]
            
            destgrp.require_dataset('data', shape, np.float32, chunks=chunks, compression='gzip')
            destgrp['data'].attrs['unit'] = 'A/cm^2'
            destgrp['data'].attrs['dimensions'] = hdftools.strListToArr(dimlabels)
            
            #Copy the axes over
            for ax in dimlabels:
                srcgrp.copy(ax, destgrp)
                
                
            chunksize = 100
            nchunks = int(np.ceil(nti/chunksize))
            
            #Initialize time-remaining printout
            tr = util.timeRemaining(nchunks, reportevery=10)
            
            for i in range(nchunks):
                #Update time remaining
                if verbose:
                        tr.updateTimeRemaining(i)

                a = i*chunksize
                if i == nchunks-1:
                    b = None
                else:
                    b = (i+1)*chunksize
                
                #Constant is (c/4pi) * (conversion CGS -> A/m^2)*(conversion A/m^2 -> A/cm^2)
                #(2.99e10/4pi)*(3.0e-5)*(1e-4)
                #3e-5 is from the NRL formulary
                destgrp['data'][a:b, ...] = (7.138)*math.curl(srcgrp['data'][a:b, ...], 
                    xax, yax, zax, xaxis, yaxis, zaxis)
                
        return dest