def fromtextfile(fname, delimitor=None, commentchar='#', missingchar='', varnames=None, vartypes=None): """Creates a mrecarray from data stored in the file `filename`. Parameters ---------- filename : {file name/handle} Handle of an opened file. delimitor : {None, string}, optional Alphanumeric character used to separate columns in the file. If None, any (group of) white spacestring(s) will be used. commentchar : {'#', string}, optional Alphanumeric character used to mark the start of a comment. missingchar : {'', string}, optional String indicating missing data, and used to create the masks. varnames : {None, sequence}, optional Sequence of the variable names. If None, a list will be created from the first non empty line of the file. vartypes : {None, sequence}, optional Sequence of the variables dtypes. If None, it will be estimated from the first non-commented line. Ultra simple: the varnames are in the header, one line""" # Try to open the file ...................... f = openfile(fname) # Get the first non-empty line as the varnames while True: line = f.readline() firstline = line[:line.find(commentchar)].strip() _varnames = firstline.split(delimitor) if len(_varnames) > 1: break if varnames is None: varnames = _varnames # Get the data .............................. _variables = masked_array([line.strip().split(delimitor) for line in f if line[0] != commentchar and len(line) > 1]) (_, nfields) = _variables.shape # Try to guess the dtype .................... if vartypes is None: vartypes = _guessvartypes(_variables[0]) else: vartypes = [np.dtype(v) for v in vartypes] if len(vartypes) != nfields: msg = "Attempting to %i dtypes for %i fields!" msg += " Reverting to default." warnings.warn(msg % (len(vartypes), nfields)) vartypes = _guessvartypes(_variables[0]) # Construct the descriptor .................. mdescr = [(n, f) for (n, f) in zip(varnames, vartypes)] mfillv = [ma.default_fill_value(f) for f in vartypes] # Get the data and the mask ................. # We just need a list of masked_arrays. It's easier to create it like that: _mask = (_variables.T == missingchar) _datalist = [masked_array(a, mask=m, dtype=t, fill_value=f) for (a, m, t, f) in zip(_variables.T, _mask, vartypes, mfillv)] return fromarrays(_datalist, dtype=mdescr)
def fromtextfile(fname, delimitor=None, commentchar='#', missingchar='', varnames=None, vartypes=None): """Creates a mrecarray from data stored in the file `filename`. Parameters ---------- filename : {file name/handle} Handle of an opened file. delimitor : {None, string}, optional Alphanumeric character used to separate columns in the file. If None, any (group of) white spacestring(s) will be used. commentchar : {'#', string}, optional Alphanumeric character used to mark the start of a comment. missingchar : {'', string}, optional String indicating missing data, and used to create the masks. varnames : {None, sequence}, optional Sequence of the variable names. If None, a list will be created from the first non empty line of the file. vartypes : {None, sequence}, optional Sequence of the variables dtypes. If None, it will be estimated from the first non-commented line. Ultra simple: the varnames are in the header, one line""" # Try to open the file ...................... f = openfile(fname) # Get the first non-empty line as the varnames while True: line = f.readline() firstline = line[:line.find(commentchar)].strip() _varnames = firstline.split(delimitor) if len(_varnames) > 1: break if varnames is None: varnames = _varnames # Get the data .............................. _variables = masked_array([line.strip().split(delimitor) for line in f if line[0] != commentchar and len(line) > 1]) (_, nfields) = _variables.shape # Try to guess the dtype .................... if vartypes is None: vartypes = _guessvartypes(_variables[0]) else: vartypes = [np.dtype(v) for v in vartypes] if len(vartypes) != nfields: msg = "Attempting to %i dtypes for %i fields!" msg += " Reverting to default." warnings.warn(msg % (len(vartypes), nfields)) vartypes = _guessvartypes(_variables[0]) # Construct the descriptor .................. mdescr = [(n,f) for (n,f) in zip(varnames, vartypes)] mfillv = [ma.default_fill_value(f) for f in vartypes] # Get the data and the mask ................. # We just need a list of masked_arrays. It's easier to create it like that: _mask = (_variables.T == missingchar) _datalist = [masked_array(a,mask=m,dtype=t,fill_value=f) for (a,m,t,f) in zip(_variables.T, _mask, vartypes, mfillv)] return fromarrays(_datalist, dtype=mdescr)
def add_var(dst, name, dims, data=None, shape=None, atts=None, dtype=None, zlib=True, smart_chunks=True, fillValue=None, **kwargs): ''' function to add a Variable to a NetCDF Dataset; returns the Variable reference; all remaining kwargs are passed on to dst.createVariable() ''' # use data array to infer dimensions and data type if data is not None: if not isinstance(data, np.ndarray): raise TypeError(data) if len(dims) != data.ndim: raise NCDataError( "Number of dimensions in '{:s}' does not match data array.". format(name, )) if shape: if shape != data.shape: raise NCDataError( "Shape of '{:s}' does not match data array.".format( name, )) else: shape = data.shape # get dtype if dtype: if dtype != data.dtype: data = data.astype(dtype) else: dtype = data.dtype if dtype is None: raise NCDataError( "Cannot construct a NetCDF Variable without a data array or an abstract data type." ) dtype = np.dtype(dtype) # use numpy types if dtype is np.dtype('bool_'): dtype = np.dtype('i1') # cast numpy bools as 8-bit integers if dtype.kind == 'S': dtype = np.dtype(str) # N.B.: the dtype 'S' causes a TypeError, because NetCDF4 only allows 'S1', but Python str can be used instead # check/create dimensions if shape is None: shape = [ None, ] * len(dims) else: shape = list(shape) if len(shape) != len(dims): raise NCAxisError(shape) for i, dim in zip(range(len(dims)), dims): if dim in dst.dimensions: if shape[i] is None: shape[i] = len(dst.dimensions[dim]) else: if shape[i] != len(dst.dimensions[dim]) and len( dst.dimensions[dim]) > 0: raise NCAxisError( 'Size of dimension {:s} does not match records! {:d} != {:d}' .format(dim, shape[i], len(dst.dimensions[dim]))) else: dst.createDimension(dim, size=shape[i]) dims = tuple(dims) shape = tuple(shape) # figure out parameters for variable varargs = dict() # arguments to be passed to createVariable if isinstance(zlib, dict): varargs.update(zlib) elif zlib: varargs.update(zlib_default) varargs.update(kwargs) if fillValue is None: if atts and '_FillValue' in atts: fillValue = atts['_FillValue'] # will be removed later elif atts and 'missing_value' in atts: fillValue = atts['missing_value'] elif data is not None and isinstance( data, ma.MaskedArray): # defaults values for numpy masked arrays fillValue = ma.default_fill_value(dtype) else: pass # if it is not a masked array and no missing value information was passed, don't assign fillValue else: if data is not None and isinstance(data, ma.MaskedArray): data._fill_value = fillValue # make sure fillValue is OK (there have been problems...) fillValue = checkFillValue(fillValue, dtype) # set chunks based on reasonable division of domain if smart_chunks and len(dims) > 1 and 'chunksizes' not in varargs: varargs['chunksizes'] = autoChunk(shape) # create netcdf variable var = dst.createVariable(name, dtype, dims, fill_value=fillValue, **varargs) # add attributes if atts: var.setncatts(coerceAtts(atts)) if fillValue is not None: var.setncattr( 'missing_value', fillValue ) # coerceAtts removes this one... but I prefer it to _FillValue # assign coordinate data if given if data is not None: var[:] = data # return var reference return var
def add_var(dst, name, dims, data=None, shape=None, atts=None, dtype=None, zlib=True, fillValue=None, lusestr=True, **kwargs): ''' Function to add a Variable to a NetCDF Dataset; returns the Variable reference. ''' # all remaining kwargs are passed on to dst.createVariable() # use data array to infer dimensions and data type if data is not None: if not isinstance(data,np.ndarray): raise TypeError if len(dims) != data.ndim: raise NCDataError, "Number of dimensions in '%s' does not match data array."%(name,) if shape: if shape != data.shape: raise NCDataError, "Shape of '%s' does not match data array."%(name,) else: shape = data.shape # get dtype if dtype: if dtype != data.dtype: data = data.astype(dtype) # raise NCDataError, "Data type in '%s' does not match data array."%(name,) else: dtype = data.dtype if dtype is None: raise NCDataError, "Cannot construct a NetCDF Variable without a data array or an abstract data type." dtype = np.dtype(dtype) # use numpy types if dtype is np.dtype('bool_'): dtype = np.dtype('i1') # cast numpy bools as 8-bit integers lstrvar = ( dtype.kind == 'S' and not lusestr ) # check/create dimensions if shape is None: shape = [None,]*len(dims) else: shape = list(shape) if len(shape) != len(dims): raise NCAxisError for i,dim in zip(xrange(len(dims)),dims): if dim in dst.dimensions: if shape[i] is None: shape[i] = len(dst.dimensions[dim]) else: if shape[i] != len(dst.dimensions[dim]): raise NCAxisError, 'Size of dimension %s does not match records! %i != %i'%(dim,shape[i],len(dst.dimensions[dim])) else: if shape[i] is not None: dst.createDimension(dim, size=shape[i]) else: raise NCAxisError, "Cannot construct dimension '%s' without size information."%(dims,) dims = tuple(dims); shape = tuple(shape) # figure out parameters for variable varargs = dict() # arguments to be passed to createVariable if isinstance(zlib,dict): varargs.update(zlib) elif zlib: varargs.update(zlib_default) varargs.update(kwargs) if fillValue is None: if atts and '_FillValue' in atts: fillValue = atts['_FillValue'] # will be removed later elif atts and 'missing_value' in atts: fillValue = atts['missing_value'] elif data is not None and isinstance(data,ma.MaskedArray): # defaults values for numpy masked arrays fillValue = ma.default_fill_value(dtype) # if isinstance(dtype,np.bool_): fillValue = True # elif isinstance(dtype,np.integer): fillValue = 999999 # elif isinstance(dtype,np.floating): fillValue = 1.e20 # elif isinstance(dtype,np.complexfloating): fillValue = 1.e20+0j # elif isinstance(dtype,np.flexible): fillValue = 'N/A' # else: fillValue = None # for 'object' else: pass # if it is not a masked array and no missing value information was passed, don't assign fillValue else: if data is not None and isinstance(data,ma.MaskedArray): data.set_fill_value(fillValue) # make sure fillValue is OK (there have been problems...) fillValue = checkFillValue(fillValue, dtype) if fillValue is not None: atts['missing_value'] = fillValue # I use fillValue and missing_value the same way # add extra dimension for strings if lstrvar and dtype.itemsize > 1: # add extra dimension shape = shape + (dtype.itemsize,) dims = dims + ('str_dim_'+name,) # naming pattern for string dimensions dst.createDimension(dims[-1], size=shape[-1]) # change dtype to single char string dtype = np.dtype('|S1') # convert string arrays to char arrays if data is not None: data = nc.stringtochar(data) assert data.dtype == dtype, str(data.dtype)+', '+str(dtype) # create netcdf variable var = dst.createVariable(name, dtype, dims, fill_value=fillValue, **varargs) # add attributes if atts: var.setncatts(coerceAtts(atts)) # assign coordinate data if given if data is not None: var[:] = data # return var reference return var
def fromtextfile(fname, delimiter=None, commentchar='#', missingchar='', varnames=None, vartypes=None, *, delimitor=np._NoValue): # backwards compatibility """ Creates a mrecarray from data stored in the file `filename`. Parameters ---------- fname : {file name/handle} Handle of an opened file. delimiter : {None, string}, optional Alphanumeric character used to separate columns in the file. If None, any (group of) white spacestring(s) will be used. commentchar : {'#', string}, optional Alphanumeric character used to mark the start of a comment. missingchar : {'', string}, optional String indicating missing data, and used to create the masks. varnames : {None, sequence}, optional Sequence of the variable names. If None, a list will be created from the first non empty line of the file. vartypes : {None, sequence}, optional Sequence of the variables dtypes. If None, it will be estimated from the first non-commented line. Ultra simple: the varnames are in the header, one line""" if delimitor is not np._NoValue: if delimiter is not None: raise TypeError("fromtextfile() got multiple values for argument " "'delimiter'") # NumPy 1.22.0, 2021-09-23 warnings.warn("The 'delimitor' keyword argument of " "numpy.ma.mrecords.fromtextfile() is deprecated " "since NumPy 1.22.0, use 'delimiter' instead.", DeprecationWarning, stacklevel=2) delimiter = delimitor # Try to open the file. ftext = openfile(fname) # Get the first non-empty line as the varnames while True: line = ftext.readline() firstline = line[:line.find(commentchar)].strip() _varnames = firstline.split(delimiter) if len(_varnames) > 1: break if varnames is None: varnames = _varnames # Get the data. _variables = masked_array([line.strip().split(delimiter) for line in ftext if line[0] != commentchar and len(line) > 1]) (_, nfields) = _variables.shape ftext.close() # Try to guess the dtype. if vartypes is None: vartypes = _guessvartypes(_variables[0]) else: vartypes = [np.dtype(v) for v in vartypes] if len(vartypes) != nfields: msg = "Attempting to %i dtypes for %i fields!" msg += " Reverting to default." warnings.warn(msg % (len(vartypes), nfields), stacklevel=2) vartypes = _guessvartypes(_variables[0]) # Construct the descriptor. mdescr = [(n, f) for (n, f) in zip(varnames, vartypes)] mfillv = [ma.default_fill_value(f) for f in vartypes] # Get the data and the mask. # We just need a list of masked_arrays. It's easier to create it like that: _mask = (_variables.T == missingchar) _datalist = [masked_array(a, mask=m, dtype=t, fill_value=f) for (a, m, t, f) in zip(_variables.T, _mask, vartypes, mfillv)] return fromarrays(_datalist, dtype=mdescr)
def add_var(dst, name, dims, data=None, shape=None, atts=None, dtype=None, zlib=True, fillValue=None, lusestr=True, **kwargs): ''' Function to add a Variable to a NetCDF Dataset; returns the Variable reference. ''' # all remaining kwargs are passed on to dst.createVariable() # use data array to infer dimensions and data type if data is not None: if not isinstance(data, np.ndarray): raise TypeError if len(dims) != data.ndim: raise NCDataError, "Number of dimensions in '%s' does not match data array." % ( name, ) if shape: if shape != data.shape: raise NCDataError, "Shape of '%s' does not match data array." % ( name, ) else: shape = data.shape # get dtype if dtype: if dtype != data.dtype: data = data.astype(dtype) # raise NCDataError, "Data type in '%s' does not match data array."%(name,) else: dtype = data.dtype if dtype is None: raise NCDataError, "Cannot construct a NetCDF Variable without a data array or an abstract data type." dtype = np.dtype(dtype) # use numpy types if dtype is np.dtype('bool_'): dtype = np.dtype('i1') # cast numpy bools as 8-bit integers lstrvar = (dtype.kind == 'S' and not lusestr) # check/create dimensions if shape is None: shape = [ None, ] * len(dims) else: shape = list(shape) if len(shape) != len(dims): raise NCAxisError for i, dim in zip(xrange(len(dims)), dims): if dim in dst.dimensions: if shape[i] is None: shape[i] = len(dst.dimensions[dim]) else: if shape[i] != len(dst.dimensions[dim]): raise NCAxisError, 'Size of dimension %s does not match records! %i != %i' % ( dim, shape[i], len(dst.dimensions[dim])) else: if shape[i] is not None: dst.createDimension(dim, size=shape[i]) else: raise NCAxisError, "Cannot construct dimension '%s' without size information." % ( dims, ) dims = tuple(dims) shape = tuple(shape) # figure out parameters for variable varargs = dict() # arguments to be passed to createVariable if isinstance(zlib, dict): varargs.update(zlib) elif zlib: varargs.update(zlib_default) varargs.update(kwargs) if fillValue is None: if atts and '_FillValue' in atts: fillValue = atts['_FillValue'] # will be removed later elif atts and 'missing_value' in atts: fillValue = atts['missing_value'] elif data is not None and isinstance( data, ma.MaskedArray): # defaults values for numpy masked arrays fillValue = ma.default_fill_value(dtype) # if isinstance(dtype,np.bool_): fillValue = True # elif isinstance(dtype,np.integer): fillValue = 999999 # elif isinstance(dtype,np.floating): fillValue = 1.e20 # elif isinstance(dtype,np.complexfloating): fillValue = 1.e20+0j # elif isinstance(dtype,np.flexible): fillValue = 'N/A' # else: fillValue = None # for 'object' else: pass # if it is not a masked array and no missing value information was passed, don't assign fillValue else: if data is not None and isinstance(data, ma.MaskedArray): data.set_fill_value(fillValue) # make sure fillValue is OK (there have been problems...) fillValue = checkFillValue(fillValue, dtype) if fillValue is not None: atts[ 'missing_value'] = fillValue # I use fillValue and missing_value the same way # add extra dimension for strings if lstrvar and dtype.itemsize > 1: # add extra dimension shape = shape + (dtype.itemsize, ) dims = dims + ('str_dim_' + name, ) # naming pattern for string dimensions dst.createDimension(dims[-1], size=shape[-1]) # change dtype to single char string dtype = np.dtype('|S1') # convert string arrays to char arrays if data is not None: data = nc.stringtochar(data) assert data.dtype == dtype, str(data.dtype) + ', ' + str(dtype) # create netcdf variable var = dst.createVariable(name, dtype, dims, fill_value=fillValue, **varargs) # add attributes if atts: var.setncatts(coerceAtts(atts)) # assign coordinate data if given if data is not None: var[:] = data # return var reference return var
def apply_along_axis(func1d, axis, arr, *args, **kwargs): """ (This docstring should be overwritten) """ arr = array(arr, copy=False, subok=True) nd = arr.ndim if axis < 0: axis += nd if (axis >= nd): raise ValueError("axis must be less than arr.ndim; axis=%d, rank=%d." % (axis, nd)) ind = [0] * (nd - 1) i = np.zeros(nd, 'O') indlist = list(range(nd)) indlist.remove(axis) i[axis] = slice(None, None) outshape = np.asarray(arr.shape).take(indlist) i.put(indlist, ind) j = i.copy() res = func1d(arr[tuple(i.tolist())], *args, **kwargs) # if res is a number, then we have a smaller output array asscalar = np.isscalar(res) if not asscalar: try: len(res) except TypeError: asscalar = True # Note: we shouldn't set the dtype of the output from the first result... #...so we force the type to object, and build a list of dtypes #...we'll just take the largest, to avoid some downcasting dtypes = [] if asscalar: dtypes.append(np.asarray(res).dtype) outarr = zeros(outshape, object) outarr[tuple(ind)] = res Ntot = np.product(outshape) k = 1 while k < Ntot: # increment the index ind[-1] += 1 n = -1 while (ind[n] >= outshape[n]) and (n > (1 - nd)): ind[n - 1] += 1 ind[n] = 0 n -= 1 i.put(indlist, ind) res = func1d(arr[tuple(i.tolist())], *args, **kwargs) outarr[tuple(ind)] = res dtypes.append(asarray(res).dtype) k += 1 else: ismasked = np.ma.getmaskarray(arr)[tuple(i.tolist())].all() res = array(res, copy=False, subok=True) j = i.copy() j[axis] = ([slice(None, None)] * res.ndim) j.put(indlist, ind) Ntot = np.product(outshape) holdshape = outshape outshape = list(arr.shape) outshape[axis] = res.shape dtypes.append(asarray(res).dtype) outshape = flatten_inplace(outshape) outarr = zeros(outshape, object) u = tuple(flatten_inplace(j.tolist())) outarr[u] = np.ma.masked if ismasked else res k = 1 while k < Ntot: # increment the index ind[-1] += 1 n = -1 while (ind[n] >= holdshape[n]) and (n > (1 - nd)): ind[n - 1] += 1 ind[n] = 0 n -= 1 i.put(indlist, ind) j.put(indlist, ind) s = arr[tuple(i.tolist())] u = tuple(flatten_inplace(j.tolist())) if ~s.mask.all(): res = func1d(s, *args, **kwargs) outarr[u] = res dtypes.append(asarray(res).dtype) else: outarr[u] = np.ma.masked k += 1 max_dtypes = np.dtype(np.asarray(dtypes).max()) if not hasattr(arr, '_mask'): result = np.asarray(outarr, dtype=max_dtypes) else: result = asarray(outarr, dtype=max_dtypes) result.fill_value = ma.default_fill_value(result) return result