예제 #1
0
파일: mrecords.py 프로젝트: 1950/sawbuck
def fromtextfile(fname, delimitor=None, commentchar='#', missingchar='',
                 varnames=None, vartypes=None):
    """Creates a mrecarray from data stored in the file `filename`.

    Parameters
    ----------
    filename : {file name/handle}
        Handle of an opened file.
    delimitor : {None, string}, optional
        Alphanumeric character used to separate columns in the file.
        If None, any (group of) white spacestring(s) will be used.
    commentchar : {'#', string}, optional
        Alphanumeric character used to mark the start of a comment.
    missingchar : {'', string}, optional
        String indicating missing data, and used to create the masks.
    varnames : {None, sequence}, optional
        Sequence of the variable names. If None, a list will be created from
        the first non empty line of the file.
    vartypes : {None, sequence}, optional
        Sequence of the variables dtypes. If None, it will be estimated from
        the first non-commented line.


    Ultra simple: the varnames are in the header, one line"""
    # Try to open the file ......................
    f = openfile(fname)
    # Get the first non-empty line as the varnames
    while True:
        line = f.readline()
        firstline = line[:line.find(commentchar)].strip()
        _varnames = firstline.split(delimitor)
        if len(_varnames) > 1:
            break
    if varnames is None:
        varnames = _varnames
    # Get the data ..............................
    _variables = masked_array([line.strip().split(delimitor) for line in f
                                  if line[0] != commentchar and len(line) > 1])
    (_, nfields) = _variables.shape
    # Try to guess the dtype ....................
    if vartypes is None:
        vartypes = _guessvartypes(_variables[0])
    else:
        vartypes = [np.dtype(v) for v in vartypes]
        if len(vartypes) != nfields:
            msg = "Attempting to %i dtypes for %i fields!"
            msg += " Reverting to default."
            warnings.warn(msg % (len(vartypes), nfields))
            vartypes = _guessvartypes(_variables[0])
    # Construct the descriptor ..................
    mdescr = [(n, f) for (n, f) in zip(varnames, vartypes)]
    mfillv = [ma.default_fill_value(f) for f in vartypes]
    # Get the data and the mask .................
    # We just need a list of masked_arrays. It's easier to create it like that:
    _mask = (_variables.T == missingchar)
    _datalist = [masked_array(a, mask=m, dtype=t, fill_value=f)
                 for (a, m, t, f) in zip(_variables.T, _mask, vartypes, mfillv)]
    return fromarrays(_datalist, dtype=mdescr)
예제 #2
0
def fromtextfile(fname, delimitor=None, commentchar='#', missingchar='',
                 varnames=None, vartypes=None):
    """Creates a mrecarray from data stored in the file `filename`.

    Parameters
    ----------
    filename : {file name/handle}
        Handle of an opened file.
    delimitor : {None, string}, optional
        Alphanumeric character used to separate columns in the file.
        If None, any (group of) white spacestring(s) will be used.
    commentchar : {'#', string}, optional
        Alphanumeric character used to mark the start of a comment.
    missingchar : {'', string}, optional
        String indicating missing data, and used to create the masks.
    varnames : {None, sequence}, optional
        Sequence of the variable names. If None, a list will be created from
        the first non empty line of the file.
    vartypes : {None, sequence}, optional
        Sequence of the variables dtypes. If None, it will be estimated from
        the first non-commented line.


    Ultra simple: the varnames are in the header, one line"""
    # Try to open the file ......................
    f = openfile(fname)
    # Get the first non-empty line as the varnames
    while True:
        line = f.readline()
        firstline = line[:line.find(commentchar)].strip()
        _varnames = firstline.split(delimitor)
        if len(_varnames) > 1:
            break
    if varnames is None:
        varnames = _varnames
    # Get the data ..............................
    _variables = masked_array([line.strip().split(delimitor) for line in f
                                  if line[0] != commentchar and len(line) > 1])
    (_, nfields) = _variables.shape
    # Try to guess the dtype ....................
    if vartypes is None:
        vartypes = _guessvartypes(_variables[0])
    else:
        vartypes = [np.dtype(v) for v in vartypes]
        if len(vartypes) != nfields:
            msg = "Attempting to %i dtypes for %i fields!"
            msg += " Reverting to default."
            warnings.warn(msg % (len(vartypes), nfields))
            vartypes = _guessvartypes(_variables[0])
    # Construct the descriptor ..................
    mdescr = [(n,f) for (n,f) in zip(varnames, vartypes)]
    mfillv = [ma.default_fill_value(f) for f in vartypes]
    # Get the data and the mask .................
    # We just need a list of masked_arrays. It's easier to create it like that:
    _mask = (_variables.T == missingchar)
    _datalist = [masked_array(a,mask=m,dtype=t,fill_value=f)
                 for (a,m,t,f) in zip(_variables.T, _mask, vartypes, mfillv)]
    return fromarrays(_datalist, dtype=mdescr)
예제 #3
0
def add_var(dst,
            name,
            dims,
            data=None,
            shape=None,
            atts=None,
            dtype=None,
            zlib=True,
            smart_chunks=True,
            fillValue=None,
            **kwargs):
    ''' function to add a Variable to a NetCDF Dataset; returns the Variable reference; 
        all remaining kwargs are passed on to dst.createVariable() '''

    # use data array to infer dimensions and data type
    if data is not None:
        if not isinstance(data, np.ndarray): raise TypeError(data)
        if len(dims) != data.ndim:
            raise NCDataError(
                "Number of dimensions in '{:s}' does not match data array.".
                format(name, ))
        if shape:
            if shape != data.shape:
                raise NCDataError(
                    "Shape of '{:s}' does not match data array.".format(
                        name, ))
        else:
            shape = data.shape
        # get dtype
        if dtype:
            if dtype != data.dtype: data = data.astype(dtype)
        else: dtype = data.dtype
    if dtype is None:
        raise NCDataError(
            "Cannot construct a NetCDF Variable without a data array or an abstract data type."
        )
    dtype = np.dtype(dtype)  # use numpy types
    if dtype is np.dtype('bool_'):
        dtype = np.dtype('i1')  # cast numpy bools as 8-bit integers
    if dtype.kind == 'S': dtype = np.dtype(str)
    # N.B.: the dtype 'S' causes a TypeError, because NetCDF4 only allows 'S1', but Python str can be used instead

    # check/create dimensions
    if shape is None: shape = [
            None,
    ] * len(dims)
    else: shape = list(shape)
    if len(shape) != len(dims):
        raise NCAxisError(shape)
    for i, dim in zip(range(len(dims)), dims):
        if dim in dst.dimensions:
            if shape[i] is None:
                shape[i] = len(dst.dimensions[dim])
            else:
                if shape[i] != len(dst.dimensions[dim]) and len(
                        dst.dimensions[dim]) > 0:
                    raise NCAxisError(
                        'Size of dimension {:s} does not match records! {:d} != {:d}'
                        .format(dim, shape[i], len(dst.dimensions[dim])))
        else:
            dst.createDimension(dim, size=shape[i])
    dims = tuple(dims)
    shape = tuple(shape)

    # figure out parameters for variable
    varargs = dict()  # arguments to be passed to createVariable
    if isinstance(zlib, dict): varargs.update(zlib)
    elif zlib: varargs.update(zlib_default)
    varargs.update(kwargs)
    if fillValue is None:
        if atts and '_FillValue' in atts:
            fillValue = atts['_FillValue']  # will be removed later
        elif atts and 'missing_value' in atts:
            fillValue = atts['missing_value']
        elif data is not None and isinstance(
                data,
                ma.MaskedArray):  # defaults values for numpy masked arrays
            fillValue = ma.default_fill_value(dtype)
        else:
            pass  # if it is not a masked array and no missing value information was passed, don't assign fillValue
    else:
        if data is not None and isinstance(data, ma.MaskedArray):
            data._fill_value = fillValue
    # make sure fillValue is OK (there have been problems...)
    fillValue = checkFillValue(fillValue, dtype)

    # set chunks based on reasonable division of domain
    if smart_chunks and len(dims) > 1 and 'chunksizes' not in varargs:
        varargs['chunksizes'] = autoChunk(shape)

    # create netcdf variable
    var = dst.createVariable(name,
                             dtype,
                             dims,
                             fill_value=fillValue,
                             **varargs)
    # add attributes
    if atts:
        var.setncatts(coerceAtts(atts))
        if fillValue is not None:
            var.setncattr(
                'missing_value', fillValue
            )  # coerceAtts removes this one... but I prefer it to _FillValue
    # assign coordinate data if given
    if data is not None: var[:] = data

    # return var reference
    return var
예제 #4
0
파일: nctools.py 프로젝트: xiefengy/GeoPy
def add_var(dst, name, dims, data=None, shape=None, atts=None, dtype=None, zlib=True, fillValue=None, 
            lusestr=True, **kwargs):
  ''' Function to add a Variable to a NetCDF Dataset; returns the Variable reference. '''
  # all remaining kwargs are passed on to dst.createVariable()
  # use data array to infer dimensions and data type
  if data is not None:
    if not isinstance(data,np.ndarray): raise TypeError     
    if len(dims) != data.ndim: raise NCDataError, "Number of dimensions in '%s' does not match data array."%(name,)    
    if shape: 
      if shape != data.shape: raise NCDataError, "Shape of '%s' does not match data array."%(name,)
    else: shape = data.shape
    # get dtype 
    if dtype: 
      if dtype != data.dtype: data = data.astype(dtype)
        # raise NCDataError, "Data type in '%s' does not match data array."%(name,) 
    else: dtype = data.dtype
  if dtype is None: raise NCDataError, "Cannot construct a NetCDF Variable without a data array or an abstract data type."
  dtype = np.dtype(dtype) # use numpy types
  if dtype is np.dtype('bool_'): dtype = np.dtype('i1') # cast numpy bools as 8-bit integers
  lstrvar = ( dtype.kind == 'S' and not lusestr )
  # check/create dimensions
  if shape is None: shape = [None,]*len(dims)
  else: shape = list(shape)
  if len(shape) != len(dims): raise NCAxisError 
  for i,dim in zip(xrange(len(dims)),dims):
    if dim in dst.dimensions:
      if shape[i] is None: 
        shape[i] = len(dst.dimensions[dim])
      else: 
        if shape[i] != len(dst.dimensions[dim]): 
          raise NCAxisError, 'Size of dimension %s does not match records! %i != %i'%(dim,shape[i],len(dst.dimensions[dim]))
    else: 
      if shape[i] is not None: dst.createDimension(dim, size=shape[i])
      else: raise NCAxisError, "Cannot construct dimension '%s' without size information."%(dims,)
  dims = tuple(dims); shape = tuple(shape)
  # figure out parameters for variable
  varargs = dict() # arguments to be passed to createVariable
  if isinstance(zlib,dict): varargs.update(zlib)
  elif zlib: varargs.update(zlib_default)
  varargs.update(kwargs)
  if fillValue is None:
    if atts and '_FillValue' in atts: fillValue = atts['_FillValue'] # will be removed later
    elif atts and 'missing_value' in atts: fillValue = atts['missing_value']
    elif data is not None and isinstance(data,ma.MaskedArray): # defaults values for numpy masked arrays
      fillValue = ma.default_fill_value(dtype)
      # if isinstance(dtype,np.bool_): fillValue = True
      # elif isinstance(dtype,np.integer): fillValue = 999999
      # elif isinstance(dtype,np.floating): fillValue = 1.e20
      # elif isinstance(dtype,np.complexfloating): fillValue = 1.e20+0j
      # elif isinstance(dtype,np.flexible): fillValue = 'N/A'
      # else: fillValue = None # for 'object'
    else: pass # if it is not a masked array and no missing value information was passed, don't assign fillValue 
  else:  
    if data is not None and isinstance(data,ma.MaskedArray): data.set_fill_value(fillValue)
  # make sure fillValue is OK (there have been problems...)    
  fillValue = checkFillValue(fillValue, dtype)
  if fillValue is not None:
    atts['missing_value'] = fillValue # I use fillValue and missing_value the same way
  # add extra dimension for strings
  if lstrvar and dtype.itemsize > 1:
    # add extra dimension
    shape = shape + (dtype.itemsize,)
    dims = dims + ('str_dim_'+name,) # naming pattern for string dimensions
    dst.createDimension(dims[-1], size=shape[-1])
    # change dtype to single char string  
    dtype = np.dtype('|S1')
    # convert string arrays to char arrays
    if data is not None: 
      data = nc.stringtochar(data)
      assert data.dtype == dtype, str(data.dtype)+', '+str(dtype)    
  # create netcdf variable  
  var = dst.createVariable(name, dtype, dims, fill_value=fillValue, **varargs)
  # add attributes
  if atts: var.setncatts(coerceAtts(atts))
  # assign coordinate data if given
  if data is not None: var[:] = data   
  # return var reference
  return var
예제 #5
0
def fromtextfile(fname, delimiter=None, commentchar='#', missingchar='',
                 varnames=None, vartypes=None,
                 *, delimitor=np._NoValue):  # backwards compatibility
    """
    Creates a mrecarray from data stored in the file `filename`.

    Parameters
    ----------
    fname : {file name/handle}
        Handle of an opened file.
    delimiter : {None, string}, optional
        Alphanumeric character used to separate columns in the file.
        If None, any (group of) white spacestring(s) will be used.
    commentchar : {'#', string}, optional
        Alphanumeric character used to mark the start of a comment.
    missingchar : {'', string}, optional
        String indicating missing data, and used to create the masks.
    varnames : {None, sequence}, optional
        Sequence of the variable names. If None, a list will be created from
        the first non empty line of the file.
    vartypes : {None, sequence}, optional
        Sequence of the variables dtypes. If None, it will be estimated from
        the first non-commented line.


    Ultra simple: the varnames are in the header, one line"""
    if delimitor is not np._NoValue:
        if delimiter is not None:
            raise TypeError("fromtextfile() got multiple values for argument "
                            "'delimiter'")
        # NumPy 1.22.0, 2021-09-23
        warnings.warn("The 'delimitor' keyword argument of "
                      "numpy.ma.mrecords.fromtextfile() is deprecated "
                      "since NumPy 1.22.0, use 'delimiter' instead.",
                      DeprecationWarning, stacklevel=2)
        delimiter = delimitor

    # Try to open the file.
    ftext = openfile(fname)

    # Get the first non-empty line as the varnames
    while True:
        line = ftext.readline()
        firstline = line[:line.find(commentchar)].strip()
        _varnames = firstline.split(delimiter)
        if len(_varnames) > 1:
            break
    if varnames is None:
        varnames = _varnames

    # Get the data.
    _variables = masked_array([line.strip().split(delimiter) for line in ftext
                               if line[0] != commentchar and len(line) > 1])
    (_, nfields) = _variables.shape
    ftext.close()

    # Try to guess the dtype.
    if vartypes is None:
        vartypes = _guessvartypes(_variables[0])
    else:
        vartypes = [np.dtype(v) for v in vartypes]
        if len(vartypes) != nfields:
            msg = "Attempting to %i dtypes for %i fields!"
            msg += " Reverting to default."
            warnings.warn(msg % (len(vartypes), nfields), stacklevel=2)
            vartypes = _guessvartypes(_variables[0])

    # Construct the descriptor.
    mdescr = [(n, f) for (n, f) in zip(varnames, vartypes)]
    mfillv = [ma.default_fill_value(f) for f in vartypes]

    # Get the data and the mask.
    # We just need a list of masked_arrays. It's easier to create it like that:
    _mask = (_variables.T == missingchar)
    _datalist = [masked_array(a, mask=m, dtype=t, fill_value=f)
                 for (a, m, t, f) in zip(_variables.T, _mask, vartypes, mfillv)]

    return fromarrays(_datalist, dtype=mdescr)
예제 #6
0
def add_var(dst,
            name,
            dims,
            data=None,
            shape=None,
            atts=None,
            dtype=None,
            zlib=True,
            fillValue=None,
            lusestr=True,
            **kwargs):
    ''' Function to add a Variable to a NetCDF Dataset; returns the Variable reference. '''
    # all remaining kwargs are passed on to dst.createVariable()
    # use data array to infer dimensions and data type
    if data is not None:
        if not isinstance(data, np.ndarray): raise TypeError
        if len(dims) != data.ndim:
            raise NCDataError, "Number of dimensions in '%s' does not match data array." % (
                name, )
        if shape:
            if shape != data.shape:
                raise NCDataError, "Shape of '%s' does not match data array." % (
                    name, )
        else:
            shape = data.shape
        # get dtype
        if dtype:
            if dtype != data.dtype: data = data.astype(dtype)
            # raise NCDataError, "Data type in '%s' does not match data array."%(name,)
        else:
            dtype = data.dtype
    if dtype is None:
        raise NCDataError, "Cannot construct a NetCDF Variable without a data array or an abstract data type."
    dtype = np.dtype(dtype)  # use numpy types
    if dtype is np.dtype('bool_'):
        dtype = np.dtype('i1')  # cast numpy bools as 8-bit integers
    lstrvar = (dtype.kind == 'S' and not lusestr)
    # check/create dimensions
    if shape is None: shape = [
            None,
    ] * len(dims)
    else: shape = list(shape)
    if len(shape) != len(dims): raise NCAxisError
    for i, dim in zip(xrange(len(dims)), dims):
        if dim in dst.dimensions:
            if shape[i] is None:
                shape[i] = len(dst.dimensions[dim])
            else:
                if shape[i] != len(dst.dimensions[dim]):
                    raise NCAxisError, 'Size of dimension %s does not match records! %i != %i' % (
                        dim, shape[i], len(dst.dimensions[dim]))
        else:
            if shape[i] is not None: dst.createDimension(dim, size=shape[i])
            else:
                raise NCAxisError, "Cannot construct dimension '%s' without size information." % (
                    dims, )
    dims = tuple(dims)
    shape = tuple(shape)
    # figure out parameters for variable
    varargs = dict()  # arguments to be passed to createVariable
    if isinstance(zlib, dict): varargs.update(zlib)
    elif zlib: varargs.update(zlib_default)
    varargs.update(kwargs)
    if fillValue is None:
        if atts and '_FillValue' in atts:
            fillValue = atts['_FillValue']  # will be removed later
        elif atts and 'missing_value' in atts:
            fillValue = atts['missing_value']
        elif data is not None and isinstance(
                data,
                ma.MaskedArray):  # defaults values for numpy masked arrays
            fillValue = ma.default_fill_value(dtype)
            # if isinstance(dtype,np.bool_): fillValue = True
            # elif isinstance(dtype,np.integer): fillValue = 999999
            # elif isinstance(dtype,np.floating): fillValue = 1.e20
            # elif isinstance(dtype,np.complexfloating): fillValue = 1.e20+0j
            # elif isinstance(dtype,np.flexible): fillValue = 'N/A'
            # else: fillValue = None # for 'object'
        else:
            pass  # if it is not a masked array and no missing value information was passed, don't assign fillValue
    else:
        if data is not None and isinstance(data, ma.MaskedArray):
            data.set_fill_value(fillValue)
    # make sure fillValue is OK (there have been problems...)
    fillValue = checkFillValue(fillValue, dtype)
    if fillValue is not None:
        atts[
            'missing_value'] = fillValue  # I use fillValue and missing_value the same way
    # add extra dimension for strings
    if lstrvar and dtype.itemsize > 1:
        # add extra dimension
        shape = shape + (dtype.itemsize, )
        dims = dims + ('str_dim_' + name,
                       )  # naming pattern for string dimensions
        dst.createDimension(dims[-1], size=shape[-1])
        # change dtype to single char string
        dtype = np.dtype('|S1')
        # convert string arrays to char arrays
        if data is not None:
            data = nc.stringtochar(data)
            assert data.dtype == dtype, str(data.dtype) + ', ' + str(dtype)
    # create netcdf variable
    var = dst.createVariable(name,
                             dtype,
                             dims,
                             fill_value=fillValue,
                             **varargs)
    # add attributes
    if atts: var.setncatts(coerceAtts(atts))
    # assign coordinate data if given
    if data is not None: var[:] = data
    # return var reference
    return var
예제 #7
0
파일: np.py 프로젝트: jthacker/jtmri
def apply_along_axis(func1d, axis, arr, *args, **kwargs):
    """
    (This docstring should be overwritten)
    """
    arr = array(arr, copy=False, subok=True)
    nd = arr.ndim
    if axis < 0:
        axis += nd
    if (axis >= nd):
        raise ValueError("axis must be less than arr.ndim; axis=%d, rank=%d."
            % (axis, nd))
    ind = [0] * (nd - 1)
    i = np.zeros(nd, 'O')
    indlist = list(range(nd))
    indlist.remove(axis)
    i[axis] = slice(None, None)
    outshape = np.asarray(arr.shape).take(indlist)
    i.put(indlist, ind)
    j = i.copy()
    res = func1d(arr[tuple(i.tolist())], *args, **kwargs)
    #  if res is a number, then we have a smaller output array
    asscalar = np.isscalar(res)
    if not asscalar:
        try:
            len(res)
        except TypeError:
            asscalar = True
    # Note: we shouldn't set the dtype of the output from the first result...
    #...so we force the type to object, and build a list of dtypes
    #...we'll just take the largest, to avoid some downcasting
    dtypes = []
    if asscalar:
        dtypes.append(np.asarray(res).dtype)
        outarr = zeros(outshape, object)
        outarr[tuple(ind)] = res
        Ntot = np.product(outshape)
        k = 1
        while k < Ntot:
            # increment the index
            ind[-1] += 1
            n = -1
            while (ind[n] >= outshape[n]) and (n > (1 - nd)):
                ind[n - 1] += 1
                ind[n] = 0
                n -= 1
            i.put(indlist, ind)
            res = func1d(arr[tuple(i.tolist())], *args, **kwargs)
            outarr[tuple(ind)] = res
            dtypes.append(asarray(res).dtype)
            k += 1
    else:
        ismasked = np.ma.getmaskarray(arr)[tuple(i.tolist())].all()
        res = array(res, copy=False, subok=True)
        j = i.copy()
        j[axis] = ([slice(None, None)] * res.ndim)
        j.put(indlist, ind)
        Ntot = np.product(outshape)
        holdshape = outshape
        outshape = list(arr.shape)
        outshape[axis] = res.shape
        dtypes.append(asarray(res).dtype)
        outshape = flatten_inplace(outshape)
        outarr = zeros(outshape, object)
        u = tuple(flatten_inplace(j.tolist()))
        outarr[u] = np.ma.masked if ismasked else res
        k = 1
        while k < Ntot:
            # increment the index
            ind[-1] += 1
            n = -1
            while (ind[n] >= holdshape[n]) and (n > (1 - nd)):
                ind[n - 1] += 1
                ind[n] = 0
                n -= 1
            i.put(indlist, ind)
            j.put(indlist, ind)
            s = arr[tuple(i.tolist())]
            u = tuple(flatten_inplace(j.tolist()))
            if ~s.mask.all():
                res = func1d(s, *args, **kwargs)
                outarr[u] = res
                dtypes.append(asarray(res).dtype)
            else:
                outarr[u] = np.ma.masked
            k += 1
    max_dtypes = np.dtype(np.asarray(dtypes).max())
    if not hasattr(arr, '_mask'):
        result = np.asarray(outarr, dtype=max_dtypes)
    else:
        result = asarray(outarr, dtype=max_dtypes)
        result.fill_value = ma.default_fill_value(result)
    return result