def make_gene_map_1(self): count = 0 self.iterator = itertools.product(range(self.time_len), range(self.lat_len), range(self.lon_len)) for x_valid in self.iterator: binary_string = bin(count)[2:] while len(binary_string) < self.string_length: binary_string = "0" + binary_string # self.gene_map[binary_string] = {} # self.gene_map[binary_string]['coordinate'] = tuple(x_valid) if ma.getdata(self.array[x_valid]) < 100: # chooses unmasked points? self.gene_map[binary_string] = {} self.gene_map[binary_string]["coordinate"] = tuple(x_valid) self.gene_map[binary_string]["value"] = self.array[x_valid] # count += 1 print count, binary_string, self.gene_map[binary_string]["value"] # debug else: # self.gene_map[binary_string]['value'] = 1E06 pass count += 1 # print count, binary_string, self.gene_map[binary_string]['value'] # debug self.last_valid_binary_string = binary_string not_valid_first = eval("0b" + binary_string) + 1 not_valid_last = eval("0b" + "1" * self.string_length) print not_valid_last print x_valid print ma.isMA(self.array) for x_not_valid in range(not_valid_first, not_valid_last + 1): binary_string = bin(x_not_valid)[2:] self.gene_map[binary_string] = {} self.gene_map[binary_string]["coordinate"] = (999, 999, 999) self.gene_map[binary_string]["value"] = 1e06 print x_not_valid, binary_string, self.gene_map[binary_string]["value"]
def shiftgrid(lon0,datain,lonsin,start=False,cyclic=360.0): if np.fabs(lonsin[-1]-lonsin[0]-cyclic) > 1.e-4: # Use all data instead of raise ValueError, 'cyclic point not included' start_idx = 0 else: # If cyclic, remove the duplicate point start_idx = 1 if lon0 < lonsin[0] or lon0 > lonsin[-1]: raise ValueError('lon0 outside of range of lonsin') i0 = np.argmin(np.fabs(lonsin-lon0)) i0_shift = len(lonsin)-i0 if ma.isMA(datain): dataout = ma.zeros(datain.shape,datain.dtype) else: dataout = np.zeros(datain.shape,datain.dtype) if ma.isMA(lonsin): lonsout = ma.zeros(lonsin.shape,lonsin.dtype) else: lonsout = np.zeros(lonsin.shape,lonsin.dtype) if start: lonsout[0:i0_shift] = lonsin[i0:] else: lonsout[0:i0_shift] = lonsin[i0:]-cyclic dataout[...,0:i0_shift] = datain[...,i0:] if start: lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]+cyclic else: lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx] dataout[...,i0_shift:] = datain[...,start_idx:i0+start_idx] return dataout,lonsout
def shiftgrid(lon0, datain, lonsin, start=True, cyclic=360.0): """ Shift global lat/lon grid east or west. .. tabularcolumns:: |l|L| ============== ==================================================== Arguments Description ============== ==================================================== lon0 starting longitude for shifted grid (ending longitude if start=False). lon0 must be on input grid (within the range of lonsin). datain original data with longitude the right-most dimension. lonsin original longitudes. ============== ==================================================== .. tabularcolumns:: |l|L| ============== ==================================================== Keywords Description ============== ==================================================== start if True, lon0 represents the starting longitude of the new grid. if False, lon0 is the ending longitude. Default True. cyclic width of periodic domain (default 360) ============== ==================================================== returns ``dataout,lonsout`` (data and longitudes on shifted grid). """ if np.fabs(lonsin[-1] - lonsin[0] - cyclic) > 1.e-4: # Use all data instead of raise ValueError, 'cyclic point not included' start_idx = 0 else: # If cyclic, remove the duplicate point start_idx = 1 if lon0 < lonsin[0] or lon0 > lonsin[-1]: raise ValueError('lon0 outside of range of lonsin') i0 = np.argmin(np.fabs(lonsin - lon0)) i0_shift = len(lonsin) - i0 if ma.isMA(datain): dataout = ma.zeros(datain.shape, datain.dtype) else: dataout = np.zeros(datain.shape, datain.dtype) if ma.isMA(lonsin): lonsout = ma.zeros(lonsin.shape, lonsin.dtype) else: lonsout = np.zeros(lonsin.shape, lonsin.dtype) if start: lonsout[0:i0_shift] = lonsin[i0:] else: lonsout[0:i0_shift] = lonsin[i0:] - cyclic dataout[..., 0:i0_shift] = datain[..., i0:] if start: lonsout[i0_shift:] = lonsin[start_idx:i0 + start_idx] + cyclic else: lonsout[i0_shift:] = lonsin[start_idx:i0 + start_idx] dataout[..., i0_shift:] = datain[..., start_idx:i0 + start_idx] return dataout, lonsout
def shiftgrid(lon0, datain, lonsin, start= True, cyclic=360.0): """ Purpose:: Shift global lat/lon grid east or west. This function is taken directly from the (unreleased) basemap 1.0.7 source code as version 1.0.6 does not currently support arrays with more than two dimensions. https://github.com/matplotlib/basemap Input:: lon0 - starting longitude for shifted grid (ending longitude if start=False). lon0 must be on input grid (within the range of lonsin). datain - original data with longitude the right-most dimension. lonsin - original longitudes. start - if True, lon0 represents the starting longitude of the new grid. if False, lon0 is the ending longitude. Default True. cyclic - width of periodic domain (default 360) Output:: dataout - data on shifted grid lonsout - lons on shifted grid """ if np.fabs(lonsin[-1]-lonsin[0]-cyclic) > 1.e-4: # Use all data instead of raise ValueError, 'cyclic point not included' start_idx = 0 else: # If cyclic, remove the duplicate point start_idx = 1 if lon0 < lonsin[0] or lon0 > lonsin[-1]: raise ValueError('lon0 outside of range of lonsin') i0 = np.argmin(np.fabs(lonsin-lon0)) i0_shift = len(lonsin)-i0 if ma.isMA(datain): dataout = ma.zeros(datain.shape,datain.dtype) else: dataout = np.zeros(datain.shape,datain.dtype) if ma.isMA(lonsin): lonsout = ma.zeros(lonsin.shape,lonsin.dtype) else: lonsout = np.zeros(lonsin.shape,lonsin.dtype) if start: lonsout[0:i0_shift] = lonsin[i0:] else: lonsout[0:i0_shift] = lonsin[i0:]-cyclic dataout[...,0:i0_shift] = datain[...,i0:] if start: lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]+cyclic else: lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx] dataout[...,i0_shift:] = datain[...,start_idx:i0+start_idx] return dataout,lonsout
def shiftgrid(lon0, datain, lonsin, start=True, cyclic=360.0): """ Purpose:: Shift global lat/lon grid east or west. This function is taken directly from the (unreleased) basemap 1.0.7 source code as version 1.0.6 does not currently support arrays with more than two dimensions. https://github.com/matplotlib/basemap Input:: lon0 - starting longitude for shifted grid (ending longitude if start=False). lon0 must be on input grid (within the range of lonsin). datain - original data with longitude the right-most dimension. lonsin - original longitudes. start - if True, lon0 represents the starting longitude of the new grid. if False, lon0 is the ending longitude. Default True. cyclic - width of periodic domain (default 360) Output:: dataout - data on shifted grid lonsout - lons on shifted grid """ if np.fabs(lonsin[-1] - lonsin[0] - cyclic) > 1.e-4: # Use all data instead of raise ValueError, 'cyclic point not included' start_idx = 0 else: # If cyclic, remove the duplicate point start_idx = 1 if lon0 < lonsin[0] or lon0 > lonsin[-1]: raise ValueError('lon0 outside of range of lonsin') i0 = np.argmin(np.fabs(lonsin - lon0)) i0_shift = len(lonsin) - i0 if ma.isMA(datain): dataout = ma.zeros(datain.shape, datain.dtype) else: dataout = np.zeros(datain.shape, datain.dtype) if ma.isMA(lonsin): lonsout = ma.zeros(lonsin.shape, lonsin.dtype) else: lonsout = np.zeros(lonsin.shape, lonsin.dtype) if start: lonsout[0:i0_shift] = lonsin[i0:] else: lonsout[0:i0_shift] = lonsin[i0:] - cyclic dataout[..., 0:i0_shift] = datain[..., i0:] if start: lonsout[i0_shift:] = lonsin[start_idx:i0 + start_idx] + cyclic else: lonsout[i0_shift:] = lonsin[start_idx:i0 + start_idx] dataout[..., i0_shift:] = datain[..., start_idx:i0 + start_idx] return dataout, lonsout
def set_data(self, x, y, A): if not ma.isMA(A): A = np.asarray(A) if x is None: x = np.arange(0, A.shape[1] + 1, dtype=np.float64) else: x = np.asarray(x, np.float64).ravel() if y is None: y = np.arange(0, A.shape[0] + 1, dtype=np.float64) else: y = np.asarray(y, np.float64).ravel() if A.shape[:2] != (y.size - 1, x.size - 1): print A.shape print y.size print x.size raise ValueError("Axes don't match array shape") if A.ndim not in [2, 3]: raise ValueError("A must be 2D or 3D") if A.ndim == 3 and A.shape[2] == 1: A.shape = A.shape[:2] self.is_grayscale = False if A.ndim == 3: if A.shape[2] in [3, 4]: if (A[:, :, 0] == A[:, :, 1]).all() and (A[:, :, 0] == A[:, :, 2]).all(): self.is_grayscale = True else: raise ValueError("3D arrays must have RGB or RGBA as last dim") self._A = A self._Ax = x self._Ay = y self.update_dict['array'] = True
def make_gene_map_2(self): """ The method that takes the attributes from the array and uses them to create a gene map for the array. The gene map is a dictionary which has a binary string as a key. The binary string is created by creating a binary bit string of an appropriate length. The length is calculated """ count = 0 self.iterator_one = itertools.product(range(self.time_len), range(self.lat_len), range(self.lon_len)) ### Assign a binary string a location and a value from the data print "\n" print "Creating gene-map dictionary... \n" print "Assigning valid locations to binary strings! \n" for x_valid in self.iterator_one: binary_string = bin(count)[2:] while len(binary_string) < self.string_length: # removed minus one (-1) NB binary_string = "0" + binary_string self.gene_map[binary_string] = {} if ma.is_masked(self.array[x_valid]): pass else: self.gene_map[binary_string]["coordinate"] = tuple(x_valid) self.gene_map[binary_string]["value"] = self.array[x_valid] self.location_dict[x_valid[1:3]] = [] self.location_dict_stdevs[x_valid[1:3]] = 0 count += 1 self.last_valid_binary_string = binary_string binary_string_old = binary_string not_valid_first = int(binary_string, 2) + 1 not_valid_last = int("1" * (self.string_length), 2) # added minus one just for nonmasked version NB self.count = count if self.count == self.count_non_masked: print "The counter corresponds with the non-masked count! \n" ### Pad the dictionary to give binary strings some value print "Assigning left over binary strings to non-existant locations! \n" self.iterator_two = itertools.product(range(self.time_len), range(self.lat_len), range(self.lon_len)) # ~ for x_not_valid in range(not_valid_first, not_valid_last+1): count_2 = not_valid_first for x_not_valid in self.iterator_two: # ~ binary_string = bin(x_not_valid)[2:] # DOES IT NEED TO BE PADDED binary_string = bin(count_2)[2:] while len(binary_string) < self.string_length: # removed minus one (-1) NB binary_string = "0" + binary_string self.gene_map[binary_string] = {} self.gene_map[binary_string]["coordinate"] = (999, 999, 999) # x_not_valid self.gene_map[binary_string]["value"] = 1e09 # self.array[x_valid] if count_2 == not_valid_last: break else: count_2 += 1 print "There are %d valid locations. \n" % count print "The last binary string is: ", binary_string print "The last binary string assigned to a valid locations is :", binary_string_old print "The length of binary string is: ", self.string_length print "The non-valid locations fall between %d and %d. \n" % (not_valid_first, not_valid_last) print "Is the array masked?: \n", ma.isMA(self.array) print "The gene-map has been created! \n"
def test_topo(self): file = self.f # basic case cstr_list = ('time|i0 zc|ZP|2500 yc|i5 xc|:', \ 'time|i0 zc|ZP|2500m yc|i5 xc|:', \ 'time|i0 zc|ZP|1500m yc|i5 xc|:', \ 'time|i0 zc|ZP|1000,1500m yc|i5.5 xc|:') results = ((21,), (21,), (21,), (2,21)) for (cstr, res) in zip(cstr_list, results): if verbose: print cstr print "in test_topo" xsel = Nio.inp2xsel(file, 'PT', cstr) pt = file.variables['PT'][cstr] #pt = file.variables['ZP'][:] if verbose: print pt.shape if verbose: if ma.isMA(pt): print N.asarray(pt.filled()) else: print pt assert_equal(pt.shape, res) # ERROR: #cstr = 'xc|10k yc|i5.5:8:0.5i zc|ZP|2.5,3.5 time|i0:6:3' #if verbose: print cstr #pt = file.variables['PT'][cstr] #if verbose: print pt.shape file.close()
def test_topo(self): file = self.f # basic case cstr_list = ('time|i0 zc|ZP|2500 yc|i5 xc|:', \ 'time|i0 zc|ZP|2500m yc|i5 xc|:', \ 'time|i0 zc|ZP|1500m yc|i5 xc|:', \ 'time|i0 zc|ZP|1000,1500m yc|i5.5 xc|:') results = ((21, ), (21, ), (21, ), (2, 21)) for (cstr, res) in zip(cstr_list, results): if verbose: print(cstr) print("in test_topo") xsel = Nio.inp2xsel(file, 'PT', cstr) pt = file.variables['PT'][cstr] #pt = file.variables['ZP'][:] if verbose: print(pt.shape) if verbose: if ma.isMA(pt): print(N.asarray(pt.filled())) else: print(pt) assert_equal(pt.shape, res) # ERROR: #cstr = 'xc|10k yc|i5.5:8:0.5i zc|ZP|2.5,3.5 time|i0:6:3' #if verbose: print cstr #pt = file.variables['PT'][cstr] #if verbose: print pt.shape file.close()
def set_data(self, x, y, A): x = np.asarray(x,np.float32) y = np.asarray(y,np.float32) if not ma.isMA(A): A = np.asarray(A) if len(x.shape) != 1 or len(y.shape) != 1\ or A.shape[0:2] != (y.shape[0], x.shape[0]): raise TypeError("Axes don't match array shape") if len(A.shape) not in [2, 3]: raise TypeError("Can only plot 2D or 3D data") if len(A.shape) == 3 and A.shape[2] not in [1, 3, 4]: raise TypeError("3D arrays must have three (RGB) or four (RGBA) color components") if len(A.shape) == 3 and A.shape[2] == 1: A.shape = A.shape[0:2] if len(A.shape) == 2: if A.dtype != np.uint8: A = (self.cmap(self.norm(A))*255).astype(np.uint8) else: A = np.repeat(A[:,:,np.newaxis], 4, 2) A[:,:,3] = 255 else: if A.dtype != np.uint8: A = (255*A).astype(np.uint8) if A.shape[2] == 3: B = zeros(tuple(list(A.shape[0:2]) + [4]), np.uint8) B[:,:,0:3] = A B[:,:,3] = 255 A = B self._A = A self._Ax = x self._Ay = y self._imcache = None
def set_data(self, x, y, A): x = np.asarray(x, np.float32) y = np.asarray(y, np.float32) if not ma.isMA(A): A = np.asarray(A) if len(x.shape) != 1 or len(y.shape) != 1\ or A.shape[0:2] != (y.shape[0], x.shape[0]): raise TypeError("Axes don't match array shape") if len(A.shape) not in [2, 3]: raise TypeError("Can only plot 2D or 3D data") if len(A.shape) == 3 and A.shape[2] not in [1, 3, 4]: raise TypeError( "3D arrays must have three (RGB) or four (RGBA) color components" ) if len(A.shape) == 3 and A.shape[2] == 1: A.shape = A.shape[0:2] if len(A.shape) == 2: if A.dtype != np.uint8: A = (self.cmap(self.norm(A)) * 255).astype(np.uint8) else: A = np.repeat(A[:, :, np.newaxis], 4, 2) A[:, :, 3] = 255 else: if A.dtype != np.uint8: A = (255 * A).astype(np.uint8) if A.shape[2] == 3: B = zeros(tuple(list(A.shape[0:2]) + [4]), np.uint8) B[:, :, 0:3] = A B[:, :, 3] = 255 A = B self._A = A self._Ax = x self._Ay = y self._imcache = None
def set_data(self, A, shape=None): """ Set the image array ACCEPTS: numpy/PIL Image A""" # check if data is PIL Image without importing Image if hasattr(A, 'getpixel'): self._A = pil_to_array(A) elif ma.isMA(A): self._A = A else: self._A = np.asarray(A) # assume array if self._A.dtype != np.uint8 and not np.can_cast( self._A.dtype, np.float): raise TypeError("Image data can not convert to float") if (self._A.ndim not in (2, 3) or (self._A.ndim == 3 and self._A.shape[-1] not in (3, 4))): raise TypeError("Invalid dimensions for image data") self._imcache = None self._rgbacache = None self._oldxslice = None self._oldyslice = None
def set_data(self, x, y, A): if not ma.isMA(A): A = np.asarray(A) if x is None: x = np.arange(0, A.shape[1]+1, dtype=np.float64) else: x = np.asarray(x, np.float64).ravel() if y is None: y = np.arange(0, A.shape[0]+1, dtype=np.float64) else: y = np.asarray(y, np.float64).ravel() if A.shape[:2] != (y.size-1, x.size-1): print A.shape print y.size print x.size raise ValueError("Axes don't match array shape") if A.ndim not in [2, 3]: raise ValueError("A must be 2D or 3D") if A.ndim == 3 and A.shape[2] == 1: A.shape = A.shape[:2] self.is_grayscale = False if A.ndim == 3: if A.shape[2] in [3, 4]: if (A[:,:,0] == A[:,:,1]).all() and (A[:,:,0] == A[:,:,2]).all(): self.is_grayscale = True else: raise ValueError("3D arrays must have RGB or RGBA as last dim") self._A = A self._Ax = x self._Ay = y self.update_dict['array'] = True
def test_masked_unweighted(): data_in = ma.ones((10,), dtype=[('x', float), ('y', float)]) data_out = downsample(data_in, 2) assert ma.isMA(data_out) assert np.array_equal(data_out, data_in[:5]) data_in['x'].mask[2] = True data_in.mask[7] = (True, True) data_out = downsample(data_in, 2) assert np.array_equal(data_out, data_in[:5])
def _abs(x): """ Works around numpy bug with abs() of masked arrays producing a ComplexWarning(Casting complex values to real discards the imaginary part) """ if ma.isMA(x): return ma.masked_array(np.abs(x.data), x.mask) else: return ma.masked_array(np.abs(x))
def test_one_masked(): data1 = ma.ones((10,), dtype=[('f', float), ('w', float)]) data2 = np.ones((10,), dtype=[('f', float), ('w', float)]) data1.mask = False data1['f'].mask[2] = True result = accumulate(data1_in=data1, data2_in=data2, add='f', weight='w') assert not ma.isMA(result), 'Result should not be masked.' assert np.all(result['f'] == 1), 'Incorrect addition result.' assert np.array_equal(result['w'][1:4], (2, 1, 2)),\ 'Mask not used correctly.'
def test_extrapolate(): data = np.empty((10,), dtype=[('x', float), ('y', float)]) data['x'] = np.arange(10.) data['y'] = np.arange(10.) x2 = np.arange(-1,11) result = resample(data, 'x', x2, 'y') assert ma.isMA(result) assert result['y'].mask[0] assert result['y'].mask[-1] assert np.array_equal(result['y'][1:-1], x2[1:-1])
def test_masked_weighted(): data_in = ma.ones((10, ), dtype=[('x', float), ('y', float)]) data_out = downsample(data_in, 2, weight='y') assert ma.isMA(data_out) assert np.all(data_out['x'] == 1.) assert np.all(data_out['y'] == 2.) data_in['x'].mask[2] = True data_in.mask[7] = (True, True) data_out = downsample(data_in, 2, weight='y') assert np.all(data_out['x'] == 1.) assert np.all(data_out['y'] == (2., 1., 2., 1., 2.))
def test_masked_weighted(): data_in = ma.ones((10,), dtype=[('x', float), ('y', float)]) data_out = downsample(data_in, 2, weight='y') assert ma.isMA(data_out) assert np.all(data_out['x'] == 1.) assert np.all(data_out['y'] == 2.) data_in['x'].mask[2] = True data_in.mask[7] = (True, True) data_out = downsample(data_in, 2, weight='y') assert np.all(data_out['x'] == 1.) assert np.all(data_out['y'] == (2., 1., 2., 1., 2.))
def addcyclic(arrin, lonsin): """ ``arrout, lonsout = addcyclic(arrin, lonsin)`` adds cyclic (wraparound) point in longitude to ``arrin`` and ``lonsin``. """ nlats = arrin.shape[0] nlons = arrin.shape[1] if ma.isMA(arrin): arrout = ma.zeros((nlats, nlons + 1), arrin.dtype) else: arrout = numpy.zeros((nlats, nlons + 1), arrin.dtype) arrout[:, 0:nlons] = arrin[:, :] arrout[:, nlons] = arrin[:, 0] if ma.isMA(lonsin): lonsout = ma.zeros(nlons + 1, lonsin.dtype) else: lonsout = numpy.zeros(nlons + 1, lonsin.dtype) lonsout[0:nlons] = lonsin[:] lonsout[nlons] = lonsin[-1] + lonsin[1] - lonsin[0] return arrout, lonsout
def addcyclic(arrin,lonsin): """ ``arrout, lonsout = addcyclic(arrin, lonsin)`` adds cyclic (wraparound) point in longitude to ``arrin`` and ``lonsin``. """ nlats = arrin.shape[0] nlons = arrin.shape[1] if ma.isMA(arrin): arrout = ma.zeros((nlats,nlons+1),arrin.dtype) else: arrout = numpy.zeros((nlats,nlons+1),arrin.dtype) arrout[:,0:nlons] = arrin[:,:] arrout[:,nlons] = arrin[:,0] if ma.isMA(lonsin): lonsout = ma.zeros(nlons+1,lonsin.dtype) else: lonsout = numpy.zeros(nlons+1,lonsin.dtype) lonsout[0:nlons] = lonsin[:] lonsout[nlons] = lonsin[-1] + lonsin[1]-lonsin[0] return arrout,lonsout
def shiftgrid(lon0, datain, lonsin, start=False, cyclic=360.0): """ lon0: new starting longitude, to be in lonsin datain: input data to be shifted lonsin: longitude axis of data (supposed to be the last axis of datain) optional argument: start: (default: False) cyclic: (default: 360.0) shift data to start at longitude lon0 return dataout, lonsout """ if np.fabs(lonsin[-1] - lonsin[0] - cyclic) > 1.e-4: # Use all data instead of raise ValueError, 'cyclic point not included' start_idx = 0 else: # If cyclic, remove the duplicate point start_idx = 1 if lon0 < lonsin[0] or lon0 > lonsin[-1]: raise ValueError('lon0 outside of range of lonsin') i0 = np.argmin(np.fabs(lonsin - lon0)) i0_shift = len(lonsin) - i0 if ma.isMA(datain): dataout = ma.zeros(datain.shape, datain.dtype) else: dataout = np.zeros(datain.shape, datain.dtype) if ma.isMA(lonsin): lonsout = ma.zeros(lonsin.shape, lonsin.dtype) else: lonsout = np.zeros(lonsin.shape, lonsin.dtype) if start: lonsout[0:i0_shift] = lonsin[i0:] else: lonsout[0:i0_shift] = lonsin[i0:] - cyclic dataout[..., 0:i0_shift] = datain[..., i0:] if start: lonsout[i0_shift:] = lonsin[start_idx:i0 + start_idx] + cyclic else: lonsout[i0_shift:] = lonsin[start_idx:i0 + start_idx] dataout[..., i0_shift:] = datain[..., start_idx:i0 + start_idx] return dataout, lonsout
def test_propagated_array_mask(): wlen = np.arange(10) flux = ma.ones((10,)) flux.mask = False flux[2] = ma.masked result = redshift(z_in=0, z_out=1, rules=[ {'name': 'wlen', 'exponent': +1, 'array_in': wlen}, {'name': 'flux', 'exponent': -1, 'array_in': flux}]) assert ma.isMA(result) assert not result['wlen'].mask[2], 'Input mask not propagated.' assert result['flux'].mask[2], 'Input mask not propagated.' assert not result['flux'].mask[3], 'Input mask not propagated.'
def __init__(self, var, name=None): """Create a VarInfo object. Arguments: var: numpy or numpy.ma array""" # Compute all necessary statistics in initialization, so that we don't # have to hold onto the variable in memory for later use (in case the # variable consumes a lot of memory). if not ma.isMA(var): var = ma.array(var) self._compute_stats(var) self.name = name
def test_propagated_data_mask(): data_in = ma.ones((10,), dtype=[ ('wlen', float), ('flux', float), ('extra', int)]) data_in['wlen'][1] = ma.masked data_in['extra'][2] = ma.masked result = redshift(z_in=0, z_out=1, data_in=data_in, rules=[ {'name': 'wlen', 'exponent': +1}, {'name': 'flux', 'exponent': -1}]) assert ma.isMA(result) assert not result['wlen'].mask[0], 'Input mask not propagated.' assert not result['flux'].mask[0], 'Input mask not propagated.' assert result['wlen'].mask[1], 'Input mask not propagated.' assert result['extra'].mask[2], 'Input mask not propagated.'
def test_both_masked(): data1 = ma.ones((10,), dtype=[('f', float), ('w', float), ('i', int)]) data2 = ma.ones((10,), dtype=[('f', float), ('w', float), ('i', int)]) data1.mask = False data1['f'].mask[2:4] = True data2.mask = False data2['f'].mask[3:5] = True result = accumulate(data1_in=data1, data2_in=data2, add='f', weight='w', join='i') assert not ma.isMA(result), 'Result should not be masked.' valid = result['w'] != 0 assert np.all(result['f'][valid] == 1), 'Incorrect addition result.' assert np.array_equal(result['w'][1:6], (2, 1, 0, 1, 2)),\ 'Mask not used correctly.'
def addcyclic(data): """ Adds cyclic points to an array in rightmost dimension. data = input 2D array. """ if data.ndim != 2: print('ERROR: Input array is not two-dimensional') return if MA.isMA(data): newdata = MA.concatenate((data,data[:,0,N.newaxis]),axis=-1) else: newdata = N.concatenate((data,data[:,0,N.newaxis]),axis=-1) return newdata
def fill_gaps_in_2d_transect_once(Z): was_masked = ma.isMA(Z) # Ensure array has NaNs, not mask Z = ma.filled(Z, np.nan).copy() fill_values = np.nanmean( np.dstack((np.roll(Z, 1, axis=1), np.roll(Z, -1, axis=1))), axis=2) inds_to_fill = np.logical_and(np.isnan(Z), ~np.isnan(fill_values)) Z[inds_to_fill] = fill_values[inds_to_fill] if was_masked: Z = ma.masked_invalid(Z) return Z
def set_data(self, A, shape=None): """ Set the image array ACCEPTS: numpy/PIL Image A""" # check if data is PIL Image without importing Image if hasattr(A, 'getpixel'): self._A = pil_to_array(A) elif ma.isMA(A): self._A = A else: self._A = np.asarray(A) # assume array self._imcache = None self._rgbacache = None self._oldxslice = None self._oldyslice = None
def set_data(self, A, shape=None): """ Set the image array ACCEPTS: numpy/PIL Image A""" # check if data is PIL Image without importing Image if hasattr(A,'getpixel'): self._A = pil_to_array(A) elif ma.isMA(A): self._A = A else: self._A = np.asarray(A) # assume array self._imcache =None self._rgbacache = None self._oldxslice = None self._oldyslice = None
def test_propagated_array_mask(): wlen = np.arange(10) flux = ma.ones((10, )) flux.mask = False flux[2] = ma.masked result = redshift(z_in=0, z_out=1, rules=[{ 'name': 'wlen', 'exponent': +1, 'array_in': wlen }, { 'name': 'flux', 'exponent': -1, 'array_in': flux }]) assert ma.isMA(result) assert not result['wlen'].mask[2], 'Input mask not propagated.' assert result['flux'].mask[2], 'Input mask not propagated.' assert not result['flux'].mask[3], 'Input mask not propagated.'
def test_propagated_data_mask(): data_in = ma.ones((10, ), dtype=[('wlen', float), ('flux', float), ('extra', int)]) data_in['wlen'][1] = ma.masked data_in['extra'][2] = ma.masked result = redshift(z_in=0, z_out=1, data_in=data_in, rules=[{ 'name': 'wlen', 'exponent': +1 }, { 'name': 'flux', 'exponent': -1 }]) assert ma.isMA(result) assert not result['wlen'].mask[0], 'Input mask not propagated.' assert not result['flux'].mask[0], 'Input mask not propagated.' assert result['wlen'].mask[1], 'Input mask not propagated.' assert result['extra'].mask[2], 'Input mask not propagated.'
def _quantize(data, least_significant_digit): """ quantize data to improve compression. data is quantized using around(scale*data)/scale, where scale is 2**bits, and bits is determined from the least_significant_digit. For example, if least_significant_digit=1, bits will be 4. """ precision = pow(10., -least_significant_digit) exp = np.log10(precision) if exp < 0: exp = int(np.floor(exp)) else: exp = int(np.ceil(exp)) bits = np.ceil(np.log2(pow(10., -exp))) scale = pow(2., bits) datout = np.around(scale * data) / scale if ma.isMA(datout): datout.set_fill_value(data.fill_value) return datout else: return datout
def _quantize(data,least_significant_digit): """ quantize data to improve compression. data is quantized using around(scale*data)/scale, where scale is 2**bits, and bits is determined from the least_significant_digit. For example, if least_significant_digit=1, bits will be 4. """ precision = pow(10.,-least_significant_digit) exp = np.log10(precision) if exp < 0: exp = int(np.floor(exp)) else: exp = int(np.ceil(exp)) bits = np.ceil(np.log2(pow(10.,-exp))) scale = pow(2.,bits) datout = np.around(scale*data)/scale if ma.isMA(datout): datout.set_fill_value(data.fill_value) return datout else: return datout
def test_topo(self): # basic case cstr_list = ('time|i0 zc|ZP|2500 yc|i5 xc|:', \ 'time|i0 zc|ZP|2500m yc|i5 xc|:', \ 'time|i0 zc|ZP|1500m yc|i5 xc|:', \ 'time|i0 zc|ZP|1000,1500m yc|i5.5 xc|:') results = ((21, ), (21, ), (21, ), (2, 21)) for (cstr, res) in zip(cstr_list, results): if verbose: print(cstr) print("in test_topo") xsel = Nio.inp2xsel(self.f, 'PT', cstr) pt = self.f.variables['PT'][cstr] #pt = self.f.variables['ZP'][:] if verbose: print(pt.shape) if verbose: if ma.isMA(pt): print(N.asarray(pt.filled())) else: print(pt) assert_equal(pt.shape, res)
def bin_2d_transect(x, y, Z, x_out, y_out): """Bin transect Z(x, y), where x can be irregular Inputs ------ x, y : 1D arrays x can be irregular, y cannot Z : 2D array Data at each point x, y. May be masked array x_out, y_out : 1D arrays Edges of grid on which to bin Z Returns ------- Z_out : 2D array Shape (len(x_out) - 1, len(y_out) - 1) """ if Z.ndim == 1: Z = Z[np.newaxis, :] # Preallocate result Nx, Ny = x_out.size - 1, y_out.size - 1 Z_out = np.full((Nx, Ny), np.nan) filterwarnings('ignore', '.*Mean of empty slice*.') # Using loop for simplicity for i, j in np.ndindex(Nx, Ny): in_x_bin = np.logical_and(x > x_out[i], x < x_out[i + 1]) in_y_bin = np.logical_and(y > y_out[j], y < y_out[j + 1]) Z_in_bin = Z[in_y_bin, in_x_bin] Z_out[i, j] = np.nanmean(ma.filled(Z_in_bin, np.nan)) if ma.isMA(Z): Z_out = ma.masked_invalid(Z_out) return Z_out
def set_data(self, A, shape=None): """ Set the image array ACCEPTS: numpy/PIL Image A""" # check if data is PIL Image without importing Image if hasattr(A,'getpixel'): self._A = pil_to_array(A) elif ma.isMA(A): self._A = A else: self._A = np.asarray(A) # assume array if self._A.dtype != np.uint8 and not np.can_cast(self._A.dtype, np.float): raise TypeError("Image data can not convert to float") if (self._A.ndim not in (2, 3) or (self._A.ndim == 3 and self._A.shape[-1] not in (3, 4))): raise TypeError("Invalid dimensions for image data") self._imcache =None self._rgbacache = None self._oldxslice = None self._oldyslice = None
def broadcast(*args): def _mask_or(a, b): return ma.mask_or(a, b, shrink=True) args = [_safe_masked_invalid(arg) for arg in args] if any([ma.isMA(arg) for arg in args]): vars = [ma.getdata(var) for var in args] mvars = [ma.getmaskarray(var) for var in args] outargs = list(map(np.array, np.broadcast_arrays(*vars))) masks = list(map(np.array, np.broadcast_arrays(*mvars))) mask = reduce(_mask_or, masks) else: mask = ma.nomask # Using map(np.array, ...) to get contiguous copies. outargs = list(map(np.array, np.broadcast_arrays(*args))) if outargs[0].ndim == 0: scalar = True for arg in outargs: arg.shape = (1, ) if mask is not ma.nomask: mask.shape = (1, ) else: scalar = False return scalar, mask, outargs
def make_gene_map_2(self): """ The method that takes the attributes from the array and uses them to create a gene map for the array. The gene map is a dictionary which has a binary string as a key. The binary string is created by creating a binary bit string of an appropriate length. The length is calculated """ # ~ self.array = ma.getdata(self.array) count = 0 self.iterator = itertools.product(range(self.time_len), range(self.lat_len), range(self.lon_len)) for x_valid in self.iterator: # print x # debug binary_string = bin(count)[2:] while len(binary_string) < self.string_length: # removed minus one (-1) NB # print len(binary_string) #debug binary_string = "0" + binary_string # print binary_string # debug self.gene_map[binary_string] = {} print self.array[x_valid] if ma.is_masked(self.array[x_valid]): print "masked" # ~ pass else: self.gene_map[binary_string]["coordinate"] = tuple(x_valid) self.gene_map[binary_string]["value"] = self.array[x_valid] count += 1 # ~ print binary_string, tuple(x_valid), self.array[x_valid] # print self.count # += 1 self.last_valid_binary_string = binary_string binary_string_old = binary_string not_valid_first = int(binary_string, 2) + 1 not_valid_last = int("1" * (self.string_length), 2) # added minus one just for nonmasked version NB self.count = count print count print binary_string print len(binary_string) print not_valid_first print not_valid_last print x_valid print ma.isMA(self.array) for x_not_valid in range(not_valid_first, not_valid_last + 1): binary_string = bin(x_not_valid)[2:] # DOES IT NEED TO BE PADDED while len(binary_string) < self.string_length: # removed minus one (-1) NB # print len(binary_string) #debug binary_string = "0" + binary_string self.gene_map[binary_string] = {} self.gene_map[binary_string]["coordinate"] = (999, 999, 999) self.gene_map[binary_string]["value"] = 1e06 print x_not_valid, binary_string, self.gene_map[binary_string]["value"] # print x_not_valid, binary_string # debug print count print binary_string print binary_string_old print len(binary_string) print self.string_length print not_valid_first print not_valid_last print x_valid print x_not_valid print ma.isMA(self.array)
def shiftgrid(lon0,datain,lonsin,start=True,cyclic=360.0): import numpy.ma as ma """ Shift global lat/lon grid east or west. copied directly from mpl_toolkits v1.0.2 by mjh .. tabularcolumns:: |l|L| ============== ==================================================== Arguments Description ============== ==================================================== lon0 starting longitude for shifted grid (ending longitude if start=False). lon0 must be on input grid (within the range of lonsin). datain original data. lonsin original longitudes. ============== ==================================================== .. tabularcolumns:: |l|L| ============== ==================================================== Keywords Description ============== ==================================================== start if True, lon0 represents the starting longitude of the new grid. if False, lon0 is the ending longitude. Default True. cyclic width of periodic domain (default 360) ============== ==================================================== returns ``dataout,lonsout`` (data and longitudes on shifted grid). """ if numpy.fabs(lonsin[-1]-lonsin[0]-cyclic) > 1.e-4: # Use all data instead of raise ValueError, 'cyclic point not included' start_idx = 0 else: # If cyclic, remove the duplicate point start_idx = 1 if lon0 < lonsin[0] or lon0 > lonsin[-1]: msg = 'lon0 outside of range of lonsin %(l0)4.1f %(st)4.1f %(ed)4.1f'%{'l0':lon0,'st':lonsin[0],'ed':lonsin[-1]} raise ValueError(msg) i0 = numpy.argmin(numpy.fabs(lonsin-lon0)) i0_shift = len(lonsin)-i0 if ma.isMA(datain): dataout = ma.zeros(datain.shape,datain.dtype) else: dataout = numpy.zeros(datain.shape,datain.dtype) if ma.isMA(lonsin): lonsout = ma.zeros(lonsin.shape,lonsin.dtype) else: lonsout = numpy.zeros(lonsin.shape,lonsin.dtype) if start: lonsout[0:i0_shift] = lonsin[i0:] else: lonsout[0:i0_shift] = lonsin[i0:]-cyclic dataout[:,0:i0_shift] = datain[:,i0:] if start: lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]+cyclic else: lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx] dataout[:,i0_shift:] = datain[:,start_idx:i0+start_idx] return dataout,lonsout
def delete_masked_points(*args): """ Find all masked and/or non-finite points in a set of arguments, and return the arguments with only the unmasked points remaining. Arguments can be in any of 5 categories: 1) 1-D masked arrays 2) 1-D ndarrays 3) ndarrays with more than one dimension 4) other non-string iterables 5) anything else The first argument must be in one of the first four categories; any argument with a length differing from that of the first argument (and hence anything in category 5) then will be passed through unchanged. Masks are obtained from all arguments of the correct length in categories 1, 2, and 4; a point is bad if masked in a masked array or if it is a nan or inf. No attempt is made to extract a mask from categories 2, 3, and 4 if :meth:`np.isfinite` does not yield a Boolean array. All input arguments that are not passed unchanged are returned as ndarrays after removing the points or rows corresponding to masks in any of the arguments. A vastly simpler version of this function was originally written as a helper for Axes.scatter(). """ if not len(args): return () if is_string_like(args[0]) or not iterable(args[0]): raise ValueError("First argument must be a sequence") nrecs = len(args[0]) margs = [] seqlist = [False] * len(args) for i, x in enumerate(args): if (not is_string_like(x)) and iterable(x) and len(x) == nrecs: seqlist[i] = True if ma.isMA(x): if x.ndim > 1: raise ValueError("Masked arrays must be 1-D") else: x = np.asarray(x) margs.append(x) masks = [] # list of masks that are True where good for i, x in enumerate(margs): if seqlist[i]: if x.ndim > 1: continue # Don't try to get nan locations unless 1-D. if ma.isMA(x): masks.append(~ma.getmaskarray(x)) # invert the mask xd = x.data else: xd = x try: mask = np.isfinite(xd) if isinstance(mask, np.ndarray): masks.append(mask) except: # Fixme: put in tuple of possible exceptions? pass if len(masks): mask = reduce(np.logical_and, masks) igood = mask.nonzero()[0] if len(igood) < nrecs: for i, x in enumerate(margs): if seqlist[i]: margs[i] = x.take(igood, axis=0) for i, x in enumerate(margs): if seqlist[i] and ma.isMA(x): margs[i] = x.filled() return margs
def shiftgrid(lon0, datain, lonsin, start=True, cyclic=360.0): import numpy.ma as ma """ Shift global lat/lon grid east or west. copied directly from mpl_toolkits v1.0.2 by mjh .. tabularcolumns:: |l|L| ============== ==================================================== Arguments Description ============== ==================================================== lon0 starting longitude for shifted grid (ending longitude if start=False). lon0 must be on input grid (within the range of lonsin). datain original data. lonsin original longitudes. ============== ==================================================== .. tabularcolumns:: |l|L| ============== ==================================================== Keywords Description ============== ==================================================== start if True, lon0 represents the starting longitude of the new grid. if False, lon0 is the ending longitude. Default True. cyclic width of periodic domain (default 360) ============== ==================================================== returns ``dataout,lonsout`` (data and longitudes on shifted grid). """ if numpy.fabs(lonsin[-1] - lonsin[0] - cyclic) > 1.e-4: # Use all data instead of raise ValueError, 'cyclic point not included' start_idx = 0 else: # If cyclic, remove the duplicate point start_idx = 1 if lon0 < lonsin[0] or lon0 > lonsin[-1]: msg = 'lon0 outside of range of lonsin %(l0)4.1f %(st)4.1f %(ed)4.1f' % { 'l0': lon0, 'st': lonsin[0], 'ed': lonsin[-1] } raise ValueError(msg) i0 = numpy.argmin(numpy.fabs(lonsin - lon0)) i0_shift = len(lonsin) - i0 if ma.isMA(datain): dataout = ma.zeros(datain.shape, datain.dtype) else: dataout = numpy.zeros(datain.shape, datain.dtype) if ma.isMA(lonsin): lonsout = ma.zeros(lonsin.shape, lonsin.dtype) else: lonsout = numpy.zeros(lonsin.shape, lonsin.dtype) if start: lonsout[0:i0_shift] = lonsin[i0:] else: lonsout[0:i0_shift] = lonsin[i0:] - cyclic dataout[:, 0:i0_shift] = datain[:, i0:] if start: lonsout[i0_shift:] = lonsin[start_idx:i0 + start_idx] + cyclic else: lonsout[i0_shift:] = lonsin[start_idx:i0 + start_idx] dataout[:, i0_shift:] = datain[:, start_idx:i0 + start_idx] return dataout, lonsout
def delete_masked_points(*args): """ Find all masked and/or non-finite points in a set of arguments, and return the arguments with only the unmasked points remaining. Arguments can be in any of 5 categories: 1) 1-D masked arrays 2) 1-D ndarrays 3) ndarrays with more than one dimension 4) other non-string iterables 5) anything else The first argument must be in one of the first four categories; any argument with a length differing from that of the first argument (and hence anything in category 5) then will be passed through unchanged. Masks are obtained from all arguments of the correct length in categories 1, 2, and 4; a point is bad if masked in a masked array or if it is a nan or inf. No attempt is made to extract a mask from categories 2, 3, and 4 if :meth:`np.isfinite` does not yield a Boolean array. All input arguments that are not passed unchanged are returned as ndarrays after removing the points or rows corresponding to masks in any of the arguments. A vastly simpler version of this function was originally written as a helper for Axes.scatter(). """ if not len(args): return () if (is_string_like(args[0]) or not iterable(args[0])): raise ValueError("First argument must be a sequence") nrecs = len(args[0]) margs = [] seqlist = [False] * len(args) for i, x in enumerate(args): if (not is_string_like(x)) and iterable(x) and len(x) == nrecs: seqlist[i] = True if ma.isMA(x): if x.ndim > 1: raise ValueError("Masked arrays must be 1-D") else: x = np.asarray(x) margs.append(x) masks = [] # list of masks that are True where good for i, x in enumerate(margs): if seqlist[i]: if x.ndim > 1: continue # Don't try to get nan locations unless 1-D. if ma.isMA(x): masks.append(~ma.getmaskarray(x)) # invert the mask xd = x.data else: xd = x try: mask = np.isfinite(xd) if isinstance(mask, np.ndarray): masks.append(mask) except: #Fixme: put in tuple of possible exceptions? pass if len(masks): mask = reduce(np.logical_and, masks) igood = mask.nonzero()[0] if len(igood) < nrecs: for i, x in enumerate(margs): if seqlist[i]: margs[i] = x.take(igood, axis=0) for i, x in enumerate(margs): if seqlist[i] and ma.isMA(x): margs[i] = x.filled() return margs
def redshift(z_in, z_out, data_in=None, data_out=None, rules=[]): """Transform spectral data from redshift z_in to z_out. Each quantity X is transformed according to a power law:: X_out = X_in * ((1 + z_out) / (1 + z_in))**exponent where exponents are specified with the ``rules`` argument. Exponents for some common cases are listed in the table below. ======== ================================================================ Exponent Quantities ======== ================================================================ 0 flux density in photons/(s*cm^2*Ang) +1 wavelength, wavelength error, flux density in ergs/(s*cm^2*Hz) -1 frequency, frequency error, flux density in ergs/(s*cm^2*Ang) +2 inverse variance of flux density in ergs/(s*cm^2*Ang) -2 inverse variance of flux density in ergs/(s*cm^2*Hz) ======== ================================================================ For example, to transform separate wavelength and flux arrays using the SDSS standard units of Ang and 1e-17 erg/(s*cm^2*Ang): >>> wlen = np.arange(4000., 10000.) >>> flux = np.ones(wlen.shape) >>> result = redshift(z_in=0, z_out=1, rules=[ ... dict(name='wlen', exponent=+1, array_in=wlen), ... dict(name='flux', exponent=-1, array_in=flux)]) >>> result.dtype dtype([('wlen', '<f8'), ('flux', '<f8')]) >>> result['flux'][:5] array([ 0.5, 0.5, 0.5, 0.5, 0.5]) The same calculation could be performed with the input data stored in a numpy structured array, in which case any additional fields are copied to the output array: >>> data = np.empty(6000, dtype=[ ... ('wlen', float), ('flux', float), ('maskbits', int)]) >>> data['wlen'] = np.arange(4000., 10000.) >>> data['flux'] = np.ones_like(data['wlen']) >>> result = redshift(z_in=0, z_out=1, data_in=data, rules=[ ... dict(name='wlen', exponent=+1), ... dict(name='flux', exponent=-1)]) >>> result.dtype dtype([('wlen', '<f8'), ('flux', '<f8'), ('maskbits', '<i8')]) >>> result['flux'][:5] array([ 0.5, 0.5, 0.5, 0.5, 0.5]) The transformed result is always a `numpy structured array <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`__, with field (column) names determined by the rules you provide. The usual `numpy broadcasting rules <http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html>`__ apply in the transformation expression above so, for example, the same redshift can be applied to multiple spectra, or different redshifts can be applied to the same spectrum with appropriate input shapes. Input arrays can have associated `masks <http://docs.scipy.org/doc/numpy/reference/maskedarray.html>`__ and these will be propagated to the output. Input arrays can also have `units <http://astropy.readthedocs.io/en/latest/units/index.html>`__ but these will not be used or propagated to the output since numpy structured arrays do not support per-column units. Parameters ---------- z_in : float or numpy.ndarray Redshift(s) of the input spectral data, which must all be > -1. z_out : float or numpy.ndarray Redshift(s) of the output spectral data, which must all be > -1. data_in : numpy.ndarray Structured numpy array containing input spectrum data to transform. If none is specified, then all quantities must be provided as numpy arrays in the rules. data_out : numpy.ndarray Structured numpy array where output spectrum data should be written. If none is specified, then an appropriately sized array will be allocated and returned. Use this method to take control of the memory allocation and, for example, re-use the same output array for a sequence of transforms. rules : iterable An iterable object whose elements are dictionaries. Each dictionary specifies how one quantity will be transformed and must contain 'name' and 'exponent' values. If an 'array_in' value is also specified, it should refer to a numpy array containing the input values to transform. Otherwise, ``data_in[<name>]`` is assumed to contain the input values to transform. If no ``rules`` are specified and ``data_in`` is provided, then ``data_out`` is just a copy of ``data_in``. Returns ------- numpy.ndarray Array of spectrum data with the redshift transform applied. Equal to data_out when set, otherwise a new array is allocated. If ``data_in`` is specified, then any fields not listed in ``rules`` are copied to ``data_out``, so effectively have an implicit exponent of zero. """ if not isinstance(z_in, np.ndarray): z_in = np.float(z_in) if np.any(z_in <= -1): raise ValueError('Found invalid z_in <= -1.') if not isinstance(z_out, np.ndarray): z_out = np.float(z_out) if np.any(z_out <= -1): raise ValueError('Found invalid z_out <= -1.') z_factor = (1.0 + z_out) / (1.0 + z_in) if data_in is not None and not isinstance(data_in, np.ndarray): raise ValueError('Invalid data_in type: {0}.'.format(type(data_in))) if data_out is not None and not isinstance(data_out, np.ndarray): raise ValueError('Invalid data_out type: {0}.'.format(type(data_out))) if data_in is not None: shape_in = data_in.shape dtype_in = data_in.dtype masked_in = ma.isMA(data_in) else: shape_in = None dtype_in = [] masked_in = False for i, rule in enumerate(rules): name = rule.get('name') if not isinstance(name, basestring): raise ValueError('Invalid name in rule: {0}'.format(name)) try: exponent = np.float(rule.get('exponent')) except TypeError: raise ValueError( 'Invalid exponent for {0}: {1}.' .format(name, rule.get('exponent'))) if data_in is not None and name not in dtype_in.names: raise ValueError('No such data_in field named {0}.'.format(name)) if data_out is not None and name not in data_out.dtype.names: raise ValueError('No such data_out field named {0}.'.format(name)) array_in = rule.get('array_in') if array_in is not None: if data_in is not None: raise ValueError( 'Cannot specify data_in and array_in for {0}.' .format(name)) if not isinstance(array_in, np.ndarray): raise ValueError( 'Invalid array_in type for {0}: {1}.' .format(name, type(array_in))) if shape_in is None: shape_in = array_in.shape elif shape_in != array_in.shape: raise ValueError( 'Incompatible array_in shape for {0}: {1}. Expected {2}.' .format(name, array_in.shape, shape_in)) dtype_in.append((name, array_in.dtype)) if ma.isMA(array_in): masked_in = True else: if data_in is None: raise ValueError( 'Missing array_in for {0} (with no data_in).'.format(name)) # Save a view of the input data column associated with this rule. rules[i]['array_in'] = data_in[name] shape_out = np.broadcast(np.empty(shape_in), z_factor).shape if data_out is None: if masked_in: data_out = ma.empty(shape_out, dtype=dtype_in) data_out.mask = False else: data_out = np.empty(shape_out, dtype=dtype_in) else: if masked_in and not ma.isMA(data_out): raise ValueError('data_out discards data_in mask.') if data_out.shape != shape_out: raise ValueError( 'Invalid data_out shape: {0}. Expected {1}.' .format(data_out.shape, shape_out)) if data_out.dtype != dtype_in: raise ValueError( 'Invalid data_out dtype: {0}. Expected {1}.' .format(data_out.dtype, dtype_in)) if data_in is not None: # Copy data_in to data_out so that any columns not listed in the # rules are propagated to the output. data_out[...] = data_in for rule in rules: name = rule.get('name') exponent = np.float(rule.get('exponent')) array_in = rule.get('array_in') data_out[name][:] = array_in * z_factor**exponent if data_in is None and ma.isMA(array_in): data_out[name].mask[...] = array_in.mask return data_out
def resample(data_in, x_in, x_out, y, data_out=None, kind='linear'): """Resample the data of one spectrum using interpolation. Dependent variables y1, y2, ... in the input data are resampled in the independent variable x using interpolation models y1(x), y2(x), ... evaluated on a new grid of x values. The independent variable will typically be a wavelength or frequency and the independent variables can be fluxes, inverse variances, etc. Interpolation is intended for cases where the input and output grids have comparable densities. When neighboring samples are correlated, the resampling process should be essentially lossless. When the output grid is sparser than the input grid, it may be more appropriate to "downsample", i.e., average dependent variables over consecutive ranges of input samples. The basic usage of this function is: >>> data = np.ones((5,), ... [('wlen', float), ('flux', float), ('ivar', float)]) >>> data['wlen'] = np.arange(4000, 5000, 200) >>> wlen_out = np.arange(4100, 4700, 200) >>> resample(data, 'wlen', wlen_out, ('flux', 'ivar')) array([(4100, 1.0, 1.0), (4300, 1.0, 1.0), (4500, 1.0, 1.0)], dtype=[('wlen', '<i8'), ('flux', '<f8'), ('ivar', '<f8')]) The input grid can also be external to the structured array of spectral data, for example: >>> data = np.ones((5,), [('flux', float), ('ivar', float)]) >>> wlen_in = np.arange(4000, 5000, 200) >>> wlen_out = np.arange(4100, 4900, 200) >>> resample(data, wlen_in, wlen_out, ('flux', 'ivar')) array([(1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0)], dtype=[('flux', '<f8'), ('ivar', '<f8')]) If the output grid extends beyond the input grid, a `masked array <http://docs.scipy.org/doc/numpy/reference/maskedarray.html>`__ will be returned with any values requiring extrapolation masked: >>> wlen_out = np.arange(3500, 5500, 500) >>> resample(data, wlen_in, wlen_out, 'flux') masked_array(data = [(--,) (1.0,) (1.0,) (--,)], mask = [(True,) (False,) (False,) (True,)], fill_value = (1e+20,), dtype = [('flux', '<f8')]) If the input data is masked, any output interpolated values that depend on an input masked value will be masked in the output: >>> data = ma.ones((5,), [('flux', float), ('ivar', float)]) >>> data['flux'][2] = ma.masked >>> wlen_out = np.arange(4100, 4900, 200) >>> resample(data, wlen_in, wlen_out, 'flux') masked_array(data = [(1.0,) (--,) (--,) (1.0,)], mask = [(False,) (True,) (True,) (False,)], fill_value = (1e+20,), dtype = [('flux', '<f8')]) Interpolation is performed using :class:`scipy.interpolate.inter1pd`. Parameters ---------- data_in : numpy.ndarray or numpy.ma.MaskedArray Structured numpy array of input spectral data to resample. The input array must be one-dimensional. x_in : string or numpy.ndarray A field name in data_in containing the independent variable to use for interpolation, or else an array of values with the same shape as the input data. x_out : numpy.ndarray An array of values for the independent variable where interpolation models should be evaluated to calculate the output values. y : string or iterable of strings. A field name or a list of field names present in the input data that should be resampled by interpolation and included in the output. data_out : numpy.ndarray or None Structured numpy array where the output result should be written. If None is specified, then an appropriately sized array will be allocated and returned. Use this method to take control of the memory allocation and, for example, re-use the same array when resampling many spectra. kind : string or integer Specify the kind of interpolation models to build using any of the forms allowed by :class:`scipy.interpolate.inter1pd`. If any input dependent values are masked, only the ``nearest` and ``linear`` values are allowed. Returns ------- numpy.ndarray or numpy.ma.MaskedArray Structured numpy array of the resampled result containing all ``y`` fields and (if ``x_in`` is specified as a string) the output ``x`` field. The output will be a :class:`numpy.ma.MaskedArray` if ``x_out`` extends beyond ``x_in`` or if ``data_in`` is masked. """ if not isinstance(data_in, np.ndarray): raise ValueError('Invalid data_in type: {0}.'.format(type(data_in))) if data_in.dtype.fields is None: raise ValueError('Input data_in is not a structured array.') if len(data_in.shape) > 1: raise ValueError('Input data_in is multidimensional.') if isinstance(x_in, basestring): if x_in not in data_in.dtype.names: raise ValueError('No such x_in field: {0}.'.format(x_in)) x_out_name = x_in x_in = data_in[x_in] else: if not isinstance(x_in, np.ndarray): raise ValueError('Invalid x_in type: {0}.'.format(type(x_in))) if x_in.shape != data_in.shape: raise ValueError('Incompatible shapes for x_in and data_in.') x_out_name = None if not isinstance(x_out, np.ndarray): raise ValueError('Invalid x_out type: {0}.'.format(type(data_out))) if ma.isMA(x_in) and np.any(x_in.mask): raise ValueError('Cannot resample masked x_in.') x_type = np.promote_types(x_in.dtype, x_out.dtype) dtype_out = [] if x_out_name is not None: dtype_out.append((x_out_name, x_out.dtype)) if isinstance(y, basestring): # Use a list instead of a tuple here so y_names can be used # to index data_in below. y_names = [y,] else: try: y_names = [name for name in y] except TypeError: raise ValueError('Invalid y type: {0}.'.format(type(y))) for not_first, y in enumerate(y_names): if y not in data_in.dtype.names: raise ValueError('No such y field: {0}.'.format(y)) if not_first: if data_in[y].dtype != y_type: raise ValueError('All y fields must have the same type.') else: y_type = data_in[y].dtype dtype_out.append((y, y_type)) y_shape = (len(y_names),) if ma.isMA(data_in): # Copy the structured 1D array into a 2D unstructured array # and set masked values to NaN. y_in = np.zeros(data_in.shape + y_shape, y_type) for i,y in enumerate(y_names): y_in[:,i] = data_in[y].filled(np.nan) else: y_in = data_in[y_names] # View the structured 1D array as a 2D unstructured array (without # copying any memory). y_in = y_in.view(y_type).reshape(data_in.shape + y_shape) # interp1d will only propagate NaNs correctly for certain values of `kind`. # With numpy = 1.6 or 1.7, only 'nearest' and 'linear' work. # With numpy = 1.8 or 1.9, 'slinear' and kind = 0 or 1 also work. if np.any(np.isnan(y_in)): if kind not in ('nearest', 'linear'): raise ValueError( 'Interpolation kind not supported for masked data: {0}.' .format(kind)) try: interpolator = scipy.interpolate.interp1d( x_in, y_in, kind=kind, axis=0, copy=False, bounds_error=False, fill_value=np.nan) except NotImplementedError: raise ValueError('Interpolation kind not supported: {0}.'.format(kind)) shape_out = (len(x_out),) if data_out is None: data_out = np.empty(shape_out, dtype_out) else: if data_out.shape != shape_out: raise ValueError( 'data_out has wrong shape: {0}. Expected: {1}.' .format(data_out.shape, shape_out)) if data_out.dtype != dtype_out: raise ValueError( 'data_out has wrong dtype: {0}. Expected: {1}.' .format(data_out.dtype, dtype_out)) if x_out_name is not None: data_out[x_out_name][:] = x_out y_out = interpolator(x_out) for i,y in enumerate(y_names): data_out[y][:] = y_out[:,i] if ma.isMA(data_in) or np.any(np.isnan(y_out)): data_out = ma.MaskedArray(data_out) data_out.mask = False for y in y_names: data_out[y].mask = np.isnan(data_out[y].data) return data_out
def extract_loc(ref_lon, ref_lat, tlon, tlat, var): """ Extract CCSM/POP model output for a given location (lat, lon). It finds the 4 model grid points around the location and computes their weighted average (weights = inverse of the distance). If a location is next to land, the function returns the weighted average of the closest grid points that are not on land. Input: ref_lon = longitude of position to be extracted (scalar) ref_lat = latitude of position to be extracted (scalar) tlon = model longitude grid (numpy array) tlat = model latitude grid (numpy array) var = variable to be extracted (Masked 2-D or 3-D array) Output: wavg = weighted average (scalar or 1-D array) """ if var.ndim == 3: # 3D variable zmax, imax, jmax = var.shape threeD = True elif var.ndim == 2: # 2D variable imax, jmax = var.shape threeD = False else: print 'extract_loc: check variable dimensions' return # find the indices of the 4 model grid points around the location Ilist, Jlist = find_stn_idx(ref_lon, ref_lat, tlon, tlat) # compute great circle distance from location to model grid points dist = gc_dist(ref_lon, ref_lat, tlon, tlat) dist[dist==0] = 1.e-15 # avoid division by zero # arrays to store weights and data to be averaged if threeD: # 3D variable wghts = MA.zeros((zmax,len(Ilist)*len(Jlist)),float) data = MA.zeros((zmax,len(Ilist)*len(Jlist)),float) if MA.isMA(var): # mask weights dist_m = MA.array(N.resize(dist,var.shape),mask=var.mask) else: dist_m = N.array(N.resize(dist,var.shape)) else: # 2D variable wghts = MA.zeros((len(Ilist)*len(Jlist)),float) data = MA.zeros((len(Ilist)*len(Jlist)),float) if MA.isMA(var): dist_m = MA.array(dist,mask=var.mask) # mask weights else: dist_m = N.array(dist) # get the 4 model grid points and compute weights n = 0 for i in Ilist: for j in Jlist: wghts[...,n] = 1./dist_m[...,i,j] data[...,n] = var[...,i,j] n += 1 # compute weighted average wavg = MA.average(data,axis=-1,weights=wghts) return wavg
def downsample(data_in, downsampling, weight=None, axis=-1, start_index=0, auto_trim=True, data_out=None): """Downsample spectral data by a constant factor. Downsampling consists of dividing the input data into fixed-size groups of consecutive bins, then calculated downsampled values as weighted averages within each group. The basic usage is: >>> data = np.ones((6,), dtype=[('flux', float), ('ivar', float)]) >>> out = downsample(data, downsampling=2, weight='ivar') >>> np.all(out == ... np.array([(1.0, 2.0), (1.0, 2.0), (1.0, 2.0)], ... dtype=[('flux', '<f8'), ('ivar', '<f8')])) True Any partial group at the end of the input data will be silently ignored unless `auto_trim=False`: >>> out = downsample(data, downsampling=4, weight='ivar') >>> np.all(out == ... np.array([(1.0, 4.0)], dtype=[('flux', '<f8'), ('ivar', '<f8')])) True >>> out = downsample(data, downsampling=4, weight='ivar', auto_trim=False) Traceback (most recent call last): ... ValueError: Input data does not evenly divide with downsampling = 4. A multi-dimensional array of spectra with the same binning can be downsampled in a single operation, for example: >>> data = np.ones((2,16,3,), dtype=[('flux', float), ('ivar', float)]) >>> results = downsample(data, 4, axis=1) >>> results.shape (2, 4, 3) If no axis is specified, the last axis of the input array is assumed. If the input data is masked, only unmasked entries will be used to calculate the weighted averages for each downsampled group and the output will also be masked: >>> data = ma.ones((6,), dtype=[('flux', float), ('ivar', float)]) >>> data.mask[3:] = True >>> out = downsample(data, 2, weight='ivar') >>> type(out) == ma.core.MaskedArray True If the input fields have different masks, their logical OR will be used for all output fields since, otherwise, each output field would require its own output weight field. As a consequence, masking a single input field is equivalent to masking all input fields. Parameters ---------- data_in : numpy.ndarray or numpy.ma.MaskedArray Structured numpy array containing input spectrum data to downsample. downsampling : int Number of consecutive bins to combine into each downsampled bin. Must be at least one and not larger than the input data size. weight : string or None. The name of a field whose values provide the weights to use for downsampling. When None, a weight value of one will be used. The output array will contain a field with this name, unless it is None, containing values of the downsampled weights. All weights must be non-negative. start_index : int Index of the first bin to use for downsampling. Any bins preceeding the start bin will not be included in the downsampled results. Negative indices are not allowed. axis : int Index of the axis to perform downsampling in. The default is to use the last index of the input data array. auto_trim : bool When True, any bins at the end of the input data that do not fill a complete downsampled bin will be automatically (and silently) trimmed. When False, a ValueError will be raised. data_out : numpy.ndarray or None Structured numpy array where output spectrum data should be written. If none is specified, then an appropriately sized array will be allocated and returned. Use this method to take control of the memory allocation and, for example, re-use the same output array for a sequence of downsampling operations. Returns ------- numpy.ndarray or numpy.ma.MaskedArray Structured numpy array of downsampled result, containing the same fields as the input data and the same shape except along the specified downsampling axis. If the input data is masked, the output data will also be masked, with each output field's mask determined by the combination of the optional weight field mask and the corresponding input field mask. """ if not isinstance(data_in, np.ndarray): raise ValueError('Invalid data_in type: {0}.'.format(type(data_in))) if data_out is not None and not isinstance(data_out, np.ndarray): raise ValueError('Invalid data_out type: {0}.'.format(type(data_out))) shape_in = data_in.shape try: num_bins = shape_in[axis] except IndexError: raise ValueError('Invalid axis = {0}.'.format(axis)) if downsampling < 1 or downsampling > num_bins: raise ValueError('Invalid downsampling = {0}.'.format(downsampling)) if start_index < 0 or start_index >= num_bins: raise ValueError('Invalid start_index = {0}.'.format(start_index)) num_downsampled = (num_bins - start_index) // downsampling if num_downsampled <= 0: raise ValueError( 'Incompatible downsampling = {0} and start_index = {1}.'.format( downsampling, start_index)) stop_index = start_index + num_downsampled * downsampling assert stop_index <= num_bins if stop_index < num_bins and not auto_trim: raise ValueError( 'Input data does not evenly divide with downsampling = {0}.'. format(downsampling)) if weight is not None: if not isinstance(weight, basestring): raise ValueError('Invalid weight type: {0}.'.format(type(weight))) if weight in data_in.dtype.fields: # If data_in is a MaskedArray, weights_in will also be masked. weights_in = data_in[weight] if np.any(weights_in < 0): raise ValueError('Some input weights < 0.') else: raise ValueError('No such weight field: {0}.'.format(weight)) else: if ma.isMA(data_in): weights_in = ma.ones(shape_in) else: weights_in = np.ones(shape_in) shape_out = list(shape_in) shape_out[axis] = num_downsampled shape_out = tuple(shape_out) expanded_shape = list(shape_in) expanded_shape[axis] = downsampling expanded_shape.insert(axis, num_downsampled) sum_axis = axis + 1 if axis >= 0 else len(shape_in) + axis + 1 dtype_out = data_in.dtype if data_out is None: if ma.isMA(data_in): data_out = ma.empty(shape_out, dtype=data_in.dtype) data_out.mask = False else: data_out = np.empty(shape_out, dtype=data_in.dtype) else: if data_out.shape != shape_out: raise ValueError( 'data_out has wrong shape: {0}. Expected: {1}.'.format( data_out.shape, shape_out)) if data_out.dtype != dtype_out: raise ValueError( 'data_out has wrong dtype: {0}. Expected: {1}.'.format( data_out.dtype, dtype_out)) if ma.isMA(data_in): # Each field has an independent mask in the input, but we want to # use the same output weights for all fields. Use the logical OR # of the individual input field masks to achieve this. or_mask = np.zeros(shape_in, dtype=bool) for field in data_in.dtype.fields: or_mask = or_mask | data_in[field].mask weights_in.mask = or_mask # Loop over fields in the input data. weights_out = np.sum( weights_in[start_index:stop_index].reshape(expanded_shape), axis=sum_axis) for field in data_in.dtype.fields: if field == weight: continue weighted = (weights_in[start_index:stop_index] * data_in[field][start_index:stop_index]) if ma.isMA(data_in): weighted.mask = or_mask data_out[field] = np.sum(weighted.reshape(expanded_shape), axis=sum_axis) / weights_out if weight is not None: data_out[weight] = weights_out return data_out
def accumulate(data1_in, data2_in, data_out=None, join=None, add=None, weight=None): """Combine the data from two spectra. Values x1 and x2 with corresponding weights w1 and w2 are combined as:: x12 = (w1*x1 + w2*x2)/(w1 + w2) If no weight field is present for either input, a weight of one is used. If either input array is `masked <http://docs.scipy.org/doc/numpy/reference/maskedarray.html>`__, weights for masked entries will be set to zero. The output contains values for x12 and the accumulated weight: w12 = w1 + w2 For example: >>> data1 = np.ones((10,), dtype=[('flux', float), ('ivar', float)]) >>> data2 = np.ones((10,), dtype=[('flux', float), ('ivar', float)]) >>> result = accumulate(data1, data2, add='flux', weight='ivar') >>> np.all(result[:3] == ... np.array([(1.0, 2.0), (1.0, 2.0), (1.0, 2.0)], ... dtype=[('flux', '<f8'), ('ivar', '<f8')])) True Any fields common to both inputs can also be copied to the output: >>> data1 = np.ones((10,), dtype=[('wlen', float), ('flux', float)]) >>> data2 = np.ones((10,), dtype=[('wlen', float), ('flux', float)]) >>> result = accumulate(data1, data2, join='wlen', add='flux') >>> np.all(result[:3] == ... np.array([(1.0, 1.0), (1.0, 1.0), (1.0, 1.0)], ... dtype=[('wlen', '<f8'), ('flux', '<f8')])) True The actual calculation of x12 uses the expression:: x12 = x1 + (x2 - x1)*w2/(w1 + w2) which has `better numerical properties <https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance #Weighted_incremental_algorithm>`__ when many spectra are iteratively accumulated using the following pattern: >>> result = None >>> data = np.ones((10,100), ... dtype=[('wlen', float), ('flux', float), ('ivar', float)]) >>> for row in data: ... result = accumulate(data1_in=result, data2_in=row, data_out=result, ... join='wlen', add='flux', weight='ivar') >>> np.all(result[:3] == ... np.array([(1.0, 1.0, 10.0), (1.0, 1.0, 10.0), (1.0, 1.0, 10.0)], ... dtype=[('wlen', '<f8'), ('flux', '<f8'), ('ivar', '<f8')])) True With this pattern, the result array is allocated on the first iteration and then re-used for all subsequent iterations. Parameters ---------- data1_in : numpy.ndarray or numpy.ma.MaskedArray or None First structured numpy array of input spectral data. data2_in : numpy.ndarray or numpy.ma.MaskedArray Second structured numpy array of input spectral data. data_out : numpy.ndarray or None Structured numpy array where output spectrum data should be written. If None is specified, then an appropriately sized array will be allocated and returned. Use this method to take control of the memory allocation and, for example, re-use the same output array for iterative accumulation. join: string or iterable of strings or None. A field name or a list of field names that are present in both inputs with identical values, and should be included in the output. add : string or iterable or None. A field name or a list of field names that are present in both inputs and whose values, x1 and x2, should be accumulated as x12 in the output. weight : string or None. The name of a field whose values provide the weights w1 and w2 used to calculate the accumulated x12 = w1*x1 + w2*x2. If the named field is not present in either input a weight value of one will be used. The output array will contain a field with this name, if it is not None, containing values for w12. Returns ------- numpy.ndarray Structured numpy array of accumulated result, containing all fields listed in the ``join``, ``add``, and ``weight`` arguments. Any values associated with a zero weight sample should be considered invalid. """ if data1_in is not None and not isinstance(data1_in, np.ndarray): raise ValueError('data1_in is not a numpy array.') if not isinstance(data2_in, np.ndarray): raise ValueError('data2_in is not a numpy array.') if data_out is not None and not isinstance(data_out, np.ndarray): raise ValueError('data_out is not a numpy array.') if data1_in is not None: if data1_in.shape != data2_in.shape: raise ValueError( 'Inputs have different shapes: {0} != {1}.'.format( data1_in.shape, data2_in.shape)) data1_fields = data1_in.dtype.fields if data1_fields is None: raise ValueError('Input data1_in is not a structured array.') data2_fields = data2_in.dtype.fields if data2_fields is None: raise ValueError('Input data2_in is not a structured array.') shape_out = data2_in.shape dtype_out = [] # Find the intersection of field names in both input datasets. if data1_in is not None: shared_fields = set(data1_fields.keys()) & set(data2_fields.keys()) if len(shared_fields) == 0: raise ValueError('Inputs have no fields in common.') else: shared_fields = set(data2_fields.keys()) def prepare_names(arg, label): if arg is None: names = () elif isinstance(arg, basestring): names = (arg, ) else: try: names = [name for name in arg] except TypeError: raise ValueError('Invalid {0} type: {1}.'.format( label, type(arg))) for name in names: if name not in shared_fields: raise ValueError('Invalid {0} field name: {1}.'.format( label, name)) if data1_in is not None: dtype1 = data1_fields[name][0] dtype2 = data2_fields[name][0] dtype_out.append((name, np.promote_types(dtype1, dtype2))) else: dtype_out.append((name, data2_fields[name][0])) return names join_names = prepare_names(join, 'join') add_names = prepare_names(add, 'add') if data1_in is not None: for name in join_names: if not np.array_equal(data1_in[name], data2_in[name]): raise ValueError( 'Cannot join on unmatched field: {0}.'.format(name)) if weight is not None: if not isinstance(weight, basestring): raise ValueError('Invalid weight type: {0}.'.format(type(weight))) if data1_in is not None: if weight in data1_fields: weight1 = data1_in[weight] else: weight1 = np.ones(shape_out) if weight in data2_fields: weight2 = data2_in[weight] else: weight2 = np.ones(shape_out) if data1_in is not None: dtype_out.append( (weight, np.promote_types(weight1.dtype, weight2.dtype))) else: dtype_out.append((weight, weight2.dtype)) else: if data1_in is not None: weight1 = np.ones(shape_out) weight2 = np.ones(shape_out) # Set weights to zero for any masked elements. Since each field has its # own mask, use the logical OR of all named join/add/weight fields. if data1_in is not None and ma.isMA(data1_in): mask = np.zeros(shape_out, dtype=bool) for name in join_names: mask = mask | data1_in[name].mask for name in add_names: mask = mask | data1_in[name].mask if weight is not None: mask = mask | data1_in[weight].mask weight1[mask] = 0 if np.any(mask) and weight is None: raise ValueError('Output weight required for masked input data.') if ma.isMA(data2_in): mask = np.zeros(shape_out, dtype=bool) for name in join_names: mask = mask | data2_in[name].mask for name in add_names: mask = mask | data2_in[name].mask if weight is not None: mask = mask | data2_in[weight].mask weight2[mask] = 0 if np.any(mask) and weight is None: raise ValueError('Output weight required for masked input data.') if len(dtype_out) == 0: raise ValueError('No result fields specified.') if data_out is None: data_out = np.zeros(shape_out, dtype_out) else: if data_out.shape != shape_out: raise ValueError( 'data_out has wrong shape: {0}. Expected: {1}.'.format( data_out.shape, shape_out)) if data_out.dtype != dtype_out: raise ValueError( 'data_out has wrong dtype: {0}. Expected: {1}.'.format( data_out.dtype, dtype_out)) # We do not need to copy join fields if data_out uses the same memory # as one of our input arrays. if data_out.base is None or data_out.base not in (data1_in, data2_in): for name in join_names: data_out[name][:] = data2_in[name] mask2 = weight2 != 0 if data1_in is None: for name in add_names: data_out[name][mask2] = data2_in[name][mask2] if weight is not None: data_out[weight][:] = weight2 else: # Accumulate add fields. mask1 = weight1 != 0 weight_sum = weight1 + weight2 for name in add_names: if data_out is not data1_in: data_out[name][mask1] = data1_in[name][mask1] data_out[name][mask2] += ( weight2[mask2] / weight_sum[mask2] * (data2_in[name][mask2] - data1_in[name][mask2])) if weight is not None: data_out[weight][:] = weight_sum return data_out
def accumulate(data1_in, data2_in, data_out=None, join=None, add=None, weight=None): """Combine the data from two spectra. Values x1 and x2 with corresponding weights w1 and w2 are combined as:: x12 = (w1*x1 + w2*x2)/(w1 + w2) If no weight field is present for either input, a weight of one is used. If either input array is `masked <http://docs.scipy.org/doc/numpy/reference/maskedarray.html>`__, weights for masked entries will be set to zero. The output contains values for x12 and the accumulated weight: w12 = w1 + w2 For example: >>> data1 = np.ones((10,), dtype=[('flux', float), ('ivar', float)]) >>> data2 = np.ones((10,), dtype=[('flux', float), ('ivar', float)]) >>> result = accumulate(data1, data2, add='flux', weight='ivar') >>> result[:3] array([(1.0, 2.0), (1.0, 2.0), (1.0, 2.0)], dtype=[('flux', '<f8'), ('ivar', '<f8')]) Any fields common to both inputs can also be copied to the output: >>> data1 = np.ones((10,), dtype=[('wlen', float), ('flux', float)]) >>> data2 = np.ones((10,), dtype=[('wlen', float), ('flux', float)]) >>> result = accumulate(data1, data2, join='wlen', add='flux') >>> result[:3] array([(1.0, 1.0), (1.0, 1.0), (1.0, 1.0)], dtype=[('wlen', '<f8'), ('flux', '<f8')]) The actual calculation of x12 uses the expression:: x12 = x1 + (x2 - x1)*w2/(w1 + w2) which has `better numerical properties <https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance #Weighted_incremental_algorithm>`__ when many spectra are iteratively accumulated using the following pattern: >>> result = None >>> data = np.ones((10,100), ... dtype=[('wlen', float), ('flux', float), ('ivar', float)]) >>> for row in data: ... result = accumulate(data1_in=result, data2_in=row, data_out=result, ... join='wlen', add='flux', weight='ivar') >>> result[:3] array([(1.0, 1.0, 10.0), (1.0, 1.0, 10.0), (1.0, 1.0, 10.0)], dtype=[('wlen', '<f8'), ('flux', '<f8'), ('ivar', '<f8')]) With this pattern, the result array is allocated on the first iteration and then re-used for all subsequent iterations. Parameters ---------- data1_in : numpy.ndarray or numpy.ma.MaskedArray or None First structured numpy array of input spectral data. data2_in : numpy.ndarray or numpy.ma.MaskedArray Second structured numpy array of input spectral data. data_out : numpy.ndarray or None Structured numpy array where output spectrum data should be written. If None is specified, then an appropriately sized array will be allocated and returned. Use this method to take control of the memory allocation and, for example, re-use the same output array for iterative accumulation. join: string or iterable of strings or None. A field name or a list of field names that are present in both inputs with identical values, and should be included in the output. add : string or iterable or None. A field name or a list of field names that are present in both inputs and whose values, x1 and x2, should be accumulated as x12 in the output. weight : string or None. The name of a field whose values provide the weights w1 and w2 used to calculate the accumulated x12 = w1*x1 + w2*x2. If the named field is not present in either input a weight value of one will be used. The output array will contain a field with this name, if it is not None, containing values for w12. Returns ------- numpy.ndarray Structured numpy array of accumulated result, containing all fields listed in the ``join``, ``add``, and ``weight`` arguments. Any values associated with a zero weight sample should be considered invalid. """ if data1_in is not None and not isinstance(data1_in, np.ndarray): raise ValueError('data1_in is not a numpy array.') if not isinstance(data2_in, np.ndarray): raise ValueError('data2_in is not a numpy array.') if data_out is not None and not isinstance(data_out, np.ndarray): raise ValueError('data_out is not a numpy array.') if data1_in is not None: if data1_in.shape != data2_in.shape: raise ValueError( 'Inputs have different shapes: {0} != {1}.' .format(data1_in.shape, data2_in.shape)) data1_fields = data1_in.dtype.fields if data1_fields is None: raise ValueError('Input data1_in is not a structured array.') data2_fields = data2_in.dtype.fields if data2_fields is None: raise ValueError('Input data2_in is not a structured array.') shape_out = data2_in.shape dtype_out = [] # Find the intersection of field names in both input datasets. if data1_in is not None: shared_fields = set(data1_fields.keys()) & set(data2_fields.keys()) if len(shared_fields) == 0: raise ValueError('Inputs have no fields in common.') else: shared_fields = set(data2_fields.keys()) def prepare_names(arg, label): if arg is None: names = () elif isinstance(arg, basestring): names = (arg,) else: try: names = [name for name in arg] except TypeError: raise ValueError( 'Invalid {0} type: {1}.'.format(label, type(arg))) for name in names: if name not in shared_fields: raise ValueError( 'Invalid {0} field name: {1}.'.format(label, name)) if data1_in is not None: dtype1 = data1_fields[name][0] dtype2 = data2_fields[name][0] dtype_out.append((name, np.promote_types(dtype1, dtype2))) else: dtype_out.append((name, data2_fields[name][0])) return names join_names = prepare_names(join, 'join') add_names = prepare_names(add, 'add') if data1_in is not None: for name in join_names: if not np.array_equal(data1_in[name], data2_in[name]): raise ValueError( 'Cannot join on unmatched field: {0}.'.format(name)) if weight is not None: if not isinstance(weight, basestring): raise ValueError('Invalid weight type: {0}.'.format(type(weight))) if data1_in is not None: if weight in data1_fields: weight1 = data1_in[weight] else: weight1 = np.ones(shape_out) if weight in data2_fields: weight2 = data2_in[weight] else: weight2 = np.ones(shape_out) if data1_in is not None: dtype_out.append( (weight, np.promote_types(weight1.dtype, weight2.dtype))) else: dtype_out.append((weight, weight2.dtype)) else: if data1_in is not None: weight1 = np.ones(shape_out) weight2 = np.ones(shape_out) # Set weights to zero for any masked elements. Since each field has its # own mask, use the logical OR of all named join/add/weight fields. if data1_in is not None and ma.isMA(data1_in): mask = np.zeros(shape_out, dtype=bool) for name in join_names: mask = mask | data1_in[name].mask for name in add_names: mask = mask | data1_in[name].mask if weight is not None: mask = mask | data1_in[weight].mask weight1[mask] = 0 if np.any(mask) and weight is None: raise ValueError('Output weight required for masked input data.') if ma.isMA(data2_in): mask = np.zeros(shape_out, dtype=bool) for name in join_names: mask = mask | data2_in[name].mask for name in add_names: mask = mask | data2_in[name].mask if weight is not None: mask = mask | data2_in[weight].mask weight2[mask] = 0 if np.any(mask) and weight is None: raise ValueError('Output weight required for masked input data.') if len(dtype_out) == 0: raise ValueError('No result fields specified.') if data_out is None: data_out = np.zeros(shape_out, dtype_out) else: if data_out.shape != shape_out: raise ValueError( 'data_out has wrong shape: {0}. Expected: {1}.' .format(data_out.shape, shape_out)) if data_out.dtype != dtype_out: raise ValueError( 'data_out has wrong dtype: {0}. Expected: {1}.' .format(data_out.dtype, dtype_out)) # We do not need to copy join fields if data_out uses the same memory # as one of our input arrays. if data_out.base is None or data_out.base not in (data1_in, data2_in): for name in join_names: data_out[name][:] = data2_in[name] mask2 = weight2 != 0 if data1_in is None: for name in add_names: data_out[name][mask2] = data2_in[name][mask2] if weight is not None: data_out[weight][:] = weight2 else: # Accumulate add fields. mask1 = weight1 != 0 weight_sum = weight1 + weight2 for name in add_names: if data_out is not data1_in: data_out[name][mask1] = data1_in[name][mask1] data_out[name][mask2] += ( weight2[mask2] / weight_sum[mask2] * (data2_in[name][mask2] - data1_in[name][mask2])) if weight is not None: data_out[weight][:] = weight_sum return data_out
def resample(data_in, x_in, x_out, y, data_out=None, kind='linear'): """Resample the data of one spectrum using interpolation. Dependent variables y1, y2, ... in the input data are resampled in the independent variable x using interpolation models y1(x), y2(x), ... evaluated on a new grid of x values. The independent variable will typically be a wavelength or frequency and the independent variables can be fluxes, inverse variances, etc. Interpolation is intended for cases where the input and output grids have comparable densities. When neighboring samples are correlated, the resampling process should be essentially lossless. When the output grid is sparser than the input grid, it may be more appropriate to "downsample", i.e., average dependent variables over consecutive ranges of input samples. The basic usage of this function is: >>> data = np.ones((5,), ... [('wlen', float), ('flux', float), ('ivar', float)]) >>> data['wlen'] = np.arange(4000, 5000, 200) >>> wlen_out = np.arange(4100, 4700, 200) >>> out = resample(data, 'wlen', wlen_out, ('flux', 'ivar')) >>> np.all(out == ... np.array([(4100, 1.0, 1.0), (4300, 1.0, 1.0), (4500, 1.0, 1.0)], ... dtype=[('wlen', '<i8'), ('flux', '<f8'), ('ivar', '<f8')])) True The input grid can also be external to the structured array of spectral data, for example: >>> data = np.ones((5,), [('flux', float), ('ivar', float)]) >>> wlen_in = np.arange(4000, 5000, 200) >>> wlen_out = np.arange(4100, 4900, 200) >>> out = resample(data, wlen_in, wlen_out, ('flux', 'ivar')) >>> np.all(out == ... np.array([(1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0)], ... dtype=[('flux', '<f8'), ('ivar', '<f8')])) True If the output grid extends beyond the input grid, a `masked array <http://docs.scipy.org/doc/numpy/reference/maskedarray.html>`__ will be returned with any values requiring extrapolation masked: >>> wlen_out = np.arange(3500, 5500, 500) >>> out = resample(data, wlen_in, wlen_out, 'flux') >>> np.all(out.mask == ... np.array([(True,), (False,), (False,), (True,)], ... dtype=[('flux', 'bool')])) True If the input data is masked, any output interpolated values that depend on an input masked value will be masked in the output: >>> data = ma.ones((5,), [('flux', float), ('ivar', float)]) >>> data['flux'][2] = ma.masked >>> wlen_out = np.arange(4100, 4900, 200) >>> out = resample(data, wlen_in, wlen_out, 'flux') >>> np.all(out.mask == ... np.array([(False,), (True,), (True,), (False,)], ... dtype=[('flux', 'bool')])) True Interpolation is performed using :class:`scipy.interpolate.inter1pd`. Parameters ---------- data_in : numpy.ndarray or numpy.ma.MaskedArray Structured numpy array of input spectral data to resample. The input array must be one-dimensional. x_in : string or numpy.ndarray A field name in data_in containing the independent variable to use for interpolation, or else an array of values with the same shape as the input data. x_out : numpy.ndarray An array of values for the independent variable where interpolation models should be evaluated to calculate the output values. y : string or iterable of strings. A field name or a list of field names present in the input data that should be resampled by interpolation and included in the output. data_out : numpy.ndarray or None Structured numpy array where the output result should be written. If None is specified, then an appropriately sized array will be allocated and returned. Use this method to take control of the memory allocation and, for example, re-use the same array when resampling many spectra. kind : string or integer Specify the kind of interpolation models to build using any of the forms allowed by :class:`scipy.interpolate.inter1pd`. If any input dependent values are masked, only the ``nearest` and ``linear`` values are allowed. Returns ------- numpy.ndarray or numpy.ma.MaskedArray Structured numpy array of the resampled result containing all ``y`` fields and (if ``x_in`` is specified as a string) the output ``x`` field. The output will be a :class:`numpy.ma.MaskedArray` if ``x_out`` extends beyond ``x_in`` or if ``data_in`` is masked. """ if not isinstance(data_in, np.ndarray): raise ValueError('Invalid data_in type: {0}.'.format(type(data_in))) if data_in.dtype.fields is None: raise ValueError('Input data_in is not a structured array.') if len(data_in.shape) > 1: raise ValueError('Input data_in is multidimensional.') if isinstance(x_in, str): if x_in not in data_in.dtype.names: raise ValueError('No such x_in field: {0}.'.format(x_in)) x_out_name = x_in x_in = data_in[x_in] else: if not isinstance(x_in, np.ndarray): raise ValueError('Invalid x_in type: {0}.'.format(type(x_in))) if x_in.shape != data_in.shape: raise ValueError('Incompatible shapes for x_in and data_in.') x_out_name = None if not isinstance(x_out, np.ndarray): raise ValueError('Invalid x_out type: {0}.'.format(type(data_out))) if ma.isMA(x_in) and np.any(x_in.mask): raise ValueError('Cannot resample masked x_in.') x_type = np.promote_types(x_in.dtype, x_out.dtype) dtype_out = [] if x_out_name is not None: dtype_out.append((x_out_name, x_out.dtype)) if isinstance(y, str): # Use a list instead of a tuple here so y_names can be used # to index data_in below. y_names = [y,] else: try: y_names = [name for name in y] except TypeError: raise ValueError('Invalid y type: {0}.'.format(type(y))) for not_first, y in enumerate(y_names): if y not in data_in.dtype.names: raise ValueError('No such y field: {0}.'.format(y)) if not_first: if data_in[y].dtype != y_type: raise ValueError('All y fields must have the same type.') else: y_type = data_in[y].dtype dtype_out.append((y, y_type)) y_shape = (len(y_names),) if ma.isMA(data_in): # Copy the structured 1D array into a 2D unstructured array # and set masked values to NaN. y_in = np.zeros(data_in.shape + y_shape, y_type) for i,y in enumerate(y_names): y_in[:,i] = data_in[y].filled(np.nan) else: if pkgr.parse_version(np.__version__) >= pkgr.parse_version('1.16'): # The slicing does not work in numpy 1.16 and above # we use structured_to_unstructured to get the slice that we care about y_in = rfn.structured_to_unstructured( data_in[y_names]).reshape(data_in.shape + y_shape) else: y_in = data_in[y_names] # View the structured 1D array as a 2D unstructured array (without # copying any memory). y_in = y_in.view(y_type).reshape(data_in.shape + y_shape) # interp1d will only propagate NaNs correctly for certain values of `kind`. # With numpy = 1.6 or 1.7, only 'nearest' and 'linear' work. # With numpy = 1.8 or 1.9, 'slinear' and kind = 0 or 1 also work. if np.any(np.isnan(y_in)): if kind not in ('nearest', 'linear'): raise ValueError( 'Interpolation kind not supported for masked data: {0}.' .format(kind)) try: interpolator = scipy.interpolate.interp1d( x_in, y_in, kind=kind, axis=0, copy=False, bounds_error=False, fill_value=np.nan) except NotImplementedError: raise ValueError('Interpolation kind not supported: {0}.'.format(kind)) shape_out = (len(x_out),) if data_out is None: data_out = np.empty(shape_out, dtype_out) else: if data_out.shape != shape_out: raise ValueError( 'data_out has wrong shape: {0}. Expected: {1}.' .format(data_out.shape, shape_out)) if data_out.dtype != dtype_out: raise ValueError( 'data_out has wrong dtype: {0}. Expected: {1}.' .format(data_out.dtype, dtype_out)) if x_out_name is not None: data_out[x_out_name][:] = x_out y_out = interpolator(x_out) for i,y in enumerate(y_names): data_out[y][:] = y_out[:,i] if ma.isMA(data_in) or np.any(np.isnan(y_out)): data_out = ma.MaskedArray(data_out) data_out.mask = False for y in y_names: data_out[y].mask = np.isnan(data_out[y].data) return data_out
def downsample(data_in, downsampling, weight=None, axis=-1, start_index=0, auto_trim=True, data_out=None): """Downsample spectral data by a constant factor. Downsampling consists of dividing the input data into fixed-size groups of consecutive bins, then calculated downsampled values as weighted averages within each group. The basic usage is: >>> data = np.ones((6,), dtype=[('flux', float), ('ivar', float)]) >>> out = downsample(data, downsampling=2, weight='ivar') >>> np.all(out == ... np.array([(1.0, 2.0), (1.0, 2.0), (1.0, 2.0)], ... dtype=[('flux', '<f8'), ('ivar', '<f8')])) True Any partial group at the end of the input data will be silently ignored unless `auto_trim=False`: >>> out = downsample(data, downsampling=4, weight='ivar') >>> np.all(out == ... np.array([(1.0, 4.0)], dtype=[('flux', '<f8'), ('ivar', '<f8')])) True >>> out = downsample(data, downsampling=4, weight='ivar', auto_trim=False) Traceback (most recent call last): ... ValueError: Input data does not evenly divide with downsampling = 4. A multi-dimensional array of spectra with the same binning can be downsampled in a single operation, for example: >>> data = np.ones((2,16,3,), dtype=[('flux', float), ('ivar', float)]) >>> results = downsample(data, 4, axis=1) >>> results.shape (2, 4, 3) If no axis is specified, the last axis of the input array is assumed. If the input data is masked, only unmasked entries will be used to calculate the weighted averages for each downsampled group and the output will also be masked: >>> data = ma.ones((6,), dtype=[('flux', float), ('ivar', float)]) >>> data.mask[3:] = True >>> out = downsample(data, 2, weight='ivar') >>> type(out) == ma.core.MaskedArray True If the input fields have different masks, their logical OR will be used for all output fields since, otherwise, each output field would require its own output weight field. As a consequence, masking a single input field is equivalent to masking all input fields. Parameters ---------- data_in : numpy.ndarray or numpy.ma.MaskedArray Structured numpy array containing input spectrum data to downsample. downsampling : int Number of consecutive bins to combine into each downsampled bin. Must be at least one and not larger than the input data size. weight : string or None. The name of a field whose values provide the weights to use for downsampling. When None, a weight value of one will be used. The output array will contain a field with this name, unless it is None, containing values of the downsampled weights. All weights must be non-negative. start_index : int Index of the first bin to use for downsampling. Any bins preceeding the start bin will not be included in the downsampled results. Negative indices are not allowed. axis : int Index of the axis to perform downsampling in. The default is to use the last index of the input data array. auto_trim : bool When True, any bins at the end of the input data that do not fill a complete downsampled bin will be automatically (and silently) trimmed. When False, a ValueError will be raised. data_out : numpy.ndarray or None Structured numpy array where output spectrum data should be written. If none is specified, then an appropriately sized array will be allocated and returned. Use this method to take control of the memory allocation and, for example, re-use the same output array for a sequence of downsampling operations. Returns ------- numpy.ndarray or numpy.ma.MaskedArray Structured numpy array of downsampled result, containing the same fields as the input data and the same shape except along the specified downsampling axis. If the input data is masked, the output data will also be masked, with each output field's mask determined by the combination of the optional weight field mask and the corresponding input field mask. """ if not isinstance(data_in, np.ndarray): raise ValueError('Invalid data_in type: {0}.'.format(type(data_in))) if data_out is not None and not isinstance(data_out, np.ndarray): raise ValueError('Invalid data_out type: {0}.'.format(type(data_out))) shape_in = data_in.shape try: num_bins = shape_in[axis] except IndexError: raise ValueError('Invalid axis = {0}.'.format(axis)) if downsampling < 1 or downsampling > num_bins: raise ValueError('Invalid downsampling = {0}.'.format(downsampling)) if start_index < 0 or start_index >= num_bins: raise ValueError('Invalid start_index = {0}.'.format(start_index)) num_downsampled = (num_bins - start_index) // downsampling if num_downsampled <= 0: raise ValueError( 'Incompatible downsampling = {0} and start_index = {1}.' .format(downsampling, start_index)) stop_index = start_index + num_downsampled * downsampling assert stop_index <= num_bins if stop_index < num_bins and not auto_trim: raise ValueError( 'Input data does not evenly divide with downsampling = {0}.' .format(downsampling)) if weight is not None: if not isinstance(weight, basestring): raise ValueError('Invalid weight type: {0}.'.format(type(weight))) if weight in data_in.dtype.fields: # If data_in is a MaskedArray, weights_in will also be masked. weights_in = data_in[weight] if np.any(weights_in < 0): raise ValueError('Some input weights < 0.') else: raise ValueError('No such weight field: {0}.'.format(weight)) else: if ma.isMA(data_in): weights_in = ma.ones(shape_in) else: weights_in = np.ones(shape_in) shape_out = list(shape_in) shape_out[axis] = num_downsampled shape_out = tuple(shape_out) expanded_shape = list(shape_in) expanded_shape[axis] = downsampling expanded_shape.insert(axis, num_downsampled) sum_axis = axis + 1 if axis >= 0 else len(shape_in) + axis + 1 dtype_out = data_in.dtype if data_out is None: if ma.isMA(data_in): data_out = ma.empty(shape_out, dtype=data_in.dtype) data_out.mask = False else: data_out = np.empty(shape_out, dtype=data_in.dtype) else: if data_out.shape != shape_out: raise ValueError( 'data_out has wrong shape: {0}. Expected: {1}.' .format(data_out.shape, shape_out)) if data_out.dtype != dtype_out: raise ValueError( 'data_out has wrong dtype: {0}. Expected: {1}.' .format(data_out.dtype, dtype_out)) if ma.isMA(data_in): # Each field has an independent mask in the input, but we want to # use the same output weights for all fields. Use the logical OR # of the individual input field masks to achieve this. or_mask = np.zeros(shape_in, dtype=bool) for field in data_in.dtype.fields: or_mask = or_mask | data_in[field].mask weights_in.mask = or_mask # Loop over fields in the input data. weights_out = np.sum( weights_in[start_index:stop_index].reshape(expanded_shape), axis=sum_axis) for field in data_in.dtype.fields: if field == weight: continue weighted = ( weights_in[start_index:stop_index] * data_in[field][start_index:stop_index]) if ma.isMA(data_in): weighted.mask = or_mask data_out[field] = np.sum( weighted.reshape(expanded_shape), axis=sum_axis) / weights_out if weight is not None: data_out[weight] = weights_out return data_out