def make_gene_map_1(self):
        count = 0
        self.iterator = itertools.product(range(self.time_len), range(self.lat_len), range(self.lon_len))
        for x_valid in self.iterator:
            binary_string = bin(count)[2:]
            while len(binary_string) < self.string_length:
                binary_string = "0" + binary_string
                # self.gene_map[binary_string] = {}
                # self.gene_map[binary_string]['coordinate'] = tuple(x_valid)
            if ma.getdata(self.array[x_valid]) < 100:  # chooses unmasked points?
                self.gene_map[binary_string] = {}
                self.gene_map[binary_string]["coordinate"] = tuple(x_valid)
                self.gene_map[binary_string]["value"] = self.array[x_valid]
                # count += 1
                print count, binary_string, self.gene_map[binary_string]["value"]  # debug
            else:
                # self.gene_map[binary_string]['value'] = 1E06
                pass
            count += 1
            # print count, binary_string, self.gene_map[binary_string]['value'] # debug
        self.last_valid_binary_string = binary_string
        not_valid_first = eval("0b" + binary_string) + 1
        not_valid_last = eval("0b" + "1" * self.string_length)
        print not_valid_last
        print x_valid
        print ma.isMA(self.array)

        for x_not_valid in range(not_valid_first, not_valid_last + 1):
            binary_string = bin(x_not_valid)[2:]
            self.gene_map[binary_string] = {}
            self.gene_map[binary_string]["coordinate"] = (999, 999, 999)
            self.gene_map[binary_string]["value"] = 1e06
            print x_not_valid, binary_string, self.gene_map[binary_string]["value"]
Example #2
0
def shiftgrid(lon0,datain,lonsin,start=False,cyclic=360.0):
    if np.fabs(lonsin[-1]-lonsin[0]-cyclic) > 1.e-4:
        # Use all data instead of raise ValueError, 'cyclic point not included'
        start_idx = 0
    else:
        # If cyclic, remove the duplicate point
        start_idx = 1
    if lon0 < lonsin[0] or lon0 > lonsin[-1]:
        raise ValueError('lon0 outside of range of lonsin')
    i0 = np.argmin(np.fabs(lonsin-lon0))
    i0_shift = len(lonsin)-i0
    if ma.isMA(datain):
        dataout  = ma.zeros(datain.shape,datain.dtype)
    else:
        dataout  = np.zeros(datain.shape,datain.dtype)
    if ma.isMA(lonsin):
        lonsout = ma.zeros(lonsin.shape,lonsin.dtype)
    else:
        lonsout = np.zeros(lonsin.shape,lonsin.dtype)
    if start:
        lonsout[0:i0_shift] = lonsin[i0:]
    else:
        lonsout[0:i0_shift] = lonsin[i0:]-cyclic
    dataout[...,0:i0_shift] = datain[...,i0:]
    if start:
        lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]+cyclic
    else:
        lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]
    dataout[...,i0_shift:] = datain[...,start_idx:i0+start_idx]
    return dataout,lonsout	
Example #3
0
def shiftgrid(lon0, datain, lonsin, start=True, cyclic=360.0):
    """
    Shift global lat/lon grid east or west.
    .. tabularcolumns:: |l|L|
    ==============   ====================================================
    Arguments        Description
    ==============   ====================================================
    lon0             starting longitude for shifted grid
                     (ending longitude if start=False). lon0 must be on
                     input grid (within the range of lonsin).
    datain           original data with longitude the right-most
                     dimension.
    lonsin           original longitudes.
    ==============   ====================================================
    .. tabularcolumns:: |l|L|
    ==============   ====================================================
    Keywords         Description
    ==============   ====================================================
    start            if True, lon0 represents the starting longitude
                     of the new grid. if False, lon0 is the ending
                     longitude. Default True.
    cyclic           width of periodic domain (default 360)
    ==============   ====================================================
    returns ``dataout,lonsout`` (data and longitudes on shifted grid).
    """
    if np.fabs(lonsin[-1] - lonsin[0] - cyclic) > 1.e-4:
        # Use all data instead of raise ValueError, 'cyclic point not included'
        start_idx = 0
    else:
        # If cyclic, remove the duplicate point
        start_idx = 1
    if lon0 < lonsin[0] or lon0 > lonsin[-1]:
        raise ValueError('lon0 outside of range of lonsin')
    i0 = np.argmin(np.fabs(lonsin - lon0))
    i0_shift = len(lonsin) - i0
    if ma.isMA(datain):
        dataout = ma.zeros(datain.shape, datain.dtype)
    else:
        dataout = np.zeros(datain.shape, datain.dtype)
    if ma.isMA(lonsin):
        lonsout = ma.zeros(lonsin.shape, lonsin.dtype)
    else:
        lonsout = np.zeros(lonsin.shape, lonsin.dtype)
    if start:
        lonsout[0:i0_shift] = lonsin[i0:]
    else:
        lonsout[0:i0_shift] = lonsin[i0:] - cyclic
    dataout[..., 0:i0_shift] = datain[..., i0:]
    if start:
        lonsout[i0_shift:] = lonsin[start_idx:i0 + start_idx] + cyclic
    else:
        lonsout[i0_shift:] = lonsin[start_idx:i0 + start_idx]
    dataout[..., i0_shift:] = datain[..., start_idx:i0 + start_idx]
    return dataout, lonsout
Example #4
0
def shiftgrid(lon0, datain, lonsin, start= True, cyclic=360.0):
    """
    Purpose::
        Shift global lat/lon grid east or west. This function is taken directly
        from the (unreleased) basemap 1.0.7 source code as version 1.0.6 does not
        currently support arrays with more than two dimensions.
        https://github.com/matplotlib/basemap
        
    Input::
        lon0 - starting longitude for shifted grid (ending longitude if start=False). 
               lon0 must be on input grid (within the range of lonsin).
        datain - original data with longitude the right-most dimension.
        lonsin - original longitudes.
        start  - if True, lon0 represents the starting longitude of the new grid. 
                 if False, lon0 is the ending longitude. Default True.
        cyclic - width of periodic domain (default 360)

    Output:: 
        dataout - data on shifted grid
        lonsout - lons on shifted grid
    """
    if np.fabs(lonsin[-1]-lonsin[0]-cyclic) > 1.e-4:
        # Use all data instead of raise ValueError, 'cyclic point not included'
        start_idx = 0
    else:
        # If cyclic, remove the duplicate point
        start_idx = 1
    if lon0 < lonsin[0] or lon0 > lonsin[-1]:
        raise ValueError('lon0 outside of range of lonsin')
    i0 = np.argmin(np.fabs(lonsin-lon0))
    i0_shift = len(lonsin)-i0
    if ma.isMA(datain):
        dataout  = ma.zeros(datain.shape,datain.dtype)
    else:
        dataout  = np.zeros(datain.shape,datain.dtype)
    if ma.isMA(lonsin):
        lonsout = ma.zeros(lonsin.shape,lonsin.dtype)
    else:
        lonsout = np.zeros(lonsin.shape,lonsin.dtype)
    if start:
        lonsout[0:i0_shift] = lonsin[i0:]
    else:
        lonsout[0:i0_shift] = lonsin[i0:]-cyclic
    dataout[...,0:i0_shift] = datain[...,i0:]
    if start:
        lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]+cyclic
    else:
        lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]
    dataout[...,i0_shift:] = datain[...,start_idx:i0+start_idx]
    return dataout,lonsout
Example #5
0
def shiftgrid(lon0, datain, lonsin, start=True, cyclic=360.0):
    """
    Purpose::
        Shift global lat/lon grid east or west. This function is taken directly
        from the (unreleased) basemap 1.0.7 source code as version 1.0.6 does not
        currently support arrays with more than two dimensions.
        https://github.com/matplotlib/basemap
        
    Input::
        lon0 - starting longitude for shifted grid (ending longitude if start=False). 
               lon0 must be on input grid (within the range of lonsin).
        datain - original data with longitude the right-most dimension.
        lonsin - original longitudes.
        start  - if True, lon0 represents the starting longitude of the new grid. 
                 if False, lon0 is the ending longitude. Default True.
        cyclic - width of periodic domain (default 360)

    Output:: 
        dataout - data on shifted grid
        lonsout - lons on shifted grid
    """
    if np.fabs(lonsin[-1] - lonsin[0] - cyclic) > 1.e-4:
        # Use all data instead of raise ValueError, 'cyclic point not included'
        start_idx = 0
    else:
        # If cyclic, remove the duplicate point
        start_idx = 1
    if lon0 < lonsin[0] or lon0 > lonsin[-1]:
        raise ValueError('lon0 outside of range of lonsin')
    i0 = np.argmin(np.fabs(lonsin - lon0))
    i0_shift = len(lonsin) - i0
    if ma.isMA(datain):
        dataout = ma.zeros(datain.shape, datain.dtype)
    else:
        dataout = np.zeros(datain.shape, datain.dtype)
    if ma.isMA(lonsin):
        lonsout = ma.zeros(lonsin.shape, lonsin.dtype)
    else:
        lonsout = np.zeros(lonsin.shape, lonsin.dtype)
    if start:
        lonsout[0:i0_shift] = lonsin[i0:]
    else:
        lonsout[0:i0_shift] = lonsin[i0:] - cyclic
    dataout[..., 0:i0_shift] = datain[..., i0:]
    if start:
        lonsout[i0_shift:] = lonsin[start_idx:i0 + start_idx] + cyclic
    else:
        lonsout[i0_shift:] = lonsin[start_idx:i0 + start_idx]
    dataout[..., i0_shift:] = datain[..., start_idx:i0 + start_idx]
    return dataout, lonsout
Example #6
0
    def set_data(self, x, y, A):
        if not ma.isMA(A):
            A = np.asarray(A)
        if x is None:
            x = np.arange(0, A.shape[1] + 1, dtype=np.float64)
        else:
            x = np.asarray(x, np.float64).ravel()
        if y is None:
            y = np.arange(0, A.shape[0] + 1, dtype=np.float64)
        else:
            y = np.asarray(y, np.float64).ravel()

        if A.shape[:2] != (y.size - 1, x.size - 1):
            print A.shape
            print y.size
            print x.size
            raise ValueError("Axes don't match array shape")
        if A.ndim not in [2, 3]:
            raise ValueError("A must be 2D or 3D")
        if A.ndim == 3 and A.shape[2] == 1:
            A.shape = A.shape[:2]
        self.is_grayscale = False
        if A.ndim == 3:
            if A.shape[2] in [3, 4]:
                if (A[:, :, 0] == A[:, :, 1]).all() and (A[:, :, 0]
                                                         == A[:, :, 2]).all():
                    self.is_grayscale = True
            else:
                raise ValueError("3D arrays must have RGB or RGBA as last dim")
        self._A = A
        self._Ax = x
        self._Ay = y
        self.update_dict['array'] = True
    def make_gene_map_2(self):
        """
        The method that takes the attributes from the array and uses
        them to create a gene map for the array.
        The gene map is a dictionary which has a binary string as a key.
        The binary string is created by creating a binary bit string of 
        an appropriate length.
        The length is calculated 
        """
        count = 0
        self.iterator_one = itertools.product(range(self.time_len), range(self.lat_len), range(self.lon_len))
        ### Assign a binary string a location and a value from the data
        print "\n"
        print "Creating gene-map dictionary... \n"
        print "Assigning valid locations to binary strings! \n"
        for x_valid in self.iterator_one:
            binary_string = bin(count)[2:]
            while len(binary_string) < self.string_length:  # removed minus one (-1) NB
                binary_string = "0" + binary_string
            self.gene_map[binary_string] = {}
            if ma.is_masked(self.array[x_valid]):
                pass
            else:
                self.gene_map[binary_string]["coordinate"] = tuple(x_valid)
                self.gene_map[binary_string]["value"] = self.array[x_valid]
                self.location_dict[x_valid[1:3]] = []
                self.location_dict_stdevs[x_valid[1:3]] = 0
                count += 1
        self.last_valid_binary_string = binary_string
        binary_string_old = binary_string
        not_valid_first = int(binary_string, 2) + 1
        not_valid_last = int("1" * (self.string_length), 2)  # added minus one just for nonmasked version NB
        self.count = count

        if self.count == self.count_non_masked:
            print "The counter corresponds with the non-masked count! \n"
        ### Pad the dictionary to give binary strings some value
        print "Assigning left over binary strings to non-existant locations! \n"
        self.iterator_two = itertools.product(range(self.time_len), range(self.lat_len), range(self.lon_len))
        # ~ for x_not_valid in range(not_valid_first, not_valid_last+1):
        count_2 = not_valid_first
        for x_not_valid in self.iterator_two:
            # ~ binary_string = bin(x_not_valid)[2:] # DOES IT NEED TO BE PADDED
            binary_string = bin(count_2)[2:]
            while len(binary_string) < self.string_length:  # removed minus one (-1) NB
                binary_string = "0" + binary_string
            self.gene_map[binary_string] = {}
            self.gene_map[binary_string]["coordinate"] = (999, 999, 999)  # x_not_valid
            self.gene_map[binary_string]["value"] = 1e09  # self.array[x_valid]
            if count_2 == not_valid_last:
                break
            else:
                count_2 += 1
        print "There are %d valid locations. \n" % count
        print "The last binary string is: ", binary_string
        print "The last binary string assigned to a valid locations is :", binary_string_old
        print "The length of binary string is: ", self.string_length
        print "The non-valid locations fall between %d and %d. \n" % (not_valid_first, not_valid_last)
        print "Is the array masked?: \n", ma.isMA(self.array)
        print "The gene-map has been created! \n"
Example #8
0
    def test_topo(self):
        file = self.f

        # basic case
        cstr_list = ('time|i0 zc|ZP|2500 yc|i5 xc|:', \
                'time|i0 zc|ZP|2500m yc|i5 xc|:', \
                'time|i0 zc|ZP|1500m yc|i5 xc|:', \
                'time|i0 zc|ZP|1000,1500m yc|i5.5 xc|:')
        results = ((21,), (21,), (21,), (2,21))

        for (cstr, res) in zip(cstr_list, results):
            if verbose: print cstr
            print "in test_topo"
            xsel = Nio.inp2xsel(file, 'PT', cstr)
            pt = file.variables['PT'][cstr]
            #pt = file.variables['ZP'][:]
            if verbose: print pt.shape
            if verbose: 
                if ma.isMA(pt):
                    print N.asarray(pt.filled())
                else:
                    print pt
            assert_equal(pt.shape, res)

        # ERROR:
        #cstr = 'xc|10k yc|i5.5:8:0.5i zc|ZP|2.5,3.5 time|i0:6:3'
        #if verbose: print cstr
        #pt = file.variables['PT'][cstr]
        #if verbose: print pt.shape

        file.close()
Example #9
0
    def test_topo(self):
        file = self.f

        # basic case
        cstr_list = ('time|i0 zc|ZP|2500 yc|i5 xc|:', \
                'time|i0 zc|ZP|2500m yc|i5 xc|:', \
                'time|i0 zc|ZP|1500m yc|i5 xc|:', \
                'time|i0 zc|ZP|1000,1500m yc|i5.5 xc|:')
        results = ((21, ), (21, ), (21, ), (2, 21))

        for (cstr, res) in zip(cstr_list, results):
            if verbose: print(cstr)
            print("in test_topo")
            xsel = Nio.inp2xsel(file, 'PT', cstr)
            pt = file.variables['PT'][cstr]
            #pt = file.variables['ZP'][:]
            if verbose: print(pt.shape)
            if verbose:
                if ma.isMA(pt):
                    print(N.asarray(pt.filled()))
                else:
                    print(pt)
            assert_equal(pt.shape, res)

        # ERROR:
        #cstr = 'xc|10k yc|i5.5:8:0.5i zc|ZP|2.5,3.5 time|i0:6:3'
        #if verbose: print cstr
        #pt = file.variables['PT'][cstr]
        #if verbose: print pt.shape

        file.close()
Example #10
0
 def set_data(self, x, y, A):
     x = np.asarray(x,np.float32)
     y = np.asarray(y,np.float32)
     if not ma.isMA(A):
         A = np.asarray(A)
     if len(x.shape) != 1 or len(y.shape) != 1\
        or A.shape[0:2] != (y.shape[0], x.shape[0]):
         raise TypeError("Axes don't match array shape")
     if len(A.shape) not in [2, 3]:
         raise TypeError("Can only plot 2D or 3D data")
     if len(A.shape) == 3 and A.shape[2] not in [1, 3, 4]:
         raise TypeError("3D arrays must have three (RGB) or four (RGBA) color components")
     if len(A.shape) == 3 and A.shape[2] == 1:
         A.shape = A.shape[0:2]
     if len(A.shape) == 2:
         if A.dtype != np.uint8:
             A = (self.cmap(self.norm(A))*255).astype(np.uint8)
         else:
             A = np.repeat(A[:,:,np.newaxis], 4, 2)
             A[:,:,3] = 255
     else:
         if A.dtype != np.uint8:
             A = (255*A).astype(np.uint8)
         if A.shape[2] == 3:
             B = zeros(tuple(list(A.shape[0:2]) + [4]), np.uint8)
             B[:,:,0:3] = A
             B[:,:,3] = 255
             A = B
     self._A = A
     self._Ax = x
     self._Ay = y
     self._imcache = None
Example #11
0
 def set_data(self, x, y, A):
     x = np.asarray(x, np.float32)
     y = np.asarray(y, np.float32)
     if not ma.isMA(A):
         A = np.asarray(A)
     if len(x.shape) != 1 or len(y.shape) != 1\
        or A.shape[0:2] != (y.shape[0], x.shape[0]):
         raise TypeError("Axes don't match array shape")
     if len(A.shape) not in [2, 3]:
         raise TypeError("Can only plot 2D or 3D data")
     if len(A.shape) == 3 and A.shape[2] not in [1, 3, 4]:
         raise TypeError(
             "3D arrays must have three (RGB) or four (RGBA) color components"
         )
     if len(A.shape) == 3 and A.shape[2] == 1:
         A.shape = A.shape[0:2]
     if len(A.shape) == 2:
         if A.dtype != np.uint8:
             A = (self.cmap(self.norm(A)) * 255).astype(np.uint8)
         else:
             A = np.repeat(A[:, :, np.newaxis], 4, 2)
             A[:, :, 3] = 255
     else:
         if A.dtype != np.uint8:
             A = (255 * A).astype(np.uint8)
         if A.shape[2] == 3:
             B = zeros(tuple(list(A.shape[0:2]) + [4]), np.uint8)
             B[:, :, 0:3] = A
             B[:, :, 3] = 255
             A = B
     self._A = A
     self._Ax = x
     self._Ay = y
     self._imcache = None
Example #12
0
    def set_data(self, A, shape=None):
        """
        Set the image array

        ACCEPTS: numpy/PIL Image A"""
        # check if data is PIL Image without importing Image
        if hasattr(A, 'getpixel'):
            self._A = pil_to_array(A)
        elif ma.isMA(A):
            self._A = A
        else:
            self._A = np.asarray(A)  # assume array

        if self._A.dtype != np.uint8 and not np.can_cast(
                self._A.dtype, np.float):
            raise TypeError("Image data can not convert to float")

        if (self._A.ndim not in (2, 3)
                or (self._A.ndim == 3 and self._A.shape[-1] not in (3, 4))):
            raise TypeError("Invalid dimensions for image data")

        self._imcache = None
        self._rgbacache = None
        self._oldxslice = None
        self._oldyslice = None
Example #13
0
    def set_data(self, x, y, A):
        if not ma.isMA(A):
            A = np.asarray(A)
        if x is None:
            x = np.arange(0, A.shape[1]+1, dtype=np.float64)
        else:
            x = np.asarray(x, np.float64).ravel()
        if y is None:
            y = np.arange(0, A.shape[0]+1, dtype=np.float64)
        else:
            y = np.asarray(y, np.float64).ravel()

        if A.shape[:2] != (y.size-1, x.size-1):
            print A.shape
            print y.size
            print x.size
            raise ValueError("Axes don't match array shape")
        if A.ndim not in [2, 3]:
            raise ValueError("A must be 2D or 3D")
        if A.ndim == 3 and A.shape[2] == 1:
            A.shape = A.shape[:2]
        self.is_grayscale = False
        if A.ndim == 3:
            if A.shape[2] in [3, 4]:
                if (A[:,:,0] == A[:,:,1]).all() and (A[:,:,0] == A[:,:,2]).all():
                    self.is_grayscale = True
            else:
                raise ValueError("3D arrays must have RGB or RGBA as last dim")
        self._A = A
        self._Ax = x
        self._Ay = y
        self.update_dict['array'] = True
Example #14
0
def test_masked_unweighted():
    data_in = ma.ones((10,), dtype=[('x', float), ('y', float)])
    data_out = downsample(data_in, 2)
    assert ma.isMA(data_out)
    assert np.array_equal(data_out, data_in[:5])
    data_in['x'].mask[2] = True
    data_in.mask[7] = (True, True)
    data_out = downsample(data_in, 2)
    assert np.array_equal(data_out, data_in[:5])
Example #15
0
def _abs(x):
    """
    Works around numpy bug with abs() of masked arrays producing a 
    ComplexWarning(Casting complex values to real discards the imaginary part)
    """
    if ma.isMA(x):
        return ma.masked_array(np.abs(x.data), x.mask)
    else:
        return ma.masked_array(np.abs(x))
Example #16
0
def test_one_masked():
    data1 = ma.ones((10,), dtype=[('f', float), ('w', float)])
    data2 = np.ones((10,), dtype=[('f', float), ('w', float)])
    data1.mask = False
    data1['f'].mask[2] = True
    result = accumulate(data1_in=data1, data2_in=data2, add='f', weight='w')
    assert not ma.isMA(result), 'Result should not be masked.'
    assert np.all(result['f'] == 1), 'Incorrect addition result.'
    assert np.array_equal(result['w'][1:4], (2, 1, 2)),\
        'Mask not used correctly.'
Example #17
0
def test_extrapolate():
    data = np.empty((10,), dtype=[('x', float), ('y', float)])
    data['x'] = np.arange(10.)
    data['y'] = np.arange(10.)
    x2 = np.arange(-1,11)
    result = resample(data, 'x', x2, 'y')
    assert ma.isMA(result)
    assert result['y'].mask[0]
    assert result['y'].mask[-1]
    assert np.array_equal(result['y'][1:-1], x2[1:-1])
Example #18
0
def test_extrapolate():
    data = np.empty((10,), dtype=[('x', float), ('y', float)])
    data['x'] = np.arange(10.)
    data['y'] = np.arange(10.)
    x2 = np.arange(-1,11)
    result = resample(data, 'x', x2, 'y')
    assert ma.isMA(result)
    assert result['y'].mask[0]
    assert result['y'].mask[-1]
    assert np.array_equal(result['y'][1:-1], x2[1:-1])
Example #19
0
def test_masked_weighted():
    data_in = ma.ones((10, ), dtype=[('x', float), ('y', float)])
    data_out = downsample(data_in, 2, weight='y')
    assert ma.isMA(data_out)
    assert np.all(data_out['x'] == 1.)
    assert np.all(data_out['y'] == 2.)
    data_in['x'].mask[2] = True
    data_in.mask[7] = (True, True)
    data_out = downsample(data_in, 2, weight='y')
    assert np.all(data_out['x'] == 1.)
    assert np.all(data_out['y'] == (2., 1., 2., 1., 2.))
Example #20
0
def test_masked_weighted():
    data_in = ma.ones((10,), dtype=[('x', float), ('y', float)])
    data_out = downsample(data_in, 2, weight='y')
    assert ma.isMA(data_out)
    assert np.all(data_out['x'] == 1.)
    assert np.all(data_out['y'] == 2.)
    data_in['x'].mask[2] = True
    data_in.mask[7] = (True, True)
    data_out = downsample(data_in, 2, weight='y')
    assert np.all(data_out['x'] == 1.)
    assert np.all(data_out['y'] == (2., 1., 2., 1., 2.))
Example #21
0
def addcyclic(arrin, lonsin):
    """
    ``arrout, lonsout = addcyclic(arrin, lonsin)``
    adds cyclic (wraparound) point in longitude to ``arrin`` and ``lonsin``.
    """
    nlats = arrin.shape[0]
    nlons = arrin.shape[1]
    if ma.isMA(arrin):
        arrout = ma.zeros((nlats, nlons + 1), arrin.dtype)
    else:
        arrout = numpy.zeros((nlats, nlons + 1), arrin.dtype)
    arrout[:, 0:nlons] = arrin[:, :]
    arrout[:, nlons] = arrin[:, 0]
    if ma.isMA(lonsin):
        lonsout = ma.zeros(nlons + 1, lonsin.dtype)
    else:
        lonsout = numpy.zeros(nlons + 1, lonsin.dtype)
    lonsout[0:nlons] = lonsin[:]
    lonsout[nlons] = lonsin[-1] + lonsin[1] - lonsin[0]
    return arrout, lonsout
Example #22
0
def addcyclic(arrin,lonsin):
    """
    ``arrout, lonsout = addcyclic(arrin, lonsin)``
    adds cyclic (wraparound) point in longitude to ``arrin`` and ``lonsin``.
    """
    nlats = arrin.shape[0]
    nlons = arrin.shape[1]
    if ma.isMA(arrin):
        arrout  = ma.zeros((nlats,nlons+1),arrin.dtype)
    else:
        arrout  = numpy.zeros((nlats,nlons+1),arrin.dtype)
    arrout[:,0:nlons] = arrin[:,:]
    arrout[:,nlons] = arrin[:,0]
    if ma.isMA(lonsin):
        lonsout = ma.zeros(nlons+1,lonsin.dtype)
    else:
        lonsout = numpy.zeros(nlons+1,lonsin.dtype)
    lonsout[0:nlons] = lonsin[:]
    lonsout[nlons]  = lonsin[-1] + lonsin[1]-lonsin[0]
    return arrout,lonsout
Example #23
0
def shiftgrid(lon0, datain, lonsin, start=False, cyclic=360.0):
    """
    lon0:   new starting longitude, to be in lonsin
    datain: input data to be shifted
    lonsin: longitude axis of data (supposed to be the last axis of datain)
    optional argument:
      start: (default: False)
      cyclic: (default: 360.0)
    shift data to start at longitude lon0
    return dataout, lonsout
    """

    if np.fabs(lonsin[-1] - lonsin[0] - cyclic) > 1.e-4:
        # Use all data instead of raise ValueError, 'cyclic point not included'
        start_idx = 0
    else:
        # If cyclic, remove the duplicate point
        start_idx = 1
    if lon0 < lonsin[0] or lon0 > lonsin[-1]:
        raise ValueError('lon0 outside of range of lonsin')
    i0 = np.argmin(np.fabs(lonsin - lon0))
    i0_shift = len(lonsin) - i0
    if ma.isMA(datain):
        dataout = ma.zeros(datain.shape, datain.dtype)
    else:
        dataout = np.zeros(datain.shape, datain.dtype)
    if ma.isMA(lonsin):
        lonsout = ma.zeros(lonsin.shape, lonsin.dtype)
    else:
        lonsout = np.zeros(lonsin.shape, lonsin.dtype)
    if start:
        lonsout[0:i0_shift] = lonsin[i0:]
    else:
        lonsout[0:i0_shift] = lonsin[i0:] - cyclic
    dataout[..., 0:i0_shift] = datain[..., i0:]
    if start:
        lonsout[i0_shift:] = lonsin[start_idx:i0 + start_idx] + cyclic
    else:
        lonsout[i0_shift:] = lonsin[start_idx:i0 + start_idx]
    dataout[..., i0_shift:] = datain[..., start_idx:i0 + start_idx]
    return dataout, lonsout
Example #24
0
def test_propagated_array_mask():
    wlen = np.arange(10)
    flux = ma.ones((10,))
    flux.mask = False
    flux[2] = ma.masked
    result = redshift(z_in=0, z_out=1, rules=[
        {'name': 'wlen', 'exponent': +1, 'array_in': wlen},
        {'name': 'flux', 'exponent': -1, 'array_in': flux}])
    assert ma.isMA(result)
    assert not result['wlen'].mask[2], 'Input mask not propagated.'
    assert result['flux'].mask[2], 'Input mask not propagated.'
    assert not result['flux'].mask[3], 'Input mask not propagated.'
Example #25
0
    def __init__(self, var, name=None):
        """Create a VarInfo object.

        Arguments:
        var: numpy or numpy.ma array"""

        # Compute all necessary statistics in initialization, so that we don't
        # have to hold onto the variable in memory for later use (in case the
        # variable consumes a lot of memory).
        if not ma.isMA(var):
            var = ma.array(var)
        self._compute_stats(var)
        self.name = name
Example #26
0
    def __init__(self, var, name=None):
        """Create a VarInfo object.

        Arguments:
        var: numpy or numpy.ma array"""

        # Compute all necessary statistics in initialization, so that we don't
        # have to hold onto the variable in memory for later use (in case the
        # variable consumes a lot of memory).
        if not ma.isMA(var):
            var = ma.array(var)
        self._compute_stats(var)
        self.name = name
Example #27
0
def test_propagated_data_mask():
    data_in = ma.ones((10,), dtype=[
        ('wlen', float), ('flux', float), ('extra', int)])
    data_in['wlen'][1] = ma.masked
    data_in['extra'][2] = ma.masked
    result = redshift(z_in=0, z_out=1, data_in=data_in, rules=[
        {'name': 'wlen', 'exponent': +1},
        {'name': 'flux', 'exponent': -1}])
    assert ma.isMA(result)
    assert not result['wlen'].mask[0], 'Input mask not propagated.'
    assert not result['flux'].mask[0], 'Input mask not propagated.'
    assert result['wlen'].mask[1], 'Input mask not propagated.'
    assert result['extra'].mask[2], 'Input mask not propagated.'
Example #28
0
def test_both_masked():
    data1 = ma.ones((10,), dtype=[('f', float), ('w', float), ('i', int)])
    data2 = ma.ones((10,), dtype=[('f', float), ('w', float), ('i', int)])
    data1.mask = False
    data1['f'].mask[2:4] = True
    data2.mask = False
    data2['f'].mask[3:5] = True
    result = accumulate(data1_in=data1, data2_in=data2,
                        add='f', weight='w', join='i')
    assert not ma.isMA(result), 'Result should not be masked.'
    valid = result['w'] != 0
    assert np.all(result['f'][valid] == 1), 'Incorrect addition result.'
    assert np.array_equal(result['w'][1:6], (2, 1, 0, 1, 2)),\
        'Mask not used correctly.'
def addcyclic(data):
	"""
	Adds cyclic points to an array in rightmost dimension.
	data = input 2D array.
	"""
	if data.ndim != 2:
		print('ERROR: Input array is not two-dimensional')
		return

	if MA.isMA(data):
		newdata = MA.concatenate((data,data[:,0,N.newaxis]),axis=-1)
	else:
		newdata = N.concatenate((data,data[:,0,N.newaxis]),axis=-1)

	return newdata
Example #30
0
def fill_gaps_in_2d_transect_once(Z):
    was_masked = ma.isMA(Z)

    # Ensure array has NaNs, not mask
    Z = ma.filled(Z, np.nan).copy()

    fill_values = np.nanmean(
        np.dstack((np.roll(Z, 1, axis=1), np.roll(Z, -1, axis=1))), axis=2)

    inds_to_fill = np.logical_and(np.isnan(Z), ~np.isnan(fill_values))

    Z[inds_to_fill] = fill_values[inds_to_fill]

    if was_masked:
        Z = ma.masked_invalid(Z)

    return Z
Example #31
0
    def set_data(self, A, shape=None):
        """
        Set the image array

        ACCEPTS: numpy/PIL Image A"""
        # check if data is PIL Image without importing Image
        if hasattr(A, 'getpixel'):
            self._A = pil_to_array(A)
        elif ma.isMA(A):
            self._A = A
        else:
            self._A = np.asarray(A)  # assume array

        self._imcache = None
        self._rgbacache = None
        self._oldxslice = None
        self._oldyslice = None
Example #32
0
    def set_data(self, A, shape=None):
        """
        Set the image array

        ACCEPTS: numpy/PIL Image A"""
        # check if data is PIL Image without importing Image
        if hasattr(A,'getpixel'):
            self._A = pil_to_array(A)
        elif ma.isMA(A):
            self._A = A
        else:
            self._A = np.asarray(A) # assume array

        self._imcache =None
        self._rgbacache = None
        self._oldxslice = None
        self._oldyslice = None
Example #33
0
def test_propagated_array_mask():
    wlen = np.arange(10)
    flux = ma.ones((10, ))
    flux.mask = False
    flux[2] = ma.masked
    result = redshift(z_in=0,
                      z_out=1,
                      rules=[{
                          'name': 'wlen',
                          'exponent': +1,
                          'array_in': wlen
                      }, {
                          'name': 'flux',
                          'exponent': -1,
                          'array_in': flux
                      }])
    assert ma.isMA(result)
    assert not result['wlen'].mask[2], 'Input mask not propagated.'
    assert result['flux'].mask[2], 'Input mask not propagated.'
    assert not result['flux'].mask[3], 'Input mask not propagated.'
Example #34
0
def test_propagated_data_mask():
    data_in = ma.ones((10, ),
                      dtype=[('wlen', float), ('flux', float), ('extra', int)])
    data_in['wlen'][1] = ma.masked
    data_in['extra'][2] = ma.masked
    result = redshift(z_in=0,
                      z_out=1,
                      data_in=data_in,
                      rules=[{
                          'name': 'wlen',
                          'exponent': +1
                      }, {
                          'name': 'flux',
                          'exponent': -1
                      }])
    assert ma.isMA(result)
    assert not result['wlen'].mask[0], 'Input mask not propagated.'
    assert not result['flux'].mask[0], 'Input mask not propagated.'
    assert result['wlen'].mask[1], 'Input mask not propagated.'
    assert result['extra'].mask[2], 'Input mask not propagated.'
Example #35
0
def _quantize(data, least_significant_digit):
    """
quantize data to improve compression. data is quantized using 
around(scale*data)/scale, where scale is 2**bits, and bits is determined 
from the least_significant_digit. For example, if 
least_significant_digit=1, bits will be 4.
    """
    precision = pow(10., -least_significant_digit)
    exp = np.log10(precision)
    if exp < 0:
        exp = int(np.floor(exp))
    else:
        exp = int(np.ceil(exp))
    bits = np.ceil(np.log2(pow(10., -exp)))
    scale = pow(2., bits)
    datout = np.around(scale * data) / scale
    if ma.isMA(datout):
        datout.set_fill_value(data.fill_value)
        return datout
    else:
        return datout
Example #36
0
def _quantize(data,least_significant_digit):
    """
quantize data to improve compression. data is quantized using
around(scale*data)/scale, where scale is 2**bits, and bits is determined
from the least_significant_digit. For example, if
least_significant_digit=1, bits will be 4.
    """
    precision = pow(10.,-least_significant_digit)
    exp = np.log10(precision)
    if exp < 0:
        exp = int(np.floor(exp))
    else:
        exp = int(np.ceil(exp))
    bits = np.ceil(np.log2(pow(10.,-exp)))
    scale = pow(2.,bits)
    datout = np.around(scale*data)/scale
    if ma.isMA(datout):
        datout.set_fill_value(data.fill_value)
        return datout
    else:
        return datout
Example #37
0
    def test_topo(self):
        # basic case
        cstr_list = ('time|i0 zc|ZP|2500 yc|i5 xc|:', \
                'time|i0 zc|ZP|2500m yc|i5 xc|:', \
                'time|i0 zc|ZP|1500m yc|i5 xc|:', \
                'time|i0 zc|ZP|1000,1500m yc|i5.5 xc|:')
        results = ((21, ), (21, ), (21, ), (2, 21))

        for (cstr, res) in zip(cstr_list, results):
            if verbose:
                print(cstr)
                print("in test_topo")
            xsel = Nio.inp2xsel(self.f, 'PT', cstr)
            pt = self.f.variables['PT'][cstr]
            #pt = self.f.variables['ZP'][:]
            if verbose: print(pt.shape)
            if verbose:
                if ma.isMA(pt):
                    print(N.asarray(pt.filled()))
                else:
                    print(pt)
            assert_equal(pt.shape, res)
Example #38
0
def bin_2d_transect(x, y, Z, x_out, y_out):
    """Bin transect Z(x, y), where x can be irregular

    Inputs
    ------
    x, y : 1D arrays
        x can be irregular, y cannot
    Z : 2D array
        Data at each point x, y. May be masked array
    x_out, y_out : 1D arrays
        Edges of grid on which to bin Z

    Returns
    -------
    Z_out : 2D array
        Shape (len(x_out) - 1, len(y_out) - 1)
    """
    if Z.ndim == 1:
        Z = Z[np.newaxis, :]

    # Preallocate result
    Nx, Ny = x_out.size - 1, y_out.size - 1
    Z_out = np.full((Nx, Ny), np.nan)

    filterwarnings('ignore', '.*Mean of empty slice*.')

    # Using loop for simplicity
    for i, j in np.ndindex(Nx, Ny):
        in_x_bin = np.logical_and(x > x_out[i], x < x_out[i + 1])
        in_y_bin = np.logical_and(y > y_out[j], y < y_out[j + 1])

        Z_in_bin = Z[in_y_bin, in_x_bin]
        Z_out[i, j] = np.nanmean(ma.filled(Z_in_bin, np.nan))

    if ma.isMA(Z):
        Z_out = ma.masked_invalid(Z_out)

    return Z_out
Example #39
0
    def set_data(self, A, shape=None):
        """
        Set the image array

        ACCEPTS: numpy/PIL Image A"""
        # check if data is PIL Image without importing Image
        if hasattr(A,'getpixel'):
            self._A = pil_to_array(A)
        elif ma.isMA(A):
            self._A = A
        else:
            self._A = np.asarray(A) # assume array

        if self._A.dtype != np.uint8 and not np.can_cast(self._A.dtype, np.float):
            raise TypeError("Image data can not convert to float")

        if (self._A.ndim not in (2, 3) or
            (self._A.ndim == 3 and self._A.shape[-1] not in (3, 4))):
            raise TypeError("Invalid dimensions for image data")

        self._imcache =None
        self._rgbacache = None
        self._oldxslice = None
        self._oldyslice = None
Example #40
0
def broadcast(*args):
    def _mask_or(a, b):
        return ma.mask_or(a, b, shrink=True)

    args = [_safe_masked_invalid(arg) for arg in args]
    if any([ma.isMA(arg) for arg in args]):
        vars = [ma.getdata(var) for var in args]
        mvars = [ma.getmaskarray(var) for var in args]
        outargs = list(map(np.array, np.broadcast_arrays(*vars)))
        masks = list(map(np.array, np.broadcast_arrays(*mvars)))
        mask = reduce(_mask_or, masks)
    else:
        mask = ma.nomask
        # Using map(np.array, ...) to get contiguous copies.
        outargs = list(map(np.array, np.broadcast_arrays(*args)))
    if outargs[0].ndim == 0:
        scalar = True
        for arg in outargs:
            arg.shape = (1, )
        if mask is not ma.nomask:
            mask.shape = (1, )
    else:
        scalar = False
    return scalar, mask, outargs
    def make_gene_map_2(self):
        """
		The method that takes the attributes from the array and uses
		them to create a gene map for the array.
		The gene map is a dictionary which has a binary string as a key.
		The binary string is created by creating a binary bit string of 
		an appropriate length.
		The length is calculated 
		"""
        # ~ self.array = ma.getdata(self.array)
        count = 0
        self.iterator = itertools.product(range(self.time_len), range(self.lat_len), range(self.lon_len))
        for x_valid in self.iterator:
            # print x # debug
            binary_string = bin(count)[2:]
            while len(binary_string) < self.string_length:  # removed minus one (-1) NB
                # print len(binary_string) #debug
                binary_string = "0" + binary_string
                # print binary_string # debug
            self.gene_map[binary_string] = {}
            print self.array[x_valid]
            if ma.is_masked(self.array[x_valid]):
                print "masked"
                # ~ pass
            else:
                self.gene_map[binary_string]["coordinate"] = tuple(x_valid)
                self.gene_map[binary_string]["value"] = self.array[x_valid]
                count += 1
                # ~ print binary_string, tuple(x_valid), self.array[x_valid]
                # print self.count # += 1
        self.last_valid_binary_string = binary_string
        binary_string_old = binary_string
        not_valid_first = int(binary_string, 2) + 1
        not_valid_last = int("1" * (self.string_length), 2)  # added minus one just for nonmasked version NB
        self.count = count
        print count
        print binary_string
        print len(binary_string)
        print not_valid_first
        print not_valid_last
        print x_valid
        print ma.isMA(self.array)

        for x_not_valid in range(not_valid_first, not_valid_last + 1):
            binary_string = bin(x_not_valid)[2:]  # DOES IT NEED TO BE PADDED
            while len(binary_string) < self.string_length:  # removed minus one (-1) NB
                # print len(binary_string) #debug
                binary_string = "0" + binary_string
            self.gene_map[binary_string] = {}
            self.gene_map[binary_string]["coordinate"] = (999, 999, 999)
            self.gene_map[binary_string]["value"] = 1e06
            print x_not_valid, binary_string, self.gene_map[binary_string]["value"]
            # print x_not_valid, binary_string # debug
        print count
        print binary_string
        print binary_string_old
        print len(binary_string)
        print self.string_length
        print not_valid_first
        print not_valid_last
        print x_valid
        print x_not_valid
        print ma.isMA(self.array)
Example #42
0
def shiftgrid(lon0,datain,lonsin,start=True,cyclic=360.0):

    import numpy.ma as ma
    """
    Shift global lat/lon grid east or west.
    copied directly from mpl_toolkits v1.0.2 by mjh

    .. tabularcolumns:: |l|L|

    ==============   ====================================================
    Arguments        Description
    ==============   ====================================================
    lon0             starting longitude for shifted grid
                     (ending longitude if start=False). lon0 must be on
                     input grid (within the range of lonsin).
    datain           original data.
    lonsin           original longitudes.
    ==============   ====================================================

    .. tabularcolumns:: |l|L|

    ==============   ====================================================
    Keywords         Description
    ==============   ====================================================
    start            if True, lon0 represents the starting longitude
                     of the new grid. if False, lon0 is the ending
                     longitude. Default True.
    cyclic           width of periodic domain (default 360)
    ==============   ====================================================

    returns ``dataout,lonsout`` (data and longitudes on shifted grid).
    """
    if numpy.fabs(lonsin[-1]-lonsin[0]-cyclic) > 1.e-4:
        # Use all data instead of raise ValueError, 'cyclic point not included'
        start_idx = 0
    else:
        # If cyclic, remove the duplicate point
        start_idx = 1
    if lon0 < lonsin[0] or lon0 > lonsin[-1]:
        msg = 'lon0 outside of range of lonsin %(l0)4.1f %(st)4.1f %(ed)4.1f'%{'l0':lon0,'st':lonsin[0],'ed':lonsin[-1]}
        raise ValueError(msg)
    i0 = numpy.argmin(numpy.fabs(lonsin-lon0))
    i0_shift = len(lonsin)-i0
    if ma.isMA(datain):
        dataout  = ma.zeros(datain.shape,datain.dtype)
    else:
        dataout  = numpy.zeros(datain.shape,datain.dtype)
    if ma.isMA(lonsin):
        lonsout = ma.zeros(lonsin.shape,lonsin.dtype)
    else:
        lonsout = numpy.zeros(lonsin.shape,lonsin.dtype)
    if start:
        lonsout[0:i0_shift] = lonsin[i0:]
    else:
        lonsout[0:i0_shift] = lonsin[i0:]-cyclic
    dataout[:,0:i0_shift] = datain[:,i0:]
    if start:
        lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]+cyclic
    else:
        lonsout[i0_shift:] = lonsin[start_idx:i0+start_idx]
    dataout[:,i0_shift:] = datain[:,start_idx:i0+start_idx]
    return dataout,lonsout
Example #43
0
def delete_masked_points(*args):
    """
    Find all masked and/or non-finite points in a set of arguments,
    and return the arguments with only the unmasked points remaining.

    Arguments can be in any of 5 categories:

    1) 1-D masked arrays
    2) 1-D ndarrays
    3) ndarrays with more than one dimension
    4) other non-string iterables
    5) anything else

    The first argument must be in one of the first four categories;
    any argument with a length differing from that of the first
    argument (and hence anything in category 5) then will be
    passed through unchanged.

    Masks are obtained from all arguments of the correct length
    in categories 1, 2, and 4; a point is bad if masked in a masked
    array or if it is a nan or inf.  No attempt is made to
    extract a mask from categories 2, 3, and 4 if :meth:`np.isfinite`
    does not yield a Boolean array.

    All input arguments that are not passed unchanged are returned
    as ndarrays after removing the points or rows corresponding to
    masks in any of the arguments.

    A vastly simpler version of this function was originally
    written as a helper for Axes.scatter().

    """
    if not len(args):
        return ()
    if is_string_like(args[0]) or not iterable(args[0]):
        raise ValueError("First argument must be a sequence")
    nrecs = len(args[0])
    margs = []
    seqlist = [False] * len(args)
    for i, x in enumerate(args):
        if (not is_string_like(x)) and iterable(x) and len(x) == nrecs:
            seqlist[i] = True
            if ma.isMA(x):
                if x.ndim > 1:
                    raise ValueError("Masked arrays must be 1-D")
            else:
                x = np.asarray(x)
        margs.append(x)
    masks = []  # list of masks that are True where good
    for i, x in enumerate(margs):
        if seqlist[i]:
            if x.ndim > 1:
                continue  # Don't try to get nan locations unless 1-D.
            if ma.isMA(x):
                masks.append(~ma.getmaskarray(x))  # invert the mask
                xd = x.data
            else:
                xd = x
            try:
                mask = np.isfinite(xd)
                if isinstance(mask, np.ndarray):
                    masks.append(mask)
            except:  # Fixme: put in tuple of possible exceptions?
                pass
    if len(masks):
        mask = reduce(np.logical_and, masks)
        igood = mask.nonzero()[0]
        if len(igood) < nrecs:
            for i, x in enumerate(margs):
                if seqlist[i]:
                    margs[i] = x.take(igood, axis=0)
    for i, x in enumerate(margs):
        if seqlist[i] and ma.isMA(x):
            margs[i] = x.filled()
    return margs
Example #44
0
def shiftgrid(lon0, datain, lonsin, start=True, cyclic=360.0):

    import numpy.ma as ma
    """
    Shift global lat/lon grid east or west.
    copied directly from mpl_toolkits v1.0.2 by mjh

    .. tabularcolumns:: |l|L|

    ==============   ====================================================
    Arguments        Description
    ==============   ====================================================
    lon0             starting longitude for shifted grid
                     (ending longitude if start=False). lon0 must be on
                     input grid (within the range of lonsin).
    datain           original data.
    lonsin           original longitudes.
    ==============   ====================================================

    .. tabularcolumns:: |l|L|

    ==============   ====================================================
    Keywords         Description
    ==============   ====================================================
    start            if True, lon0 represents the starting longitude
                     of the new grid. if False, lon0 is the ending
                     longitude. Default True.
    cyclic           width of periodic domain (default 360)
    ==============   ====================================================

    returns ``dataout,lonsout`` (data and longitudes on shifted grid).
    """
    if numpy.fabs(lonsin[-1] - lonsin[0] - cyclic) > 1.e-4:
        # Use all data instead of raise ValueError, 'cyclic point not included'
        start_idx = 0
    else:
        # If cyclic, remove the duplicate point
        start_idx = 1
    if lon0 < lonsin[0] or lon0 > lonsin[-1]:
        msg = 'lon0 outside of range of lonsin %(l0)4.1f %(st)4.1f %(ed)4.1f' % {
            'l0': lon0,
            'st': lonsin[0],
            'ed': lonsin[-1]
        }
        raise ValueError(msg)
    i0 = numpy.argmin(numpy.fabs(lonsin - lon0))
    i0_shift = len(lonsin) - i0
    if ma.isMA(datain):
        dataout = ma.zeros(datain.shape, datain.dtype)
    else:
        dataout = numpy.zeros(datain.shape, datain.dtype)
    if ma.isMA(lonsin):
        lonsout = ma.zeros(lonsin.shape, lonsin.dtype)
    else:
        lonsout = numpy.zeros(lonsin.shape, lonsin.dtype)
    if start:
        lonsout[0:i0_shift] = lonsin[i0:]
    else:
        lonsout[0:i0_shift] = lonsin[i0:] - cyclic
    dataout[:, 0:i0_shift] = datain[:, i0:]
    if start:
        lonsout[i0_shift:] = lonsin[start_idx:i0 + start_idx] + cyclic
    else:
        lonsout[i0_shift:] = lonsin[start_idx:i0 + start_idx]
    dataout[:, i0_shift:] = datain[:, start_idx:i0 + start_idx]
    return dataout, lonsout
Example #45
0
def delete_masked_points(*args):
    """
    Find all masked and/or non-finite points in a set of arguments,
    and return the arguments with only the unmasked points remaining.

    Arguments can be in any of 5 categories:

    1) 1-D masked arrays
    2) 1-D ndarrays
    3) ndarrays with more than one dimension
    4) other non-string iterables
    5) anything else

    The first argument must be in one of the first four categories;
    any argument with a length differing from that of the first
    argument (and hence anything in category 5) then will be
    passed through unchanged.

    Masks are obtained from all arguments of the correct length
    in categories 1, 2, and 4; a point is bad if masked in a masked
    array or if it is a nan or inf.  No attempt is made to
    extract a mask from categories 2, 3, and 4 if :meth:`np.isfinite`
    does not yield a Boolean array.

    All input arguments that are not passed unchanged are returned
    as ndarrays after removing the points or rows corresponding to
    masks in any of the arguments.

    A vastly simpler version of this function was originally
    written as a helper for Axes.scatter().

    """
    if not len(args):
        return ()
    if (is_string_like(args[0]) or not iterable(args[0])):
        raise ValueError("First argument must be a sequence")
    nrecs = len(args[0])
    margs = []
    seqlist = [False] * len(args)
    for i, x in enumerate(args):
        if (not is_string_like(x)) and iterable(x) and len(x) == nrecs:
            seqlist[i] = True
            if ma.isMA(x):
                if x.ndim > 1:
                    raise ValueError("Masked arrays must be 1-D")
            else:
                x = np.asarray(x)
        margs.append(x)
    masks = []  # list of masks that are True where good
    for i, x in enumerate(margs):
        if seqlist[i]:
            if x.ndim > 1:
                continue  # Don't try to get nan locations unless 1-D.
            if ma.isMA(x):
                masks.append(~ma.getmaskarray(x))  # invert the mask
                xd = x.data
            else:
                xd = x
            try:
                mask = np.isfinite(xd)
                if isinstance(mask, np.ndarray):
                    masks.append(mask)
            except:  #Fixme: put in tuple of possible exceptions?
                pass
    if len(masks):
        mask = reduce(np.logical_and, masks)
        igood = mask.nonzero()[0]
        if len(igood) < nrecs:
            for i, x in enumerate(margs):
                if seqlist[i]:
                    margs[i] = x.take(igood, axis=0)
    for i, x in enumerate(margs):
        if seqlist[i] and ma.isMA(x):
            margs[i] = x.filled()
    return margs
Example #46
0
def redshift(z_in, z_out, data_in=None, data_out=None, rules=[]):
    """Transform spectral data from redshift z_in to z_out.

    Each quantity X is transformed according to a power law::

        X_out = X_in * ((1 + z_out) / (1 + z_in))**exponent

    where exponents are specified with the ``rules`` argument. Exponents for
    some common cases are listed in the table below.

    ======== ================================================================
    Exponent Quantities
    ======== ================================================================
    0        flux density in photons/(s*cm^2*Ang)
    +1       wavelength, wavelength error, flux density in ergs/(s*cm^2*Hz)
    -1       frequency, frequency error, flux density in ergs/(s*cm^2*Ang)
    +2       inverse variance of flux density in ergs/(s*cm^2*Ang)
    -2       inverse variance of flux density in ergs/(s*cm^2*Hz)
    ======== ================================================================

    For example, to transform separate wavelength and flux arrays using the
    SDSS standard units of Ang and 1e-17 erg/(s*cm^2*Ang):

    >>> wlen = np.arange(4000., 10000.)
    >>> flux = np.ones(wlen.shape)
    >>> result = redshift(z_in=0, z_out=1, rules=[
    ... dict(name='wlen', exponent=+1, array_in=wlen),
    ... dict(name='flux', exponent=-1, array_in=flux)])
    >>> result.dtype
    dtype([('wlen', '<f8'), ('flux', '<f8')])
    >>> result['flux'][:5]
    array([ 0.5,  0.5,  0.5,  0.5,  0.5])

    The same calculation could be performed with the input data stored in
    a numpy structured array, in which case any additional fields are
    copied to the output array:

    >>> data = np.empty(6000, dtype=[
    ... ('wlen', float), ('flux', float), ('maskbits', int)])
    >>> data['wlen'] = np.arange(4000., 10000.)
    >>> data['flux'] = np.ones_like(data['wlen'])
    >>> result = redshift(z_in=0, z_out=1, data_in=data, rules=[
    ... dict(name='wlen', exponent=+1),
    ... dict(name='flux', exponent=-1)])
    >>> result.dtype
    dtype([('wlen', '<f8'), ('flux', '<f8'), ('maskbits', '<i8')])
    >>> result['flux'][:5]
    array([ 0.5,  0.5,  0.5,  0.5,  0.5])

    The transformed result is always a `numpy structured array
    <http://docs.scipy.org/doc/numpy/user/basics.rec.html>`__, with field
    (column) names determined by the rules you provide.

    The usual `numpy broadcasting rules
    <http://docs.scipy.org/doc/numpy/user/basics.broadcasting.html>`__ apply
    in the transformation expression above so, for example, the same redshift
    can be applied to multiple spectra, or different redshifts can be applied
    to the same spectrum with appropriate input shapes.

    Input arrays can have associated `masks
    <http://docs.scipy.org/doc/numpy/reference/maskedarray.html>`__ and these
    will be propagated to the output. Input arrays can also have `units
    <http://astropy.readthedocs.io/en/latest/units/index.html>`__ but these
    will not be used or propagated to the output since numpy structured arrays
    do not support per-column units.

    Parameters
    ----------
    z_in : float or numpy.ndarray
        Redshift(s) of the input spectral data, which must all be > -1.
    z_out : float or numpy.ndarray
        Redshift(s) of the output spectral data, which must all be > -1.
    data_in : numpy.ndarray
        Structured numpy array containing input spectrum data to transform. If
        none is specified, then all quantities must be provided as numpy arrays
        in the rules.
    data_out : numpy.ndarray
        Structured numpy array where output spectrum data should be written. If
        none is specified, then an appropriately sized array will be allocated
        and returned. Use this method to take control of the memory allocation
        and, for example, re-use the same output array for a sequence of
        transforms.
    rules : iterable
        An iterable object whose elements are dictionaries. Each dictionary
        specifies how one quantity will be transformed and must contain 'name'
        and 'exponent' values. If an 'array_in' value is also specified, it
        should refer to a numpy array containing the input values to transform.
        Otherwise, ``data_in[<name>]`` is assumed to contain the input values
        to transform.  If no ``rules`` are specified and ``data_in`` is
        provided, then ``data_out`` is just a copy of ``data_in``.

    Returns
    -------
    numpy.ndarray
        Array of spectrum data with the redshift transform applied. Equal to
        data_out when set, otherwise a new array is allocated. If ``data_in``
        is specified, then any fields not listed in ``rules`` are copied to
        ``data_out``, so effectively have an implicit exponent of zero.
    """

    if not isinstance(z_in, np.ndarray):
        z_in = np.float(z_in)
    if np.any(z_in <= -1):
        raise ValueError('Found invalid z_in <= -1.')
    if not isinstance(z_out, np.ndarray):
        z_out = np.float(z_out)
    if np.any(z_out <= -1):
        raise ValueError('Found invalid z_out <= -1.')
    z_factor = (1.0 + z_out) / (1.0 + z_in)

    if data_in is not None and not isinstance(data_in, np.ndarray):
        raise ValueError('Invalid data_in type: {0}.'.format(type(data_in)))
    if data_out is not None and not isinstance(data_out, np.ndarray):
        raise ValueError('Invalid data_out type: {0}.'.format(type(data_out)))

    if data_in is not None:
        shape_in = data_in.shape
        dtype_in = data_in.dtype
        masked_in = ma.isMA(data_in)
    else:
        shape_in = None
        dtype_in = []
        masked_in = False

    for i, rule in enumerate(rules):
        name = rule.get('name')
        if not isinstance(name, basestring):
            raise ValueError('Invalid name in rule: {0}'.format(name))
        try:
            exponent = np.float(rule.get('exponent'))
        except TypeError:
            raise ValueError(
                'Invalid exponent for {0}: {1}.'
                .format(name, rule.get('exponent')))
        if data_in is not None and name not in dtype_in.names:
            raise ValueError('No such data_in field named {0}.'.format(name))
        if data_out is not None and name not in data_out.dtype.names:
            raise ValueError('No such data_out field named {0}.'.format(name))
        array_in = rule.get('array_in')
        if array_in is not None:
            if data_in is not None:
                raise ValueError(
                    'Cannot specify data_in and array_in for {0}.'
                    .format(name))
            if not isinstance(array_in, np.ndarray):
                raise ValueError(
                    'Invalid array_in type for {0}: {1}.'
                    .format(name, type(array_in)))
            if shape_in is None:
                shape_in = array_in.shape
            elif shape_in != array_in.shape:
                raise ValueError(
                    'Incompatible array_in shape for {0}: {1}. Expected {2}.'
                    .format(name, array_in.shape, shape_in))
            dtype_in.append((name, array_in.dtype))
            if ma.isMA(array_in):
                masked_in = True
        else:
            if data_in is None:
                raise ValueError(
                    'Missing array_in for {0} (with no data_in).'.format(name))
            # Save a view of the input data column associated with this rule.
            rules[i]['array_in'] = data_in[name]

    shape_out = np.broadcast(np.empty(shape_in), z_factor).shape
    if data_out is None:
        if masked_in:
            data_out = ma.empty(shape_out, dtype=dtype_in)
            data_out.mask = False
        else:
            data_out = np.empty(shape_out, dtype=dtype_in)
    else:
        if masked_in and not ma.isMA(data_out):
            raise ValueError('data_out discards data_in mask.')
        if data_out.shape != shape_out:
            raise ValueError(
                'Invalid data_out shape: {0}. Expected {1}.'
                .format(data_out.shape, shape_out))
        if data_out.dtype != dtype_in:
            raise ValueError(
                'Invalid data_out dtype: {0}. Expected {1}.'
                .format(data_out.dtype, dtype_in))

    if data_in is not None:
        # Copy data_in to data_out so that any columns not listed in the
        # rules are propagated to the output.
        data_out[...] = data_in

    for rule in rules:
        name = rule.get('name')
        exponent = np.float(rule.get('exponent'))
        array_in = rule.get('array_in')
        data_out[name][:] = array_in * z_factor**exponent
        if data_in is None and ma.isMA(array_in):
            data_out[name].mask[...] = array_in.mask

    return data_out
Example #47
0
def resample(data_in, x_in, x_out, y, data_out=None, kind='linear'):
    """Resample the data of one spectrum using interpolation.

    Dependent variables y1, y2, ... in the input data are resampled in the
    independent variable x using interpolation models y1(x), y2(x), ...
    evaluated on a new grid of x values. The independent variable will
    typically be a wavelength or frequency and the independent variables can
    be fluxes, inverse variances, etc.

    Interpolation is intended for cases where the input and output grids have
    comparable densities. When neighboring samples are correlated, the
    resampling process should be essentially lossless.  When the output
    grid is sparser than the input grid, it may be more appropriate to
    "downsample", i.e., average dependent variables over consecutive ranges
    of input samples.

    The basic usage of this function is:

    >>> data = np.ones((5,),
    ... [('wlen', float), ('flux', float), ('ivar', float)])
    >>> data['wlen'] = np.arange(4000, 5000, 200)
    >>> wlen_out = np.arange(4100, 4700, 200)
    >>> resample(data, 'wlen', wlen_out, ('flux', 'ivar'))
    array([(4100, 1.0, 1.0), (4300, 1.0, 1.0), (4500, 1.0, 1.0)],
          dtype=[('wlen', '<i8'), ('flux', '<f8'), ('ivar', '<f8')])

    The input grid can also be external to the structured array of spectral
    data, for example:

    >>> data = np.ones((5,), [('flux', float), ('ivar', float)])
    >>> wlen_in = np.arange(4000, 5000, 200)
    >>> wlen_out = np.arange(4100, 4900, 200)
    >>> resample(data, wlen_in, wlen_out, ('flux', 'ivar'))
    array([(1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0)],
          dtype=[('flux', '<f8'), ('ivar', '<f8')])

    If the output grid extends beyond the input grid, a `masked array
    <http://docs.scipy.org/doc/numpy/reference/maskedarray.html>`__ will be
    returned with any values requiring extrapolation masked:

    >>> wlen_out = np.arange(3500, 5500, 500)
    >>> resample(data, wlen_in, wlen_out, 'flux')
    masked_array(data = [(--,) (1.0,) (1.0,) (--,)],
                 mask = [(True,) (False,) (False,) (True,)],
           fill_value = (1e+20,),
                dtype = [('flux', '<f8')])

    If the input data is masked, any output interpolated values that depend on
    an input masked value will be masked in the output:

    >>> data = ma.ones((5,), [('flux', float), ('ivar', float)])
    >>> data['flux'][2] = ma.masked
    >>> wlen_out = np.arange(4100, 4900, 200)
    >>> resample(data, wlen_in, wlen_out, 'flux')
    masked_array(data = [(1.0,) (--,) (--,) (1.0,)],
                 mask = [(False,) (True,) (True,) (False,)],
           fill_value = (1e+20,),
                dtype = [('flux', '<f8')])

    Interpolation is performed using :class:`scipy.interpolate.inter1pd`.

    Parameters
    ----------
    data_in : numpy.ndarray or numpy.ma.MaskedArray
        Structured numpy array of input spectral data to resample. The input
        array must be one-dimensional.
    x_in : string or numpy.ndarray
        A field name in data_in containing the independent variable to use
        for interpolation, or else an array of values with the same shape
        as the input data.
    x_out : numpy.ndarray
        An array of values for the independent variable where interpolation
        models should be evaluated to calculate the output values.
    y : string or iterable of strings.
        A field name or a list of field names present in the input data that
        should be resampled by interpolation and included in the output.
    data_out : numpy.ndarray or None
        Structured numpy array where the output result should be written. If
        None is specified, then an appropriately sized array will be allocated
        and returned. Use this method to take control of the memory allocation
        and, for example, re-use the same array when resampling many spectra.
    kind : string or integer
        Specify the kind of interpolation models to build using any of the
        forms allowed by :class:`scipy.interpolate.inter1pd`.  If any input
        dependent values are masked, only the ``nearest` and ``linear``
        values are allowed.

    Returns
    -------
    numpy.ndarray or numpy.ma.MaskedArray
        Structured numpy array of the resampled result containing all ``y``
        fields and (if ``x_in`` is specified as a string) the output ``x``
        field.  The output will be a :class:`numpy.ma.MaskedArray` if ``x_out``
        extends beyond ``x_in`` or if ``data_in`` is masked.
    """
    if not isinstance(data_in, np.ndarray):
        raise ValueError('Invalid data_in type: {0}.'.format(type(data_in)))
    if data_in.dtype.fields is None:
        raise ValueError('Input data_in is not a structured array.')
    if len(data_in.shape) > 1:
        raise ValueError('Input data_in is multidimensional.')

    if isinstance(x_in, basestring):
        if x_in not in data_in.dtype.names:
            raise ValueError('No such x_in field: {0}.'.format(x_in))
        x_out_name = x_in
        x_in = data_in[x_in]
    else:
        if not isinstance(x_in, np.ndarray):
            raise ValueError('Invalid x_in type: {0}.'.format(type(x_in)))
        if x_in.shape != data_in.shape:
            raise ValueError('Incompatible shapes for x_in and data_in.')
        x_out_name = None

    if not isinstance(x_out, np.ndarray):
        raise ValueError('Invalid x_out type: {0}.'.format(type(data_out)))

    if ma.isMA(x_in) and np.any(x_in.mask):
        raise ValueError('Cannot resample masked x_in.')

    x_type = np.promote_types(x_in.dtype, x_out.dtype)

    dtype_out = []
    if x_out_name is not None:
        dtype_out.append((x_out_name, x_out.dtype))

    if isinstance(y, basestring):
        # Use a list instead of a tuple here so y_names can be used
        # to index data_in below.
        y_names = [y,]
    else:
        try:
            y_names = [name for name in y]
        except TypeError:
            raise ValueError('Invalid y type: {0}.'.format(type(y)))
    for not_first, y in enumerate(y_names):
        if y not in data_in.dtype.names:
            raise ValueError('No such y field: {0}.'.format(y))
        if not_first:
            if data_in[y].dtype != y_type:
                raise ValueError('All y fields must have the same type.')
        else:
            y_type = data_in[y].dtype
        dtype_out.append((y, y_type))

    y_shape = (len(y_names),)
    if ma.isMA(data_in):
        # Copy the structured 1D array into a 2D unstructured array
        # and set masked values to NaN.
        y_in = np.zeros(data_in.shape + y_shape, y_type)
        for i,y in enumerate(y_names):
            y_in[:,i] = data_in[y].filled(np.nan)
    else:
        y_in = data_in[y_names]
        # View the structured 1D array as a 2D unstructured array (without
        # copying any memory).
        y_in = y_in.view(y_type).reshape(data_in.shape + y_shape)
    # interp1d will only propagate NaNs correctly for certain values of `kind`.
    # With numpy = 1.6 or 1.7, only 'nearest' and 'linear' work.
    # With numpy = 1.8 or 1.9, 'slinear' and kind = 0 or 1 also work.
    if np.any(np.isnan(y_in)):
        if kind not in ('nearest', 'linear'):
            raise ValueError(
                'Interpolation kind not supported for masked data: {0}.'
                .format(kind))
    try:
        interpolator = scipy.interpolate.interp1d(
            x_in, y_in, kind=kind, axis=0, copy=False,
            bounds_error=False, fill_value=np.nan)
    except NotImplementedError:
        raise ValueError('Interpolation kind not supported: {0}.'.format(kind))

    shape_out = (len(x_out),)
    if data_out is None:
        data_out = np.empty(shape_out, dtype_out)
    else:
        if data_out.shape != shape_out:
            raise ValueError(
                'data_out has wrong shape: {0}. Expected: {1}.'
                .format(data_out.shape, shape_out))
        if data_out.dtype != dtype_out:
            raise ValueError(
                'data_out has wrong dtype: {0}. Expected: {1}.'
                .format(data_out.dtype, dtype_out))

    if x_out_name is not None:
        data_out[x_out_name][:] = x_out
    y_out = interpolator(x_out)
    for i,y in enumerate(y_names):
        data_out[y][:] = y_out[:,i]

    if ma.isMA(data_in) or np.any(np.isnan(y_out)):
        data_out = ma.MaskedArray(data_out)
        data_out.mask = False
        for y in y_names:
            data_out[y].mask = np.isnan(data_out[y].data)

    return data_out
Example #48
0
def extract_loc(ref_lon, ref_lat, tlon, tlat, var):
    """
    Extract CCSM/POP model output for a given location (lat, lon).
    It finds the 4 model grid points around the location and computes
    their weighted average (weights = inverse of the distance). If a
    location is next to land, the function returns the weighted
    average of the closest grid points that are not on land.

    Input:
        ref_lon = longitude of position to be extracted (scalar)
        ref_lat = latitude of position to be extracted  (scalar)
        tlon    = model longitude grid (numpy array)
        tlat    = model latitude grid  (numpy array)
        var     = variable to be extracted (Masked 2-D or 3-D array)

    Output:
        wavg    = weighted average (scalar or 1-D array)
    """

    if var.ndim == 3: # 3D variable
        zmax, imax, jmax = var.shape
        threeD = True
    elif var.ndim == 2: # 2D variable
        imax, jmax = var.shape
        threeD = False
    else:
        print 'extract_loc: check variable dimensions'
        return

    # find the indices of the 4 model grid points around the location
    Ilist, Jlist = find_stn_idx(ref_lon, ref_lat, tlon, tlat)

    # compute great circle distance from location to model grid points
    dist =  gc_dist(ref_lon, ref_lat, tlon, tlat)
    dist[dist==0] = 1.e-15 # avoid division by zero

    # arrays to store weights and data to be averaged
    if threeD: # 3D variable
        wghts  = MA.zeros((zmax,len(Ilist)*len(Jlist)),float)
        data   = MA.zeros((zmax,len(Ilist)*len(Jlist)),float)
        if MA.isMA(var): # mask weights
            dist_m = MA.array(N.resize(dist,var.shape),mask=var.mask)
        else:
            dist_m = N.array(N.resize(dist,var.shape))
    else:      # 2D variable
        wghts  = MA.zeros((len(Ilist)*len(Jlist)),float)
        data   = MA.zeros((len(Ilist)*len(Jlist)),float)
        if MA.isMA(var):
            dist_m = MA.array(dist,mask=var.mask) # mask weights
        else:
            dist_m = N.array(dist)

    # get the 4 model grid points and compute weights
    n = 0
    for i in Ilist:
        for j in Jlist:
            wghts[...,n] = 1./dist_m[...,i,j]
            data[...,n]  = var[...,i,j]
            n += 1

    # compute weighted average
    wavg = MA.average(data,axis=-1,weights=wghts)
    return wavg
Example #49
0
def downsample(data_in,
               downsampling,
               weight=None,
               axis=-1,
               start_index=0,
               auto_trim=True,
               data_out=None):
    """Downsample spectral data by a constant factor.

    Downsampling consists of dividing the input data into fixed-size groups of
    consecutive bins, then calculated downsampled values as weighted averages
    within each group.  The basic usage is:

    >>> data = np.ones((6,), dtype=[('flux', float), ('ivar', float)])
    >>> out = downsample(data, downsampling=2, weight='ivar')
    >>> np.all(out ==
    ... np.array([(1.0, 2.0), (1.0, 2.0), (1.0, 2.0)],
    ... dtype=[('flux', '<f8'), ('ivar', '<f8')]))
    True

    Any partial group at the end of the input data will be silently ignored
    unless `auto_trim=False`:

    >>> out = downsample(data, downsampling=4, weight='ivar')
    >>> np.all(out ==
    ... np.array([(1.0, 4.0)], dtype=[('flux', '<f8'), ('ivar', '<f8')]))
    True
    >>> out = downsample(data, downsampling=4, weight='ivar', auto_trim=False)
    Traceback (most recent call last):
        ...
    ValueError: Input data does not evenly divide with downsampling = 4.

    A multi-dimensional array of spectra with the same binning can be
    downsampled in a single operation, for example:

    >>> data = np.ones((2,16,3,), dtype=[('flux', float), ('ivar', float)])
    >>> results = downsample(data, 4, axis=1)
    >>> results.shape
    (2, 4, 3)

    If no axis is specified, the last axis of the input array is assumed.

    If the input data is masked, only unmasked entries will be used to calculate
    the weighted averages for each downsampled group and the output will also be
    masked:

    >>> data = ma.ones((6,), dtype=[('flux', float), ('ivar', float)])
    >>> data.mask[3:] = True
    >>> out = downsample(data, 2, weight='ivar')
    >>> type(out) == ma.core.MaskedArray
    True

    If the input fields have different masks, their logical OR will be used for
    all output fields since, otherwise, each output field would require its
    own output weight field.  As a consequence, masking a single input field
    is equivalent to masking all input fields.

    Parameters
    ----------
    data_in : numpy.ndarray or numpy.ma.MaskedArray
        Structured numpy array containing input spectrum data to downsample.
    downsampling : int
        Number of consecutive bins to combine into each downsampled bin.
        Must be at least one and not larger than the input data size.
    weight : string or None.
        The name of a field whose values provide the weights to use for
        downsampling.  When None, a weight value of one will be used.
        The output array will contain a field with this name, unless it is
        None, containing values of the downsampled weights.  All weights must
        be non-negative.
    start_index : int
        Index of the first bin to use for downsampling. Any bins preceeding
        the start bin will not be included in the downsampled results. Negative
        indices are not allowed.
    axis : int
        Index of the axis to perform downsampling in. The default is to use
        the last index of the input data array.
    auto_trim : bool
        When True, any bins at the end of the input data that do not fill a
        complete downsampled bin will be automatically (and silently) trimmed.
        When False, a ValueError will be raised.
    data_out : numpy.ndarray or None
        Structured numpy array where output spectrum data should be written. If
        none is specified, then an appropriately sized array will be allocated
        and returned. Use this method to take control of the memory allocation
        and, for example, re-use the same output array for a sequence of
        downsampling operations.

    Returns
    -------
    numpy.ndarray or numpy.ma.MaskedArray
        Structured numpy array of downsampled result, containing the same
        fields as the input data and the same shape except along the specified
        downsampling axis. If the input data is masked, the output data will
        also be masked, with each output field's mask determined by the
        combination of the optional weight field mask and the corresponding
        input field mask.
    """
    if not isinstance(data_in, np.ndarray):
        raise ValueError('Invalid data_in type: {0}.'.format(type(data_in)))
    if data_out is not None and not isinstance(data_out, np.ndarray):
        raise ValueError('Invalid data_out type: {0}.'.format(type(data_out)))

    shape_in = data_in.shape
    try:
        num_bins = shape_in[axis]
    except IndexError:
        raise ValueError('Invalid axis = {0}.'.format(axis))

    if downsampling < 1 or downsampling > num_bins:
        raise ValueError('Invalid downsampling = {0}.'.format(downsampling))
    if start_index < 0 or start_index >= num_bins:
        raise ValueError('Invalid start_index = {0}.'.format(start_index))

    num_downsampled = (num_bins - start_index) // downsampling
    if num_downsampled <= 0:
        raise ValueError(
            'Incompatible downsampling = {0} and start_index = {1}.'.format(
                downsampling, start_index))
    stop_index = start_index + num_downsampled * downsampling
    assert stop_index <= num_bins
    if stop_index < num_bins and not auto_trim:
        raise ValueError(
            'Input data does not evenly divide with downsampling = {0}.'.
            format(downsampling))

    if weight is not None:
        if not isinstance(weight, basestring):
            raise ValueError('Invalid weight type: {0}.'.format(type(weight)))
        if weight in data_in.dtype.fields:
            # If data_in is a MaskedArray, weights_in will also be masked.
            weights_in = data_in[weight]
            if np.any(weights_in < 0):
                raise ValueError('Some input weights < 0.')
        else:
            raise ValueError('No such weight field: {0}.'.format(weight))
    else:
        if ma.isMA(data_in):
            weights_in = ma.ones(shape_in)
        else:
            weights_in = np.ones(shape_in)

    shape_out = list(shape_in)
    shape_out[axis] = num_downsampled
    shape_out = tuple(shape_out)
    expanded_shape = list(shape_in)
    expanded_shape[axis] = downsampling
    expanded_shape.insert(axis, num_downsampled)
    sum_axis = axis + 1 if axis >= 0 else len(shape_in) + axis + 1

    dtype_out = data_in.dtype
    if data_out is None:
        if ma.isMA(data_in):
            data_out = ma.empty(shape_out, dtype=data_in.dtype)
            data_out.mask = False
        else:
            data_out = np.empty(shape_out, dtype=data_in.dtype)
    else:
        if data_out.shape != shape_out:
            raise ValueError(
                'data_out has wrong shape: {0}. Expected: {1}.'.format(
                    data_out.shape, shape_out))
        if data_out.dtype != dtype_out:
            raise ValueError(
                'data_out has wrong dtype: {0}. Expected: {1}.'.format(
                    data_out.dtype, dtype_out))

    if ma.isMA(data_in):
        # Each field has an independent mask in the input, but we want to
        # use the same output weights for all fields.  Use the logical OR
        # of the individual input field masks to achieve this.
        or_mask = np.zeros(shape_in, dtype=bool)
        for field in data_in.dtype.fields:
            or_mask = or_mask | data_in[field].mask
        weights_in.mask = or_mask

    # Loop over fields in the input data.
    weights_out = np.sum(
        weights_in[start_index:stop_index].reshape(expanded_shape),
        axis=sum_axis)
    for field in data_in.dtype.fields:
        if field == weight:
            continue
        weighted = (weights_in[start_index:stop_index] *
                    data_in[field][start_index:stop_index])
        if ma.isMA(data_in):
            weighted.mask = or_mask
        data_out[field] = np.sum(weighted.reshape(expanded_shape),
                                 axis=sum_axis) / weights_out
    if weight is not None:
        data_out[weight] = weights_out

    return data_out
Example #50
0
def accumulate(data1_in,
               data2_in,
               data_out=None,
               join=None,
               add=None,
               weight=None):
    """Combine the data from two spectra.

    Values x1 and x2 with corresponding weights w1 and w2 are combined as::

        x12 = (w1*x1 + w2*x2)/(w1 + w2)

    If no weight field is present for either input, a weight of one is used.
    If either input array is `masked
    <http://docs.scipy.org/doc/numpy/reference/maskedarray.html>`__, weights
    for masked entries will be set to zero. The output contains values for
    x12 and the accumulated weight:

        w12 = w1 + w2

    For example:

    >>> data1 = np.ones((10,), dtype=[('flux', float), ('ivar', float)])
    >>> data2 = np.ones((10,), dtype=[('flux', float), ('ivar', float)])
    >>> result = accumulate(data1, data2, add='flux', weight='ivar')
    >>> np.all(result[:3] ==
    ... np.array([(1.0, 2.0), (1.0, 2.0), (1.0, 2.0)],
    ... dtype=[('flux', '<f8'), ('ivar', '<f8')]))
    True

    Any fields common to both inputs can also be copied to the output:

    >>> data1 = np.ones((10,), dtype=[('wlen', float), ('flux', float)])
    >>> data2 = np.ones((10,), dtype=[('wlen', float), ('flux', float)])
    >>> result = accumulate(data1, data2, join='wlen', add='flux')
    >>> np.all(result[:3] ==
    ... np.array([(1.0, 1.0), (1.0, 1.0), (1.0, 1.0)],
    ... dtype=[('wlen', '<f8'), ('flux', '<f8')]))
    True

    The actual calculation of x12 uses the expression::

        x12 = x1 + (x2 - x1)*w2/(w1 + w2)

    which has `better numerical properties
    <https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
    #Weighted_incremental_algorithm>`__ when many spectra are
    iteratively accumulated using the following pattern:

    >>> result = None
    >>> data = np.ones((10,100),
    ... dtype=[('wlen', float), ('flux', float), ('ivar', float)])
    >>> for row in data:
    ...     result = accumulate(data1_in=result, data2_in=row, data_out=result,
    ...                         join='wlen', add='flux', weight='ivar')
    >>> np.all(result[:3] ==
    ... np.array([(1.0, 1.0, 10.0), (1.0, 1.0, 10.0), (1.0, 1.0, 10.0)],
    ... dtype=[('wlen', '<f8'), ('flux', '<f8'), ('ivar', '<f8')]))
    True

    With this pattern, the result array is allocated on the first iteration
    and then re-used for all subsequent iterations.

    Parameters
    ----------
    data1_in : numpy.ndarray or numpy.ma.MaskedArray or None
        First structured numpy array of input spectral data.
    data2_in : numpy.ndarray or numpy.ma.MaskedArray
        Second structured numpy array of input spectral data.
    data_out : numpy.ndarray or None
        Structured numpy array where output spectrum data should be written. If
        None is specified, then an appropriately sized array will be allocated
        and returned. Use this method to take control of the memory allocation
        and, for example, re-use the same output array for iterative
        accumulation.
    join:  string or iterable of strings or None.
        A field name or a list of field names that are present in both inputs
        with identical values, and should be included in the output.
    add : string or iterable or None.
        A field name or a list of field names that are present in both inputs
        and whose values, x1 and x2, should be accumulated as x12 in the
        output.
    weight : string or None.
        The name of a field whose values provide the weights w1 and w2 used
        to calculate the accumulated x12 = w1*x1 + w2*x2.  If the named field
        is not present in either input a weight value of one will be used.
        The output array will contain a field with this name, if it is not
        None, containing values for w12.

    Returns
    -------
    numpy.ndarray
        Structured numpy array of accumulated result, containing
        all fields listed in the ``join``, ``add``, and ``weight`` arguments.
        Any values associated with a zero weight sample should be considered
        invalid.
    """
    if data1_in is not None and not isinstance(data1_in, np.ndarray):
        raise ValueError('data1_in is not a numpy array.')
    if not isinstance(data2_in, np.ndarray):
        raise ValueError('data2_in is not a numpy array.')
    if data_out is not None and not isinstance(data_out, np.ndarray):
        raise ValueError('data_out is not a numpy array.')

    if data1_in is not None:
        if data1_in.shape != data2_in.shape:
            raise ValueError(
                'Inputs have different shapes: {0} != {1}.'.format(
                    data1_in.shape, data2_in.shape))
        data1_fields = data1_in.dtype.fields
        if data1_fields is None:
            raise ValueError('Input data1_in is not a structured array.')

    data2_fields = data2_in.dtype.fields
    if data2_fields is None:
        raise ValueError('Input data2_in is not a structured array.')
    shape_out = data2_in.shape
    dtype_out = []

    # Find the intersection of field names in both input datasets.
    if data1_in is not None:
        shared_fields = set(data1_fields.keys()) & set(data2_fields.keys())
        if len(shared_fields) == 0:
            raise ValueError('Inputs have no fields in common.')
    else:
        shared_fields = set(data2_fields.keys())

    def prepare_names(arg, label):
        if arg is None:
            names = ()
        elif isinstance(arg, basestring):
            names = (arg, )
        else:
            try:
                names = [name for name in arg]
            except TypeError:
                raise ValueError('Invalid {0} type: {1}.'.format(
                    label, type(arg)))
        for name in names:
            if name not in shared_fields:
                raise ValueError('Invalid {0} field name: {1}.'.format(
                    label, name))
            if data1_in is not None:
                dtype1 = data1_fields[name][0]
                dtype2 = data2_fields[name][0]
                dtype_out.append((name, np.promote_types(dtype1, dtype2)))
            else:
                dtype_out.append((name, data2_fields[name][0]))
        return names

    join_names = prepare_names(join, 'join')
    add_names = prepare_names(add, 'add')

    if data1_in is not None:
        for name in join_names:
            if not np.array_equal(data1_in[name], data2_in[name]):
                raise ValueError(
                    'Cannot join on unmatched field: {0}.'.format(name))

    if weight is not None:
        if not isinstance(weight, basestring):
            raise ValueError('Invalid weight type: {0}.'.format(type(weight)))
        if data1_in is not None:
            if weight in data1_fields:
                weight1 = data1_in[weight]
            else:
                weight1 = np.ones(shape_out)
        if weight in data2_fields:
            weight2 = data2_in[weight]
        else:
            weight2 = np.ones(shape_out)
        if data1_in is not None:
            dtype_out.append(
                (weight, np.promote_types(weight1.dtype, weight2.dtype)))
        else:
            dtype_out.append((weight, weight2.dtype))
    else:
        if data1_in is not None:
            weight1 = np.ones(shape_out)
        weight2 = np.ones(shape_out)

    # Set weights to zero for any masked elements. Since each field has its
    # own mask, use the logical OR of all named join/add/weight fields.
    if data1_in is not None and ma.isMA(data1_in):
        mask = np.zeros(shape_out, dtype=bool)
        for name in join_names:
            mask = mask | data1_in[name].mask
        for name in add_names:
            mask = mask | data1_in[name].mask
        if weight is not None:
            mask = mask | data1_in[weight].mask
        weight1[mask] = 0
        if np.any(mask) and weight is None:
            raise ValueError('Output weight required for masked input data.')
    if ma.isMA(data2_in):
        mask = np.zeros(shape_out, dtype=bool)
        for name in join_names:
            mask = mask | data2_in[name].mask
        for name in add_names:
            mask = mask | data2_in[name].mask
        if weight is not None:
            mask = mask | data2_in[weight].mask
        weight2[mask] = 0
        if np.any(mask) and weight is None:
            raise ValueError('Output weight required for masked input data.')

    if len(dtype_out) == 0:
        raise ValueError('No result fields specified.')

    if data_out is None:
        data_out = np.zeros(shape_out, dtype_out)
    else:
        if data_out.shape != shape_out:
            raise ValueError(
                'data_out has wrong shape: {0}. Expected: {1}.'.format(
                    data_out.shape, shape_out))
        if data_out.dtype != dtype_out:
            raise ValueError(
                'data_out has wrong dtype: {0}. Expected: {1}.'.format(
                    data_out.dtype, dtype_out))

    # We do not need to copy join fields if data_out uses the same memory
    # as one of our input arrays.
    if data_out.base is None or data_out.base not in (data1_in, data2_in):
        for name in join_names:
            data_out[name][:] = data2_in[name]

    mask2 = weight2 != 0
    if data1_in is None:
        for name in add_names:
            data_out[name][mask2] = data2_in[name][mask2]
        if weight is not None:
            data_out[weight][:] = weight2
    else:
        # Accumulate add fields.
        mask1 = weight1 != 0
        weight_sum = weight1 + weight2
        for name in add_names:
            if data_out is not data1_in:
                data_out[name][mask1] = data1_in[name][mask1]
            data_out[name][mask2] += (
                weight2[mask2] / weight_sum[mask2] *
                (data2_in[name][mask2] - data1_in[name][mask2]))

        if weight is not None:
            data_out[weight][:] = weight_sum

    return data_out
Example #51
0
def accumulate(data1_in, data2_in, data_out=None,
               join=None, add=None, weight=None):
    """Combine the data from two spectra.

    Values x1 and x2 with corresponding weights w1 and w2 are combined as::

        x12 = (w1*x1 + w2*x2)/(w1 + w2)

    If no weight field is present for either input, a weight of one is used.
    If either input array is `masked
    <http://docs.scipy.org/doc/numpy/reference/maskedarray.html>`__, weights
    for masked entries will be set to zero. The output contains values for
    x12 and the accumulated weight:

        w12 = w1 + w2

    For example:

    >>> data1 = np.ones((10,), dtype=[('flux', float), ('ivar', float)])
    >>> data2 = np.ones((10,), dtype=[('flux', float), ('ivar', float)])
    >>> result = accumulate(data1, data2, add='flux', weight='ivar')
    >>> result[:3]
    array([(1.0, 2.0), (1.0, 2.0), (1.0, 2.0)],
          dtype=[('flux', '<f8'), ('ivar', '<f8')])

    Any fields common to both inputs can also be copied to the output:

    >>> data1 = np.ones((10,), dtype=[('wlen', float), ('flux', float)])
    >>> data2 = np.ones((10,), dtype=[('wlen', float), ('flux', float)])
    >>> result = accumulate(data1, data2, join='wlen', add='flux')
    >>> result[:3]
    array([(1.0, 1.0), (1.0, 1.0), (1.0, 1.0)],
          dtype=[('wlen', '<f8'), ('flux', '<f8')])

    The actual calculation of x12 uses the expression::

        x12 = x1 + (x2 - x1)*w2/(w1 + w2)

    which has `better numerical properties
    <https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
    #Weighted_incremental_algorithm>`__ when many spectra are
    iteratively accumulated using the following pattern:

    >>> result = None
    >>> data = np.ones((10,100),
    ... dtype=[('wlen', float), ('flux', float), ('ivar', float)])
    >>> for row in data:
    ...     result = accumulate(data1_in=result, data2_in=row, data_out=result,
    ...                         join='wlen', add='flux', weight='ivar')
    >>> result[:3]
    array([(1.0, 1.0, 10.0), (1.0, 1.0, 10.0), (1.0, 1.0, 10.0)],
          dtype=[('wlen', '<f8'), ('flux', '<f8'), ('ivar', '<f8')])

    With this pattern, the result array is allocated on the first iteration
    and then re-used for all subsequent iterations.

    Parameters
    ----------
    data1_in : numpy.ndarray or numpy.ma.MaskedArray or None
        First structured numpy array of input spectral data.
    data2_in : numpy.ndarray or numpy.ma.MaskedArray
        Second structured numpy array of input spectral data.
    data_out : numpy.ndarray or None
        Structured numpy array where output spectrum data should be written. If
        None is specified, then an appropriately sized array will be allocated
        and returned. Use this method to take control of the memory allocation
        and, for example, re-use the same output array for iterative
        accumulation.
    join:  string or iterable of strings or None.
        A field name or a list of field names that are present in both inputs
        with identical values, and should be included in the output.
    add : string or iterable or None.
        A field name or a list of field names that are present in both inputs
        and whose values, x1 and x2, should be accumulated as x12 in the
        output.
    weight : string or None.
        The name of a field whose values provide the weights w1 and w2 used
        to calculate the accumulated x12 = w1*x1 + w2*x2.  If the named field
        is not present in either input a weight value of one will be used.
        The output array will contain a field with this name, if it is not
        None, containing values for w12.

    Returns
    -------
    numpy.ndarray
        Structured numpy array of accumulated result, containing
        all fields listed in the ``join``, ``add``, and ``weight`` arguments.
        Any values associated with a zero weight sample should be considered
        invalid.
    """
    if data1_in is not None and not isinstance(data1_in, np.ndarray):
        raise ValueError('data1_in is not a numpy array.')
    if not isinstance(data2_in, np.ndarray):
        raise ValueError('data2_in is not a numpy array.')
    if data_out is not None and not isinstance(data_out, np.ndarray):
        raise ValueError('data_out is not a numpy array.')

    if data1_in is not None:
        if data1_in.shape != data2_in.shape:
            raise ValueError(
                'Inputs have different shapes: {0} != {1}.'
                .format(data1_in.shape, data2_in.shape))
        data1_fields = data1_in.dtype.fields
        if data1_fields is None:
            raise ValueError('Input data1_in is not a structured array.')

    data2_fields = data2_in.dtype.fields
    if data2_fields is None:
        raise ValueError('Input data2_in is not a structured array.')
    shape_out = data2_in.shape
    dtype_out = []

    # Find the intersection of field names in both input datasets.
    if data1_in is not None:
        shared_fields = set(data1_fields.keys()) & set(data2_fields.keys())
        if len(shared_fields) == 0:
            raise ValueError('Inputs have no fields in common.')
    else:
        shared_fields = set(data2_fields.keys())

    def prepare_names(arg, label):
        if arg is None:
            names = ()
        elif isinstance(arg, basestring):
            names = (arg,)
        else:
            try:
                names = [name for name in arg]
            except TypeError:
                raise ValueError(
                    'Invalid {0} type: {1}.'.format(label, type(arg)))
        for name in names:
            if name not in shared_fields:
                raise ValueError(
                    'Invalid {0} field name: {1}.'.format(label, name))
            if data1_in is not None:
                dtype1 = data1_fields[name][0]
                dtype2 = data2_fields[name][0]
                dtype_out.append((name, np.promote_types(dtype1, dtype2)))
            else:
                dtype_out.append((name, data2_fields[name][0]))
        return names

    join_names = prepare_names(join, 'join')
    add_names = prepare_names(add, 'add')

    if data1_in is not None:
        for name in join_names:
            if not np.array_equal(data1_in[name], data2_in[name]):
                raise ValueError(
                    'Cannot join on unmatched field: {0}.'.format(name))

    if weight is not None:
        if not isinstance(weight, basestring):
            raise ValueError('Invalid weight type: {0}.'.format(type(weight)))
        if data1_in is not None:
            if weight in data1_fields:
                weight1 = data1_in[weight]
            else:
                weight1 = np.ones(shape_out)
        if weight in data2_fields:
            weight2 = data2_in[weight]
        else:
            weight2 = np.ones(shape_out)
        if data1_in is not None:
            dtype_out.append(
                (weight, np.promote_types(weight1.dtype, weight2.dtype)))
        else:
            dtype_out.append((weight, weight2.dtype))
    else:
        if data1_in is not None:
            weight1 = np.ones(shape_out)
        weight2 = np.ones(shape_out)

    # Set weights to zero for any masked elements. Since each field has its
    # own mask, use the logical OR of all named join/add/weight fields.
    if data1_in is not None and ma.isMA(data1_in):
        mask = np.zeros(shape_out, dtype=bool)
        for name in join_names:
            mask = mask | data1_in[name].mask
        for name in add_names:
            mask = mask | data1_in[name].mask
        if weight is not None:
            mask = mask | data1_in[weight].mask
        weight1[mask] = 0
        if np.any(mask) and weight is None:
            raise ValueError('Output weight required for masked input data.')
    if ma.isMA(data2_in):
        mask = np.zeros(shape_out, dtype=bool)
        for name in join_names:
            mask = mask | data2_in[name].mask
        for name in add_names:
            mask = mask | data2_in[name].mask
        if weight is not None:
            mask = mask | data2_in[weight].mask
        weight2[mask] = 0
        if np.any(mask) and weight is None:
            raise ValueError('Output weight required for masked input data.')

    if len(dtype_out) == 0:
        raise ValueError('No result fields specified.')

    if data_out is None:
        data_out = np.zeros(shape_out, dtype_out)
    else:
        if data_out.shape != shape_out:
            raise ValueError(
                'data_out has wrong shape: {0}. Expected: {1}.'
                .format(data_out.shape, shape_out))
        if data_out.dtype != dtype_out:
            raise ValueError(
                'data_out has wrong dtype: {0}. Expected: {1}.'
                .format(data_out.dtype, dtype_out))

    # We do not need to copy join fields if data_out uses the same memory
    # as one of our input arrays.
    if data_out.base is None or data_out.base not in (data1_in, data2_in):
        for name in join_names:
            data_out[name][:] = data2_in[name]

    mask2 = weight2 != 0
    if data1_in is None:
        for name in add_names:
            data_out[name][mask2] = data2_in[name][mask2]
        if weight is not None:
            data_out[weight][:] = weight2
    else:
        # Accumulate add fields.
        mask1 = weight1 != 0
        weight_sum = weight1 + weight2
        for name in add_names:
            if data_out is not data1_in:
                data_out[name][mask1] = data1_in[name][mask1]
            data_out[name][mask2] += (
                weight2[mask2] / weight_sum[mask2] *
                (data2_in[name][mask2] - data1_in[name][mask2]))

        if weight is not None:
            data_out[weight][:] = weight_sum

    return data_out
Example #52
0
def resample(data_in, x_in, x_out, y, data_out=None, kind='linear'):
    """Resample the data of one spectrum using interpolation.

    Dependent variables y1, y2, ... in the input data are resampled in the
    independent variable x using interpolation models y1(x), y2(x), ...
    evaluated on a new grid of x values. The independent variable will
    typically be a wavelength or frequency and the independent variables can
    be fluxes, inverse variances, etc.

    Interpolation is intended for cases where the input and output grids have
    comparable densities. When neighboring samples are correlated, the
    resampling process should be essentially lossless.  When the output
    grid is sparser than the input grid, it may be more appropriate to
    "downsample", i.e., average dependent variables over consecutive ranges
    of input samples.

    The basic usage of this function is:

    >>> data = np.ones((5,),
    ... [('wlen', float), ('flux', float), ('ivar', float)])
    >>> data['wlen'] = np.arange(4000, 5000, 200)
    >>> wlen_out = np.arange(4100, 4700, 200)
    >>> out = resample(data, 'wlen', wlen_out, ('flux', 'ivar'))
    >>> np.all(out ==
    ... np.array([(4100, 1.0, 1.0), (4300, 1.0, 1.0), (4500, 1.0, 1.0)],
    ... dtype=[('wlen', '<i8'), ('flux', '<f8'), ('ivar', '<f8')]))
    True

    The input grid can also be external to the structured array of spectral
    data, for example:

    >>> data = np.ones((5,), [('flux', float), ('ivar', float)])
    >>> wlen_in = np.arange(4000, 5000, 200)
    >>> wlen_out = np.arange(4100, 4900, 200)
    >>> out = resample(data, wlen_in, wlen_out, ('flux', 'ivar'))
    >>> np.all(out ==
    ... np.array([(1.0, 1.0), (1.0, 1.0), (1.0, 1.0), (1.0, 1.0)],
    ... dtype=[('flux', '<f8'), ('ivar', '<f8')]))
    True

    If the output grid extends beyond the input grid, a `masked array
    <http://docs.scipy.org/doc/numpy/reference/maskedarray.html>`__ will be
    returned with any values requiring extrapolation masked:

    >>> wlen_out = np.arange(3500, 5500, 500)
    >>> out = resample(data, wlen_in, wlen_out, 'flux')
    >>> np.all(out.mask ==
    ... np.array([(True,), (False,), (False,), (True,)],
    ... dtype=[('flux', 'bool')]))
    True

    If the input data is masked, any output interpolated values that depend on
    an input masked value will be masked in the output:

    >>> data = ma.ones((5,), [('flux', float), ('ivar', float)])
    >>> data['flux'][2] = ma.masked
    >>> wlen_out = np.arange(4100, 4900, 200)
    >>> out = resample(data, wlen_in, wlen_out, 'flux')
    >>> np.all(out.mask ==
    ... np.array([(False,), (True,), (True,), (False,)],
    ... dtype=[('flux', 'bool')]))
    True

    Interpolation is performed using :class:`scipy.interpolate.inter1pd`.

    Parameters
    ----------
    data_in : numpy.ndarray or numpy.ma.MaskedArray
        Structured numpy array of input spectral data to resample. The input
        array must be one-dimensional.
    x_in : string or numpy.ndarray
        A field name in data_in containing the independent variable to use
        for interpolation, or else an array of values with the same shape
        as the input data.
    x_out : numpy.ndarray
        An array of values for the independent variable where interpolation
        models should be evaluated to calculate the output values.
    y : string or iterable of strings.
        A field name or a list of field names present in the input data that
        should be resampled by interpolation and included in the output.
    data_out : numpy.ndarray or None
        Structured numpy array where the output result should be written. If
        None is specified, then an appropriately sized array will be allocated
        and returned. Use this method to take control of the memory allocation
        and, for example, re-use the same array when resampling many spectra.
    kind : string or integer
        Specify the kind of interpolation models to build using any of the
        forms allowed by :class:`scipy.interpolate.inter1pd`.  If any input
        dependent values are masked, only the ``nearest` and ``linear``
        values are allowed.

    Returns
    -------
    numpy.ndarray or numpy.ma.MaskedArray
        Structured numpy array of the resampled result containing all ``y``
        fields and (if ``x_in`` is specified as a string) the output ``x``
        field.  The output will be a :class:`numpy.ma.MaskedArray` if ``x_out``
        extends beyond ``x_in`` or if ``data_in`` is masked.
    """
    if not isinstance(data_in, np.ndarray):
        raise ValueError('Invalid data_in type: {0}.'.format(type(data_in)))
    if data_in.dtype.fields is None:
        raise ValueError('Input data_in is not a structured array.')
    if len(data_in.shape) > 1:
        raise ValueError('Input data_in is multidimensional.')

    if isinstance(x_in, str):
        if x_in not in data_in.dtype.names:
            raise ValueError('No such x_in field: {0}.'.format(x_in))
        x_out_name = x_in
        x_in = data_in[x_in]
    else:
        if not isinstance(x_in, np.ndarray):
            raise ValueError('Invalid x_in type: {0}.'.format(type(x_in)))
        if x_in.shape != data_in.shape:
            raise ValueError('Incompatible shapes for x_in and data_in.')
        x_out_name = None

    if not isinstance(x_out, np.ndarray):
        raise ValueError('Invalid x_out type: {0}.'.format(type(data_out)))

    if ma.isMA(x_in) and np.any(x_in.mask):
        raise ValueError('Cannot resample masked x_in.')

    x_type = np.promote_types(x_in.dtype, x_out.dtype)

    dtype_out = []
    if x_out_name is not None:
        dtype_out.append((x_out_name, x_out.dtype))

    if isinstance(y, str):
        # Use a list instead of a tuple here so y_names can be used
        # to index data_in below.
        y_names = [y,]
    else:
        try:
            y_names = [name for name in y]
        except TypeError:
            raise ValueError('Invalid y type: {0}.'.format(type(y)))
    for not_first, y in enumerate(y_names):
        if y not in data_in.dtype.names:
            raise ValueError('No such y field: {0}.'.format(y))
        if not_first:
            if data_in[y].dtype != y_type:
                raise ValueError('All y fields must have the same type.')
        else:
            y_type = data_in[y].dtype
        dtype_out.append((y, y_type))

    y_shape = (len(y_names),)
    if ma.isMA(data_in):
        # Copy the structured 1D array into a 2D unstructured array
        # and set masked values to NaN.
        y_in = np.zeros(data_in.shape + y_shape, y_type)
        for i,y in enumerate(y_names):
            y_in[:,i] = data_in[y].filled(np.nan)
    else:
        if pkgr.parse_version(np.__version__)  >= pkgr.parse_version('1.16'):
            # The slicing does not work in numpy 1.16 and above
            # we use structured_to_unstructured to get the slice that we care about
            y_in = rfn.structured_to_unstructured(
                       data_in[y_names]).reshape(data_in.shape + y_shape)
        else:
            y_in = data_in[y_names]
            # View the structured 1D array as a 2D unstructured array (without
            # copying any memory).
            y_in = y_in.view(y_type).reshape(data_in.shape + y_shape)
       
    # interp1d will only propagate NaNs correctly for certain values of `kind`.
    # With numpy = 1.6 or 1.7, only 'nearest' and 'linear' work.
    # With numpy = 1.8 or 1.9, 'slinear' and kind = 0 or 1 also work.
    if np.any(np.isnan(y_in)):
        if kind not in ('nearest', 'linear'):
            raise ValueError(
                'Interpolation kind not supported for masked data: {0}.'
                .format(kind))
    try:
        interpolator = scipy.interpolate.interp1d(
            x_in, y_in, kind=kind, axis=0, copy=False,
            bounds_error=False, fill_value=np.nan)
    except NotImplementedError:
        raise ValueError('Interpolation kind not supported: {0}.'.format(kind))

    shape_out = (len(x_out),)
    if data_out is None:
        data_out = np.empty(shape_out, dtype_out)
    else:
        if data_out.shape != shape_out:
            raise ValueError(
                'data_out has wrong shape: {0}. Expected: {1}.'
                .format(data_out.shape, shape_out))
        if data_out.dtype != dtype_out:
            raise ValueError(
                'data_out has wrong dtype: {0}. Expected: {1}.'
                .format(data_out.dtype, dtype_out))

    if x_out_name is not None:
        data_out[x_out_name][:] = x_out
    y_out = interpolator(x_out)
    for i,y in enumerate(y_names):
        data_out[y][:] = y_out[:,i]

    if ma.isMA(data_in) or np.any(np.isnan(y_out)):
        data_out = ma.MaskedArray(data_out)
        data_out.mask = False
        for y in y_names:
            data_out[y].mask = np.isnan(data_out[y].data)

    return data_out
Example #53
0
def downsample(data_in, downsampling, weight=None, axis=-1, start_index=0,
               auto_trim=True, data_out=None):
    """Downsample spectral data by a constant factor.

    Downsampling consists of dividing the input data into fixed-size groups of
    consecutive bins, then calculated downsampled values as weighted averages
    within each group.  The basic usage is:

    >>> data = np.ones((6,), dtype=[('flux', float), ('ivar', float)])
    >>> out = downsample(data, downsampling=2, weight='ivar')
    >>> np.all(out ==
    ... np.array([(1.0, 2.0), (1.0, 2.0), (1.0, 2.0)],
    ... dtype=[('flux', '<f8'), ('ivar', '<f8')]))
    True

    Any partial group at the end of the input data will be silently ignored
    unless `auto_trim=False`:

    >>> out = downsample(data, downsampling=4, weight='ivar')
    >>> np.all(out ==
    ... np.array([(1.0, 4.0)], dtype=[('flux', '<f8'), ('ivar', '<f8')]))
    True
    >>> out = downsample(data, downsampling=4, weight='ivar', auto_trim=False)
    Traceback (most recent call last):
        ...
    ValueError: Input data does not evenly divide with downsampling = 4.

    A multi-dimensional array of spectra with the same binning can be
    downsampled in a single operation, for example:

    >>> data = np.ones((2,16,3,), dtype=[('flux', float), ('ivar', float)])
    >>> results = downsample(data, 4, axis=1)
    >>> results.shape
    (2, 4, 3)

    If no axis is specified, the last axis of the input array is assumed.

    If the input data is masked, only unmasked entries will be used to calculate
    the weighted averages for each downsampled group and the output will also be
    masked:

    >>> data = ma.ones((6,), dtype=[('flux', float), ('ivar', float)])
    >>> data.mask[3:] = True
    >>> out = downsample(data, 2, weight='ivar')
    >>> type(out) == ma.core.MaskedArray
    True

    If the input fields have different masks, their logical OR will be used for
    all output fields since, otherwise, each output field would require its
    own output weight field.  As a consequence, masking a single input field
    is equivalent to masking all input fields.

    Parameters
    ----------
    data_in : numpy.ndarray or numpy.ma.MaskedArray
        Structured numpy array containing input spectrum data to downsample.
    downsampling : int
        Number of consecutive bins to combine into each downsampled bin.
        Must be at least one and not larger than the input data size.
    weight : string or None.
        The name of a field whose values provide the weights to use for
        downsampling.  When None, a weight value of one will be used.
        The output array will contain a field with this name, unless it is
        None, containing values of the downsampled weights.  All weights must
        be non-negative.
    start_index : int
        Index of the first bin to use for downsampling. Any bins preceeding
        the start bin will not be included in the downsampled results. Negative
        indices are not allowed.
    axis : int
        Index of the axis to perform downsampling in. The default is to use
        the last index of the input data array.
    auto_trim : bool
        When True, any bins at the end of the input data that do not fill a
        complete downsampled bin will be automatically (and silently) trimmed.
        When False, a ValueError will be raised.
    data_out : numpy.ndarray or None
        Structured numpy array where output spectrum data should be written. If
        none is specified, then an appropriately sized array will be allocated
        and returned. Use this method to take control of the memory allocation
        and, for example, re-use the same output array for a sequence of
        downsampling operations.

    Returns
    -------
    numpy.ndarray or numpy.ma.MaskedArray
        Structured numpy array of downsampled result, containing the same
        fields as the input data and the same shape except along the specified
        downsampling axis. If the input data is masked, the output data will
        also be masked, with each output field's mask determined by the
        combination of the optional weight field mask and the corresponding
        input field mask.
    """
    if not isinstance(data_in, np.ndarray):
        raise ValueError('Invalid data_in type: {0}.'.format(type(data_in)))
    if data_out is not None and not isinstance(data_out, np.ndarray):
        raise ValueError('Invalid data_out type: {0}.'.format(type(data_out)))

    shape_in = data_in.shape
    try:
        num_bins = shape_in[axis]
    except IndexError:
        raise ValueError('Invalid axis = {0}.'.format(axis))

    if downsampling < 1 or downsampling > num_bins:
        raise ValueError('Invalid downsampling = {0}.'.format(downsampling))
    if start_index < 0 or start_index >= num_bins:
        raise ValueError('Invalid start_index = {0}.'.format(start_index))

    num_downsampled = (num_bins - start_index) // downsampling
    if num_downsampled <= 0:
        raise ValueError(
            'Incompatible downsampling = {0} and start_index = {1}.'
            .format(downsampling, start_index))
    stop_index = start_index + num_downsampled * downsampling
    assert stop_index <= num_bins
    if stop_index < num_bins and not auto_trim:
        raise ValueError(
            'Input data does not evenly divide with downsampling = {0}.'
            .format(downsampling))

    if weight is not None:
        if not isinstance(weight, basestring):
            raise ValueError('Invalid weight type: {0}.'.format(type(weight)))
        if weight in data_in.dtype.fields:
            # If data_in is a MaskedArray, weights_in will also be masked.
            weights_in = data_in[weight]
            if np.any(weights_in < 0):
                raise ValueError('Some input weights < 0.')
        else:
            raise ValueError('No such weight field: {0}.'.format(weight))
    else:
        if ma.isMA(data_in):
            weights_in = ma.ones(shape_in)
        else:
            weights_in = np.ones(shape_in)

    shape_out = list(shape_in)
    shape_out[axis] = num_downsampled
    shape_out = tuple(shape_out)
    expanded_shape = list(shape_in)
    expanded_shape[axis] = downsampling
    expanded_shape.insert(axis, num_downsampled)
    sum_axis = axis + 1 if axis >= 0 else len(shape_in) + axis + 1

    dtype_out = data_in.dtype
    if data_out is None:
        if ma.isMA(data_in):
            data_out = ma.empty(shape_out, dtype=data_in.dtype)
            data_out.mask = False
        else:
            data_out = np.empty(shape_out, dtype=data_in.dtype)
    else:
        if data_out.shape != shape_out:
            raise ValueError(
                'data_out has wrong shape: {0}. Expected: {1}.'
                .format(data_out.shape, shape_out))
        if data_out.dtype != dtype_out:
            raise ValueError(
                'data_out has wrong dtype: {0}. Expected: {1}.'
                .format(data_out.dtype, dtype_out))

    if ma.isMA(data_in):
        # Each field has an independent mask in the input, but we want to
        # use the same output weights for all fields.  Use the logical OR
        # of the individual input field masks to achieve this.
        or_mask = np.zeros(shape_in, dtype=bool)
        for field in data_in.dtype.fields:
            or_mask = or_mask | data_in[field].mask
        weights_in.mask = or_mask

    # Loop over fields in the input data.
    weights_out = np.sum(
        weights_in[start_index:stop_index].reshape(expanded_shape),
        axis=sum_axis)
    for field in data_in.dtype.fields:
        if field == weight:
            continue
        weighted = (
            weights_in[start_index:stop_index] *
            data_in[field][start_index:stop_index])
        if ma.isMA(data_in):
            weighted.mask = or_mask
        data_out[field] = np.sum(
            weighted.reshape(expanded_shape), axis=sum_axis) / weights_out
    if weight is not None:
        data_out[weight] = weights_out

    return data_out