def select_zone_by_mask(var,mask): '''This method takes in a variable and a 2d zone mask, then average all the points inside the zone. Parameters ---------- var: opened netCDF variable variable opened from .nc file mask: np.array 2d array of true/false points (true for points inside given climate zone) Returns ------- var: np.array zone averaged variable ''' ndim = len(var.shape) #number of dimensions if ndim == 3: mask = np.repeat(mask[np.newaxis,:,:],var.shape[0],axis=0) var = ma.masked_where(mask,var) var = ma.average(var,axis=2) var = ma.average(var,axis=1) elif ndim == 4: mask = np.repeat(mask[np.newaxis,:,:],var.shape[1],axis=0) mask = np.repeat(mask[np.newaxis,:,:],var.shape[0],axis=0) var = ma.masked_where(mask,var) var = ma.average(var,axis=3) var = ma.average(var,axis=2) return var
def mean_wind(prof, pbot=850, ptop=250, dp=-1, stu=0, stv=0): ''' Calculates a pressure-weighted mean wind through a layer. The default layer is 850 to 200 hPa. Parameters ---------- prof: profile object Profile object pbot : number (optional; default 850 hPa) Pressure of the bottom level (hPa) ptop : number (optional; default 250 hPa) Pressure of the top level (hPa) dp : negative integer (optional; default -1) The pressure increment for the interpolated sounding stu : number (optional; default 0) U-component of storm-motion vector stv : number (optional; default 0) V-component of storm-motion vector Returns ------- mnu : number U-component mnv : number V-component ''' if dp > 0: dp = -dp ps = np.arange(pbot, ptop+dp, dp) u, v = interp.components(prof, ps) # u -= stu; v -= stv return ma.average(u, weights=ps)-stu, ma.average(v, weights=ps)-stv
def zonal_avg(data,Log=False): """ Compute the zonal average of field on POP gx3v5 grid. Shape of input data is expected to be either [nfoo,nlat,nlon] or [nlat,nlon]. Log=True computes the geometric average. Output: arrays zavg and lat """ print 'computing zonal average' # get lat and lon for new regular grid # fpin = Nio.open_file('/home/ivan/Python/data/lat_t.nc','r') fpin = Nio.open_file('/home/emunoz/Python/mapping/model_grid/lat_t.nc','r') lat_t = fpin.variables['lat_t'][:] lat_t_edges = fpin.variables['lat_t_edges'][:] fpin.close() # fpin = Nio.open_file('/home/ivan/Python/data/gx3v5.nc','r') fpin = Nio.open_file('/home/emunoz/Python/mapping/model_grid/gx3v5.nc','r') lon_t = N.sort(fpin.variables['TLONG'][0,:]) ulon = N.sort(fpin.variables['ULONG'][0,:]) lon_t_edges = N.concatenate((ulon,ulon[0,N.newaxis]+360.),0) # get gx3v5 lat and lon tlon = fpin.variables['TLONG'][:] tlat = fpin.variables['TLAT'][:] fpin.close() # compute area of cells in new regular grid area = grid_area(lon_t_edges,lat_t_edges) nlat = lat_t.shape[0] nlon = lon_t.shape[0] if data.ndim == 3: new_data = MA.zeros((data.shape[0],nlat,nlon),dtype=float) elif data.ndim == 2: new_data = MA.zeros((nlat,nlon),dtype=float) else: print 'Check field dimensions' sys.exit() # geometric mean? if Log: work = MA.log(data) else: work = data # remap data to new regular grid for i in range(nlat): #print 'lat = %.2f'%(lat_t[i]) for j in range(nlon): new_data[:,i,j] = extract_loc(lon_t[j],lat_t[i],tlon,tlat,work) # compute zonal average if Log: za_data = (MA.exp(MA.average(new_data,axis=-1, weights=N.resize(area,new_data.shape)))) else: za_data = (MA.average(new_data,axis=-1, weights=N.resize(area,new_data.shape))) return za_data, lat_t
def calc_area_weighted_spatial_average(data, lon, lat, masking = 'mask_off', area_weight=True): '''Calculate area weighted average of the values in data :param data: two-dimensional masked array :type dataset: :class:`numpy.ma.core.MaskedArray` :returns: an area weighted mean value ''' if lat.ndim == 1: lons, lats = np.meshgrid(lon, lat) else: lats = lat weights = np.cos(lats * np.pi / 180.) if masking == 'mask_ocean': masked_data = maskoceans(lons,lats,data) # masking oceans data = masked_data if masking == 'mask_land': masked_data = maskoceans(lons,lats,data) # masking oceans masked_data.mask = ~masked_data.mask # 'inverting' the mask to instead mask land data = masked_data if area_weight: spatial_average = ma.average( data[:], weights=weights) else: spatial_average = ma.average(data) return spatial_average
def calc_area_weighted_spatial_average(dataset, area_weight=False): '''Calculate area weighted average of the values in OCW dataset :param dataset: Dataset object :type dataset: :class:`dataset.Dataset` :returns: time series for the dataset of shape (nT) ''' if dataset.lats.ndim == 1: lons, lats = np.meshgrid(dataset.lons, dataset.lats) else: lats = dataset.lats weights = np.cos(lats * np.pi / 180.) nt, ny, nx = dataset.values.shape spatial_average = ma.zeros(nt) for it in np.arange(nt): if area_weight: spatial_average[it] = ma.average(dataset.values[it, :], weights=weights) else: spatial_average[it] = ma.average(dataset.values[it, :]) return spatial_average
def calc_area_weighted_standard_deviation(data, lon, lat, masking='mask_off', area_weight=True): if lat.ndim == 1: lons, lats = np.meshgrid(lon, lat) else: lats = lat weights = np.cos(lats * np.pi / 180.) if masking == 'mask_ocean': masked_data = maskoceans(lons,lats,data) # masking oceans data = masked_data if masking == 'mask_land': masked_data = maskoceans(lons,lats,data) # masking oceans masked_data.mask = ~masked_data.mask # 'inverting' the mask to instead mask land data = masked_data squared_data = data[:]*data[:] if area_weight: spatial_average = ma.average(data[:], weights=weights) squared_data_spatial_average = ma.average(squared_data, weights=weights) else: spatial_average = ma.average(data) squared_data_spatial_average = ma.average(squared_data) standard_deviation = np.sqrt(squared_data_spatial_average-(spatial_average * spatial_average)) return standard_deviation
def testAttrs(self): # Same export as testAnaNetwork, but check that the # attributes are synchronised query = dballe.Record() query["var"] = "B10004" query["datetime"] = datetime.datetime(2007, 1, 1, 0, 0, 0) vars = read(self.db.query_data(query), (AnaIndex(), NetworkIndex()), attributes=True) self.assertEqual(len(vars), 1) self.assertCountEqual(vars.keys(), ["B10004"]) data = vars["B10004"] self.assertEqual(len(data.attrs), 2) self.assertCountEqual(sorted(data.attrs.keys()), ['B33007', 'B33040']) for net, a in ('synop', 'B33007'), ('temp', 'B33040'): self.assertEqual(data.dims, data.attrs[a].dims) self.assertEqual(data.vals.size, data.attrs[a].vals.size) self.assertEqual(data.vals.shape, data.attrs[a].vals.shape) # Find what is the network dimension where we have the attributes netidx = -1 for idx, n in enumerate(data.dims[1]): if n == net: netidx = idx break self.assertNotEqual(netidx, -1) # No attrs in the other network self.assertEqual([x for x in data.attrs[a].vals.mask[:,1-netidx].flat], [True]*len(data.attrs[a].vals.mask[:,1-netidx].flat)) # Same attrs as values in this network self.assertEqual([x for x in data.vals.mask[:,netidx].flat], [x for x in data.attrs[a].vals.mask[:,netidx].flat]) self.assertEqual(round(ma.average(data.attrs['B33007'].vals)), 32) self.assertEqual(round(ma.average(data.attrs['B33040'].vals)), 54)
def testAttrs(self): with self.db.transaction() as tr: # Same export as testAnaNetwork, but check that the # attributes are synchronised query = {} query["var"] = "B10004" query["datetime"] = datetime.datetime(2007, 1, 1, 0, 0, 0) vars = read(tr.query_data(query), (AnaIndex(), NetworkIndex()), attributes=True) self.assertEqual(len(vars), 1) self.assertCountEqual(vars.keys(), ["B10004"]) data = vars["B10004"] self.assertEqual(len(data.attrs), 2) self.assertCountEqual(sorted(data.attrs.keys()), ['B33007', 'B33040']) for net, a in ('synop', 'B33007'), ('temp', 'B33040'): self.assertEqual(data.dims, data.attrs[a].dims) self.assertEqual(data.vals.size, data.attrs[a].vals.size) self.assertEqual(data.vals.shape, data.attrs[a].vals.shape) # Find what is the network dimension where we have the attributes netidx = -1 for idx, n in enumerate(data.dims[1]): if n == net: netidx = idx break self.assertNotEqual(netidx, -1) # No attrs in the other network self.assertEqual([x for x in data.attrs[a].vals.mask[:, 1-netidx].flat], [True]*len(data.attrs[a].vals.mask[:, 1-netidx].flat)) # Same attrs as values in this network self.assertEqual([x for x in data.vals.mask[:, netidx].flat], [x for x in data.attrs[a].vals.mask[:, netidx].flat]) self.assertEqual(round(ma.average(data.attrs['B33007'].vals)), 32) self.assertEqual(round(ma.average(data.attrs['B33040'].vals)), 54)
def mean_wind(prof, pbot=850, ptop=250, dp=-1, stu=0, stv=0): ''' Calculates a pressure-weighted mean wind through a layer. The default layer is 850 to 200 hPa. Parameters ---------- prof: profile object Profile object pbot : number (optional; default 850 hPa) Pressure of the bottom level (hPa) ptop : number (optional; default 250 hPa) Pressure of the top level (hPa) dp : negative integer (optional; default -1) The pressure increment for the interpolated sounding stu : number (optional; default 0) U-component of storm-motion vector stv : number (optional; default 0) V-component of storm-motion vector Returns ------- mnu : number U-component mnv : number V-component ''' if dp > 0: dp = -dp ps = np.arange(pbot, ptop + dp, dp) u, v = interp.components(prof, ps) # u -= stu; v -= stv return ma.average(u, weights=ps) - stu, ma.average(v, weights=ps) - stv
def calc_area_weighted_spatial_average(dataset, area_weight=False): '''Calculate area weighted average of the values in OCW dataset :param dataset: Dataset object :type dataset: :class:`dataset.Dataset` :returns: time series for the dataset of shape (nT) ''' if dataset.lats.ndim ==1: lons, lats = np.meshgrid(dataset.lons, dataset.lats) else: lons = dataset.lons lats = dataset.lats weights = np.cos(lats*np.pi/180.) nt, ny, nx = dataset.values.shape spatial_average = ma.zeros(nt) for it in np.arange(nt): if area_weight: spatial_average[it] = ma.average(dataset.values[it,:], weights = weights) else: spatial_average[it] = ma.average(dataset.values[it,:]) return spatial_average
def imprimir_resultados(nome_estimador, erros_amostrais_estimadores, estimadores, teta): erro_quadratico_medio = average(erros_amostrais_estimadores)**2 vies = average(estimadores) - teta variancia = var(estimadores) print( f"Estimador: {nome_estimador} \tEQM: {erro_quadratico_medio:.5f} \tViés: {vies:.5f} \tVariância: {variancia:.5f}" )
def servTime (acqSeqL, relSeqL, dim=0): servTimeList = map (partial(avgWaitTime, dim=dim), acqSeqL, relSeqL) servTimes, servTimesSq, counts = zip (*servTimeList) servTimesMtx = listOfArrToMtx (servTimes) servTimesSqMtx = listOfArrToMtx (servTimesSq) countMtx = listOfArrToMtx (counts) # norm of columns norms = normalizeRowWise(countMtx.T).T ma_servTimesMtx = ma.array(servTimesMtx, mask = servTimesMtx == 0) ma_servTimesSqMtx = ma.array(servTimesSqMtx, mask = servTimesSqMtx == 0) return ma.average (ma_servTimesMtx, axis=0, weights=norms), ma.average(ma_servTimesSqMtx, axis=0, weights=norms)
def avgprices(self, stockweighted=False): """Return a masked array of the average price by element""" p = ma.array(self.prices, mask=self.prices <= 0) if stockweighted: s = ma.array(self.stock, mask=self.stock <= 0) avgprices = ma.average(p, weights=s, axis=1) else: #avgprices = p.sum(axis=1)/(p > 0).sum(axis=1) #denominator sums the non-zero values avgprices = ma.average(p, axis=1) return avgprices
def testAnaTrangeNetwork(self): with self.db.transaction() as tr: # 3 dimensions: ana, timerange, network # 2 variables query = dict(datetime=datetime.datetime(2007, 1, 1, 0, 0, 0)) vars = read(tr.query_data(query), (AnaIndex(), TimeRangeIndex(shared=False), NetworkIndex())) self.assertEqual(len(vars), 2) self.assertEqual(sorted(vars.keys()), ["B10004", "B13011"]) data = vars["B10004"] self.assertEqual(data.name, "B10004") self.assertEqual(len(data.attrs), 0) self.assertEqual(len(data.dims), 3) self.assertEqual(len(data.dims[0]), 12) self.assertEqual(len(data.dims[1]), 1) self.assertEqual(len(data.dims[2]), 2) self.assertEqual(data.vals.size, 24) self.assertEqual(data.vals.shape, (12, 1, 2)) self.assertEqual(sum(data.vals.mask.flat), 13) self.assertEqual(round(ma.average(data.vals)), 83185) self.assertEqual(data.dims[0][0], (1, 10., 15., None)) self.assertEqual(data.dims[0][1], (2, 10., 25., None)) self.assertEqual(data.dims[0][2], (3, 20., 15., None)) self.assertEqual(data.dims[0][3], (4, 20., 25., None)) self.assertEqual(data.dims[0][4], (5, 30., 15., None)) self.assertEqual(data.dims[0][5], (6, 30., 25., None)) self.assertEqual(data.dims[1][0], (0, None, None)) self.assertEqual(set(data.dims[2]), set(("temp", "synop"))) data = vars["B13011"] self.assertEqual(data.name, "B13011") self.assertEqual(len(data.attrs), 0) self.assertEqual(len(data.dims), 3) self.assertEqual(len(data.dims[0]), 12) self.assertEqual(len(data.dims[1]), 2) self.assertEqual(len(data.dims[2]), 2) self.assertEqual(data.vals.size, 48) self.assertEqual(data.vals.shape, (12, 2, 2)) self.assertEqual(sum(data.vals.mask.flat), 24) self.assertAlmostEqual(ma.average(data.vals), 5.325, 6) self.assertEqual(data.dims[0][0], (1, 10., 15., None)) self.assertEqual(data.dims[0][1], (2, 10., 25., None)) self.assertEqual(data.dims[0][2], (3, 20., 15., None)) self.assertEqual(data.dims[0][3], (4, 20., 25., None)) self.assertEqual(data.dims[0][4], (5, 30., 15., None)) self.assertEqual(data.dims[0][5], (6, 30., 25., None)) self.assertEqual(data.dims[1][0], (4, -43200, 0)) self.assertEqual(data.dims[1][1], (4, -21600, 0)) self.assertEqual(set(data.dims[2]), set(("temp", "synop"))) self.assertEqual(vars["B10004"].dims[0], vars["B13011"].dims[0]) self.assertNotEqual(vars["B10004"].dims[1], vars["B13011"].dims[1]) self.assertEqual(vars["B10004"].dims[2], vars["B13011"].dims[2])
def testAnaTrangeNetwork(self): # 3 dimensions: ana, timerange, network # 2 variables query = dballe.Record(datetime=datetime.datetime(2007, 1, 1, 0, 0, 0)) vars = read(self.db.query_data(query), (AnaIndex(), TimeRangeIndex(shared=False), NetworkIndex())) self.assertEqual(len(vars), 2) self.assertEqual(sorted(vars.keys()), ["B10004", "B13011"]) data = vars["B10004"] self.assertEqual(data.name, "B10004") self.assertEqual(len(data.attrs), 0) self.assertEqual(len(data.dims), 3) self.assertEqual(len(data.dims[0]), 6) self.assertEqual(len(data.dims[1]), 1) self.assertEqual(len(data.dims[2]), 2) self.assertEqual(data.vals.size, 12) self.assertEqual(data.vals.shape, (6, 1, 2)) self.assertEqual(sum(data.vals.mask.flat), 1) self.assertEqual(round(ma.average(data.vals)), 83185) self.assertEqual(data.dims[0][0], (1, 10., 15., None)) self.assertEqual(data.dims[0][1], (2, 10., 25., None)) self.assertEqual(data.dims[0][2], (3, 20., 15., None)) self.assertEqual(data.dims[0][3], (4, 20., 25., None)) self.assertEqual(data.dims[0][4], (5, 30., 15., None)) self.assertEqual(data.dims[0][5], (6, 30., 25., None)) self.assertEqual(data.dims[1][0], (0, None, None)) self.assertEqual(set(data.dims[2]), set(("temp", "synop"))) data = vars["B13011"] self.assertEqual(data.name, "B13011") self.assertEqual(len(data.attrs), 0) self.assertEqual(len(data.dims), 3) self.assertEqual(len(data.dims[0]), 6) self.assertEqual(len(data.dims[1]), 2) self.assertEqual(len(data.dims[2]), 2) self.assertEqual(data.vals.size, 24) self.assertEqual(data.vals.shape, (6, 2, 2)) self.assertEqual(sum(data.vals.mask.flat), 0) self.assertAlmostEqual(ma.average(data.vals), 5.325, 6) self.assertEqual(data.dims[0][0], (1, 10., 15., None)) self.assertEqual(data.dims[0][1], (2, 10., 25., None)) self.assertEqual(data.dims[0][2], (3, 20., 15., None)) self.assertEqual(data.dims[0][3], (4, 20., 25., None)) self.assertEqual(data.dims[0][4], (5, 30., 15., None)) self.assertEqual(data.dims[0][5], (6, 30., 25., None)) self.assertEqual(data.dims[1][0], (4, -43200, 0)) self.assertEqual(data.dims[1][1], (4, -21600, 0)) self.assertEqual(set(data.dims[2]), set(("temp", "synop"))) self.assertEqual(vars["B10004"].dims[0], vars["B13011"].dims[0]) self.assertNotEqual(vars["B10004"].dims[1], vars["B13011"].dims[1]) self.assertEqual(vars["B10004"].dims[2], vars["B13011"].dims[2])
def averaged_tract_mean_std(mean_map_data, tract_data, threshold): if np.all(tract_data == 0): warnings.warn('Tract data is empty') return 0, 0 if np.all(tract_data <= threshold): warnings.warn('Threshold has excluded all tract data') return 0, 0 masked_map = ma.masked_where(tract_data <= threshold, mean_map_data) average = ma.average(masked_map, weights=tract_data) std = np.sqrt(ma.average((masked_map - average)**2, weights=tract_data)) # weighted std return average, std
def testSomeAttrs(self): # Same export as testAnaNetwork, but check that the # attributes are synchronised query = dballe.Record() query["var"] = "B10004" query["datetime"] = datetime.datetime(2007, 1, 1, 0, 0, 0) vars = read(self.db.query_data(query), (AnaIndex(), NetworkIndex()), attributes=('B33040',)) self.assertEqual(len(vars), 1) self.assertCountEqual(vars.keys(), ["B10004"]) data = vars["B10004"] self.assertEqual(len(data.attrs), 1) self.assertCountEqual(data.attrs.keys(), ['B33040']) a = data.attrs['B33040'] self.assertEqual(data.dims, a.dims) self.assertEqual(data.vals.size, a.vals.size) self.assertEqual(data.vals.shape, a.vals.shape) # Find the temp index netidx = -1 for idx, n in enumerate(data.dims[1]): if n == "temp": netidx = idx break self.assertNotEqual(netidx, -1) # Only compare the values on the temp index self.assertEqual([x for x in a.vals.mask[:,1-netidx].flat], [True]*len(a.vals.mask[:,1-netidx].flat)) self.assertEqual([x for x in data.vals.mask[:,netidx].flat], [x for x in a.vals.mask[:,netidx].flat]) self.assertEqual(round(ma.average(a.vals)), 54)
def testSomeAttrs(self): with self.db.transaction() as tr: # Same export as testAnaNetwork, but check that the # attributes are synchronised query = {} query["var"] = "B10004" query["datetime"] = datetime.datetime(2007, 1, 1, 0, 0, 0) vars = read(tr.query_data(query), (AnaIndex(), NetworkIndex()), attributes=('B33040',)) self.assertEqual(len(vars), 1) self.assertCountEqual(vars.keys(), ["B10004"]) data = vars["B10004"] self.assertEqual(len(data.attrs), 1) self.assertCountEqual(data.attrs.keys(), ['B33040']) a = data.attrs['B33040'] self.assertEqual(data.dims, a.dims) self.assertEqual(data.vals.size, a.vals.size) self.assertEqual(data.vals.shape, a.vals.shape) # Find the temp index netidx = -1 for idx, n in enumerate(data.dims[1]): if n == "temp": netidx = idx break self.assertNotEqual(netidx, -1) # Only compare the values on the temp index self.assertEqual([x for x in a.vals.mask[:, 1-netidx].flat], [True]*len(a.vals.mask[:, 1-netidx].flat)) self.assertEqual([x for x in data.vals.mask[:, netidx].flat], [x for x in a.vals.mask[:, netidx].flat]) self.assertEqual(round(ma.average(a.vals)), 54)
def testAnaNetwork(self): # Ana in one dimension, network in the other query = dballe.Record() query["var"] = "B10004" query["datetime"] = datetime.datetime(2007, 1, 1, 0, 0, 0) vars = read(self.db.query_data(query), (AnaIndex(), NetworkIndex())) self.assertEqual(len(vars), 1) self.assertCountEqual(vars.keys(), ["B10004"]) data = vars["B10004"] self.assertEqual(data.name, "B10004") self.assertEqual(len(data.attrs), 0) self.assertEqual(len(data.dims), 2) self.assertEqual(len(data.dims[0]), 6) self.assertEqual(len(data.dims[1]), 2) self.assertEqual(data.vals.size, 12) self.assertEqual(data.vals.shape, (6, 2)) self.assertEqual(sum(data.vals.mask.flat), 1) self.assertEqual(round(ma.average(data.vals)), 83185) self.assertEqual(data.dims[0][0], (1, 10., 15., None)) self.assertEqual(data.dims[0][1], (2, 10., 25., None)) self.assertEqual(data.dims[0][2], (3, 20., 15., None)) self.assertEqual(data.dims[0][3], (4, 20., 25., None)) self.assertEqual(data.dims[0][4], (5, 30., 15., None)) self.assertEqual(data.dims[0][5], (6, 30., 25., None)) self.assertEqual(set(data.dims[1]), set(("temp", "synop")))
def simulacao(tamanho_amostras): print(f"\n*Simulacão com {tamanho_amostras} amostras") numero_amostras = 100 valor_minimo = 0 valor_maximo = 10 estimadores_momentos_valor_maximo = [] erros_amostrais_estimadores_momentos = [] estimadores_maxima_verossimilhanca_valor_maximo = [] erros_amostrais_estimadores_maxima_verossimilhanca = [] for i in range(numero_amostras): distribuicao_uniforme = np.random.uniform(low=valor_minimo, high=valor_maximo, size=tamanho_amostras) estimador_momentos = 2 * average(distribuicao_uniforme) estimadores_momentos_valor_maximo.append(estimador_momentos) erro_amostral_estimador_momentos = estimador_momentos - valor_maximo erros_amostrais_estimadores_momentos.append( erro_amostral_estimador_momentos) estimador_maxima_verossimilhanca = max(distribuicao_uniforme) estimadores_maxima_verossimilhanca_valor_maximo.append( estimador_maxima_verossimilhanca) erro_amostral_estimador_maxima_verossimilhanca = estimador_maxima_verossimilhanca - valor_maximo erros_amostrais_estimadores_maxima_verossimilhanca.append( erro_amostral_estimador_maxima_verossimilhanca) imprimir_resultados("Momentos", erros_amostrais_estimadores_momentos, estimadores_momentos_valor_maximo, valor_maximo) imprimir_resultados("Máx. Veros.", erros_amostrais_estimadores_maxima_verossimilhanca, estimadores_maxima_verossimilhanca_valor_maximo, valor_maximo)
def temporal_rebin_with_time_index(target_dataset, nt_average): """ Rebin a Dataset to a new temporal resolution :param target_dataset: Dataset object that needs temporal rebinned :type target_dataset: :class:`dataset.Dataset` :param nt_average: Time resolution for the output datasets. It is the same as the number of time indicies to be averaged. (length of time dimension in the rebinned dataset) = (original time dimension length/nt_average) :type temporal_resolution: integer :returns: A new temporally rebinned Dataset :rtype: :class:`dataset.Dataset` """ nt = target_dataset.times.size if nt % nt_average !=0: print 'Warning: length of time dimension must be a multiple of nt_average' # nt2 is the length of time dimension in the rebinned dataset nt2 = nt/nt_average binned_dates = target_dataset.times[np.arange(nt2)*nt_average] binned_values = ma.zeros(np.insert(target_dataset.values.shape[1:],0,nt2)) for it in np.arange(nt2): binned_values[it,:] = ma.average(target_dataset.values[nt_average*it:nt_average*it+nt_average,:], axis=0) new_dataset = ds.Dataset(target_dataset.lats, target_dataset.lons, binned_dates, binned_values, variable=target_dataset.variable, units=target_dataset.units, name=target_dataset.name, origin=target_dataset.origin) return new_dataset
def temporal_rebin_with_time_index(target_dataset, nt_average): """ Rebin a Dataset to a new temporal resolution :param target_dataset: Dataset object that needs temporal rebinned :type target_dataset: :class:`dataset.Dataset` :param nt_average: Time resolution for the output datasets. It is the same as the number of time indicies to be averaged. (length of time dimension in the rebinned dataset) = (original time dimension length/nt_average) :type temporal_resolution: integer :returns: A new temporally rebinned Dataset :rtype: :class:`dataset.Dataset` """ nt = target_dataset.times.size if nt % nt_average != 0: print 'Warning: length of time dimension must be a multiple of nt_average' # nt2 is the length of time dimension in the rebinned dataset nt2 = nt / nt_average binned_dates = target_dataset.times[np.arange(nt2) * nt_average] binned_values = ma.zeros(np.insert(target_dataset.values.shape[1:], 0, nt2)) for it in np.arange(nt2): binned_values[it, :] = ma.average( target_dataset.values[nt_average * it:nt_average * it + nt_average, :], axis=0) new_dataset = ds.Dataset(target_dataset.lats, target_dataset.lons, binned_dates, binned_values, variable=target_dataset.variable, units=target_dataset.units, name=target_dataset.name, origin=target_dataset.origin) return new_dataset
def average_combine(self): """Average combine together a set of arrays. A CCDData object is returned with the data property set to the average of the arrays. If the data was masked or any data have been rejected, those pixels will not be included in the median. A mask will be returned, and if a pixel has been rejected in all images, it will be masked. The uncertainty of the combined image is set by the standard deviation of the input images. Returns ------- combined_image: CCDData object CCDData object based on the combined input of CCDData objects. """ #set up the data data, wei = ma.average(self.data_arr, axis=0, weights=self.weights, returned=True) #set up the mask mask = self.data_arr.mask.sum(axis=0) mask = (mask == len(self.data_arr)) #set up the variance uncertainty = ma.std(self.data_arr, axis=0) #create the combined image combined_image = CCDData(data.data, mask=mask, unit=self.unit, uncertainty=StdDevUncertainty(uncertainty)) #update the meta data combined_image.meta['NCOMBINE'] = len(self.data_arr) #return the combined image return combined_image
def testAnaNetwork(self): with self.db.transaction() as tr: # Ana in one dimension, network in the other query = {} query["var"] = "B10004" query["datetime"] = datetime.datetime(2007, 1, 1, 0, 0, 0) vars = read(tr.query_data(query), (AnaIndex(), NetworkIndex())) self.assertEqual(len(vars), 1) self.assertCountEqual(vars.keys(), ["B10004"]) data = vars["B10004"] self.assertEqual(data.name, "B10004") self.assertEqual(len(data.attrs), 0) self.assertEqual(len(data.dims), 2) self.assertEqual(len(data.dims[0]), 11) self.assertEqual(len(data.dims[1]), 2) self.assertEqual(data.vals.size, 22) self.assertEqual(data.vals.shape, (11, 2)) self.assertEqual(sum(data.vals.mask.flat), 11) self.assertEqual(round(ma.average(data.vals)), 83185) self.assertEqual(data.dims[0][0], (1, 10., 15., None)) self.assertEqual(data.dims[0][1], (2, 10., 25., None)) self.assertEqual(data.dims[0][2], (3, 20., 15., None)) self.assertEqual(data.dims[0][3], (4, 20., 25., None)) self.assertEqual(data.dims[0][4], (5, 30., 15., None)) self.assertEqual(data.dims[0][5], (6, 30., 25., None)) self.assertEqual(set(data.dims[1]), set(("temp", "synop")))
def extract_loc_vec(ref_lon, ref_lat, tlon, tlat, indata): """ Vectorized version of extract_loc. Extracts full time series simultaneously. Much faster that original version above. Inputs: ref_lon : longitude of point to be extracted ref_lat : latitude of point to be extracted tlon : grid longitudes tlat : grid latitudes indata : array/field to extract point from Output: wavg : weighted average of the 4 model grid points around position """ # find the indices of the 4 model grid points around the location Ilist, Jlist = find_stn_idx(ref_lon, ref_lat, tlon, tlat) # compute great circle distance from location to model grid points dist = gc_dist(ref_lon, ref_lat, tlon, tlat) dist[dist==0] = 1.e-15 # avoid division by zero ibeg, iend = Ilist.min(), Ilist.max() jbeg, jend = Jlist.min(), Jlist.max() work = indata[...,ibeg:iend+1,jbeg:jend+1] dist = dist[...,ibeg:iend+1,jbeg:jend+1] wghts = 1./N.resize(dist,work.shape) wavg = MA.average(work.reshape(work.shape[:-2]+(-1,)), weights=wghts.reshape(work.shape[:-2]+(-1,)),axis=-1) return wavg
def executeOperations(self, task, inputs): available_inputIds = [ inputId.split('-')[0] for inputId in inputs ] data_inputIds = [ inputId.split('-')[0] for inputId in task.inputs ] wids = [ inputId + "_WEIGHTS_" for inputId in data_inputIds ] weight_inputIds = [ ( wid if (wid in available_inputIds) else None ) for wid in wids ] inputs_with_weights = zip( data_inputIds, weight_inputIds ) self.logger.info("@@@@ data_inputIds = " + str(data_inputIds) + ", weight_inputIds = " + str(weight_inputIds) + ", inputs = " + str(inputs) ) results = [] for input_pair in inputs_with_weights: input = inputs.get( input_pair[0] ) # npArray if( input == None ): raise Exception( "Can't find input " + input_pair[0] + " in numpyModule.WeightedAverageKernel") else : weights = inputs.get( input_pair[1] ).array if( input_pair[1] != None ) else None axes = self.getOrderedAxes(task,input) self.logger.info("\n Executing average, input: " + str( input_pair[0] ) + ", shape = " + str(input.array.shape)+ ", task metadata = " + str(task.metadata) + " Input metadata: " + str( input.metadata ) ) t0 = time.time() result = input.array for axis in axes: current_shape = list( result.shape ) self.logger.info(" --> Exec: axis: " + str(axis) + ", shape: " + str(current_shape) ) # ( result, weights ) = ma.average( result, axis, weights, True ) ( result, weights ) = ma.average( result, axis, np.broadcast_to( weights, current_shape ), True ) current_shape[axis] = 1 result = result.reshape( current_shape ) weights = weights.reshape( current_shape ) results.append( npArray.createResult( task, input, result.filled( input.array.fill_value ) ) ) t1 = time.time() self.logger.info( " ------------------------------- AVEW KERNEL: Operating on input '{0}', shape = {1}, origin = {2}, time = {3}".format( input.name, input.shape, input.origin, t1-t0 )) return results
def simulacao(tamanho_amostras): print(f"\n*Simulacão com {tamanho_amostras} amostras") epsilon = 0.00001 teta = 10 numero_amostras = 100 valor_minimo = teta + epsilon valor_maximo = teta + 1 - epsilon estimadores_momentos = [] erros_amostrais_estimadores_momentos = [] estimadores_maxima_verossimilhanca = [] erros_amostrais_estimadores_maxima_verossimilhanca = [] for i in range(numero_amostras): distribuicao_uniforme = np.random.uniform(low=valor_minimo, high=valor_maximo, size=tamanho_amostras) estimador_momentos = average(distribuicao_uniforme) - 0.5 estimadores_momentos.append(estimador_momentos) erro_amostral_estimador_momentos = estimador_momentos - teta erros_amostrais_estimadores_momentos.append( erro_amostral_estimador_momentos) estimador_maxima_verossimilhanca = min(distribuicao_uniforme) - epsilon estimadores_maxima_verossimilhanca.append( estimador_maxima_verossimilhanca) erro_amostral_estimador_maxima_verossimilhanca = estimador_maxima_verossimilhanca - teta erros_amostrais_estimadores_maxima_verossimilhanca.append( erro_amostral_estimador_maxima_verossimilhanca) imprimir_resultados("Momentos", erros_amostrais_estimadores_momentos, estimadores_momentos, teta) imprimir_resultados("Máx. Veros.", erros_amostrais_estimadores_maxima_verossimilhanca, estimadores_maxima_verossimilhanca, teta)
def binner(x, y, w_sta, nbins, rang = None, ebar = False, per = None) : from numpy import array, digitize, lexsort, linspace from numpy.ma import average, median ind = lexsort((y, x)) xs, ys = x[ind], y[ind] if rang is None : mn, mx = min(xs), max(xs) else : mn, mx = rang bins = linspace(mn, mx, nbins + 1) x_cen = (bins[: - 1] + bins[1:])*0.5 bins = linspace(mn, mx, nbins) ibins = digitize(xs, bins) if w_sta == "median" : y_sta = array([median(ys[ibins == i]) for i in range(1, bins.size + 1)]) elif w_sta == "mean" : y_sta = array([average(ys[ibins == i]) for i in range(1, bins.size + 1)]) elif w_sta == "mode" : y_sta = array([mode(ys[ibins == i])[0] for i in range(1, bins.size + 1)]) if ebar == False : return x_cen, y_sta elif ebar == True and per == None : myer = abs(array([scoreatpercentile(ys[ibins == i], 15.8) for i in range(1, bins.size + 1)]) - y_sta) pyer = abs(array([scoreatpercentile(ys[ibins == i], 84.0) for i in range(1, bins.size + 1)]) - y_sta) yer = array([myer, pyer]) return x_cen, y_sta, yer elif ebar == True and per != None : myer = abs(array([scoreatpercentile(ys[ibins == i], per[0]) for i in range(1, bins.size + 1)]) - y_sta) pyer = abs(array([scoreatpercentile(ys[ibins == i], per[1]) for i in range(1, bins.size + 1)]) - y_sta) yer = array([myer, pyer]) return x_cen, y_sta, yer
def compute(self): if self.data == None: return if type(self.eigVectors) == MA.MaskedArray and type( self.eigValues) == MA.MaskedArray: return if type(self.data) == orange.ExampleTable: data, classes = self.data.toNumpyMA("a/c") elif type(self.data) == tuple: data, classes = self.data data = self.center(data) data = self.normalize(data) self.normalizedData = data exampleCount, attrCount = data.shape classCount = len(set(classes)) # special case when we have two classes if classCount == 2: data1 = MA.take(data, numpy.argwhere(classes == 0).flatten(), axis=0) data2 = MA.take(data, numpy.argwhere(classes != 0).flatten(), axis=0) miDiff = MA.average(data1, axis=1) - MA.average(data2, axis=1) covMatrix = (MA.dot(data1.T, data1) + MA.dot(data2.T, data2)) / exampleCount self.eigVectors = linalg.inv(covMatrix) * miDiff self.eigValues = numpy.array([1]) else: # compute means and average covariances of examples in each class group Sw = MA.zeros([attrCount, attrCount]) for v in set(classes): d = MA.take(data, numpy.argwhere(classes == v).flatten(), axis=0) d = self.center(d) Sw += MA.dot(d.T, d) Sw /= exampleCount total = MA.dot(data.T, data) / float(exampleCount) Sb = total - Sw matrix = linalg.inv(Sw) * Sb eigVals, eigVectors = linalg.eigh(matrix) self.eigValues, self.eigVectors = self.getSorted( eigVals, eigVectors)
def mean(phi, lower=0., upper=120.): """wrapping the numpy.ma.average function for weighed average of masked arrays here weight = the image intensity, and the coordinates X, Y = np.meshgrid(range(921), range(881)) are to be averaged out. """ phi1 = phi.view(ma.MaskedArray).copy() # to be safe (slow?) try: phi1.mask += (phi1<lower) + (phi1>upper) # masking the out-of-range regions except: phi1.mask = (phi1<lower) + (phi1>upper) height, width = phi1.shape X, Y = np.meshgrid(range(width), range(height)) I, J = Y, X # always work with I, J internally Ibar = ma.average(I, weights=phi1) Jbar = ma.average(J, weights=phi1) return {'i':Ibar, 'j':Jbar}
def v_average_test(): import numpy.ma as ma M = [[1, 1, 0], [0, 0, 1], [0, 1, 0]] Coins = [100000, 200000, 300000] Mat = numpy.matrix(M) Mean = ma.average(Mat, axis=0, weights=numpy.hstack(Coins)) print(Mean) print(v_average(M, ReWeight(Coins)))
def v_average_test(): import numpy.ma as ma M=[[1,1,0],[0,0,1],[0,1,0]] Coins=[100000,200000,300000] Mat=numpy.matrix(M) Mean = ma.average(Mat, axis=0, weights=numpy.hstack(Coins)) print(Mean) print(v_average(M, ReWeight(Coins)))
def executeOperations(self, task, inputs): available_inputIds = [inputId.split('-')[0] for inputId in inputs] data_inputIds = [inputId.split('-')[0] for inputId in task.inputs] wids = [inputId + "_WEIGHTS_" for inputId in data_inputIds] weight_inputIds = [(wid if (wid in available_inputIds) else None) for wid in wids] inputs_with_weights = zip(data_inputIds, weight_inputIds) self.logger.info("@@@@ data inputIds = " + str(data_inputIds) + ", weight_inputIds = " + str(weight_inputIds) + ", inputs = " + str(inputs)) results = [] try: for input_pair in inputs_with_weights: input = inputs.get(input_pair[0]) # npArray if (input is None): raise Exception("Can't find input " + input_pair[0] + " in numpyModule.WeightedAverageKernel") else: weights = inputs.get(input_pair[1]).array if not ( input_pair[1] is None) else None axes = self.getOrderedAxes(task, input) self.logger.info("\n Executing average, input: " + str(input_pair[0]) + ", shape = " + str(input.array.shape) + ", task metadata = " + str(task.metadata) + " Input metadata: " + str(input.metadata)) t0 = time.time() result = input.array for axis in axes: current_shape = list(result.shape) if (current_shape[axis] > 1): self.logger.info(" %ZP% Exec: axis: " + str(axis) + ", shape: " + str(current_shape)) wts = None if (weights == None) else np.broadcast_to( weights, current_shape) (result, weights) = ma.average(result, axis, wts, True) current_shape[axis] = 1 result = result.reshape(current_shape) weights = weights.reshape(current_shape) results.append( npArray.createResult( task, input, result.filled(input.array.fill_value))) t1 = time.time() self.logger.info( " ------------------------------- AVEW KERNEL: Operating on input '{0}', shape = {1}, origin = {2}, time = {3}" .format(input.name, input.shape, input.origin, t1 - t0)) except Exception as err: self.logger.error("Error in WeightedAverageKernel: " + err.message + "\n" + traceback.format_exc()) results.append(npArray.empty(task.rId)) return results
def calculate_metrics(subject, method_code, tract): subject_id = subject[0] dataset_file_path = subject[1] tract_code = tract[0] tract_file_path = tract[1] print( f'Calculating metrics for subject {subject_id}, method {method_code} and tract {tract_code}' ) try: MD = nib.load( f'data/{dataset_file_path}/full_brain_maps/native/{subject_id}_Native_MD.nii.gz' ).get_data() FA = nib.load( f'data/{dataset_file_path}/full_brain_maps/native/{subject_id}_Native_FA.nii.gz' ).get_data() except FileNotFoundError: print( f'Couldn\'t find maps for dataset {dataset_file_path} and subject file path {subject_file_path}' ) fp = f'data/{dataset_file_path}/{tract_file_path}/{method_code.lower()}/native/{subject_id}_Native_{tract_code}.nii.gz' tract_data = nib.load(fp).get_data() if np.any(tract_data.nonzero()): masked_MD = ma.masked_where(tract_data == 0, MD) av_MD = ma.average(masked_MD, weights=tract_data) av_MD = 0 if np.isnan(av_MD) else av_MD std_MD = np.sqrt(ma.average((masked_MD - av_MD)**2, weights=tract_data)) # weighted std std_MD = 0 if np.isnan(std_MD) else std_MD masked_FA = ma.masked_where(tract_data == 0, FA) av_FA = ma.average(masked_FA, weights=tract_data) av_FA = 0 if np.isnan(av_FA) else av_FA std_FA = np.sqrt(ma.average((masked_FA - av_FA)**2, weights=tract_data)) # weighted std std_FA = 0 if np.isnan(std_FA) else std_FA volume = np.count_nonzero(tract_data) * 8.e-3 else: av_MD = std_MD = av_FA = std_FA = volume = 0 return SubjectTractMetrics(subject_id, method_code, tract_code, float(av_MD), float(std_MD), float(av_FA), float(std_FA), float(volume))
def run_with_kernel(kernel): classifier = svm.SVC(kernel=kernel) classifier.fit(trainSet.data, trainSet.labels) # Three fold cross validation cross_validation = cross_validate(classifier, testSet.data, testSet.labels, return_estimator=True) estimators = cross_validation['estimator'] score = cross_validation['test_score'] print("Kernel: " + kernel) print("Average Cross Validation Score: " + str(average(score))) print("C: " + str(average([estimator.C for estimator in estimators]))) print("Gamma: " + str(average([estimator._gamma for estimator in estimators]))) print()
def correlation(u, v, w=None, centered=True): """ """ u = _validate_and_mask(u) v = _validate_and_mask(v) if w is not None: w = _validate_weights(w) if centered: umu = ma.average(u, weights=w) vmu = ma.average(v, weights=w) u = u - umu v = v - vmu uv = ma.average(u * v, weights=w) uu = ma.average(np.square(u), weights=w) vv = ma.average(np.square(v), weights=w) dist = 1.0 - uv / ma.sqrt(uu * vv) return dist
def annual_cycle( self, input_variable ): t0 = time.time() time_vals = input_variable.getTime().asComponentTime() month_index_array = np.array( [ tv.month for tv in time_vals ] ) squeezed_input = input_variable.squeeze() acycle = [ ma.average( get_subset( squeezed_input, month_index, month_index_array ) ) for month_index in range(1,13) ] t1 = time.time() wpsLog.debug( "Computed annual cycle, time = %.4f, result:\n %s" % ( (t1-t0), str(acycle) ) ) return ma.array(acycle)
def applyOperation( self, input_variable, operation ): result = None try: self.setTimeBounds( input_variable ) operator = None # pydevd.settrace('localhost', port=8030, stdoutToServer=False, stderrToServer=True) wpsLog.debug( " $$$ ApplyOperation: %s " % str( operation ) ) if operation is not None: type = operation.get('type','').lower() bounds = operation.get('bounds','').lower() op_start_time = time.clock() # time.time() if not bounds: if type == 'departures': ave = cdutil.averager( input_variable, axis='t', weights='equal' ) result = input_variable - ave elif type == 'climatology': result = cdutil.averager( input_variable, axis='t', weights='equal' ) else: result = input_variable time_axis = input_variable.getTime() elif bounds == 'np': if type == 'departures': result = ma.anomalies( input_variable ).squeeze() elif type == 'climatology': result = ma.average( input_variable ).squeeze() else: result = input_variable time_axis = input_variable.getTime() else: if bounds == 'djf': operator = cdutil.DJF elif bounds == 'mam': operator = cdutil.MAM elif bounds == 'jja': operator = cdutil.JJA elif bounds == 'son': operator = cdutil.SON elif bounds == 'year': operator = cdutil.YEAR elif bounds == 'annualcycle': operator = cdutil.ANNUALCYCLE elif bounds == 'seasonalcycle': operator = cdutil.SEASONALCYCLE if operator <> None: if type == 'departures': result = operator.departures( input_variable ).squeeze() elif type == 'climatology': result = operator.climatology( input_variable ).squeeze() else: result = operator( input_variable ).squeeze() time_axis = result.getTime() op_end_time = time.clock() # time.time() wpsLog.debug( " ---> Base Operation Time: %.5f" % (op_end_time-op_start_time) ) else: result = input_variable time_axis = input_variable.getTime() if isinstance( result, float ): result_data = [ result ] elif result is not None: if result.__class__.__name__ == 'TransientVariable': result = ma.masked_equal( result.squeeze().getValue(), input_variable.getMissing() ) result_data = result.tolist( numpy.nan ) else: result_data = None except Exception, err: wpsLog.debug( "Exception applying Operation '%s':\n %s" % ( str(operation), traceback.format_exc() ) ) return ( None, None )
def seasonal_cycle( self, input_variable ): t0 = time.time() time_vals = input_variable.getTime().asComponentTime() season_index_array = np.array( [ self.season_def_array[tv.month] for tv in time_vals ] ) squeezed_input = input_variable.squeeze() acycle = [ ma.average( get_subset( squeezed_input, season_index, season_index_array ) ) for season_index in range(0,4) ] t1 = time.time() wpsLog.debug( "Computed seasonal cycle, time = %.4f, result:\n %s" % ( (t1-t0), str(acycle) ) ) return ma.array(acycle)
def weighted_mean_(self): """ Calculates the weighted mean of the image given the probabilistic segmentation. If binary, mean and weighted mean will give the same result :return: """ masked_seg = np.tile(self.masked_seg, [self.img_channels, 1]).T return ma.average(self.masked_img, axis=0, weights=masked_seg).flatten()
def combine(self, other): # Punctuality for st in [PR_DEP, CS_DEP]: self.punctuality[st]['count'] += other.punctuality[st]['count'] self.punctuality[st]['sum'] += other.punctuality[st]['sum'] self.punctuality[st]['max'] = max(self.punctuality[st]['max'], other.punctuality[st]['max']) self.punctuality[st]['big'] = average( [self.punctuality[st]['big'], other.punctuality[st]['big']]) # Waiting times self.waiting['count'] += other.waiting['count'] self.waiting['sum'] += other.waiting['sum'] self.waiting['max'] = max(self.waiting['max'], other.waiting['max']) self.waiting['big'] = average( [self.waiting['big'], other.waiting['big']]) # Stop congestion self.stops['count'] += other.stops['count'] self.stops['sum'] += other.stops['sum'] return self
def weighted_average(self, axis=0, expaxis=None): """ Calculate weighted average of data along axis after optionally inserting a new dimension into the shape array at position expaxis """ if expaxis is not None: vals = ma.expand_dims(self.vals, expaxis) dmin = ma.expand_dims(self.dmin, expaxis) dmax = ma.expand_dims(self.dmax, expaxis) wt = ma.expand_dims(self.wt, expaxis) else: vals = self.vals wt = self.wt dmin = self.dmin dmax = self.dmax # Get average value avg, norm = ma.average(vals, axis=axis, weights=wt, returned=True) avg_ex = ma.expand_dims(avg, 0) # Calculate weighted uncertainty wtmax = ma.max(wt, axis=axis) neff = norm / wtmax # Effective number of samples based on uncertainties # Seeking max deviation from the average; if above avg use max, if below use min term = np.empty_like(vals) indices = np.where(vals > avg_ex) i0 = indices[0] irest = indices[1:] ii = tuple(x for x in itertools.chain([i0], irest)) jj = tuple(x for x in itertools.chain([np.zeros_like(i0)], irest)) term[ii] = (dmax[ii] - avg_ex[jj])**2 indices = np.where(vals <= avg_ex) i0 = indices[0] irest = indices[1:] ii = tuple(x for x in itertools.chain([i0], irest)) jj = tuple(x for x in itertools.chain([np.zeros_like(i0)], irest)) term[ii] = (avg_ex[jj] - dmin[ii])**2 dsum = ma.sum(term * wt, axis=0) # Sum for weighted average of deviations dev = 0.5 * np.sqrt(dsum / (norm * neff)) if isinstance(avg, (float, np.float)): avg = avg_ex tmp_min = avg - dev ii = np.where(tmp_min < 0) tmp_min[ii] = TOL * avg[ii] return UncertContainer(avg, tmp_min, avg + dev)
def weighted_average(self,axis=0,expaxis=None): """ Calculate weighted average of data along axis after optionally inserting a new dimension into the shape array at position expaxis """ if expaxis is not None: vals = ma.expand_dims(self.vals,expaxis) dmin = ma.expand_dims(self.dmin,expaxis) dmax = ma.expand_dims(self.dmax,expaxis) wt = ma.expand_dims(self.wt,expaxis) else: vals = self.vals wt = self.wt dmin = self.dmin dmax = self.dmax # Get average value avg,norm = ma.average(vals,axis=axis,weights=wt,returned=True) avg_ex = ma.expand_dims(avg,0) # Calculate weighted uncertainty wtmax = ma.max(wt,axis=axis) neff = norm/wtmax # Effective number of samples based on uncertainties # Seeking max deviation from the average; if above avg use max, if below use min term = np.empty_like(vals) indices = np.where(vals > avg_ex) i0 = indices[0] irest = indices[1:] ii = tuple(x for x in itertools.chain([i0],irest)) jj = tuple(x for x in itertools.chain([np.zeros_like(i0)],irest)) term[ii] = (dmax[ii] - avg_ex[jj])**2 indices = np.where(vals <= avg_ex) i0 = indices[0] irest = indices[1:] ii = tuple(x for x in itertools.chain([i0],irest)) jj = tuple(x for x in itertools.chain([np.zeros_like(i0)],irest)) term[ii] = (avg_ex[jj] - dmin[ii])**2 dsum = ma.sum(term*wt,axis=0) # Sum for weighted average of deviations dev = 0.5*np.sqrt(dsum/(norm*neff)) if isinstance(avg,(float,np.float)): avg = avg_ex tmp_min = avg - dev ii = np.where(tmp_min < 0) tmp_min[ii] = TOL*avg[ii] return UncertContainer(avg,tmp_min,avg+dev)
def _var(A, axis=0, keepdims=True, weights=None): if weights is None: return npm.var(A, axis=axis, keepdims=keepdims) else: mu = npm.average(A, axis=axis, keepdims=keepdims, weights=weights) w = npm.sum(weights, axis=axis, keepdims=keepdims) var = npm.sum(weights * (A - mu)**2, axis=axis, keepdims=keepdims, weights=weights) / w**2 return var
def calculate_mean(data, lats): """ data - a 2d lat-lon array with latitude axis first lats - a 1d array containing the corresponding latitude values returns - a latitude-weighted mean of the entire data array """ # Create a 2d-array containing the weights of each cell - i.e. the cosine of the latitude of that cell lat_weights = np.repeat(np.cos([lats * np.pi / 180.0]).T, np.shape(data)[1], axis=1) return ma.average(data, weights=lat_weights)
def _get_ux_t(self): ux_arr = self.data_t[:, :, :, 3] # use an individual grid_mask for each time step # ux_masked = ma.masked_array(ux_arr, mask=self.grid_mask_t) ux_avg = ma.average(ux_masked, axis=2) t_idx_zeros, x_idx_zeros, y_idx_zeros = np.where(self.grid_mask_t) ux_arr[t_idx_zeros, x_idx_zeros, y_idx_zeros] = ux_avg[t_idx_zeros, x_idx_zeros] return ux_arr
def binner(x, y, w_sta, nbins, rang=None, ebar=False, per=None): from numpy import array, digitize, lexsort, linspace from numpy.ma import average, median ind = lexsort((y, x)) xs, ys = x[ind], y[ind] if rang is None: mn, mx = min(xs), max(xs) else: mn, mx = rang bins = linspace(mn, mx, nbins + 1) x_cen = (bins[:-1] + bins[1:]) * 0.5 bins = linspace(mn, mx, nbins) ibins = digitize(xs, bins) if w_sta == "median": y_sta = array( [median(ys[ibins == i]) for i in range(1, bins.size + 1)]) elif w_sta == "mean": y_sta = array( [average(ys[ibins == i]) for i in range(1, bins.size + 1)]) elif w_sta == "mode": y_sta = array( [mode(ys[ibins == i])[0] for i in range(1, bins.size + 1)]) if ebar == False: return x_cen, y_sta elif ebar == True and per == None: myer = abs( array([ scoreatpercentile(ys[ibins == i], 15.8) for i in range(1, bins.size + 1) ]) - y_sta) pyer = abs( array([ scoreatpercentile(ys[ibins == i], 84.0) for i in range(1, bins.size + 1) ]) - y_sta) yer = array([myer, pyer]) return x_cen, y_sta, yer elif ebar == True and per != None: myer = abs( array([ scoreatpercentile(ys[ibins == i], per[0]) for i in range(1, bins.size + 1) ]) - y_sta) pyer = abs( array([ scoreatpercentile(ys[ibins == i], per[1]) for i in range(1, bins.size + 1) ]) - y_sta) yer = array([myer, pyer]) return x_cen, y_sta, yer
def calc_area_mean(data, lats, lons, mymask='not set'): ''' Calculate Area Average of data in a masked array INPUT:: data: a masked array of data (NB. only data from one time expected to be passed at once) lats: 2d array of regularly gridded latitudes lons: 2d array of regularly gridded longitudes mymask: (optional) defines spatial region to do averaging over OUTPUT:: area_mean: a value for the mean inside the area ''' # If mask not passed in, then set maks to cover whole data domain if(mymask == 'not set'): mymask = np.empty(data.shape) mymask[:] = False # NB. mask means (don't show), so False everywhere means use everything. # Dimension check on lats, lons # Sometimes these arrays are 3d, sometimes 2d, sometimes 1d # This bit of code just converts to the required 2d array shape if(len(lats.shape) == 3): lats = lats[0, :, :] if(len(lons.shape) == 3): lons = lons[0, :, :] if(np.logical_and(len(lats.shape) == 1, len(lons.shape) == 1)): lons, lats = np.meshgrid(lons, lats) # Calculate grid length (assuming regular lat/lon grid) dlat = lats[1, 0] - lats[0, 0] dlon = lons[0, 1] - lons[0, 0] # Calculates weights for each grid box myweights = calc_area_in_grid_box(lats, dlon, dlat) # Create a new masked array covering just user selected area (defined in mymask) # NB. this preserves missing data points in the observations data subdata = ma.masked_array(data, mask=mymask) if(myweights.shape != subdata.shape): myweights.resize(subdata.shape) myweights[1:, :] = myweights[0, :] # Calculate weighted mean using ma.average (which takes weights) area_mean = ma.average(subdata, weights=myweights) return area_mean
def _weighted_mean_with_mdtol(data, weights, axis=None, mdtol=0): """ Return the weighted mean of an array over the specified axis using the provided weights (if any) and a permitted fraction of masked data. Args: * data (array-like): Data to be averaged. * weights (array-like): An array of the same shape as the data that specifies the contribution of each corresponding data element to the calculated mean. Kwargs: * axis (int or tuple of ints): Axis along which the mean is computed. The default is to compute the mean of the flattened array. * mdtol (float): Tolerance of missing data. The value returned in each element of the returned array will be masked if the fraction of masked data exceeds mdtol. This fraction is weighted by the `weights` array if one is provided. mdtol=0 means no missing data is tolerated while mdtol=1 will mean the resulting element will be masked if and only if all the contributing elements of data are masked. Defaults to 0. Returns: Numpy array (possibly masked) or scalar. """ if ma.is_masked(data): res, unmasked_weights_sum = ma.average(data, weights=weights, axis=axis, returned=True) if mdtol < 1: weights_sum = weights.sum(axis=axis) frac_masked = 1 - np.true_divide(unmasked_weights_sum, weights_sum) mask_pt = frac_masked > mdtol if np.any(mask_pt) and not isinstance(res, ma.core.MaskedConstant): if np.isscalar(res): res = ma.masked elif ma.isMaskedArray(res): res.mask |= mask_pt else: res = ma.masked_array(res, mask=mask_pt) else: res = np.average(data, weights=weights, axis=axis) return res
def combine_pixels(loglam, flux, ivar, num_combine, trim_front=True): ''' Combines neighboring pixels of inner most axis using ivar weighted average ''' shape = flux.shape num_pixels = flux.shape[-1] assert len(loglam) == num_pixels ndim = flux.ndim new_shape = shape[:ndim-1] + (-1, num_combine) num_leftover = num_pixels % num_combine s = slice(num_leftover, None) if trim_front else slice(0, -num_leftover) flux = flux[..., s].reshape(new_shape) ivar = ivar[..., s].reshape(new_shape) loglam = loglam[s].reshape(-1, num_combine) flux, ivar = ma.average(flux, weights=ivar, axis=ndim, returned=True) loglam = ma.average(loglam, axis=1) return loglam, flux, ivar
def combine_pixels(loglam, flux, ivar, num_combine, trim_front=True): """ Combines neighboring pixels of inner most axis using an ivar weighted average """ shape = flux.shape num_pixels = flux.shape[-1] assert len(loglam) == num_pixels ndim = flux.ndim new_shape = shape[:ndim-1] + (-1, num_combine) num_leftover = num_pixels % num_combine s = slice(num_leftover,None) if trim_front else slice(0,-num_leftover) flux = flux[...,s].reshape(new_shape) ivar = ivar[...,s].reshape(new_shape) loglam = loglam[s].reshape(-1, num_combine) flux, ivar = ma.average(flux, weights=ivar, axis=ndim, returned=True) loglam = ma.average(loglam, axis=1) return loglam, flux, ivar
def mean_annual_cycle(data): """ Compute the mean annual cycle of variable. Assumes data is masked array with shape [nmonth,nlat,nlon]. Output: array """ ntime, nlat, nlon = data.shape # reshape from [nmonth,nlat,nlon] to [nyear,12,nlat,nlon] work = MA.reshape(data,(-1,12,nlat,nlon)) # compute mean annual cycle mean_data = MA.average(work,0) return mean_data
def compute(self): if self.data == None: return if type(self.eigVectors) == MA.MaskedArray and type(self.eigValues) == MA.MaskedArray: return if type(self.data) == orange.ExampleTable: data, classes = self.data.toNumpyMA("a/c") elif type(self.data) == tuple: data, classes = self.data data = self.center(data) data = self.normalize(data) self.normalizedData = data exampleCount, attrCount = data.shape classCount = len(set(classes)) # special case when we have two classes if classCount == 2: data1 = MA.take(data, numpy.argwhere(classes == 0).flatten(), axis=0) data2 = MA.take(data, numpy.argwhere(classes != 0).flatten(), axis=0) miDiff = MA.average(data1, axis=1) - MA.average(data2, axis=1) covMatrix = (MA.dot(data1.T, data1) + MA.dot(data2.T, data2)) / exampleCount self.eigVectors = linalg.inv(covMatrix) * miDiff self.eigValues = numpy.array([1]) else: # compute means and average covariances of examples in each class group Sw = MA.zeros([attrCount, attrCount]) for v in set(classes): d = MA.take(data, numpy.argwhere(classes == v).flatten(), axis=0) d = self.center(d) Sw += MA.dot(d.T, d) Sw /= exampleCount total = MA.dot(data.T, data)/float(exampleCount) Sb = total - Sw matrix = linalg.inv(Sw)*Sb eigVals, eigVectors = linalg.eigh(matrix) self.eigValues, self.eigVectors = self.getSorted(eigVals, eigVectors)
def _weighted_mean_with_mdtol(data, weights, axis=None, mdtol=0): """ Return the weighted mean of an array over the specified axis using the provided weights (if any) and a permitted fraction of masked data. Args: * data (array-like): Data to be averaged. * weights (array-like): An array of the same shape as the data that specifies the contribution of each corresponding data element to the calculated mean. Kwargs: * axis (int or tuple of ints): Axis along which the mean is computed. The default is to compute the mean of the flattened array. * mdtol (float): Tolerance of missing data. The value returned in each element of the returned array will be masked if the fraction of masked data exceeds mdtol. This fraction is weighted by the `weights` array if one is provided. mdtol=0 means no missing data is tolerated while mdtol=1 will mean the resulting element will be masked if and only if all the contributing elements of data are masked. Defaults to 0. Returns: Numpy array (possibly masked) or scalar. """ res = ma.average(data, weights=weights, axis=axis) if ma.isMaskedArray(data) and mdtol < 1: weights_total = weights.sum(axis=axis) masked_weights = weights.copy() masked_weights[~ma.getmaskarray(data)] = 0 masked_weights_total = masked_weights.sum(axis=axis) frac_masked = np.true_divide(masked_weights_total, weights_total) mask_pt = frac_masked > mdtol if np.any(mask_pt): if np.isscalar(res): res = ma.masked elif ma.isMaskedArray(res): res.mask |= mask_pt else: res = ma.masked_array(res, mask=mask_pt) return res
def make_P(ps,A,B,P0): ''' # Author Charles Doutriaux # Version 1.0 # email: [email protected] # Step 1 of conversion of a field from sigma levels to pressure levels # Create the Pressure field on sigma levels, from the surface pressure # Input # Ps : Surface pressure # A,B,Po: Coefficients, such as: p=B.ps+A.Po # Ps is 2D (lonxlat) # B,A are 1D (vertical sigma levels) # Output # Pressure field from TOP (level 0) to BOTTOM (last level) # 3D field (lon/lat/sigma) # External : Numeric # Compute the pressure for the sigma levels''' import numpy.ma as MA p=MA.outerproduct(B,ps) dim=B.shape[0],ps.shape[0],ps.shape[1] p=MA.reshape(p,dim) ## p=ps.filled()[Numeric.NewAxis,...]*B.filled()[:,Numeric.NewAxis,Numeric.NewAxis] ## Po=P0*MA.ones(p.shape,Numeric.Float) A=MA.outerproduct(A,P0*MA.ones(p.shape[1:])) A=MA.reshape(A,p.shape) p=p+A # Now checking to make sure we return P[0] as the top a=MA.average(MA.average(p[0]-p[-1], axis=0)) if a>0: # We got the wrong order ! p=p[::-1] return p
def test_testAverage1(self): # Test of average. ott = array([0., 1., 2., 3.], mask=[1, 0, 0, 0]) assert_(eq(2.0, average(ott, axis=0))) assert_(eq(2.0, average(ott, weights=[1., 1., 2., 1.]))) result, wts = average(ott, weights=[1., 1., 2., 1.], returned=1) assert_(eq(2.0, result)) assert_(wts == 4.0) ott[:] = masked assert_(average(ott, axis=0) is masked) ott = array([0., 1., 2., 3.], mask=[1, 0, 0, 0]) ott = ott.reshape(2, 2) ott[:, 1] = masked assert_(eq(average(ott, axis=0), [2.0, 0.0])) assert_(average(ott, axis=1)[0] is masked) assert_(eq([2., 0.], average(ott, axis=0))) result, wts = average(ott, axis=0, returned=1) assert_(eq(wts, [1., 0.]))
def test_testAverage1(self): # Test of average. ott = array([0.0, 1.0, 2.0, 3.0], mask=[1, 0, 0, 0]) self.assertTrue(eq(2.0, average(ott, axis=0))) self.assertTrue(eq(2.0, average(ott, weights=[1.0, 1.0, 2.0, 1.0]))) result, wts = average(ott, weights=[1.0, 1.0, 2.0, 1.0], returned=1) self.assertTrue(eq(2.0, result)) self.assertTrue(wts == 4.0) ott[:] = masked self.assertTrue(average(ott, axis=0) is masked) ott = array([0.0, 1.0, 2.0, 3.0], mask=[1, 0, 0, 0]) ott = ott.reshape(2, 2) ott[:, 1] = masked self.assertTrue(eq(average(ott, axis=0), [2.0, 0.0])) self.assertTrue(average(ott, axis=1)[0] is masked) self.assertTrue(eq([2.0, 0.0], average(ott, axis=0))) result, wts = average(ott, axis=0, returned=1) self.assertTrue(eq(wts, [1.0, 0.0]))
def moment(phi, p, q, Iorigin=0, Jorigin=0, lower=0., upper=120.): """ to compute the nth raw moment of phi centre at origin will define a method in armor.pattern.DBZ calling this function in which (Iorigin, Jorigin) = coordinateOrigin a=pattern.a a.moment(p,q) """ phi1 = phi.view(ma.MaskedArray) phi1.mask += (phi1<lower) + (phi1>upper) # masking the out-of-range regions height, width = phi1.shape X, Y = np.meshgrid(range(width), range(height)) I, J = Y, X # always work with I, J internally I -= Iorigin J -= Jorigin Mpq = ma.average(I**p * J**q, weights=phi1) return Mpq
def _collapseStack(self,stack=None,ustack=None,method='SigClip',sig=50.): ''' If called without the stack keyword set, this will collapse the entire stack. However, the internal stack is overridden if a different stack is passed. For instance, this could be a stack of nod pairs. ''' if stack is None: stack,ustack = self.stack,self.ustack #stack_median = np.median(stack,2) #stack_stddev = np.std(stack,2) #shape = stack.shape #masked_stack = ma.zeros(shape) masked_stack = ma.masked_invalid(stack) masked_ustack = ma.masked_invalid(ustack) image = ma.average(masked_stack,2,weights=1./masked_ustack**2) uimage = np.sqrt(ma.mean(masked_ustack**2,2)/ma.count(masked_ustack,2)) return image, uimage