def calc_time_regres(var, num_years_req): #find the linear regression for each grid point as a function of nx = var.shape[1] ny = var.shape[2] #print nx, ny trend = ma.masked_all((nx, ny)) sig = ma.masked_all((nx, ny)) r = ma.masked_all((nx, ny)) years = np.arange(var.shape[0]) for i in xrange(nx): for j in xrange(ny): #print i, j var_ma = var[:, i, j][~var[:, i, j].mask] years_ma = years[~var[:, i, j].mask] if len(var_ma) > num_years_req: trend[i, j], intercept, r[i, j], prob, stderr = stats.linregress( years_ma, var_ma) sig[i, j] = 100 * (1 - prob) trend = ma.array(trend, mask=np.isnan(trend)) r = ma.array(r, mask=np.isnan(r)) sig = ma.array(sig, mask=np.isnan(sig)) return trend, sig, r
def get_detrended_yr(yearsTr, yearT, var_yearsT, var_yrT, num_years_req): """Detrend a 2D array using linear regression Mask based on valid number of years in each grid cell. """ var_yearsDT = ma.masked_all((var_yearsT.shape)) var_yrDT = ma.masked_all((var_yrT.shape)) # Loop over each dimension for i in range(var_yearsT.shape[1]): for j in range(var_yearsT.shape[2]): mask = ~var_yearsT[:, i, j].mask var_yearsT_ma = var_yearsT[:, i, j][mask] if (len(var_yearsT_ma) > num_years_req): trendT, interceptT, r_valsT, probT, stderrT = stats.linregress( yearsTr[mask], var_yearsT_ma) lineT = (trendT * yearsTr) + interceptT #print var_yearsT[:, i, j].shape, lineT.shape, yearsTr var_yearsDT[:, i, j] = var_yearsT[:, i, j] - lineT # Calculate the detrended var (linear trend persistence) fo the given forecast year lineT_yr = interceptT + (trendT * (yearT)) var_yrDT[i, j] = var_yrT[i, j] - lineT_yr return var_yearsDT, var_yrDT
def make_same_size(day_list,tseries_list): """ Make all time-series the same size as longest time-series and combine them into one [ncases,ntime,...] array for plotting. """ # find beg and end of period that encompasses all time-series day_beg = min([d.min() for d in day_list]) day_end = max([d.max() for d in day_list]) # find indices in 1000 year time array thousand_years = N.resize(dpm,12*1000).cumsum() ibeg = thousand_years.searchsorted(day_beg) iend = thousand_years.searchsorted(day_end) # set new array dimensions and create array day = thousand_years[ibeg:iend+1] ncases = len(tseries_list) ntime = iend - ibeg + 1 if tseries_list[0].ndim == 2: # regional time-series [ntime,nreg] data = MA.masked_all((ncases,ntime,tseries_list[0].shape[-1]),float) else: data = MA.masked_all((ncases,ntime),float) # fill new array according to time index for n in range(ncases): i1 = day.searchsorted(day_list[n].min()) i2 = day.searchsorted(day_list[n].max()) + 1 data[n,i1:i2,...] = tseries_list[n] return day, data
def get_meltonset_gridded(dataoutpath, yearsT, freezemelt_str, hemStr): """ Get gridded melt onset data Data gridded using linear interpolation of NASA's GSFC melt onset data onto a 100 km grid. """ if (hemStr == 'N'): poleStr = 'A' elif (hemStr == 'S'): poleStr = 'AA' xpts = load(dataoutpath + 'xpts100km' + poleStr) ypts = load(dataoutpath + 'ypts100km' + poleStr) Melt_onset_years = ma.masked_all( (size(yearsT), xpts.shape[0], xpts.shape[1])) x = 0 if (size(yearsT) > 1): Melt_onset_years = ma.masked_all( (size(yearsT), xpts.shape[0], xpts.shape[1])) x = 0 for year in yearsT: Melt_onset_years[x] = load(dataoutpath + freezemelt_str + '100km' + str(year) + poleStr) x += 1 else: Melt_onset_years = load(dataoutpath + freezemelt_str + '100km' + str(yearsT) + poleStr) return xpts, ypts, Melt_onset_years
def make_same_size(day_list, tseries_list): """ Make all time-series the same size as longest time-series and combine them into one [ncases,ntime,...] array for plotting. """ # find beg and end of period that encompasses all time-series day_beg = min([d.min() for d in day_list]) day_end = max([d.max() for d in day_list]) # find indices in 1000 year time array thousand_years = N.resize(dpm, 12 * 1000).cumsum() ibeg = thousand_years.searchsorted(day_beg) iend = thousand_years.searchsorted(day_end) # set new array dimensions and create array day = thousand_years[ibeg:iend + 1] ncases = len(tseries_list) ntime = iend - ibeg + 1 if tseries_list[0].ndim == 2: # regional time-series [ntime,nreg] data = MA.masked_all((ncases, ntime, tseries_list[0].shape[-1]), float) else: data = MA.masked_all((ncases, ntime), float) # fill new array according to time index for n in range(ncases): i1 = day.searchsorted(day_list[n].min()) i2 = day.searchsorted(day_list[n].max()) + 1 data[n, i1:i2, ...] = tseries_list[n] return day, data
def drifter_sample(cfg): N = cfg['montecarlo']['Nsamples'] # Initial position t = np.arange(N) * cfg['montecarlo']['dt'] x, y = ma.masked_all(N), ma.masked_all(N), u, v = ma.masked_all(N), ma.masked_all(N), x[0], y[0] = (2 * random(2) - 1) * cfg['montecarlo']['Rlimit'] u[0], v[0] = synthetic_CLring(x[0], y[0], t[0], cfg['ring']) u_nonoise, v_nonoise = u.copy(), v.copy() for n in range(1, N): dt = t[n] - t[n - 1] x[n], y[n] = x[n - 1] + u[n - 1] * dt, y[n - 1] + v[n - 1] * dt u_nonoise[n], v_nonoise[n] = synthetic_CLring(x[n], y[n], t[n], cfg['ring']) u[n] = u_nonoise[n] + cfg['montecarlo']['Vnoise_sigma'] * randn() v[n] = v_nonoise[n] + cfg['montecarlo']['Vnoise_sigma'] * randn() #print x[n], y[n], u[n], v[n] #t = t - np.median(t) data = {'t': t, 'x': x, 'y': y, 'u': u, 'v': v} stats = {} return data, stats
def woa_profile_from_dap(var, d, lat, lon, depth, cfg): """ Monthly Climatologic Mean and Standard Deviation from WOA, used either for temperature or salinity. INPUTS time: [day of the year] lat: [-90<lat<90] lon: [-180<lon<180] depth: [meters] Reads the WOA Monthly Climatology NetCDF file and returns the corresponding WOA values of salinity or temperature mean and standard deviation for the given time, lat, lon, depth. """ if lon < 0: lon = lon+360 url = cfg['url'] doy = int(d.strftime('%j')) dataset = open_url(url) dn = (np.abs(doy-dataset['time'][:])).argmin() xn = (np.abs(lon-dataset['lon'][:])).argmin() yn = (np.abs(lat-dataset['lat'][:])).argmin() if re.match("temperature\d?$", var): mn = ma.masked_values(dataset.t_mn.t_mn[dn, :, yn, xn].reshape( dataset['depth'].shape[0]), dataset.t_mn.attributes['_FillValue']) sd = ma.masked_values(dataset.t_sd.t_sd[dn, :, yn, xn].reshape( dataset['depth'].shape[0]), dataset.t_sd.attributes['_FillValue']) # se = ma.masked_values(dataset.t_se.t_se[dn, :, yn, xn].reshape( # dataset['depth'].shape[0]), dataset.t_se.attributes['_FillValue']) # Use this in the future. A minimum # of samples # dd = ma.masked_values(dataset.t_dd.t_dd[dn, :, yn, xn].reshape( # dataset['depth'].shape[0]), dataset.t_dd.attributes['_FillValue']) elif re.match("salinity\d?$", var): mn = ma.masked_values(dataset.s_mn.s_mn[dn, :, yn, xn].reshape( dataset['depth'].shape[0]), dataset.s_mn.attributes['_FillValue']) sd = ma.masked_values(dataset.s_sd.s_sd[dn, :, yn, xn].reshape( dataset['depth'].shape[0]), dataset.s_sd.attributes['_FillValue']) # dd = ma.masked_values(dataset.s_dd.s_dd[dn, :, yn, xn].reshape( # dataset['depth'].shape[0]), dataset.s_dd.attributes['_FillValue']) zwoa = ma.array(dataset.depth[:]) ind = (depth <= zwoa.max()) & (depth >= zwoa.min()) # Mean value profile f = interp1d(zwoa[~ma.getmaskarray(mn)].compressed(), mn.compressed()) mn_interp = ma.masked_all(depth.shape) mn_interp[ind] = f(depth[ind]) # The stdev profile f = interp1d(zwoa[~ma.getmaskarray(sd)].compressed(), sd.compressed()) sd_interp = ma.masked_all(depth.shape) sd_interp[ind] = f(depth[ind]) output = {'woa_an': mn_interp, 'woa_sd': sd_interp} return output
def ba_ratio_histograms(ba_files, ind_files, indices_names,minmax) : """computes histogram of ratio of MODIS BA to 0.5x0.5 deg occurrence Considering each day an independent measurement of the entire study area, the ratio of total MODIS BA counts to total 0.5x0.5 deg cells is computed for each parameter bin. Each parameter bin gets at most one observation per day, and this observation embodies all 0.5x0.5deg cells in that bin for that day. """ num_years = len(ind_files) max_days = 365 histo_shape = zip(*minmax)[2] ratio_shape = histo_shape + (max_days,num_years) ratios = ma.masked_all(ratio_shape) halfdeg_counts = ma.masked_all(ratio_shape) ca = gca.GeoCompressedAxes(ind_files[0], 'land') ca.set_clip_box(42.5, 66.5, 22, 130) for i_year in range(len(ind_files)) : indfile = ind_files[i_year] bafile = ba_files[i_year] count = bafile.variables['count'] timelim = len(indfile.dimensions['days'])-1 filevars = [ indfile.variables[iname] for iname in indices_names ] for i_day in range(10,timelim) : print i_day day_data = [ f[i_day,:] for f in filevars ] i_conditions = zip(*day_data) ba_day = count[...,i_day] ba_total = np.sum(ba_day, axis=2) ba_total_cmp = ca.compress(ba_total) # per bin ba totals (units of modis pixels) burned_total = ah.AccumulatingHistogramdd(minmax=minmax) for i_tot,ba_tot in enumerate(ba_total_cmp) : if ba_tot is ma.masked : continue if ba_tot > 0 : burned_total.put_record(i_conditions[i_tot], weight=ba_tot) # per bin occurrence totals (units of 0.5 deg cells) occurrence = ah.AccumulatingHistogramdd(minmax=minmax) for i_window,mask in enumerate(ca.get_vec_mask()) : if not mask : occurrence.put_record(i_conditions[i_window]) # calculate ratio i_occurrence = np.where(occurrence.H > 0) num_occurrence = len(i_occurrence[0]) i_occ_oneday = i_occurrence + ( np.array([i_day]*num_occurrence), np.array([i_year]*num_occurrence)) ratios[i_occ_oneday] = burned_total.H[i_occurrence]/occurrence.H[i_occurrence] halfdeg_counts[...,i_day,i_year] = occurrence.H ratio_histogram = compute_ratio_histo(ratios, minmax) return (ratios, halfdeg_counts, ratio_histogram)
def ba_ratio_histograms(ba_files, ind_files, indices_names, minmax): """computes histogram of ratio of MODIS BA to 0.5x0.5 deg occurrence Considering each day an independent measurement of the entire study area, the ratio of total MODIS BA counts to total 0.5x0.5 deg cells is computed for each parameter bin. Each parameter bin gets at most one observation per day, and this observation embodies all 0.5x0.5deg cells in that bin for that day. """ num_years = len(ind_files) max_days = 365 histo_shape = zip(*minmax)[2] ratio_shape = histo_shape + (max_days, num_years) ratios = ma.masked_all(ratio_shape) halfdeg_counts = ma.masked_all(ratio_shape) ca = gca.GeoCompressedAxes(ind_files[0], 'land') ca.set_clip_box(42.5, 66.5, 22, 130) for i_year in range(len(ind_files)): indfile = ind_files[i_year] bafile = ba_files[i_year] count = bafile.variables['count'] timelim = len(indfile.dimensions['days']) - 1 filevars = [indfile.variables[iname] for iname in indices_names] for i_day in range(10, timelim): print i_day day_data = [f[i_day, :] for f in filevars] i_conditions = zip(*day_data) ba_day = count[..., i_day] ba_total = np.sum(ba_day, axis=2) ba_total_cmp = ca.compress(ba_total) # per bin ba totals (units of modis pixels) burned_total = ah.AccumulatingHistogramdd(minmax=minmax) for i_tot, ba_tot in enumerate(ba_total_cmp): if ba_tot is ma.masked: continue if ba_tot > 0: burned_total.put_record(i_conditions[i_tot], weight=ba_tot) # per bin occurrence totals (units of 0.5 deg cells) occurrence = ah.AccumulatingHistogramdd(minmax=minmax) for i_window, mask in enumerate(ca.get_vec_mask()): if not mask: occurrence.put_record(i_conditions[i_window]) # calculate ratio i_occurrence = np.where(occurrence.H > 0) num_occurrence = len(i_occurrence[0]) i_occ_oneday = i_occurrence + (np.array( [i_day] * num_occurrence), np.array([i_year] * num_occurrence)) ratios[i_occ_oneday] = burned_total.H[i_occurrence] / occurrence.H[ i_occurrence] halfdeg_counts[..., i_day, i_year] = occurrence.H ratio_histogram = compute_ratio_histo(ratios, minmax) return (ratios, halfdeg_counts, ratio_histogram)
def woa_profile_from_dap(var, d, lat, lon, depth, cfg): """ Monthly Climatologic Mean and Standard Deviation from WOA, used either for temperature or salinity. INPUTS time: [day of the year] lat: [-90<lat<90] lon: [-180<lon<180] depth: [meters] Reads the WOA Monthly Climatology NetCDF file and returns the corresponding WOA values of salinity or temperature mean and standard deviation for the given time, lat, lon, depth. """ if lon < 0: lon = lon+360 url = cfg['url'] doy = int(d.strftime('%j')) dataset = open_url(url) dn = (np.abs(doy-dataset['time'][:])).argmin() xn = (np.abs(lon-dataset['lon'][:])).argmin() yn = (np.abs(lat-dataset['lat'][:])).argmin() if re.match(r'temperature\d?$', var): mn = ma.masked_values(dataset.t_mn.t_mn[dn, :, yn, xn].reshape( dataset['depth'].shape[0]), dataset.t_mn.attributes['_FillValue']) sd = ma.masked_values(dataset.t_sd.t_sd[dn, :, yn, xn].reshape( dataset['depth'].shape[0]), dataset.t_sd.attributes['_FillValue']) # se = ma.masked_values(dataset.t_se.t_se[dn, :, yn, xn].reshape( # dataset['depth'].shape[0]), dataset.t_se.attributes['_FillValue']) # Use this in the future. A minimum # of samples # dd = ma.masked_values(dataset.t_dd.t_dd[dn, :, yn, xn].reshape( # dataset['depth'].shape[0]), dataset.t_dd.attributes['_FillValue']) elif re.match(r'salinity\d?$', var): mn = ma.masked_values(dataset.s_mn.s_mn[dn, :, yn, xn].reshape( dataset['depth'].shape[0]), dataset.s_mn.attributes['_FillValue']) sd = ma.masked_values(dataset.s_sd.s_sd[dn, :, yn, xn].reshape( dataset['depth'].shape[0]), dataset.s_sd.attributes['_FillValue']) # dd = ma.masked_values(dataset.s_dd.s_dd[dn, :, yn, xn].reshape( # dataset['depth'].shape[0]), dataset.s_dd.attributes['_FillValue']) zwoa = ma.array(dataset.depth[:]) ind = (depth <= zwoa.max()) & (depth >= zwoa.min()) # Mean value profile f = interp1d(zwoa[~ma.getmaskarray(mn)].compressed(), mn.compressed()) mn_interp = ma.masked_all(depth.shape) mn_interp[ind] = f(depth[ind]) # The stdev profile f = interp1d(zwoa[~ma.getmaskarray(sd)].compressed(), sd.compressed()) sd_interp = ma.masked_all(depth.shape) sd_interp[ind] = f(depth[ind]) output = {'woa_an': mn_interp, 'woa_sd': sd_interp} return output
def getGriddedFowlerCurlFromDaily(m, files, lon, xptsM, yptsM, xptsG, yptsG, lonsG, latsG, dxRes): xvelG = ma.masked_all((size(files), lonsG.shape[0], lonsG.shape[1])) yvelG = ma.masked_all((size(files), lonsG.shape[0], lonsG.shape[1])) curlG = ma.masked_all((size(files), lonsG.shape[0], lonsG.shape[1])) #print 'uvel', uvelD.shape x = 0 for file in files: fd = open(file, 'rb') motionDat = fromfile(file=fd, dtype='<i2') motionDat = reshape(motionDat, [361, 361, 3]) xt = motionDat[:, :, 0] / 1000. yt = motionDat[:, :, 1] / 1000. q = motionDat[:, :, 2] / 1000. mask = where((q <= 0) | (q > 1), 0, 1) xt = ma.masked_where(mask < 0.5, xt) yt = ma.masked_where(mask < 0.5, yt) alpha = lon * pi / 180. uvelT = yt * sin(alpha) + xt * cos(alpha) vvelT = yt * cos(alpha) - xt * sin(alpha) # Set masked values back to nan for gridding purposes uvelT[where(ma.getmask(uvelT))] = np.nan vvelT[where(ma.getmask(vvelT))] = np.nan #print uvel # Re-grid data #print uvel.flatten().shape, xptsM.flatten().shape, xptsG.shape uvelG = griddata((xptsM.flatten(), yptsM.flatten()), uvelT.flatten(), (xptsG, yptsG), method='linear') vvelG = griddata((xptsM.flatten(), yptsM.flatten()), vvelT.flatten(), (xptsG, yptsG), method='linear') # Rotate data onto new grid xvelGT, yvelGT = m.rotate_vector(uvelG, vvelG, lonsG, latsG) xvelGT = ma.masked_invalid(xvelGT) yvelGT = ma.masked_invalid(yvelGT) xvelG[x] = xvelGT yvelG[x] = yvelGT curlG[x] = calcCurlSq2dXYGradient(xvelGT, yvelGT, dxRes) # print x, curlG[x] x += 1 # COULD ROTATE HERE AND DO CURL OF DAILY VARIABLES. xvelMean = ma.mean(xvelG, axis=0) yvelMean = ma.mean(yvelG, axis=0) curlMean = ma.mean(curlG, axis=0) #vvelD=vstack([vvelD, vvelT]) return xvelMean, yvelMean, curlMean
def getGriddedFowlerFromDaily(m, files, lon, xptsM, yptsM, xptsG, yptsG, lonsG, latsG): uvelD = ma.masked_all((size(files), lon.shape[0], lon.shape[1])) vvelD = ma.masked_all((size(files), lon.shape[0], lon.shape[1])) # print 'uvel', uvelD.shape x = 0 for file in files: fd = open(file, 'rb') motionDat = fromfile(file=fd, dtype='<i2') motionDat = reshape(motionDat, [361, 361, 3]) xt = motionDat[:, :, 0] / 1000. yt = motionDat[:, :, 1] / 1000. q = motionDat[:, :, 2] / 1000. mask = where((q <= 0) | (q > 1), 0, 1) xt = ma.masked_where(mask < 0.5, xt) yt = ma.masked_where(mask < 0.5, yt) # Comes in xy coordinates so need to rotate to UV #xvel= ma.masked_where(np.isnan(xt), xt) #yvel = ma.masked_where(np.isnan(yt), yt) #xvel=f[0] #yvel=f[1] alpha = lon * pi / 180. uvelT = yt * sin(alpha) + xt * cos(alpha) vvelT = yt * cos(alpha) - xt * sin(alpha) uvelD[x] = uvelT vvelD[x] = vvelT x += 1 # COULD ROTATE HERE AND DO CURL OF DAILY VARIABLES. #vvelD=vstack([vvelD, vvelT]) uvel = ma.mean(uvelD, axis=0) vvel = ma.mean(vvelD, axis=0) #print uvel #if we want to set masked values back to nan for gridding purposes uvel[where(ma.getmask(uvel))] = np.nan vvel[where(ma.getmask(vvel))] = np.nan #print uvel # Re-grid data # print uvel.flatten().shape, xptsM.flatten().shape, xptsG.shape uvelG = griddata((xptsM.flatten(), yptsM.flatten()), uvel.flatten(), (xptsG, yptsG), method='linear') vvelG = griddata((xptsM.flatten(), yptsM.flatten()), vvel.flatten(), (xptsG, yptsG), method='linear') # Rotate data onto new grid xvelG, yvelG = m.rotate_vector(uvelG, vvelG, lonsG, latsG) xvelG = ma.masked_invalid(xvelG) yvelG = ma.masked_invalid(yvelG) return xvelG, yvelG
def load_point_timeseries_from_multiple_files(nc_files, var_name, k=None, j=None, i=None, nt=None): # if i is provided but not j, it's a list of 2d points... calendar = None start_units = None if nt is None: nt, calendar = nt_from_multiple_files_with_calendar_check(nc_files) tvs_ts = ma.masked_all([nt, 6]) data_ts = ma.masked_all([nt]) t = 0 for nc_file in nc_files: nc_dataset = netCDF4.Dataset(nc_file, 'r') nc_time = nc_dataset.variables['time'] if start_units is None: start_units = nc_time.units if calendar is None: calendar = _calendar_from_time_variable(nc_time) if 'time_vectors' in nc_dataset.variables.keys(): tvs = _time_vectors_int(nc_dataset.variables['time_vectors'][:, :]) else: nc_datetimes = netCDF4.num2date(nc_time[:], nc_time.units, nc_time.calendar) tvs = _datetimes_to_time_vectors(nc_datetimes) # Issue with 2nd dimension here, might not be always 6. if tvs.shape[1] == 6: tvs_ts[t:t + tvs.shape[0], :] = tvs[:, :] elif tvs.shape[1] == 3: tvs_ts[t:t + tvs.shape[0], 0:3] = tvs[:, :] else: raise NotImplementedError("Unexpected time vectors shape.") nc_var = nc_dataset.variables[var_name] if k is not None: if j is not None: data_ts[t:t + tvs.shape[0]] = nc_var[:, k, j, i] elif i is not None: data_ts[t:t + tvs.shape[0]] = nc_var[:, k, i] else: data_ts[t:t + tvs.shape[0]] = nc_var[:, k] else: if j is not None: data_ts[t:t + tvs.shape[0]] = nc_var[:, j, i] elif i is not None: data_ts[t:t + tvs.shape[0]] = nc_var[:, i] else: data_ts[t:t + tvs.shape[0]] = nc_var[:] t += tvs.shape[0] nc_dataset.close() tvs_ts = _time_vectors_type(tvs_ts, tvs) # There is no check for a uniform increase in the time steps # The data type of returned time vectors can be float even when it should # be integers. return tvs_ts, data_ts, start_units, calendar
def __init__(self, inputdir, inputpattern=".*\.cnv", cfg=None, saveauxiliary=False, timeout=60): """ """ self.name = "ProfilesQCCollection" self.inputfiles = make_file_list(inputdir, inputpattern) self.profiles = process_profiles(self.inputfiles, cfg, saveauxiliary, timeout=timeout) # self.profiles = process_profiles_serial(self.inputfiles, cfg, # saveauxiliary) self.data = {'id': [], 'profileid': [], 'profilename': []} self.flags = {} if saveauxiliary is True: self.auxiliary = {} offset = 0 for p in self.profiles: N = p['timeS'].size # Be sure that all have the same lenght. for v in p.keys(): assert p[v].size == N ids = offset + np.arange(N) self.data['id'] = np.append(self.data['id'], ids).astype('i') profileid = [p.attributes['md5']] * N self.data['profileid'] = np.append(self.data['profileid'], profileid) profilename = [p.attributes['filename']] * N self.data['profilename'] = np.append(self.data['profilename'], profilename) for v in p.keys(): if v not in self.data: self.data[v] = ma.masked_all(offset) self.data[v] = ma.append(self.data[v], p[v]) # ---- Dealing with the flags -------------------------------- for v in p.flags.keys(): if v not in self.flags: self.flags[v] = {'id': [], 'profileid': []} self.flags[v]['id'] = np.append(self.flags[v]['id'], ids).astype('i') self.flags[v]['profileid'] = np.append( self.flags[v]['profileid'], profileid) for t in p.flags[v]: if t not in self.flags[v]: self.flags[v][t] = ma.masked_all(offset) self.flags[v][t] = ma.append(self.flags[v][t], p.flags[v][t]) offset += N return
def _get_cloud_base_and_top_heights( classification: np.ndarray, product_container: DataSource ) -> Tuple[np.ndarray, np.ndarray]: height = product_container.getvar("height") cloud_mask = _find_cloud_mask(classification) if not cloud_mask.any(): return ma.masked_all(cloud_mask.shape[0]), ma.masked_all(cloud_mask.shape[0]) lowest_bases = atmos.find_lowest_cloud_bases(cloud_mask, height) highest_tops = atmos.find_highest_cloud_tops(cloud_mask, height) assert (highest_tops - lowest_bases >= 0).all() return lowest_bases, highest_tops
def spike(x): y = ma.masked_all(x.shape, dtype=x.dtype) y[1:-1] = np.abs(x[1:-1] - (x[:-2] + x[2:])/2.0) - \ np.abs((x[2:] - x[:-2])/2.0) # ATENTION, temporary solution #y[0]=0; y[-1]=0 return y
def make_mosaic(imgs, nrows, ncols, border=1): """ Given a set of images with all the same shape, makes a mosaic with nrows and ncols """ nimgs = imgs.shape[0] imshape = imgs.shape[1:] mosaic = ma.masked_all( (nrows * imshape[0] + (nrows - 1) * border, ncols * imshape[1] + (ncols - 1) * border), dtype=np.float32) paddedh = imshape[0] + border paddedw = imshape[1] + border for i in range(nimgs): row = int(np.floor(i / ncols)) col = i % ncols mosaic[row * paddedh:row * paddedh + imshape[0], col * paddedw:col * paddedw + imshape[1]] = imgs[i] return mosaic # model = load_model('/home/nickos/servers/storage/py_projects/nk47-assignment2/saved_model.h5') # model.summary() # weights = model.get_weights() # # w = np.squeeze(weights[0]) # w = np.transpose(w, (2, 0, 1)) # pl.figure(figsize=(15, 15)) # pl.title('conv1 weights') # nice_imshow(pl.gca(), make_mosaic(w, 6, 6), cmap=cm.binary) # plt.show()
def observations(nobs, ndim): a = ma.masked_all((n,n), dtype=float) for i in range(nobs): (i,j) = np.random.randint(0, n, 2) a[(i,j)] = np.random.random_sample() a[(j,i)] = a[(i,j)] return a
def set_window(self, data): """returns an array the size of the dataset, with data correctly located in the window, and all other pixels masked. Data must be a 2D array exactly the size of the window.""" ds = ma.masked_all(self._dataset_shape, dtype=data.dtype) ds[self._window] = data return ds
def output(t, x, std, downsample, seed=0): np.random.seed(seed) y = ma.masked_all((t.size, 1)) nmeas = y[::downsample].shape[0] y[::downsample, 0] = x[::downsample, 0] + std * np.random.randn(nmeas) return y
def make_mosaic(im, nrows, ncols, border=1): """From http://nbviewer.jupyter.org/github/julienr/ipynb_playground/blob/master/keras/convmnist/keras_cnn_mnist.ipynb """ import numpy.ma as ma nimgs = len(im) imshape = im[0].shape mosaic = ma.masked_all( (nrows * imshape[0] + (nrows - 1) * border, ncols * imshape[1] + (ncols - 1) * border), dtype=np.float32) paddedh = imshape[0] + border paddedw = imshape[1] + border im for i in range(nimgs): row = int(np.floor(i / ncols)) col = i % ncols mosaic[row * paddedh:row * paddedh + imshape[0], col * paddedw:col * paddedw + imshape[1]] = im[i] return mosaic
def make_mosaic(imgs, nrows, ncols, border=2): """ Given a set of images with all the same shape, makes a mosaic with nrows and ncols """ nimgs = imgs.shape[0] imshape = imgs.shape[1:] mosaic = ma.masked_all( (nrows * imshape[0] + (nrows - 1) * border, ncols * imshape[1] + (ncols - 1) * border), dtype=np.float32) paddedh = imshape[0] + border # print(paddedh) paddedw = imshape[1] + border # print(paddedw) for i in xrange(nimgs): # chan=3 row = int(np.floor(i / ncols)) col = i % ncols a = row * paddedh b = row * paddedh + imshape[0] c = col * paddedw d = col * paddedw + imshape[1] mosaic[a:b, c:d] = imgs[i] # print(mosaic) return mosaic
def insert(self, key, record): if key in self.index_table: raise KeyError("key %s already exists in table" % str(key)) added_row_index = self.index_table[key] num_current_rows = len(self.index_table) if num_current_rows >= self.data_table.shape[0]: num_new_rows = int(self._data_table_growth_factor * num_current_rows) new_rows = ma.masked_all((num_new_rows, len(self.column_table)), dtype=self._data_type) self.data_table = ma.vstack((self.data_table, new_rows)) print "Table enlarged to %d rows" % self.data_table.shape[0] for key, in_value in record.items(): if key not in self.column_table: raise KeyError( 'Variable "%s" is not registered as a table column' % str(key)) if key in self.hash_table: value = self.hash_table[key][in_value] else: value = in_value index = self.column_table[key] self.data_table[added_row_index, index] = value
def _align_segments_with_labels(segments, partitioned_skeletons, labelled_skeletons, min_labelled=5): """ Match the head/tail alignment with the results of the classical tracking in each of the segments, if there is enough labelled data in the segment """ segments_alignment = ma.masked_all((len(segments), ), dtype=np.uint8) for segment_index, segment in enumerate(segments): segment_skeletons = labelled_skeletons[segment] non_nan_labelled = np.any(~np.isnan(segment_skeletons), axis=(1, 2)) labels_count = np.sum(non_nan_labelled) non_masked = ~np.any(partitioned_skeletons[segment].mask, axis=(1, 2, 3)) to_compare = np.logical_and(non_nan_labelled, non_masked) similarity_scores = [] for label_skel, partitioned_skeleton in zip( segment_skeletons[to_compare], partitioned_skeletons[segment][to_compare]): dists = [ skeleton_distance(label_skel, x) for x in partitioned_skeleton ] similarity_scores.append(dists) if len(similarity_scores) > 0: mean_similarity_scores = np.mean(similarity_scores, axis=0) if mean_similarity_scores[0] * mean_similarity_scores[ 1] < 0 and labels_count > min_labelled: segments_alignment[segment_index] = np.argmax( mean_similarity_scores) return segments_alignment
def __init__(self, nwalkers, ndim, lnpostfn, transd=False, processes=None, pool=None): self.nwalkers = nwalkers self.dim = ndim self._kde = None self._kde_size = self.nwalkers self.updates = np.array([]) self._get_lnpost = lnpostfn self.iterations = 0 self.stored_iterations = 0 self.pool = pool self.processes = processes if self.processes != 1 and self.pool is None: self.pool = Pool(self.processes) self._transd = transd if self._transd: self._chain = ma.masked_all((0, self.nwalkers, self.dim)) else: self._chain = np.zeros((0, self.nwalkers, self.dim)) self._lnpost = np.empty((0, self.nwalkers)) self._lnprop = np.empty((0, self.nwalkers)) self._acceptance = np.zeros((0, self.nwalkers)) self._blobs = [] self._last_run_mcmc_result = None self._failed_p = None
def draw(self, size=1, spaces=None): """ Draw samples from the transdimensional distribution. """ if spaces is not None: if len(spaces) != size: raise ValueError('Sample size inconsistent with number of spaces saved') space_inds = np.empty(size) for space_id, space in enumerate(self.spaces): subspace = np.all(spaces == space, axis=1) space_inds[subspace] = space_id else: # Draws spaces randomly with the assigned weights cumulative_weights = np.cumsum(np.exp(self._logweights)) space_inds = np.searchsorted(cumulative_weights, np.random.rand(size)) draws = ma.masked_all((size, self._max_ndim)) for space_id in range(len(self.spaces)): sel = space_inds == space_id n_fixedd = np.count_nonzero(sel) if n_fixedd > 0: # Populate only the valid entries for this parameter space draws[np.ix_(sel, self._spaces[space_id])] = self.kdes[space_id].draw(n_fixedd) return draws
def get_all_data(grib_file): """Aggregate all messages data of a GRIB file. Parameters ---------- grib_file : string Returns ------- out : numpy masked array Notes ----- All messages in the GRIB file are assumed to have the same shape. """ grb1 = pygrib.open(grib_file) t = grb1.messages for i, grb_msg in enumerate(grb1): data = grb_msg['values'] if i == 0: all_data = ma.masked_all([t, data.shape[0], data.shape[1]]) all_data[i, :, :] = data grb1.close() return all_data
def load_data(): # Read the log file module_dir = os.path.dirname(__file__) filepath = os.path.join(module_dir, 'data', 'apm.log') lines = open(filepath).read().splitlines() # Parse the data data = dict(MAG=[], IMU=[], ATT=[]) for line in lines: msgid, *fields = re.split(',\s*', line) if msgid in data: data[msgid].append([float(f) for f in fields]) data = {key: np.asarray(val) for key, val in data.items()} imu = data['IMU'] mag = data['MAG'] # Build the output array t = np.sort(np.hstack((imu[:, 0], mag[:, 0]))) imu_inds = np.array([tk in imu[:, 0] for tk in t]) mag_inds = np.array([tk in mag[:, 0] for tk in t]) y = ma.masked_all((t.size, GeneratedDTModel.ny)) y[imu_inds, :6] = imu[:, [4, 5, 6, 1, 2, 3]] y[mag_inds, 6:] = mag[:, [1, 2, 3]] t *= 1e-3 # Select the experiment interval range_ = np.s_[905:1800]#np.s_[900:1800] t = t[range_] y = y[range_] assert np.unique(t).size == t.size return t, y, data
def i2b_flags(flags, good_flags=[1,2], bad_flags=[3,4]): """ Converts int flags (like IOC) into binary (T|F) If given a dictionary of flags, it will evaluate each item of the dictionary, and return: - True if all available values are True - False if any of the available values is False - Masked is all values are masked """ if (hasattr(flags, 'keys')) and (np.ndim(flags) > 1): output= [] for f in flags: output.append(i2b_flags(flags[f], good_flags, bad_flags)) return ma.array(output).all(axis=0) flags = np.asanyarray(flags) assert flags.dtype != 'bool', "Input flags should not be binary" output = ma.masked_all(np.shape(flags), dtype='bool') for f in good_flags: output[flags == f] = True for f in bad_flags: output[flags == f] = False return output
def test_convert_to_annual(self): "Test convert_to_annual" base = dict(D=1, H=24, T=24 * 60, S=24 * 3600) #for fq in ('D', 'H', 'T', 'S'): # Don't test for minuTe and Second frequency, too time consuming. for fq in ('D', 'H'): dates = date_array(start_date=Date(fq, '2001-01-01 00:00:00'), end_date=Date(fq, '2004-12-31 23:59:59')) bq = base[fq] series = time_series(range(365 * bq) * 3 + range(366 * bq), dates=dates) control = ma.masked_all((4, 366 * bq), dtype=series.dtype) control[0, :58 * bq] = range(58 * bq) control[0, 59 * bq:] = range(58 * bq, 365 * bq) control[[1, 2]] = control[0] control[3] = range(366 * bq) test = convert_to_annual(series) assert_equal(test, control) # series = time_series(range(59, 365) + range(366) + range(365), start_date=Date('D', '2003-03-01')) test = convert_to_annual(series) assert_equal( test[:, 59:62], ma.masked_values([[-1, 59, 60], [59, 60, 61], [-1, 59, 60]], -1))
def get_subset_data(grib_file, msg_ids): """Aggregate data from subset of messages of a GRIB file. Parameters ---------- grib_file : string msg_ids : list of int Returns ------- out : numpy masked array Notes ----- All selected messages in the GRIB file are assumed to have the same shape. """ t = len(msg_ids) grb1 = pygrib.open(grib_file) c = 0 for i, grb_msg in enumerate(grb1): if i not in msg_ids: continue data = grb_msg['values'] if c == 0: all_data = ma.masked_all([t, data.shape[0], data.shape[1]]) all_data[c, :, :] = data c += 1 grb1.close() return all_data
def bin_spike(x, l): """ l is the number of points used for comparison, thus l=2 means that each point will be compared only against the previous and following measurements. l=2 is is probably not a good choice, too small. Maybe use pstsd instead? Dummy way to avoid warnings when x[ini:fin] are all masked. Improve this in the future. """ assert x.ndim == 1, "I'm not ready to deal with multidimensional x" assert l%2 == 0, "l must be an even integer" N = len(x) bin = ma.masked_all(N) # bin_std = ma.masked_all(N) half_window = int(l/2) idx = (i for i in range(half_window, N - half_window) if np.isfinite(x[i])) for i in idx: ini = max(0, i - half_window) fin = min(N, i + half_window) # At least 3 valid points if ma.compressed(x[ini:fin]).size >= 3: bin[i] = x[i] - ma.median(x[ini:fin]) # bin_std[i] = (np.append(x[ini:i], x[i+1:fin+1])).std() bin[i] /= (np.append(x[ini:i], x[i+1:fin+1])).std() return bin
def calc_uv(xvel, yvel, lons): # Script to convert vectors from xy to uv u_z = ma.masked_all((xvel.shape[0],xvel.shape[1])) v_m = ma.masked_all((xvel.shape[0],xvel.shape[1])) mask = ma.getmask(xvel) #index = np.where(mask==False) for i in xrange(xvel.shape[0]): for j in xrange(xvel.shape[1]): #TO TRANSPOSE OR NOT?.. alpha = (lons[i, j])*pi/180. if (mask[i, j]==False): u_z[i, j] = yvel[i, j]*sin(alpha) + xvel[i, j]*cos(alpha) v_m[i, j] = yvel[ i, j]*cos(alpha) - xvel[ i, j]*sin(alpha) return u_z, v_m
def make_mosaic(imgs, nrows, ncols): """ Given a set of images with all the same shape, makes a mosaic with nrows and ncols """ if len(imgs.shape) == 3 : numFilter = imgs.shape[2]; numChannel = 1; else: numFilter = imgs.shape[3]; numChannel = imgs.shape[2]; imshape = imgs.shape[:2] print(imshape); numImages = numFilter * numChannel; mosaic = ma.masked_all((nrows * imshape[0] + (nrows - 1) , ncols * imshape[1] + (ncols - 1) ), dtype=np.float32); paddedh = imshape[0] + 1; paddedw = imshape[1] + 1; imageIndex = 0; for i in xrange(numImages): row = int(np.floor(i / ncols)); col = i % ncols; channelnum = int(np.floor(i / numFilter)); filternum = i % numFilter; if len(imgs.shape) == 4 : mosaic[row * paddedh:row * paddedh + imshape[0],col * paddedw:col * paddedw + imshape[1]] = imgs[:,:,channelnum,filternum]; else: mosaic[row * paddedh:row * paddedh + imshape[0],col * paddedw:col * paddedw + imshape[1]] = imgs[:,:,i]; return mosaic
def get_conc_gridded(dataoutpath, yearsT, month, hemStr, concVersion='v2'): """ Get gridded ice concentration data Data gridded using linear interpolation of NASA Team concentration data onto a 100 km grid. Used monthly data, then monthly means of the daily NRT data for 2015 onwards. """ if (hemStr == 'N'): poleStr = 'A' elif (hemStr == 'S'): poleStr = 'AA' xpts = load(dataoutpath + concVersion + 'xpts100km' + poleStr) ypts = load(dataoutpath + concVersion + 'ypts100km' + poleStr) if (size(yearsT) > 1): conc_years = ma.masked_all( (size(yearsT), xpts.shape[0], xpts.shape[1])) x = 0 for year in yearsT: conc_years[x] = load(dataoutpath + concVersion + 'ice_conc100km' + str(month) + str(year) + poleStr + concVersion) x += 1 else: conc_years = load(dataoutpath + concVersion + 'ice_conc100km' + str(month) + str(yearsT) + poleStr + concVersion) return xpts, ypts, conc_years
def convertlonlat_timstep(indir, datan, timestep, dimx, dimy, landref): """ Convertion of lon / lat of ORCHIDEE forcing for a specific timestep. indir: str, Input NetCDF direction (ORCHIDEE Forcing). datan: str, name of the data in the Input file. timestep: int, Timestep index. dimx: int, dimension longitude. dimy: int, dimension latitude. landref: Land index (From netCDF). """ print "Conversion", timestep ncvar = GLO.get_var(indir, datan, 0) out = ma.masked_all((dimy,dimx)) var = ncvar[timestep,:] ind=0 while ind<len(var): #if ind % 100000. == 0 : print ind ref = landref[ind] k = int(ref/dimx) i = ref-k*dimx-1 j = k-1 out[j,i] = var[ind] ind=ind+1 return out
def test_convert_to_annual(self): "Test convert_to_annual" base = dict(D=1, H=24, T=24 * 60, S=24 * 3600) #for fq in ('D', 'H', 'T', 'S'): # Don't test for minuTe and Second frequency, too time consuming. for fq in ('D', 'H'): dates = date_array(start_date=Date(fq, '2001-01-01 00:00:00'), end_date=Date(fq, '2004-12-31 23:59:59')) bq = base[fq] series = time_series(range(365 * bq) * 3 + range(366 * bq), dates=dates) control = ma.masked_all((4, 366 * bq), dtype=series.dtype) control[0, :58 * bq] = range(58 * bq) control[0, 59 * bq:] = range(58 * bq, 365 * bq) control[[1, 2]] = control[0] control[3] = range(366 * bq) test = convert_to_annual(series) assert_equal(test, control) # series = time_series(range(59, 365) + range(366) + range(365), start_date=Date('D', '2003-03-01')) test = convert_to_annual(series) assert_equal(test[:, 59:62], ma.masked_values([[-1, 59, 60], [59, 60, 61], [-1, 59, 60]], - 1))
def set_window(self, data) : """returns an array the size of the dataset, with data correctly located in the window, and all other pixels masked. Data must be a 2D array exactly the size of the window.""" ds = ma.masked_all(self._dataset_shape, dtype=data.dtype) ds[self._window] = data return ds
def aod_without_prior(aod_data, unc_data, typ_data, ap_tau_types): from numpy import array, log, log10, logical_not from numpy.ma import getmaskarray, masked_all tau_var_all = unc_data / aod_data / log(10.) # Reject points with sufficient uncertainty to be unstable # when we take out the prior keep = (tau_var_all < 0.75).filled(False) # Remove negative AOD as we're taking a logarithm keep &= (aod_data > 0.).filled(False) keep &= logical_not(getmaskarray(typ_data)) tau = log10(aod_data[keep]) tau_var = tau_var_all[keep] * tau_var_all[keep] ap_tau = array([ap_tau_types[phs - 1] for phs in typ_data[keep]]) ap_tau_var = 1.5 * 1.5 weight = (ap_tau_var - tau_var) / (ap_tau_var * tau_var) value = (tau / tau_var - ap_tau / ap_tau_var) / weight result = masked_all(aod_data.shape) result[keep] = 10.**value return result
def wmean_bandpass_1D_serial(data, lshorterpass, llongerpass, t=None, method='hann', axis=0): """ Equivalent to wmean_1D_serial, but it is a bandpass Input: - data: np.array or ma.maked_array, nD - lshorterpass: The size of the highpass filter, i.e. shorter wavelenghts are preserved. It is in the same unit of t. - llongerpass: The size of the lowpass filter, i.e.longer wavelenghts are preserved. It is in the same unit of t. - t: is the scale of the choosed axis, 1D. If not defined, it will be considered a sequence. - method: ['hann', 'hamming', 'blackman'] Defines the weight function type - axis: Dimension which the filter will be applied """ assert False, "There is a BUG here" assert axis <= data.ndim, "Invalid axis!" # If necessary, move the axis to be filtered for the first axis if axis != 0: data_smooth = wmean_bandpass_1D_serial(data.swapaxes(0, axis), lshorterpass = lshorterpass, llongerpass = llongerpass, t = t, method = method, axis = 0) return data_smooth.swapaxes(0, axis) # Below here, the filter will be always applied on axis=0 # If t is not given, creates a regularly spaced t if t is None: print "The scale along the choosed axis weren't defined. I'll consider a constant sequence." t = np.arange(data.shape[axis]) assert t.shape == (data.shape[axis],), "Invalid size of t." # ---- winfunc = window_func(method) data_smooth = ma.masked_all(data.shape) if data.ndim==1: (I,) = np.nonzero(~ma.getmaskarray(data)) for i in I: # First remove the high frequency tmp = _convolve_1D(t[i], t, llongerpass, winfunc, data) # Then remove the low frequency data_smooth[i] = tmp - \ _convolve_1D(t[i], t, lshorterpass, winfunc, tmp) else: I = data.shape[1] for i in range(I): data_smooth[:,i] = wmean_bandpass_1D_serial(data[:,i], lshorterpass, llongerpass, t, method, axis) return data_smooth
def get_halfpower_period(data, filtered, dt): """ Returns the gain per frequency """ nt, ni, nj = data.shape gain = ma.masked_all((nt, ni, nj)) for i in range(ni): for j in range(nj): if ~filtered[:,i,j].mask.all(): gain[:,i,j] = np.absolute(np.fft.fft(filtered[:,i,j]-filtered[:,i,j].mean())) / np.absolute(np.fft.fft(data[:,i,j]-data[:,i,j].mean())) gain_median = ma.masked_all(nt) gain_25 = ma.masked_all(nt) gain_75 = ma.masked_all(nt) # Run for each frequency, which are in the same number of timesteps from scipy.stats import scoreatpercentile for t in range(nt): #gain_median[t] = numpy.median(gain[t,:,:].compressed()[numpy.isfinite(gain[t,:,:].compressed())]) #tmp = gain[t,:,:].compressed()[numpy.isfinite(gain[t,:,:].compressed())] #gain_median[t] = scoreatpercentile(tmp,50) gain_median[t] = ma.median(gain[t]) #gain_25[t] = scoreatpercentile(tmp,25) #gain_75[t] = scoreatpercentile(tmp,75) freq = np.fft.fftfreq(nt)/dt.days halfpower_period = 1./freq[np.absolute(gain_median-0.5).argmin()] #from scipy.interpolate import UnivariateSpline #s = UnivariateSpline(gain_median[numpy.ceil(nt/2.):], -freq[numpy.ceil(nt/2.):], s=1) #xs = -freq[numpy.ceil(nt/2.):] #ys = s(xs) #import rpy2.robjects as robjects #smooth = robjects.r['smooth.spline'](robjects.FloatVector(gain_median[numpy.ceil(nt/2.):]),robjects.FloatVector(-freq[numpy.ceil(nt/2.):]),spar=.4) ##smooth = robjects.r['smooth.spline'](robjects.FloatVector(-freq[numpy.ceil(nt/2.):]),robjects.FloatVector(gain_median[numpy.ceil(nt/2.):]),spar=.4) #s_interp = robjects.r['predict'](smooth,x=0.5) ##halfpower_period = 1./s_interp.rx2['y'][0] #halfpower_period = 1./s_interp.rx2(2)[0] #smooth = robjects.r['smooth.spline'](robjects.FloatVector(-freq[numpy.ceil(nt/2.):]),robjects.FloatVector(gain_median[numpy.ceil(nt/2.):]),spar=.4) #s_interp = robjects.r['predict'](smooth, x = robjects.FloatVector(-freq[numpy.ceil(nt/2.):])) #print "Filter half window size: %s" % l #print "Half Power Period: %s" % halfpower_period #self.halfpower_period = halfpower_period return halfpower_period
def window_mean(y,x=None,x_out=None,method="rectangular",boxsize=None): """Windowed means along 1-D array Input: - x [0,1,2,3,...] => - x_out [x] => - method [rectangular]: + rectangular => All data in window have same weight - boxsize [mean space] => Output: Apply windowed means on a 1-D data array. Selecting adequate x_out and boxsize could define boxmeans or smooth filters. Method defines the weight method. An important point of this function is the ability to work with unhomogenious spaced samples. Data ([1,2,3]) colected at [1,2,4] times would be different if was collected at [1,2,3]. """ if(x==None): x=N.arange(N.size(y)) if(x_out==None): x_out=x #y_out = N.zeros(N.size(x_out),N.float) y_out = ma.masked_all(x_out.shape) if(boxsize==None): # !!! Improve it! A better way than *1. ?! boxsize =(max(x)-min(x))/(N.size(x_out)*1.) half_boxsize = boxsize/2. #for x_i in x_out: for i in range(N.size(x_out)): x_i = x_out[i] # Higher window limit hi_limit = x_i+half_boxsize # Lower window limit lo_limit = x_i-half_boxsize # index of values inside window index = N.less_equal(x,hi_limit)*N.greater_equal(x,lo_limit) # !!! INSERT some type of check for minimum number of samples to be considered # x values on window around x_i x_tmp = N.compress(index,x)-x_i # y values on window y_tmp = N.compress(index,y) # weights in window according to x position weight = window_weight(x_tmp,boxsize,method) y_out[i] = N.sum(y_tmp*weight) return y_out
def wmean_1D(data, l, t=None, method='hann', axis=0, interp = False): """ A moving window mean filter, not necessarily a regular grid. It is equivalent to wmean_1D_serial but run in parallel with multiprocesing for higher efficiency. Check wmean_1D_serial documentation for the inputs and other details. """ #assert type(data)) in [np.ndarray, ma.MaskedArray] assert axis <= data.ndim, "Invalid axis!" # If necessary, move the axis to be filtered for the first axis if axis != 0: data_smooth = wmean_1D(data.swapaxes(0, axis), l = l, t = t, method = method, axis = 0, interp = interp) return data_smooth.swapaxes(0, axis) # Below here, the filter will be always applied on axis=0 # If t is not given, creates a regularly spaced t if t is None: print "The scale along the choosed axis weren't defined. I'll consider a constant sequence." t = np.arange(data.shape[axis]) assert t.shape == (data.shape[axis],), "Invalid size of t." # ---- # Only one dimensions usually means overhead to run in parallel. if data.ndim==1: data_smooth = wmean_1D_serial(data, l, t=t, method=method, axis=axis, interp=interp) return data_smooth # ---- npes = 2 * mp.cpu_count() pool = mp.Pool(npes) results = [] I = data.shape[1] for i in range(I): results.append(pool.apply_async(wmean_1D_serial, \ (data[:,i], l, t, method, 0, interp))) pool.close() # Collecting the results. if type(data) is np.ndarray: data_smooth = np.empty(data.shape) else: data_smooth = ma.masked_all(data.shape) for i, r in enumerate(results): data_smooth[:,i] = r.get() pool.terminate() return data_smooth
def interpolate(self, lat, lon, var): """ Interpolate each var on the coordinates requested """ subset, dims = self.subset(lat, lon, var) if np.all([y in dims['lat'] for y in lat]) & \ np.all([x in dims['lon'] for x in lon]): yn = np.nonzero([y in lat for y in dims['lat']])[0] xn = np.nonzero([x in lon for x in dims['lon']])[0] output = {} for v in subset: # output[v] = subset[v][dn, zn, yn, xn] # Seriously that this is the way to do it?!!?? output[v] = subset[v][:, xn][yn] return output # The output coordinates shall be created only once. points_out = [] for latn in lat: for lonn in lon: points_out.append([latn, lonn]) points_out = np.array(points_out) output = {} for v in var: output[v] = ma.masked_all( (lat.size, lon.size), dtype=subset[v].dtype) # The valid data idx = np.nonzero(~ma.getmaskarray(subset[v])) if idx[0].size > 0: points = np.array([ dims['lat'][idx[0]], dims['lon'][idx[1]]]).T values = subset[v][idx] # Interpolate along the dimensions that have more than one # position, otherwise it means that the output is exactly # on that coordinate. ind = np.array( [np.unique(points[:, i]).size > 1 for i in range(points.shape[1])]) assert ind.any() values_out = griddata( np.atleast_1d(np.squeeze(points[:, ind])), values, np.atleast_1d(np.squeeze(points_out[:, ind])) ) # Remap the interpolated value back into a 4D array idx = np.isfinite(values_out) for [y, x], out in zip(points_out[idx], values_out[idx]): output[v][y==lat, x==lon] = out return output
def extend_interp(datafield): # add masked values at southernmost end southernlimitmask = ma.masked_all(len(self.olon)) olat_ext = np.append(-82.1,self.olat) dfield_ext = ma.concatenate([ma.column_stack(southernlimitmask), datafield], 0) # f = interp2d(self.olon, olat_ext, dfield_ext) # return f(self.pismlon, self.pismlat) return interp(dfield_ext, self.olon, olat_ext, self.pismlon, self.pismlat)
def window_1Dmean_grid(data, l, method='hann', axis=0, parallel=False): """ A moving window mean filter applied to a regular grid. 1D means that the filter is applied to along only one of the dimensions, but in the whole array. For example in a 3D array, each latXlon point is filtered along the time. The other types of filter consider the scales of each dimension. On this case it's considered a regular grid, so the filter can be based on the number of elements, and so be much optimized. l is in number of cells around the point being evaluated. """ assert axis <= data.ndim, "Invalid axis!" if axis != 0: data_smooth = window_1Dmean_grid(data.swapaxes(0, axis), l = l, method = method, axis = 0, parallel = parallel) return data_smooth.swapaxes(0, axis) winfunc = window_func(method) r = np.arange(-np.floor(l/2),np.floor(l/2)+1) w = winfunc(r, l) data_smooth = ma.masked_all(data.shape) I = data.shape[0] norm = np.convolve(np.ones(I), w ,mode='same') if len(data.shape)==1: norm=numpy.convolve(numpy.ones(I),w,mode='same') data_smooth[:] = numpy.convolve(data[:],w,mode='same')/norm elif len(data.shape) == 2: I, J = data.shape for j in range(J): data_smooth[:,j] = np.convolve(data[:,j], w, mode='same')/norm elif len(data.shape) >2: I, J = data.shape[:2] for j in range(J): data_smooth[:,j] = window_1Dmean_grid(data[:,j], l = l, method = method, axis=0, parallel = parallel) try: data_smooth.mask = data.mask except: pass return data_smooth
def _weight_triangular(r,l): """ """ w = ma.masked_all(r.shape) ind = np.abs(r)<l/2. ind2 = np.abs(r)>l/2. w[ind] = 1-np.abs(2*r[ind]/l) w[ind2] = 0 return w
def test_allmasked(N=10): """ If the input is all masked, the output must be all masked """ I = (99 * random(N)).astype('i') + 1 J = (99 * random(N)).astype('i') + 1 for i, j in zip(I, J): x = ma.masked_all((i, j)) Lat, Lon = latlon_2D(i, j) h_smooth = wmean_2D_latlon(Lat, Lon, x, l=1e10) assert h_smooth.mask.all()
def multifile_minmax(datasets, indices, years=None,day_range=None,geog_box=None) : """calculates minimum and maximum of indices across multiple files You may call this function with a list of previously opened NetCDF datasets, or you may provide a template for the filename and a list of years. If you provide a template, this function will open and close the files for you. """ datasets = multifile_open(datasets, years) if day_range is not None : days = day_range else : days = slice(1,len(datasets[0].dimensions['days'])-1) geog_mask = slice(None,None,None) if geog_box is not None : ca = trend.CompressedAxes(datasets[0], 'land') geog_mask = calc_geog_mask(ca, datasets[0], geog_box) num_ind = len(indices) minvals = ma.masked_all( (num_ind,), dtype=np.float64) maxvals = ma.masked_all( (num_ind,), dtype=np.float64) for i_year in range(len(datasets)) : indfile = datasets[i_year] for i_indices in range(num_ind) : for d in range(days.start,days.stop) : print d index_vals = indfile.variables[indices[i_indices]][d,:] index_vals = index_vals[geog_mask] cur_max = np.max(index_vals) cur_min = np.min(index_vals) if minvals[i_indices] is ma.masked : minvals[i_indices] = cur_min maxvals[i_indices] = cur_max else : minvals[i_indices] = min(cur_min, minvals[i_indices]) maxvals[i_indices] = max(cur_max, maxvals[i_indices]) return (minvals,maxvals)
def test_flags2bin(n=100): flag = ma.concatenate([np.random.randint(0,5,n), ma.masked_all(2, dtype='int8')]) binflags = flags2bin(flag) assert type(binflags) == ma.MaskedArray assert binflags.dtype == 'bool' assert binflags.shape == (n+2,) assert binflags.mask[flag.mask].all(), \ "All masked flags records should be also masked at binflags"
def _apply_convolve_1D(data, w): if data.ndim > 1: output = ma.masked_all(data.shape[1:]) for i in xrange(data.shape[1]): output[i] = _apply_convolve_1D(data[:,i], w) return output ind = (~ma.getmaskarray(data)) tmp = data[ind]*w wsum = w[ind].sum() if wsum != 0: return (tmp).sum()/wsum
def uncompress(self, vector): """Given a compressed vector, produce an uncompressed 2d representation. The vector must be the same length as the compressed dimension in the NetCDF file.""" gi = self.get_grid_indices() grid = ma.masked_all(self._dimshape, dtype=vector.dtype) grid[gi] = vector grid = self.mask_grid(grid) return grid
def tukey53H(x): """Spike test Tukey 53H from Goring & Nikora 2002 """ N = len(x) u1 = ma.masked_all(N) for n in range(N-4): if x[n:n+5].any(): u1[n+2] = ma.median(x[n:n+5]) u2 = ma.masked_all(N) for n in range(N-2): if u1[n:n+3].any(): u2[n+1] = ma.median(u1[n:n+3]) u3 = ma.masked_all(N) u3[1:-1] = 0.25*(u2[:-2] + 2*u2[1:-1] + u2[2:]) Delta = ma.absolute(x-u3) return Delta
def getMeanConcDates(dataPath, alg=0, date1='20151220', date2='20160105', lonlat=0, mean=0): if (alg==0): team = 'NASA_TEAM' team_s = 'nt' header = 300 datatype='uint8' scale_factor=250. if (alg==1): team = 'BOOTSTRAP' team_s = 'bt' header = 0 datatype='<i2' scale_factor=1000. year = date1[0:4] print year day=0 if (int(year)>2015): files = glob(dataPath+'/ICE_CONC/NRT/'+team_s+'_'+'*.bin') else: files = glob(dataPath+'/ICE_CONC/'+team+'/ARCTIC/daily/'+year+'/'+team_s+'_'+'*.bin') dates=[file.split('/')[-1][3:11] for file in files] print dates idx1=where(array(dates)==date1)[0][0] idx2=where(array(dates)==date2)[0][0] files=files[idx1:idx2+1] ice_conc = ma.masked_all((size(files), 448, 304)) for x in xrange(size(files)): fd = open(files[x], 'r') data = fromfile(file=fd, dtype=datatype) data = data[header:] #FIRST 300 FILES ARE HEADER INFO ice_conc[x] = reshape(data, [448, 304]) #divide by 250 to express in concentration ice_conc = ice_conc/scale_factor #GREATER THAN 250 is mask/land etc ice_conc = ma.masked_where(ice_conc>1., ice_conc) #ice_conc = ma.masked_where(ice_conc<0.15, ice_conc) if (mean==1): ice_conc=ma.mean(ice_conc, axis=0) if (lonlat==1): flat = open(dataPath+'/OTHER/psn25lats_v3.dat', 'rb') flon = open(dataPath+'/OTHER/psn25lons_v3.dat', 'rb') lats = reshape(fromfile(file=flat, dtype='<i4')/100000., [448, 304]) lons = reshape(fromfile(file=flon, dtype='<i4')/100000., [448, 304]) return ice_conc, lons, lats else: return ice_conc
def calc_time_regres(var, num_years_req): #find the linear regression for each grid point as a function of nx = var.shape[-2] ny = var.shape[-1] trend = ma.masked_all((nx, ny)) sig = ma.masked_all((nx, ny)) r = ma.masked_all((nx, ny)) years = np.arange(num_years) for i in xrange(nx): for j in xrange(ny): var_ma = var[:, i, j][~var[:, i, j].mask] years_ma = years[~var[:, i, j].mask] if len(var_ma)>num_years_req: trend[i, j], intercept, r[i, j], prob, stderr = stats.linregress(years_ma,var_ma) sig[i, j] = 100*(1-prob) trend = ma.array(trend,mask=np.isnan(trend)) r = ma.array(r,mask=np.isnan(r)) sig = ma.array(sig,mask=np.isnan(sig)) return trend, sig, r
def woa_normbias(data, v, cfg): if ('LATITUDE' in data.keys()) and ('LONGITUDE' in data.keys()): if 'datetime' in data.keys(): d = data['datetime'] elif ('datetime' in data.attributes): d0 = data.attributes['datetime'] if ('timeS' in data.keys()): d = [d0 + timedelta(seconds=s) for s in data['timeS']] else: d = [data.attributes['datetime']]*len(data['LATITUDE']), woa = woa_track_from_file( d, data['LATITUDE'], data['LONGITUDE'], cfg['file'], varnames=cfg['vars']) elif ('LATITUDE' in data.attributes.keys()) and \ ('LONGITUDE' in data.attributes.keys()) and \ ('PRES' in data.keys()): woa = woa_profile(v, data.attributes['datetime'], data.attributes['LATITUDE'], data.attributes['LONGITUDE'], data['PRES'], cfg) if woa is None: # self.logger.warn("%s - WOA is not available at this site" % # self.name) flag = np.zeros(data[v].shape, dtype='i1') woa_normbias = ma.masked_all(data[v].shape) return flag, woa_normbias woa_bias = ma.absolute(data[v] - woa['woa_an']) woa_normbias = woa_bias/woa['woa_sd'] flag = np.zeros(data[v].shape, dtype='i1') ind = np.nonzero(woa_normbias <= cfg['sigma_threshold']) flag[ind] = 1 # cfg['flag_good'] ind = np.nonzero(woa_normbias > cfg['sigma_threshold']) flag[ind] = 3 # cfg['flag_bad'] # Flag as 9 any masked input value flag[ma.getmaskarray(data[v])] = 9 return flag, woa_normbias
def nearest(self, lat, lon, var): output = {} dims, idx = cropIndices(self.dims, lat, lon) for v in var: if v == 'height': v = 'z' subset = self.ncs[0].variables[v][idx['yn'], idx['xn']] output[v] = ma.masked_all((lat.size, lon.size), dtype='f') for yn_out, y in enumerate(lat): yn_in = np.absolute(dims['lat']-y).argmin() for xn_out, x in enumerate(lon): xn_in = np.absolute(dims['lon']-x).argmin() output[v][yn_out, xn_out] = subset[yn_in, xn_in] return output