def graphics(cropCalDF, state, year, features): cropcal = cropCalDF.copy() # Get the required years details based the state and the year current_time = cropcal[(cropcal.adm1_name == state) & (cropcal.Year == year)] # Calculate Climatology for past 5 years for every date in the time series new_val = [ climatology(features, state, date, 5) for date in current_time.datetime ] # Scale it down as per NDVI requirement --> just a test setting toScale = [((x - 50) / 200) for x in new_val] # Scale down NDVI requirement --> test setting current_time[features] = [((x - 50) / 200) for x in current_time[features]] # Assigning scaled climatology value to a new column in the dataframe current_time[features + "_Climatology"] = toScale # Calculate moving mean with 5 day window and minimum count =1 for both Feature and Climatology setting rm_ndvi = bn.move_mean(current_time[features], window=5, min_count=1) rmc_ndvi = bn.move_mean(current_time[features + "_Climatology"], window=5, min_count=1) # Plot graphs using Matplotlib Library. plt.figure(figsize=(14, 7)) plt.plot(current_time.datetime, rm_ndvi, "b", label=features) plt.plot(current_time.datetime, rmc_ndvi, "g", label=features + "_Cimatology") plt.title("NDVI and 5 year Climatology for {}".format(state)) plt.xlabel("Date") plt.ylabel("NDVI") plt.legend() plt.show()
def score_on_topk(self, actual, predict, groups=[0], k=50, show_rank=True, show_score=False, plot_move_window=1): """ return average score and rank on topk in each group return DataFrame("group", "actual", "rank") """ size = len(actual) indices = np.arange(size) np.random.shuffle(indices) df = pd.DataFrame( collections.OrderedDict([ ("group", groups[indices]), ("actual", actual[indices]), ("predict", predict[indices]) ])).sort_values(["group", "predict"], ascending=[True, False]).reset_index(drop=True) gsize = df.groupby("group")["predict"].count() df["rank"] = df.groupby("group")["actual"].rank(ascending=False, pct=True) rdf = df.groupby("group").head(k).groupby("group")[["actual", "rank"]].mean() rdf.reset_index(inplace=True) if show_rank: plt.figure(figsize=self.size) plt.plot(rdf["group"], bn.move_mean(rdf["rank"], plot_move_window, min_count=1), label=f"rank: {rdf['rank'].mean()*100:.2f}%") plt.plot(rdf["group"], rdf["rank"], 'o', color="0.4", alpha=0.3, markersize=3) plt.legend(loc="upper left", fontsize=self.fontsize) self.show() if show_score: plt.figure(figsize=self.size) plt.plot(rdf["group"], bn.move_mean(rdf["actual"], plot_move_window, min_count=1), label=f"score: {rdf['actual'].mean():.4f}") plt.plot(rdf["group"], rdf["actual"], 'o', color='0.4', alpha=0.3, markersize=3) plt.legend(loc="upper left", fontsize=self.fontsize) self.show() return rdf
def main(config_path): config = {} with open(config_path) as f_config: config = json.load(f_config) # out, times, h_agl = xr.Dataset(), [], [] for f_path in sorted( g.glob(os.path.join(config['output-wrf-raw'], 'wrfout_*'))): print(f_path) f_basename = os.path.basename(f_path) domain = f_basename.split('_')[1] d = xr.open_dataset(f_path).isel(Time=0) time = dt.datetime.strptime( f_basename, 'wrfout_{}_%Y-%m-%d_%H:%M:%S'.format(domain)) h_agl_staggered = (d.PHB + d.PH) / wrf.G0 h_agl = bn.move_mean( h_agl_staggered.mean(dim='south_north').mean(dim='west_east'), 2)[1:] h = bn.move_mean(h_agl_staggered.values, 2, axis=0)[1:] t = d['T'].values p = (d.P + d.PB).values q = d.QVAPOR.values out = xr.Dataset() out.coords['time'] = time # out.coords['h_agl'] = np.mean(h_agl, axis=0) out.coords['lat'] = d.XLAT.values[:, 0] out.coords['lon'] = d.XLONG.values[0, :] out['terrain'] = (('lat', 'lon'), d.HGT.values) out['u10'] = (('lat', 'lon'), d.U10.values) out['v10'] = (('lat', 'lon'), d.V10.values) out['rain'] = (('lat', 'lon'), d.RAINC + d.RAINNC) out['t2'] = (('lat', 'lon'), d.T2.values) out['so2_concentration'] = (('h_agl', 'lat', 'lon'), d.so2.values) out['o3_concentration'] = (('h_agl', 'lat', 'lon'), d.o3.values) out['nox_concentration'] = (('h_agl', 'lat', 'lon'), (d.no2 + d.no).values) out['pm25'] = (('h_agl', 'lat', 'lon'), d.PM2_5_DRY.values) out['pm10'] = (('h_agl', 'lat', 'lon'), d.PM10.values) # wrf.x_to_yOm3(d.so2.values, (d.PB + d.P).values, # d['T'].values, mm=64) # ) out['p_sl'] = (('lat', 'lon'), wrf.slp(h, p, t, q)) out['rh'] = (('lat', 'lon'), wrf.rh(p, t, q)[0]) out.to_netcdf( os.path.join( config['output-wrf'], '{domain}_{date}.nc'.format( domain=domain, date=(time.strftime('%Y%m%d%H%M')))))
def plot_loss(data_list, title, obj): ''' #Plotting the input data @param data_list: A list of error values or accuracy values @param obj: @param title: A description of the datalist @return: ''' if(title == "Generator loss total" ): if(hasattr(obj, 'plot')): obj.plot+=1 else: obj.plot=1 #plt.figure() plt.plot(bn.move_mean(data_list, window=100, min_count=1), label = title) plt.title(title+ ' prefix =' + str(obj.prefix_len) + ',' + "batch = " + str(obj.batch)) plt.legend() tt =str(datetime.now()).split('.')[0].split(':') strfile = obj.path+'/'+title+ ', prefix =' + str(obj.prefix_len) + ',' + "batch = " +str(obj.batch) + str(obj.plot) plt.savefig(strfile) if(title == "Discriminator loss total"): plt.close()
def __init__(self, frame_size, fmax, fps, oct_width, center_note, log_eta, sample_rate=44100, fold=None): self.fps = fps self.fmax = fmax self.sample_rate = sample_rate self.oct_width = oct_width self.center_note = center_note self.frame_size = frame_size self.log_eta = log_eta # parameters are based on Cho and Bello, 2014. import librosa ctroct = (librosa.hz_to_octs(librosa.note_to_hz(center_note)) if center_note is not None else None) self.filterbank = librosa.filters.chroma(sr=sample_rate, n_fft=frame_size, octwidth=oct_width, ctroct=ctroct).T[:-1] # mask out everything above fmax from bottleneck import move_mean m = np.fft.fftfreq(frame_size, 1. / sample_rate)[:frame_size / 2] < fmax mask_smooth = move_mean(m, window=10, min_count=1) self.filterbank *= mask_smooth[:, np.newaxis]
def pressure_rh(d): h_agl_staggered = (d.PHB + d.PH)/wrf.G0 h = bn.move_mean(h_agl_staggered.values, 2, axis=0)[1:] t = d['T'].values p = (d.P + d.PB).values q = d.QVAPOR.values return (wrf.slp(h, p, t, q) * 1e-2, wrf.rh(p[0], t[0], q[0]))
def update_plots(figure, axes, lines, episode, score): """ :param figure: :param axes: :param lines: :param episode: :param score: :return: """ # Moving average score_ma = move_mean(score, window=(100 if len(score) > 99 else len(score)), min_count=1) # Update plot # lines[0].set_data(range(1, episode + 1), score) lines[0].set_data(range(1, episode + 1), score_ma) # Rescale axes for ax in axes: ax.relim() ax.autoscale_view() # Update figure figure.tight_layout() figure.canvas.draw() figure.canvas.flush_events() return figure
def rollavg_bottlneck(a, n): """ :param a: array :param n: window of the rolling average :return: A fast function for computing moving averages """ return bn.move_mean(a, window=n, min_count=n)
def init(self): self.windows = self.param['window'] self.cols = self.param['col'] self.types = self.param['type'] self.translation_cols = self.param.get('translation') self.scale_cols = self.param.get('scale') self.move_window_mapping = { "mean": lambda c, s, t, w: bn.move_mean(c, w) * s + t, "std": lambda c, s, t, w: bn.move_std(c, w) * s, "var": lambda c, s, t, w: bn.move_var(c, w) * s * s, "min": lambda c, s, t, w: bn.move_min(c, w) * s + t, "max": lambda c, s, t, w: bn.move_max(c, w) * s + t, "rank": lambda c, s, t, w: bn.move_rank(c, w), "sum": lambda c, s, t, w: bn.move_sum(c, w) * s + t * w, "ema": lambda c, s, t, w: F. ema(c, 2.0 / (w + 1), start_indices=self.base.start_indices) * s + t, "rsi": lambda c, s, t, w: F.rsi( c, w, start_indices=self.base.start_indices), "psy": lambda c, s, t, w: F.psy( c, w, start_indices=self.base.start_indices), "bias": lambda c, s, t, w: F.bias( c, w, start_indices=self.base.start_indices) }
def get_power_inverse(signal, neighborhood=0): """ Assumes single frequency bin with shape (D, T). >>> s = 1 / np.array([np.arange(1, 6)]*3) >>> get_power_inverse(s) array([ 1., 4., 9., 16., 25.]) >>> get_power_inverse(s * 0 + 1, 1) array([1., 1., 1., 1., 1.]) >>> get_power_inverse(s, 1) array([ 1. , 1.6 , 2.20408163, 7.08196721, 14.04421326]) >>> get_power_inverse(s, np.inf) array([3.41620801, 3.41620801, 3.41620801, 3.41620801, 3.41620801]) """ power = np.mean(abs_square(signal), axis=-2) if np.isposinf(neighborhood): power = np.broadcast_to(np.mean(power, axis=-1, keepdims=True), power.shape) elif neighborhood > 0: assert int(neighborhood) == neighborhood, neighborhood neighborhood = int(neighborhood) import bottleneck as bn # Handle the corner case correctly (i.e. sum() / count) power = bn.move_mean(power, neighborhood * 2 + 1, min_count=1) elif neighborhood == 0: pass else: raise ValueError(neighborhood) eps = 1e-10 * np.max(power) inverse_power = 1 / np.maximum(power, eps) return inverse_power
def Rolling_mean(A, n): '''列方向n天的移动平均值''' if n < 1: #print ("计算n天均值,n不得小于1,返回输入") return A result = bk.move_mean(A, n, axis=0, min_count=1) result[np.isnan(A)] = np.nan return result
def bnsmooth(x, window): """ Bottleneck implementation of the IDL SMOOTH function """ pad = int((window-1)/2) n = len(x) xpad = np.ndarray(shape=(n+window)) xpad[0:pad] = 0.0 xpad[pad:n+pad] = x xpad[n+pad:] = 0.0 return bn.move_mean(xpad, window=window, axis=0)[window-1:(window+n-1)]
def _cci(arr_h, arr_l, arr_c, window, start_indices): TP = (arr_h + arr_l + arr_c) / 3 MA = bn.move_mean(arr_c, window) MD = bn.move_mean((MA - arr_c), window) res = np.where(MD != 0, (TP - MA) / MD / 0.015, 0) pre_cnt = 0.0 N = arr_c.shape[0] N_GROUP = start_indices.shape[0] j = 0 for i in range(N): if j < N_GROUP and start_indices[j] == i: pre_cnt = 0 j += 1 if pre_cnt < window: res[i] = np.nan pre_cnt += 1 return res
def estimate_temp(self): if len(self.time)>20: self.x = 10 elif len(self.time)>50: self.x = 20 elif len(self.time)>100: self.x = 100 self.pm = bn.move_mean(self.temp_model.predict(self.df[self.feature_col_names].values), self.x, 1) self.data_list = [self.time, self.ambient, self.coolant, self.u_d, self.u_q, self.motor_speed, self.i_d, self.i_q, self.pm]
def simple_moving_average(self) -> List[int]: """ Calculates the simple moving average (SMA). """ if self.counts == []: moving_average = [ round(move_mean_value, 1) for move_mean_value in bottleneck.move_mean( self.counts, window=self.sma_window, min_count=1) ] else: moving_average = [] return moving_average
def Delay(A, n): '''过去n天的数值''' if n < 1: return A temp = np.roll(A, n, axis=0) #利用移动平均值填充空值 fillna_value = bk.move_mean(A, n + 1, axis=0, min_count=1) temp[np.isnan(temp)] = fillna_value[np.isnan(temp)] temp[:n] = np.nan temp[np.isnan(A)] = np.nan return temp
def _cr(arr_o, arr_c, arr_h, arr_l, window, start_indices): arr_m = (arr_o + arr_c + arr_h + arr_l)/4 arr_p1 = arr_h - np.roll(arr_m, 1) arr_p2 = np.roll(arr_m, 1) - arr_l arr_p1 = bn.move_mean(arr_p1,window) arr_p2 = bn.move_mean(arr_p2,window) res = np.zeros_like(arr_c) res = np.where(arr_p2!=0, 100*arr_p1/arr_p2, 50) pre_cnt = 0.0 N = arr_c.shape[0] N_GROUP = start_indices.shape[0] j = 0 for i in range(N): if j < N_GROUP and start_indices[j] == i: pre_cnt = 0 j += 1 if pre_cnt < window: res[i] = np.nan pre_cnt += 1 return res
def smooth(x, window): """Calculate moving average of input with given window (number of points)""" window = int(window) if window <= 1: return x if window % 2 == 0: window += 1 if window >= len(x): return zeros_like(x) + nanmean(x) y = move_mean(x, window, min_count=1) yny = append(y[window // 2:], [NaN] * (window // 2)) for k in range(window // 2): yny[k] = nanmean(x[:(2 * k + 1)]) yny[-(k + 1)] = nanmean(x[-(2 * k + 1):]) return yny
def calculate(df, vol_avg=True, dir=True, body_size=True, body_size_break=True, body_mid_point=True, bar_size=True, bar_size_break=True, bars_broken_by_body=True, bar_mid_point=True, shadow_upper=True, shadow_lower=True, filled_by=True, broken_by=True, *args, **kwds): assert 'open' in df, 'DataFrame must have open column' assert 'high' in df, 'DataFrame must have high column' assert 'low' in df, 'DataFrame must have low column' assert 'close' in df, 'DataFrame must have close column' if vol_avg: df['volume_average'] = bn.move_mean(df['volume'], QUANTUM_AVG_LEN) if dir: df['dir'] = base.dir(df['open'], df['close']) if body_size: df['body_size'] = base.body_size(df['open'], df['close']) if body_size_break: assert 'body_size' in df, 'DataFrame must have body_size column' df['body_size_break'] = base.body_size_break(df['body_size']) if body_mid_point: df['body_mid_point'] = base.body_mid_point(df['open'], df['close']) if bar_size: df['bar_size'] = base.bar_size(df['high'], df['low']) if bar_size_break: assert 'bar_size' in df, 'DataFrame must have bar_size column' df['bar_size_break'] = base.bar_size_break(df['bar_size']) if bars_broken_by_body: assert 'dir' in df, 'DataFrame must have dir column' df['bars_broken_by_body'] = base.bars_broken_by_body( df['high'], df['low'], df['close'], df['dir']) if bar_mid_point: df['bar_mid_point'] = base.bar_mid_point(df['high'], df['low']) if filled_by: assert 'dir' in df, 'DataFrame must have dir column' df['filled_by'] = base.filled_by(df['open'], df['high'], df['low'], df['dir']) if broken_by: assert 'dir' in df, 'DataFrame must have dir column' df['broken_by'] = base.broken_by(df['high'], df['low'], df['close'], df['dir'])
def numpy_normxcorr(templates, stream, pads, *args, **kwargs): """ Compute the normalized cross-correlation using numpy and bottleneck. :param templates: 2D Array of templates :type templates: np.ndarray :param stream: 1D array of continuous data :type stream: np.ndarray :param pads: List of ints of pad lengths in the same order as templates :type pads: list :return: np.ndarray of cross-correlations :return: np.ndarray channels used """ import bottleneck # Generate a template mask used_chans = ~np.isnan(templates).any(axis=1) # Currently have to use float64 as bottleneck runs into issues with other # types: https://github.com/kwgoodman/bottleneck/issues/164 stream = stream.astype(np.float64) templates = templates.astype(np.float64) template_length = templates.shape[1] stream_length = len(stream) assert stream_length > template_length, "Template must be shorter than " \ "stream" fftshape = next_fast_len(template_length + stream_length - 1) # Set up normalizers stream_mean_array = bottleneck.move_mean( stream, template_length)[template_length - 1:] stream_std_array = bottleneck.move_std( stream, template_length)[template_length - 1:] # because stream_std_array is in denominator or res, nan all 0s stream_std_array[stream_std_array == 0] = np.nan # Normalize and flip the templates norm = ((templates - templates.mean(axis=-1, keepdims=True)) / (templates.std(axis=-1, keepdims=True) * template_length)) norm_sum = norm.sum(axis=-1, keepdims=True) stream_fft = np.fft.rfft(stream, fftshape) template_fft = np.fft.rfft(np.flip(norm, axis=-1), fftshape, axis=-1) res = np.fft.irfft(template_fft * stream_fft, fftshape)[:, 0:template_length + stream_length - 1] res = ((_centered(res, (templates.shape[0], stream_length - template_length + 1))) - norm_sum * stream_mean_array) / stream_std_array res[np.isnan(res)] = 0.0 for i, pad in enumerate(pads): res[i] = np.append(res[i], np.zeros(pad))[pad:] return res.astype(np.float32), used_chans
def effect_return(self): self.effective_returns = [] mm_list = [] for i in range(self.n): x = meesman_investment().total_return()[3] self.effective_returns.append(x) mm = bn.move_mean(self.effective_returns[i], window=6, min_count=1) mm_list.append(mm) plt.plot(self.x_range, mm_list[i], linewidth=1) plt.title("6 month moving average returns on investment") plt.ylabel("return") plt.xlabel("months") plt.show()
def _bias(arr, window, start_indices): arr_m = bn.move_mean(arr, window) res = np.where(arr_m != 0, 100 * (arr - arr_m) / arr_m, 50) pre_cnt = 0.0 N = arr.shape[0] N_GROUP = start_indices.shape[0] j = 0 for i in range(N): if j < N_GROUP and start_indices[j] == i: pre_cnt = 0 j += 1 if pre_cnt < window: res[i] = np.nan pre_cnt += 1 return res
def plot_from_file(self, file_name, param_name, last_N=100, color='blue', limit_x=None, limit_x_range=None, range_y=None, y_ticks=None): if os.path.isfile(self.__DATA_DIR + file_name + ".pkl"): metadata = load_pickle(file_name) else: print("Data file does not exist") return score = metadata[param_name] # mean, std = moving_average(score, last_N=last_N) mean = bn.move_mean(score, window=last_N) std = moving_std(score, last_N=last_N) if limit_x is not None: episodes = range(limit_x) mean = mean[:limit_x] std = std[:limit_x] elif limit_x_range is not None: episodes = metadata[limit_x_range] else: episodes = range(len(score)) mean, std = moving_average(score, last_N=last_N) lower_bound = [a_i - 0.5 * b_i for a_i, b_i in zip(mean, std)] upper_bound = [a_i + 0.5 * b_i for a_i, b_i in zip(mean, std)] # plt.plot(episodes, score) plt.fill_between(episodes, lower_bound, upper_bound, facecolor=color, alpha=0.5) plt.plot(episodes, mean, color=color) if range_y is not None: plt.ylim(range_y) if y_ticks is not None: plt.yticks(np.arange(range_y[0], range_y[1] + 2 * y_ticks, y_ticks)) if limit_x_range is not None: plt.xlabel(limit_x_range) else: plt.xlabel("episodes") plt.ylabel(param_name)
def _set_data(self): try: endcol1 = self.dl.dates_to_indices(self.identified_date) except KeyError: argwhere = np.argwhere(self.dl.dates > self.identified_date) if not len(argwhere): raise KeyError('It seems that {} is neither a valid date, nor a date when data is available'.format( self.identified_date)) else: endcol1 = argwhere[0][0] self.identified_date = self.dl.dates[endcol1] # endcol = min(endcol1 + self.MAX_OBS_DAYS + self.MAX_HLD_DAYS + 1, len(self.dl)) endcol = min(endcol1 + int((self.MAX_OBS_DAYS + self.MAX_HLD_DAYS) * 1.1), len(self.dl)) startcol = endcol1 - 252 assert startcol >= 0, '%s is too early to have enough data required for computation' % self.identified_date # according to R implementation, should minus 251, but here change it to 252 so the identified day can also be # trading trigger/activation day self._identified_date_id = endcol1 pair_prices = self.dl['PRCCD', self.pair][:, startcol:endcol] pair_wealth = self.dl['CUM_WEALTH', self.pair][:, startcol:endcol] pair_prices = pair_prices[:, :1] * pair_wealth / pair_wealth[:, :1] has_na = np.isnan(pair_prices[:, 252:]).any(axis=0) self._data_dict['has_na'] = has_na # start from identified self._data_dict['cum_na'] = has_na.cumsum() # start from identified # Note: actually no missing values were found during my experiment. # this block might be redundant pair_prices = foward_fillna_2darray(pair_prices) ratio = np.log(pair_prices[0] / pair_prices[1]) self._data_dict['ratio_history'] = ratio mean_mv = bn.move_mean(ratio, window=252, min_count=200)[251:] sd_mv = bn.move_std(ratio, window=252, min_count=200, ddof=1)[251:] # min_count is used to address the extreme case where the first 50 days are all missing data. # this is likely under the parameter settings of correlation computation ub_mv = mean_mv + 2. * sd_mv # start from identified - 1 lb_mv = mean_mv - 2. * sd_mv # start from identified - 1 ratio = ratio[251:] # start from identified - 1 self._data_dict['ratio'] = ratio[1:] # start from identified self._data_dict['above_upper'] = np.ediff1d(np.where(ratio >= ub_mv, 1, 0)) # start from identified self._data_dict['above_mean'] = np.ediff1d(np.where(ratio >= mean_mv, 1, 0)) self._data_dict['below_mean'] = np.ediff1d(np.where(ratio <= mean_mv, 1, 0)) self._data_dict['below_lower'] = np.ediff1d(np.where(ratio <= lb_mv, 1, 0)) self._data_dict['in_flag'] = bn.nansum(self.dl['IN_US_1', self.pair][:, endcol1:endcol], axis=0) == 2
def scipy_normxcorr(templates, stream, pads): """ Compute the normalized cross-correlation of multiple templates with data. :param templates: 2D Array of templates :type templates: np.ndarray :param stream: 1D array of continuous data :type stream: np.ndarray :param pads: List of ints of pad lengths in the same order as templates :type pads: list :return: np.ndarray of cross-correlations :return: np.ndarray channels used """ import bottleneck from scipy.signal.signaltools import _centered # Generate a template mask used_chans = ~np.isnan(templates).any(axis=1) # Currently have to use float64 as bottleneck runs into issues with other # types: https://github.com/kwgoodman/bottleneck/issues/164 stream = stream.astype(np.float64) templates = templates.astype(np.float64) template_length = templates.shape[1] stream_length = len(stream) fftshape = next_fast_len(template_length + stream_length - 1) # Set up normalizers stream_mean_array = bottleneck.move_mean( stream, template_length)[template_length - 1:] stream_std_array = bottleneck.move_std( stream, template_length)[template_length - 1:] # Normalize and flip the templates norm = ((templates - templates.mean(axis=-1, keepdims=True)) / (templates.std(axis=-1, keepdims=True) * template_length)) norm_sum = norm.sum(axis=-1, keepdims=True) stream_fft = np.fft.rfft(stream, fftshape) template_fft = np.fft.rfft(np.flip(norm, axis=-1), fftshape, axis=-1) res = np.fft.irfft(template_fft * stream_fft, fftshape)[:, 0:template_length + stream_length - 1] res = ((_centered(res, stream_length - template_length + 1)) - norm_sum * stream_mean_array) / stream_std_array res[np.isnan(res)] = 0.0 for i in range(len(pads)): res[i] = np.append(res[i], np.zeros(pads[i]))[pads[i]:] return res.astype(np.float32), used_chans
def get_best_score(self): """ :return: """ # Best score is defined as highest 100-episode score reached (+ episode) when score < 200, # or the episode when score >= 200 score_100 = move_mean(self.score, window=(100 if len(self.score) > 99 else len(self.score)), min_count=1) # Get max ep_max = np.argmax(score_100) score_max = score_100[ep_max] if score_max >= 200.0: ep_max = np.argmax(score_100 >= 200.0) score_max = 200.0 # to ensure equivalence return int(ep_max), float(score_max)
def plot(self, window=100, alpha=0.2, save=False, close_plots=False, pre_fix=""): plt.figure() plt.title(pre_fix + "Mean") p = plt.plot(bn.move_mean(self.reward_hist, window=window))[0] if alpha > 0: plt.plot(self.reward_hist, color=p.get_color(), alpha=alpha) plt.xlim(xmin=0) plt.grid(True) if not save is False: plt.savefig(os.path.join(save, "move_mean.svg")) if not close_plots: plt.show() else: plt.close() plt.figure() plt.title(pre_fix + "Min") plt.plot(bn.move_min(self.reward_hist, window=window)) plt.xlim(xmin=0) plt.grid(True) if not save is False: plt.savefig(os.path.join(save, "move_min.svg")) if not close_plots: plt.show() else: plt.close() plt.figure() plt.title(pre_fix + "Max") plt.plot(bn.move_max(self.reward_hist, window=window)) plt.xlim(xmin=0) plt.grid(True) if not save is False: plt.savefig(os.path.join(save, "move_max.svg")) if not close_plots: plt.show() else: plt.close()
def window_agg_using_bottleneck(self, progression_dim, window_size=1000, overlap=500, agg_method='median'): """Uses bottleneck to calculate windows agg. """ import bottleneck as bn # first sort by the given dim: xdim_index = self.dims.index(progression_dim) sorted_data = self.data[self.data[:,xdim_index].argsort(),] with Timer('bottleneck window'): if agg_method == 'median': agg_data = bn.move_median(sorted_data, window_size, axis=0) elif agg_method == 'average': agg_data = bn.move_mean(sorted_data, window_size, axis=0) else: raise Exception('Unknown agg method') # First rows are nan because they don't contain a full window, let's remove them: agg_data = agg_data[window_size - 1:] skip = window_size - overlap if skip > 1: agg_data = agg_data[::skip] return DataTable(agg_data, self.dims, self.legends, self.tags.copy())
def __init__(self, signal, time, dtS=0.0002): """ Parameters ---------- signal : ndarray signal to be analyzed time : ndarray time basis dtS : floating At the init we also compute a normalize signal where normalization is of the form (x-<x>)/std(x) where the mean and average is a rolling mean and standard deviation on a window of the time dtS/dt Dependences ----------- numpy scipy pycwt https://github.com/regeirk/pycwt.git astropy for better histogram function bottleneck (https://pypi.python.org/pypi/Bottleneck) for moving average """ self.sig = copy.deepcopy(signal) self.time = copy.deepcopy(time) self.dt = (self.time.max() - self.time.min()) / (self.time.size - 1) self.nsamp = self.time.size self.signorm = (self.sig - self.sig.mean()) / self.sig.std() # since the moments of the signal are # foundamental quantities we compute them # at the initial self.moments() _nPoint = int(dtS / self.dt) self.rmsnorm = ( self.sig - bottleneck.move_mean(self.sig, _nPoint, min_count=1)) / \ bottleneck.move_std(self.sig, _nPoint,min_count=1)
def _computeExB(self, data): """ Giving the output of the conditional average it compute the amplitude of radial and poloidal electric field fluctuations taking into account the amplitude of the Isat conditional average structure and including only the fluctuation between 2.5 sigma of the Isat amplitude """ signal = data.sel(sig='Is') - data.sel(sig='Is').min() spline = UnivariateSpline(data.t, signal.values - signal.max().item() / 2., s=0) # find the roots and the closest roots to 0 roots = spline.roots() tmin = roots[roots < 0][-1] * 2 try: tmax = roots[roots > 0][0] * 2 except: tmax = 2.5e-5 # now the fluctuations of the Epol ii = np.where((data.t.values >= tmin) & (data.t.values <= tmax))[0] # recompute Epol from CAS floating potential with an appropriate # smoothing otherwise we have too noisy signal _Epol = (data.sel(sig='VFT_' + str(int(self.plunge))) - data.sel(sig='VFM_' + str(int(self.plunge)))) / 4e-3 _Epol = bottleneck.move_mean(_Epol, window=10) Epol = np.abs(_Epol[ii].max() - _Epol[ii].min()) Erad = np.abs( data.sel(sig='Erad')[ii].max().item() - data.sel(sig='Erad')[ii].min().item()) EpolErr = np.mean(data.err[1, ii]) EradErr = np.mean(data.err[2, ii]) out = {'Er': Erad, 'ErErr': EradErr, 'Epol': Epol, 'EpolErr': EpolErr} return out
def window_agg_using_bottleneck(self, progression_dim, window_size=1000, overlap=500, agg_method='median'): """Uses bottleneck to calculate windows agg. """ import bottleneck as bn # first sort by the given dim: xdim_index = self.dims.index(progression_dim) sorted_data = self.data[self.data[:, xdim_index].argsort(), ] with Timer('bottleneck window'): if agg_method == 'median': agg_data = bn.move_median(sorted_data, window_size, axis=0) elif agg_method == 'average': agg_data = bn.move_mean(sorted_data, window_size, axis=0) else: raise Exception('Unknown agg method') # First rows are nan because they don't contain a full window, let's remove them: agg_data = agg_data[window_size - 1:] skip = window_size - overlap if skip > 1: agg_data = agg_data[::skip] return DataTable(agg_data, self.dims, self.legends, self.tags.copy())
latp = track.variables['latp'][:,:len(tinds_tracks)] tracpy.plotting.tracks(lonp, latp, name, grid) # Plot wind arrows lonv = np.linspace(-95.2, -88.3, len(wx)) latv = np.ones(lonv.shape)*25.5 x0, y0 = grid['basemap'](lonv, latv) # Plot start and end indicators plt.plot(x0[0], y0[0], 'og', markersize=16, alpha=0.5) plt.plot(x0[-1], y0[-1], 'or', markersize=16, alpha=0.5) # Plot a black line every day on the wind plot # pdb.set_trace() ind = (np.mod(trel[tinds_model],dd) == 0.) plt.plot(x0[ind], y0[ind], 'k|', markersize=10, alpha=0.5) # Plot arrows # have rolling average of wind arrows instead of selecting every few so it is smoother plt.quiver(x0[::dd], y0[::dd], bn.move_mean(wx, window=dd)[::dd], bn.move_mean(wy, window=dd)[::dd], scale=5, color='grey', width=.003, alpha=.8) # plt.quiver(x0[::dd], y0[::dd], wx[::dd], wy[::dd], scale=5, color='grey', width=.003, alpha=.8) # Plot date below wind for i in xrange(x0[ind].size): plt.text(x0[ind][i], y0[ind][i]-50000, dates[tinds_model][ind][i].isoformat()[5:10], fontsize=10, alpha=0.5) plt.savefig('figures/' + name + 'tracks.png',bbox_inches='tight') plt.close() track.close() d.close() # if __name__ == "__main__": # run()
def height_plot_across_folders(folder_list, inputsuffix='allz2.dat', label='Mean Light Weighted Age [Gyr]', col=6, errcol=None, lowhigh=False, order=5, ylims=None, bigpoints=False, binz=True, combine_all=False, plot_std=False, exclude=[[],[],[],[],[],[]]): axlist = [] plist = [6,3,4,2,1,5] #color_list = ['blue','turquoise','chartreuse','yellow','tomato','red'] color_list = ['blue','seagreen','darkorange','crimson','dimgray','mediumorchid','lightblue'] style_list = ['-','-','-','-','-','-','-'] if not isinstance(col,list): col = [col] * len(folder_list) for i in range(6): pointing = plist[i] ax = plt.figure().add_subplot(111) ax.set_xlabel('|Height [kpc]|') ax.set_ylabel(label) ax.set_title('{}\nP{}'.format(time.asctime(),pointing)) for f, folder in enumerate(folder_list): color = color_list[f] style = style_list[f] dat = glob('{}/*P{}*{}'.format(folder, pointing, inputsuffix))[0] print dat loc = glob('{}/*P{}*locations.dat'.format(folder, pointing))[0] print loc print 'Excluding: ', exclude[pointing-1] if errcol == None: td = np.loadtxt(dat, usecols=(col[f],), unpack=True) else: if lowhigh: td, low, high = np.loadtxt(dat, usecols=(col[f],errcol,errcol+1), unpack=True) te = np.vstack((low,high)) else: td, te = np.loadtxt(dat, usecols=(col[f],errcol), unpack=True) r, tz = np.loadtxt(loc, usecols=(4,5), unpack=True) exarr = np.array(exclude[pointing-1])-1 #becuase aps are 1-indexed td = np.delete(td,exarr) r = np.delete(r,exarr) tz = np.delete(tz,exarr) if errcol != None: if lowhigh: te = np.delete(te,exarr,axis=1) else: te = np.delete(te,exarr) alpha=1.0 if combine_all and f == 0: bigD = np.zeros(td.size) alpha=0.3 if binz: z = np.array([]) d = np.array([]) e = np.array([]) while tz.size > 0: zi = tz[0] idx = np.where(np.abs(tz - zi) < 0.05) d = np.r_[d,np.mean(td[idx])] e = np.r_[e,np.std(td[idx])] z = np.r_[z,np.abs(zi)] tz = np.delete(tz, idx) td = np.delete(td, idx) else: z = tz d = td if errcol == None: e = np.zeros(tz.size) else: e = te if combine_all: bigD = np.vstack((bigD,d)) bigz = z gidx = d == d d = d[gidx] z = z[gidx] if lowhigh: e = e[:,gidx] else: e = e[gidx] sidx = np.argsort(z) dp = np.r_[d[sidx][order::-1],d[sidx]] zp = np.r_[z[sidx][order::-1],z[sidx]] mean = bn.move_mean(dp,order)[order+1:] std = bn.move_std(dp,order)[order+1:] spl = spi.UnivariateSpline(z[sidx],d[sidx]) mean = spl(z[sidx]) # mean = np.convolve(d[sidx],np.ones(order)/order,'same') # std = np.sqrt(np.convolve((d - mean)**2,np.ones(order)/order,'same')) # ax.plot(z[sidx],mean,color=color, ls=style, label=folder, alpha=alpha) # ax.fill_between(z[sidx],mean-std,mean+std, alpha=0.1, color=color) # print d.shape, np.sum(e,axis=0).shape # d = d/np.sum(e,axis=0) # e = np.diff(e,axis=0)[0] # print e.shape ax.errorbar(z, d, yerr=e, fmt='.', color=color,alpha=alpha,capsize=0, label=folder) ax.set_xlim(-0.1,2.6) if ylims is not None: ax.set_ylim(*ylims) ax.legend(loc=0,numpoints=1) if combine_all: sidx = np.argsort(bigz) bigD = bigD[1:] bigMean = bn.nanmean(bigD,axis=0) bigStd = bn.nanstd(bigD,axis=0) bigspl = spi.UnivariateSpline(bigz[sidx],bigMean[sidx]) bigFit = bigspl(bigz[sidx]) ax.plot(bigz[sidx], bigFit, 'k-', lw=2) ax.errorbar(bigz, bigMean, yerr=bigStd, fmt='.', color='k',capsize=0) axlist.append(ax) if combine_all and plot_std: ax2 = plt.figure().add_subplot(111) ax2.set_xlabel('|Height [kpc]|') ax2.set_ylabel('$\delta$'+label) ax2.set_title(ax.get_title()) ax2.plot(bigz, bigStd, 'k') axlist.append(ax2) return axlist
def simple_plot(inputsuffix='allz2.dat', label='Mean Light Weighted Age [Gyr]', col=62, order=5, ylims=None, labelr=False, bigpoints=False, exclude=[[],[],[],[],[],[]]): zz = np.array([]) dd = np.array([]) axlist = [] bigax = plt.figure().add_subplot(111) bigax.set_xlabel('|Height [kpc]|') bigax.set_ylabel(label) plist = [6,3,4,2,1,5] #color_list = ['blue','turquoise','chartreuse','yellow','tomato','red'] color_list = ['blue','seagreen','sienna','sienna','seagreen','blue'] style_list = ['-','-','-','--','--','--'] for i in range(6): pointing = plist[i] color = color_list[i] style = style_list[i] dat = glob('*P{}*{}'.format(pointing, inputsuffix))[0] print dat loc = glob('*P{}*locations.dat'.format(pointing))[0] print loc print 'Excluding: ', exclude[pointing-1] td = np.loadtxt(dat, usecols=(col,), unpack=True) r, tz = np.loadtxt(loc, usecols=(4,5), unpack=True) avgr = np.mean(r) ax = plt.figure().add_subplot(111) ax.set_xlabel('|Height [kpc]|') ax.set_ylabel(label) if labelr: ax.set_title('{:4.0f} kpc'.format(avgr)) linelabel = '{:4.0f} kpc'.format(avgr) else: ax.set_title('{}\nP{}'.format(time.asctime(),pointing)) linelabel = 'P{}'.format(pointing) exarr = np.array(exclude[pointing-1])-1 #becuase aps are 1-indexed td = np.delete(td,exarr) t = np.delete(r,exarr) tz = np.delete(tz,exarr) z = np.array([]) d = np.array([]) e = np.array([]) while tz.size > 0: zi = tz[0] idx = np.where(np.abs(tz - zi) < 0.05) d = np.r_[d,np.mean(td[idx])] e = np.r_[e,np.std(td[idx])] z = np.r_[z,np.abs(zi)] tz = np.delete(tz, idx) td = np.delete(td, idx) gidx = d == d d = d[gidx] z = z[gidx] e = e[gidx] sidx = np.argsort(z) dp = np.r_[d[sidx][order::-1],d[sidx]] zp = np.r_[z[sidx][order::-1],z[sidx]] mean = bn.move_mean(dp,order)[order+1:] std = bn.move_std(dp,order)[order+1:] spl = spi.UnivariateSpline(z[sidx],d[sidx]) mean = spl(z[sidx]) # mean = np.convolve(d[sidx],np.ones(order)/order,'same') # std = np.sqrt(np.convolve((d - mean)**2,np.ones(order)/order,'same')) bigax.plot(z[sidx],mean, label=linelabel, color=color, ls=style) bigax.fill_between(z[sidx],mean-std,mean+std, alpha=0.1, color=color) if bigpoints: bigax.errorbar(z, d, yerr=e, fmt='.', color=color, alpha=0.6, capsize=0) ax.plot(z[sidx],mean,color=color, ls=style) ax.fill_between(z[sidx],mean-std,mean+std, alpha=0.1, color=color) ax.errorbar(z, d, yerr=e, fmt='.', color=color) ax.set_xlim(-0.1,2.6) if ylims is not None: ax.set_ylim(*ylims) axlist.append(ax) bigax.legend(loc=0, numpoints=1, scatterpoints=1) bigax.set_title(time.asctime()) bigax.set_xlim(-0.1,2.6) if ylims is not None: bigax.set_ylim(*ylims) axlist = [bigax] + axlist return axlist
def calculate_season(self): """ calculates the season """ seasons_params = {} seasons_params['DJF'] = (3,2) seasons_params['JFM'] = (3,3) seasons_params['FMA'] = (3,4) seasons_params['MAM'] = (3,5) seasons_params['AMJ'] = (3,6) seasons_params['MJJ'] = (3,7) seasons_params['JJA'] = (3,8) seasons_params['JAS'] = (3,9) seasons_params['ASO'] = (3,10) seasons_params['SON'] = (3,11) seasons_params['OND'] = (3,12) seasons_params['NDJ'] = (3,1) seasons_params['Warm Season (Dec. - May)'] = (6, 5) seasons_params['Cold Season (Jun. - Nov.)'] = (6, 11) seasons_params['Year (Jan. - Dec.)'] = (12, 12) seasons_params['Hydro. year (Jul. - Jun.)'] = (12, 6) self.seasons_params = seasons_params if not(hasattr(self, 'dset_dict')): self._read_dset_params() # get the name of the file to open fname = self.dset_dict['path'] # `dset` is now an attribute of the ensemble object self.dset = xray.open_dataset(fname) # get the variable and its index m_var = self.dset[self.variable].data index = self.dset['time'].to_index() # if the variable is rainfall, we calculate the running SUM if self.dset_dict['units'] in ['mm']: seas_field = bn.move_sum(m_var, self.seasons_params[self.season][0], \ min_count=self.seasons_params[self.season][0], axis=0) # if not, then we calculate the running MEAN (average) else: seas_field = bn.move_mean(m_var, self.seasons_params[self.season][0], \ min_count=self.seasons_params[self.season][0], axis=0) # get rid of the first nans in the time-series / fields after move_mean or move_sum seas_field = seas_field[(self.seasons_params[self.season][0]-1)::,:,:] index = index[(self.seasons_params[self.season][0]-1)::] # now selects the SEASON of interest iseas = np.where(index.month == self.seasons_params[self.season][1])[0] dates = index[iseas] seas_field = np.take(seas_field, iseas, axis=0) # if detrend is set to `True`, we detrend # detrend_linear from matplotlib.mlab is faster than detrend from scipy.signal if self.detrend: dseas_field = np.ones(seas_field.shape) * np.nan # if there is a mask, we have to test each variable if 'mask' in self.dset.data_vars: for ilat in range(dseas_field.shape[1]): for ilon in range(dseas_field.shape[2]): if np.logical_not(np.all(np.isnan(seas_field[:,ilat, ilon]))): dseas_field[:,ilat, ilon] = detrend_linear(seas_field[:,ilat,ilon]) \ + seas_field[:,ilat,ilon].mean() # if not, we can proceed over the whole dataset else: for ilat in range(dseas_field.shape[1]): for ilon in range(dseas_field.shape[2]): dseas_field[:,ilat, ilon] = detrend_linear(seas_field[:,ilat,ilon]) \ + seas_field[:,ilat,ilon].mean() self.dset['dates'] = (('dates',), dates) self.dset['seas_var'] = (('dates', 'latitudes', 'longitudes'), dseas_field) # if detrend is False, then just add the seaosnal values else: self.dset['dates'] = (('dates',), dates) self.dset['seas_var'] = (('dates', 'latitudes', 'longitudes'), seas_field)