def plot_window_precision_recall_fscore(self, indices, actual_label, predict_score, threshold=0.5, window_size=22, beta=1): ''' 展示基于窗口的指标,用于展示model的稳定性,目前包含preciion ''' plt.figure(figsize=self.size) df = pd.DataFrame({ "indices": indices, "actual_label": actual_label, "predict_score": predict_score }).sort_values("indices") df['predict_label'] = df['predict_score'] >= threshold df['TP'] = (df['actual_label'] == True) & (df['predict_label'] == True) df['FP'] = (df['actual_label'] == False) & (df['predict_label'] == True) df['TN'] = (df['actual_label'] == False) & (df['predict_label'] == False) df['FN'] = (df['actual_label'] == True) & (df['predict_label'] == False) groups = df.groupby("indices") unit_df = groups[['TP', 'FP', 'TN', 'FN']].sum() window_df = pd.DataFrame({ "TP": bn.move_sum(unit_df['TP'], window_size), "FP": bn.move_sum(unit_df['FP'], window_size), "TN": bn.move_sum(unit_df['TN'], window_size), "FN": bn.move_sum(unit_df['FN'], window_size), }) metric_df = pd.DataFrame({ "p": window_df["TP"] / (window_df["TP"] + window_df["FP"]), "r": window_df["TP"] / (window_df["TP"] + window_df["FN"]), }) beta2 = beta * beta metric_df['f'] = np.where( metric_df["p"] + metric_df["r"] > 0.0, (1 + beta2) * metric_df["p"] * metric_df["r"] / (beta2 * metric_df["p"] + metric_df["r"]), 0.0) metric_df["date"] = unit_df.index.get_values() res_df = metric_df[window_size - 1:] indices = Metrics.__try_convert_as_mdate(np.array(res_df['date'])) plt.plot(indices, res_df['p'], label="precision") plt.plot(indices, res_df['r'], label="recall") plt.plot(indices, res_df['f'], label="$F_{beta}$".format(beta=beta)) plt.ylim([0, 1]) plt.xticks(self.xticks01) plt.yticks(self.yticks01) plt.legend(loc='upper left', fontsize=self.fontsize) Metrics.__apply_sticks(indices=indices, num_sticks=8, rotate=True) plt.tight_layout() self.show()
def init(self): self.windows = self.param['window'] self.cols = self.param['col'] self.types = self.param['type'] self.translation_cols = self.param.get('translation') self.scale_cols = self.param.get('scale') self.move_window_mapping = { "mean": lambda c, s, t, w: bn.move_mean(c, w) * s + t, "std": lambda c, s, t, w: bn.move_std(c, w) * s, "var": lambda c, s, t, w: bn.move_var(c, w) * s * s, "min": lambda c, s, t, w: bn.move_min(c, w) * s + t, "max": lambda c, s, t, w: bn.move_max(c, w) * s + t, "rank": lambda c, s, t, w: bn.move_rank(c, w), "sum": lambda c, s, t, w: bn.move_sum(c, w) * s + t * w, "ema": lambda c, s, t, w: F. ema(c, 2.0 / (w + 1), start_indices=self.base.start_indices) * s + t, "rsi": lambda c, s, t, w: F.rsi( c, w, start_indices=self.base.start_indices), "psy": lambda c, s, t, w: F.psy( c, w, start_indices=self.base.start_indices), "bias": lambda c, s, t, w: F.bias( c, w, start_indices=self.base.start_indices) }
def Ts_sum(A, n): ''' 过去n(包含当天)求和 n >= 1 ''' if n < 1: #print ("计算n天的求和,n不得小于1,返回输入") return A result = bk.move_sum(A, window=n, min_count=1, axis=0) result[np.isnan(A)] = np.nan return result
def _arbr(arr_o, arr_h, arr_l, arr_c, window, start_indices): numerator_ar = bn.move_sum((arr_h - arr_o), window) denominator_ar = bn.move_sum((arr_o - arr_l), window) ar = np.where(denominator_ar != 0, 100 * numerator_ar / denominator_ar, 50) arr_y = np.roll(arr_c, 1) numerator_br = bn.move_sum((arr_h - arr_y), window) denominator_br = bn.move_sum((arr_y - arr_l), window) br = np.where(denominator_br != 0, 100 * numerator_br / denominator_br, 50) pre_cnt = 0.0 N = arr_c.shape[0] N_GROUP = start_indices.shape[0] j = 0 for i in range(N): if j < N_GROUP and start_indices[j] == i: pre_cnt = 0 j += 1 if pre_cnt < window: ar[i] = np.nan br[i] = np.nan pre_cnt += 1 return ar, br
def eddi_1d(eto, ts): """ Compute the Evaporative Demand Drought Index (EDDI) from monthly reference evapotranspiration (eto). Step 1 is to compute the running sum of eto based on user defined time scale (ts). Step 2 is obtain the empirical probabilities from plotting positions. Step 3 is to transform probabilities using an inverse normal distribution. Arguments: eto (:obj:`numpy.ndarray`): time series of daily eto ts (int): time scale input as an integer (units in freq. of ``eto``) Returns: eddi (:obj:`numpy.ndarray`): 1-D of EDDI for period of record """ print('calculating EDDI') # Compute running soms based on time scale (ts) acc = bn.move_sum(eto, ts) # Compute plotting positions to obtain daily CDF # First, reshape array back to day x year acc = np.reshape(acc, (len(acc) // 365, 365)) # Tukey plotting positions pp = sms.plotting_positions(acc, alpha=1. / 3., beta=1. / 3., axis=0, masknan=True) # Transformation through inverse normal eddi = stats.norm.ppf(pp) eddi = eddi.ravel() return eddi
def plot_heights_with_err(inputsuffix,label=r'$\tau_{\mathrm{V,Balm}}$',basedir='.', col=1, errcol=2, lowhigh=False, order=5, bigorder=60, s=None, ylims=None, labelr=False, bigpoints=False, plotfit=True, exclude=exclude, printdate=True, printfit=True): zz = np.array([]) dd = np.array([]) if lowhigh: ee = np.array([[],[]]) else: ee = np.array([]) axlist = [] bigax = plt.figure().add_subplot(111) bigax.set_xlabel(r'$|z| \mathrm{\ [kpc]}$') bigax.set_ylabel(label) plist = [6,3,4,2,1,5] color_list = ['blue','seagreen','sienna','orange','yellowgreen','darkturquoise'] style_list = ['-','-','-','--','--','--'] for i in range(6): pointing = plist[i] color = color_list[i] style = style_list[i] dat = glob('{}/*P{}*{}'.format(basedir, pointing, inputsuffix))[0] print dat loc = glob('{}/*P{}*locations.dat'.format(basedir, pointing))[0] print loc print 'Excluding: ', exclude[pointing-1] if errcol is not None: if lowhigh: data, Lerr, Herr = np.loadtxt(dat, usecols=(col,errcol,errcol+1), unpack=True) err = np.vstack((Lerr,Herr)) else: data, err = np.loadtxt(dat, usecols=(col,errcol), unpack=True) else: data = np.loadtxt(dat, usecols=(col,), unpack=True) err = np.ones(data.size)*0.01 r, z = np.loadtxt(loc, usecols=(4,5), unpack=True) avgr = np.mean(r) ax = plt.figure().add_subplot(111) ax.set_xlabel('|Height [kpc]|') ax.set_ylabel(label) if labelr: ax.set_title('{:4.0f} kpc'.format(avgr)) linelabel = '{:4.0f} kpc'.format(avgr) else: ax.set_title('{}\nP{}'.format(time.asctime(),pointing)) linelabel = 'P{}'.format(pointing) exarr = np.array(exclude[pointing-1])-1 #becuase aps are 1-indexed data = np.delete(data,exarr) r = np.delete(r,exarr) z = np.delete(z,exarr) gidx = data == data data = data[gidx] z = z[gidx] if lowhigh: err = np.delete(err,exarr,axis=1) err = err[:,gidx] ee = np.hstack((ee,err)) else: err = np.delete(err,exarr) err = err[gidx] ee = np.r_[ee,err] zz = np.r_[zz,z] dd = np.r_[dd,data] sidx = np.argsort(z) data_pad = np.r_[data[sidx][order::-1],data[sidx]] z_pad = np.r_[z[sidx][order::-1],z[sidx]] # mean = bn.move_mean(data_pad,order)[order+1:] std = bn.move_std(data_pad,order)[order+1:] spl = spi.UnivariateSpline(z[sidx],data[sidx]) mean = spl(z[sidx]) # mean = np.convolve(d[sidx],np.ones(order)/order,'same') # std = np.sqrt(np.convolve((d - mean)**2,np.ones(order)/order,'same')) bigax.errorbar(z, data, yerr=err, fmt='.', label=linelabel, color=color, capsize=0) # ax.plot(z[sidx],mean,color=color, ls=style) # ax.fill_between(z[sidx],mean-std,mean+std, alpha=0.1, color=color) ax.errorbar(z, data, yerr=err, fmt='.', color=color, capsize=0) ax.set_xlim(-0.1,2.6) if ylims is not None: ax.set_ylim(*ylims) axlist.append(ax) if printdate: plot_title = time.asctime() else: plot_title = '' if plotfit: sidx = np.argsort(zz) big_data_pad = np.r_[dd[sidx][bigorder::-1],dd[sidx]] big_z_pad = np.r_[zz[sidx][bigorder::-1],zz[sidx]] big_e_pad = np.r_[ee[sidx][bigorder::-1],ee[sidx]] big_sum = bn.move_sum(big_data_pad/big_e_pad,bigorder)[bigorder+1:] big_weight = bn.move_sum(1./big_e_pad,bigorder)[bigorder+1:] big_mean = big_sum/big_weight # std = bn.move_std(data_pad,order)[order+1:] # big_spl = spi.UnivariateSpline(zz[sidx],dd[sidx],w = 1./ee[sidx]**2, k=k, s=s) # big_mean = big_spl(zz[sidx]) # big_pc = np.polyfit(zz[sidx], dd[sidx], polydeg, w=1./ee[sidx]**2) # big_poly = np.poly1d(big_pc) # big_mean = big_poly(zz[sidx]) p = np.poly1d(np.polyfit(zz[sidx],big_mean,1)) print p.coeffs # bigax.plot(zz[sidx],big_mean,'-k',lw=2) bigax.plot(zz[sidx],p(zz[sidx]),'--k',lw=2) if printdate: plot_title += '\n' if printfit: plot_title += label+'$={:4.2f}z{:+4.2f}$'.format(p.coeffs[0],p.coeffs[1]) bigax.set_title(plot_title) bigax.legend(loc=0, numpoints=1, scatterpoints=1) bigax.set_xlim(-0.1,2.6) print zz.size if ylims is not None: bigax.set_ylim(*ylims) axlist = [bigax] + axlist return axlist
def __chunk_mask(self, mask): indices = np.arange((mask.shape[0] - 1) // self.chunk_size + 1, dtype=np.int32) * self.chunk_size return bn.move_sum(mask.astype(np.int32)[::-1], self.chunk_size, min_count=1)[::-1][indices] > 0
def calculate_season(self): """ calculates the season """ seasons_params = {} seasons_params['DJF'] = (3,2) seasons_params['JFM'] = (3,3) seasons_params['FMA'] = (3,4) seasons_params['MAM'] = (3,5) seasons_params['AMJ'] = (3,6) seasons_params['MJJ'] = (3,7) seasons_params['JJA'] = (3,8) seasons_params['JAS'] = (3,9) seasons_params['ASO'] = (3,10) seasons_params['SON'] = (3,11) seasons_params['OND'] = (3,12) seasons_params['NDJ'] = (3,1) seasons_params['Warm Season (Dec. - May)'] = (6, 5) seasons_params['Cold Season (Jun. - Nov.)'] = (6, 11) seasons_params['Year (Jan. - Dec.)'] = (12, 12) seasons_params['Hydro. year (Jul. - Jun.)'] = (12, 6) self.seasons_params = seasons_params if not(hasattr(self, 'dset_dict')): self._read_dset_params() # get the name of the file to open fname = self.dset_dict['path'] # `dset` is now an attribute of the ensemble object self.dset = xray.open_dataset(fname) # get the variable and its index m_var = self.dset[self.variable].data index = self.dset['time'].to_index() # if the variable is rainfall, we calculate the running SUM if self.dset_dict['units'] in ['mm']: seas_field = bn.move_sum(m_var, self.seasons_params[self.season][0], \ min_count=self.seasons_params[self.season][0], axis=0) # if not, then we calculate the running MEAN (average) else: seas_field = bn.move_mean(m_var, self.seasons_params[self.season][0], \ min_count=self.seasons_params[self.season][0], axis=0) # get rid of the first nans in the time-series / fields after move_mean or move_sum seas_field = seas_field[(self.seasons_params[self.season][0]-1)::,:,:] index = index[(self.seasons_params[self.season][0]-1)::] # now selects the SEASON of interest iseas = np.where(index.month == self.seasons_params[self.season][1])[0] dates = index[iseas] seas_field = np.take(seas_field, iseas, axis=0) # if detrend is set to `True`, we detrend # detrend_linear from matplotlib.mlab is faster than detrend from scipy.signal if self.detrend: dseas_field = np.ones(seas_field.shape) * np.nan # if there is a mask, we have to test each variable if 'mask' in self.dset.data_vars: for ilat in range(dseas_field.shape[1]): for ilon in range(dseas_field.shape[2]): if np.logical_not(np.all(np.isnan(seas_field[:,ilat, ilon]))): dseas_field[:,ilat, ilon] = detrend_linear(seas_field[:,ilat,ilon]) \ + seas_field[:,ilat,ilon].mean() # if not, we can proceed over the whole dataset else: for ilat in range(dseas_field.shape[1]): for ilon in range(dseas_field.shape[2]): dseas_field[:,ilat, ilon] = detrend_linear(seas_field[:,ilat,ilon]) \ + seas_field[:,ilat,ilon].mean() self.dset['dates'] = (('dates',), dates) self.dset['seas_var'] = (('dates', 'latitudes', 'longitudes'), dseas_field) # if detrend is False, then just add the seaosnal values else: self.dset['dates'] = (('dates',), dates) self.dset['seas_var'] = (('dates', 'latitudes', 'longitudes'), seas_field)
def calculate_season(self): """ calculates the season """ self.seasons_params = seasons_params() if not (hasattr(self, 'dset_dict')): self._read_dset_params() # get the name of the file to open fname = self.dset_dict['path'] # `dset` is now an attribute of the ensemble object self.dset = xray.open_dataset(fname) # get the variable and its index m_var = self.dset[self.variable].data index = self.dset['time'].to_index() # if the variable is rainfall, we calculate the running SUM if self.dset_dict['units'] in ['mm']: seas_field = bn.move_sum(m_var, self.seasons_params[self.season][0], \ min_count=self.seasons_params[self.season][0], axis=0) # if not, then we calculate the running MEAN (average) else: seas_field = bn.move_mean(m_var, self.seasons_params[self.season][0], \ min_count=self.seasons_params[self.season][0], axis=0) # get rid of the first nans in the time-series / fields after move_mean or move_sum seas_field = seas_field[(self.seasons_params[self.season][0] - 1)::, :, :] index = index[(self.seasons_params[self.season][0] - 1)::] # now selects the SEASON of interest iseas = np.where(index.month == self.seasons_params[self.season][1])[0] dates = index[iseas] seas_field = np.take(seas_field, iseas, axis=0) # if detrend is set to `True`, we detrend # detrend_linear from matplotlib.mlab is faster than detrend from scipy.signal if self.detrend: dseas_field = np.ones(seas_field.shape) * np.nan # if there is a mask, we have to test each variable if 'mask' in self.dset.data_vars: for ilat in range(dseas_field.shape[1]): for ilon in range(dseas_field.shape[2]): if np.logical_not( np.all(np.isnan(seas_field[:, ilat, ilon]))): dseas_field[:,ilat, ilon] = detrend_linear(seas_field[:,ilat,ilon]) \ + seas_field[:,ilat,ilon].mean() # if not, we can proceed over the whole dataset else: for ilat in range(dseas_field.shape[1]): for ilon in range(dseas_field.shape[2]): dseas_field[:,ilat, ilon] = detrend_linear(seas_field[:,ilat,ilon]) \ + seas_field[:,ilat,ilon].mean() self.dset['dates'] = (('dates', ), dates) self.dset['seas_var'] = (('dates', 'latitudes', 'longitudes'), dseas_field) # if detrend is False, then just add the seaosnal values else: self.dset['dates'] = (('dates', ), dates) self.dset['seas_var'] = (('dates', 'latitudes', 'longitudes'), seas_field)
def eval(self): start_flag = self.base.start_flag end_flag = self.base.end_flag self.flag_mask = bn.move_sum( start_flag.astype(np.int32), self.least_time, min_count=1) >= 1 end_mask = index2flag([ i - j for i in self.base.end_indices for j in range(self.min_period) ], self.base.size) trigger_res = [self.eval_subtrigger(p) for p in self.triggers][::-1] fallback_price = np.where(end_flag, self.base[self.fallback_col], 0.0) sold_price = fallback_price price_flag = end_flag for sell_flag, sold_flag, target_price in trigger_res: sold_price = np.where( sell_flag & sold_flag, target_price, np.where(sell_flag, fallback_price, sold_price)) price_flag = (sell_flag & sold_flag) | ( (~sell_flag) & price_flag) | end_flag # 获取需要持有的天数 arr = price_flag[::-1] days_keep = np.arange(len(arr)) + 1 days_keep = days_keep - fill_zeros_with_last( np.where(arr, days_keep, 0)) days_keep = days_keep[::-1] # 计算卖出位置的索引 sell_indices = np.arange(self.base.size) sell_indices = np.where( # 已经持有的至少min_period或者股票已经在结束区间内 end_mask | (days_keep > self.min_period), #当前索引 + 持有天数 sell_indices + days_keep, # min_period天后的持有天数 sell_indices + self.min_period + days_keep[np.roll(sell_indices, -self.min_period)]) # 假设可以马上卖,按照策略能够最后卖出的价格 prefer_sold_price = sold_price[np.arange(self.base.size) + days_keep] final_price_hold = np.roll(prefer_sold_price, -1) * (1 - self.extra_cost_rate) final_price_hold[-1] = prefer_sold_price[-1] * (1 - self.extra_cost_rate) period_hold = days_keep + 2.0 period_hold[-1] = 1.0 # 最后的卖出价格 limit_sold_price = sold_price[sell_indices] # 假设持有, 到卖出的持有天数 limit_days_keep = sell_indices - np.arange(self.base.size) # 实际的收益率,考虑买入成功与否,买入不成功,实际收益为0 buy_price = self.base[self.base_col] * self.buy_at price = self.base[self.base_col] buy_flag = (self.base[self.low_col] <= buy_price) & self.buy_cond buy_price_real = buy_price * (1 + self.extra_cost_rate) sold_price_real = limit_sold_price * (1 - self.extra_cost_rate) actual_rate = np.where( start_flag, 1.0, np.where( # 以buy_at价格买入成功 buy_flag, sold_price_real / buy_price_real, 1.0)) - 1.0 prefer_rate = prefer_sold_price * ( 1 - self.extra_cost_rate) / self.base[self.base_col] - 1.0 # 实际占有资金的天数,如果买入失败,资金相当于以1的比例持有1天 actual_days_keep = np.where(buy_flag, limit_days_keep + 1, 1) # 资金的平均日收益率 unit_rate = np.power(1.0 + actual_rate, 1.0 / actual_days_keep) - 1.0 return [ actual_rate, actual_days_keep, unit_rate, buy_flag, price_flag, price, buy_price_real, sold_price_real, # prefer_sold_price, # prefer_rate, # days_keep + 1.0, #当天占用资金,相当于需要消耗一天的资金 prefer_sold_price, #假设已经持有的状态下,无额外约束的情况下,按照策略最后会以什么价格成交 days_keep + 1.0, #假设已经持有的情况下,最终还需要占用资金的天数, (buy_flag | True) & self.trade_filter ]
def time_move_sum(self, dtype, shape, order, axis, window): bn.move_sum(self.arr, window, axis=axis)
def time_move_sum(self, dtype, shape, window): bn.move_sum(self.arr, window)
# zero align this_unit = this_unit - this_unit[0] # bin data at 50ms bins = np.arange(0, np.max(this_unit), step=0.05) binned_spikes, edges = np.histogram(this_unit, bins=bins) mean_fr = np.sum(binned_spikes) / np.max(this_unit) # sliding window across 1 sec slide = bn.move_sum(binned_spikes, window=4) * 5 z_slide = (slide - mean_fr) / mean_fr plt.plot(z_slide) plt.ylabel('z-scored FR') plt.xlabel('n windows') plt.title('Steinmetz CA1 unit %i' % unit) plt.show() #%% #for unit in range(len(amyg_units)):