Exemple #1
0
    def plot_window_precision_recall_fscore(self,
                                            indices,
                                            actual_label,
                                            predict_score,
                                            threshold=0.5,
                                            window_size=22,
                                            beta=1):
        '''
        展示基于窗口的指标,用于展示model的稳定性,目前包含preciion
        '''
        plt.figure(figsize=self.size)
        df = pd.DataFrame({
            "indices": indices,
            "actual_label": actual_label,
            "predict_score": predict_score
        }).sort_values("indices")
        df['predict_label'] = df['predict_score'] >= threshold
        df['TP'] = (df['actual_label'] == True) & (df['predict_label'] == True)
        df['FP'] = (df['actual_label'] == False) & (df['predict_label']
                                                    == True)
        df['TN'] = (df['actual_label'] == False) & (df['predict_label']
                                                    == False)
        df['FN'] = (df['actual_label'] == True) & (df['predict_label']
                                                   == False)

        groups = df.groupby("indices")
        unit_df = groups[['TP', 'FP', 'TN', 'FN']].sum()
        window_df = pd.DataFrame({
            "TP": bn.move_sum(unit_df['TP'], window_size),
            "FP": bn.move_sum(unit_df['FP'], window_size),
            "TN": bn.move_sum(unit_df['TN'], window_size),
            "FN": bn.move_sum(unit_df['FN'], window_size),
        })
        metric_df = pd.DataFrame({
            "p":
            window_df["TP"] / (window_df["TP"] + window_df["FP"]),
            "r":
            window_df["TP"] / (window_df["TP"] + window_df["FN"]),
        })

        beta2 = beta * beta
        metric_df['f'] = np.where(
            metric_df["p"] + metric_df["r"] > 0.0,
            (1 + beta2) * metric_df["p"] * metric_df["r"] /
            (beta2 * metric_df["p"] + metric_df["r"]), 0.0)
        metric_df["date"] = unit_df.index.get_values()

        res_df = metric_df[window_size - 1:]
        indices = Metrics.__try_convert_as_mdate(np.array(res_df['date']))
        plt.plot(indices, res_df['p'], label="precision")
        plt.plot(indices, res_df['r'], label="recall")
        plt.plot(indices, res_df['f'], label="$F_{beta}$".format(beta=beta))
        plt.ylim([0, 1])
        plt.xticks(self.xticks01)
        plt.yticks(self.yticks01)
        plt.legend(loc='upper left', fontsize=self.fontsize)
        Metrics.__apply_sticks(indices=indices, num_sticks=8, rotate=True)
        plt.tight_layout()
        self.show()
 def init(self):
     self.windows = self.param['window']
     self.cols = self.param['col']
     self.types = self.param['type']
     self.translation_cols = self.param.get('translation')
     self.scale_cols = self.param.get('scale')
     self.move_window_mapping = {
         "mean":
         lambda c, s, t, w: bn.move_mean(c, w) * s + t,
         "std":
         lambda c, s, t, w: bn.move_std(c, w) * s,
         "var":
         lambda c, s, t, w: bn.move_var(c, w) * s * s,
         "min":
         lambda c, s, t, w: bn.move_min(c, w) * s + t,
         "max":
         lambda c, s, t, w: bn.move_max(c, w) * s + t,
         "rank":
         lambda c, s, t, w: bn.move_rank(c, w),
         "sum":
         lambda c, s, t, w: bn.move_sum(c, w) * s + t * w,
         "ema":
         lambda c, s, t, w: F.
         ema(c, 2.0 /
             (w + 1), start_indices=self.base.start_indices) * s + t,
         "rsi":
         lambda c, s, t, w: F.rsi(
             c, w, start_indices=self.base.start_indices),
         "psy":
         lambda c, s, t, w: F.psy(
             c, w, start_indices=self.base.start_indices),
         "bias":
         lambda c, s, t, w: F.bias(
             c, w, start_indices=self.base.start_indices)
     }
def Ts_sum(A, n):
    '''
    过去n(包含当天)求和
    n >= 1
    '''
    if n < 1:
        #print ("计算n天的求和,n不得小于1,返回输入")
        return A
    result = bk.move_sum(A, window=n, min_count=1, axis=0)
    result[np.isnan(A)] = np.nan
    return result
Exemple #4
0
def _arbr(arr_o, arr_h, arr_l, arr_c, window, start_indices):

    numerator_ar = bn.move_sum((arr_h - arr_o), window)
    denominator_ar = bn.move_sum((arr_o - arr_l), window)
    ar = np.where(denominator_ar != 0, 100 * numerator_ar / denominator_ar, 50)

    arr_y = np.roll(arr_c, 1)
    numerator_br = bn.move_sum((arr_h - arr_y), window)
    denominator_br = bn.move_sum((arr_y - arr_l), window)
    br = np.where(denominator_br != 0, 100 * numerator_br / denominator_br, 50)

    pre_cnt = 0.0
    N = arr_c.shape[0]
    N_GROUP = start_indices.shape[0]
    j = 0
    for i in range(N):
        if j < N_GROUP and start_indices[j] == i:
            pre_cnt = 0
            j += 1
        if pre_cnt < window:
            ar[i] = np.nan
            br[i] = np.nan
        pre_cnt += 1
    return ar, br
Exemple #5
0
def eddi_1d(eto, ts):
    """ 
    Compute the Evaporative Demand Drought Index (EDDI) from monthly
    reference evapotranspiration (eto). Step 1 is to compute the running
    sum of eto based on user defined time scale (ts). Step 2 is obtain the
    empirical probabilities from plotting positions. Step 3 is to transform
    probabilities using an inverse normal distribution.
    
    Arguments:
        eto (:obj:`numpy.ndarray`): time series of daily eto
        ts (int): time scale input as an integer (units in freq. of ``eto``)
    
    Returns:
        eddi (:obj:`numpy.ndarray`): 1-D of EDDI for period of record
    
    """
    print('calculating EDDI')

    # Compute running soms based on time scale (ts)
    acc = bn.move_sum(eto, ts)

    # Compute plotting positions to obtain daily CDF
    # First, reshape array back to day x year
    acc = np.reshape(acc, (len(acc) // 365, 365))

    # Tukey plotting positions
    pp = sms.plotting_positions(acc,
                                alpha=1. / 3.,
                                beta=1. / 3.,
                                axis=0,
                                masknan=True)

    # Transformation through inverse normal
    eddi = stats.norm.ppf(pp)

    eddi = eddi.ravel()

    return eddi
Exemple #6
0
def plot_heights_with_err(inputsuffix,label=r'$\tau_{\mathrm{V,Balm}}$',basedir='.',
                          col=1, errcol=2, lowhigh=False, order=5, bigorder=60, 
                          s=None, ylims=None, labelr=False, bigpoints=False,
                          plotfit=True, exclude=exclude, printdate=True, printfit=True):

    zz = np.array([])
    dd = np.array([])
    if lowhigh:
        ee = np.array([[],[]])
    else:
        ee = np.array([])
    axlist = []

    bigax = plt.figure().add_subplot(111)
    bigax.set_xlabel(r'$|z| \mathrm{\ [kpc]}$')
    bigax.set_ylabel(label)
    
    plist = [6,3,4,2,1,5]
    color_list = ['blue','seagreen','sienna','orange','yellowgreen','darkturquoise']
    style_list = ['-','-','-','--','--','--']

    for i in range(6):
        pointing = plist[i]
        color = color_list[i]
        style = style_list[i]

        dat = glob('{}/*P{}*{}'.format(basedir, pointing, inputsuffix))[0]
        print dat
        loc = glob('{}/*P{}*locations.dat'.format(basedir, pointing))[0]
        print loc
        print 'Excluding: ', exclude[pointing-1]
    
        if errcol is not None:
            if lowhigh:
                data, Lerr, Herr = np.loadtxt(dat, usecols=(col,errcol,errcol+1), unpack=True)
                err = np.vstack((Lerr,Herr))
            else:
                data, err = np.loadtxt(dat, usecols=(col,errcol), unpack=True)
        else:
            data = np.loadtxt(dat, usecols=(col,), unpack=True)
            err = np.ones(data.size)*0.01

        r, z = np.loadtxt(loc, usecols=(4,5), unpack=True)
        avgr = np.mean(r)

        ax = plt.figure().add_subplot(111)
        ax.set_xlabel('|Height [kpc]|')
        ax.set_ylabel(label)
        if labelr:
            ax.set_title('{:4.0f} kpc'.format(avgr))
            linelabel = '{:4.0f} kpc'.format(avgr)
        else:
            ax.set_title('{}\nP{}'.format(time.asctime(),pointing))
            linelabel = 'P{}'.format(pointing)

        exarr = np.array(exclude[pointing-1])-1 #becuase aps are 1-indexed
        data = np.delete(data,exarr)
        r = np.delete(r,exarr)
        z = np.delete(z,exarr)

        gidx = data == data
        data = data[gidx]
        z = z[gidx]
        if lowhigh:
            err = np.delete(err,exarr,axis=1)
            err = err[:,gidx]
            ee = np.hstack((ee,err))
        else:
            err = np.delete(err,exarr)
            err = err[gidx]
            ee = np.r_[ee,err]
        
        zz = np.r_[zz,z]
        dd = np.r_[dd,data]
        sidx = np.argsort(z)
        data_pad = np.r_[data[sidx][order::-1],data[sidx]]
        z_pad = np.r_[z[sidx][order::-1],z[sidx]]
        # mean = bn.move_mean(data_pad,order)[order+1:]
        std = bn.move_std(data_pad,order)[order+1:]
        spl = spi.UnivariateSpline(z[sidx],data[sidx])
        mean = spl(z[sidx])
        # mean = np.convolve(d[sidx],np.ones(order)/order,'same')
        # std = np.sqrt(np.convolve((d - mean)**2,np.ones(order)/order,'same'))

        bigax.errorbar(z, data, yerr=err, fmt='.', label=linelabel, color=color, capsize=0)
        
        # ax.plot(z[sidx],mean,color=color, ls=style)
        # ax.fill_between(z[sidx],mean-std,mean+std, alpha=0.1, color=color)

        ax.errorbar(z, data, yerr=err, fmt='.', color=color, capsize=0)
        ax.set_xlim(-0.1,2.6)
        
        if ylims is not None:
            ax.set_ylim(*ylims)
        
        axlist.append(ax)
        
    if printdate:
        plot_title = time.asctime()
    else:
        plot_title = ''
    if plotfit:
        sidx = np.argsort(zz)
        big_data_pad = np.r_[dd[sidx][bigorder::-1],dd[sidx]]
        big_z_pad = np.r_[zz[sidx][bigorder::-1],zz[sidx]]
        big_e_pad = np.r_[ee[sidx][bigorder::-1],ee[sidx]]
        big_sum = bn.move_sum(big_data_pad/big_e_pad,bigorder)[bigorder+1:]
        big_weight = bn.move_sum(1./big_e_pad,bigorder)[bigorder+1:]
        big_mean = big_sum/big_weight

        # std = bn.move_std(data_pad,order)[order+1:]
        # big_spl = spi.UnivariateSpline(zz[sidx],dd[sidx],w = 1./ee[sidx]**2, k=k, s=s)
        # big_mean = big_spl(zz[sidx])
        # big_pc = np.polyfit(zz[sidx], dd[sidx], polydeg, w=1./ee[sidx]**2)
        # big_poly = np.poly1d(big_pc)
        # big_mean = big_poly(zz[sidx])
        
        p = np.poly1d(np.polyfit(zz[sidx],big_mean,1))
        print p.coeffs
        
        # bigax.plot(zz[sidx],big_mean,'-k',lw=2)
        bigax.plot(zz[sidx],p(zz[sidx]),'--k',lw=2)
        if printdate:
            plot_title += '\n'
        if printfit:
            plot_title += label+'$={:4.2f}z{:+4.2f}$'.format(p.coeffs[0],p.coeffs[1])

    bigax.set_title(plot_title)
    bigax.legend(loc=0, numpoints=1, scatterpoints=1)
    bigax.set_xlim(-0.1,2.6)

    print zz.size

    if ylims is not None:
        bigax.set_ylim(*ylims)

    axlist = [bigax] + axlist
    
    return axlist
Exemple #7
0
 def __chunk_mask(self, mask):
     indices = np.arange((mask.shape[0] - 1) // self.chunk_size + 1,
                         dtype=np.int32) * self.chunk_size
     return bn.move_sum(mask.astype(np.int32)[::-1],
                        self.chunk_size,
                        min_count=1)[::-1][indices] > 0
Exemple #8
0
    def calculate_season(self):
        """
        calculates the season
        """
        seasons_params = {}
        seasons_params['DJF'] = (3,2)
        seasons_params['JFM'] = (3,3)
        seasons_params['FMA'] = (3,4)
        seasons_params['MAM'] = (3,5)
        seasons_params['AMJ'] = (3,6)
        seasons_params['MJJ'] = (3,7)
        seasons_params['JJA'] = (3,8)
        seasons_params['JAS'] = (3,9)
        seasons_params['ASO'] = (3,10)
        seasons_params['SON'] = (3,11)
        seasons_params['OND'] = (3,12)
        seasons_params['NDJ'] = (3,1)
        seasons_params['Warm Season (Dec. - May)'] = (6, 5)
        seasons_params['Cold Season (Jun. - Nov.)'] = (6, 11)
        seasons_params['Year (Jan. - Dec.)'] = (12, 12)
        seasons_params['Hydro. year (Jul. - Jun.)'] = (12, 6)
        self.seasons_params = seasons_params

        if not(hasattr(self, 'dset_dict')):
            self._read_dset_params()

        # get the name of the file to open
        fname = self.dset_dict['path']

        # `dset` is now an attribute of the ensemble object
        self.dset = xray.open_dataset(fname)

        # get the variable and its index
        m_var = self.dset[self.variable].data
        index = self.dset['time'].to_index()

        # if the variable is rainfall, we calculate the running SUM
        if self.dset_dict['units'] in ['mm']:
            seas_field = bn.move_sum(m_var, self.seasons_params[self.season][0], \
                                          min_count=self.seasons_params[self.season][0], axis=0)
        # if not, then we calculate the running MEAN (average)
        else:
            seas_field = bn.move_mean(m_var, self.seasons_params[self.season][0], \
                                          min_count=self.seasons_params[self.season][0], axis=0)

        # get rid of the first nans in the time-series / fields after move_mean or move_sum
        seas_field = seas_field[(self.seasons_params[self.season][0]-1)::,:,:]
        index = index[(self.seasons_params[self.season][0]-1)::]

        # now selects the SEASON of interest
        iseas = np.where(index.month == self.seasons_params[self.season][1])[0]
        dates = index[iseas]
        seas_field = np.take(seas_field, iseas, axis=0)

        # if detrend is set to `True`, we detrend
        # detrend_linear from matplotlib.mlab is faster than detrend from scipy.signal
        if self.detrend:
            dseas_field = np.ones(seas_field.shape) * np.nan
            # if there is a mask, we have to test each variable
            if 'mask' in self.dset.data_vars:
                for ilat in range(dseas_field.shape[1]):
                    for ilon in range(dseas_field.shape[2]):
                        if np.logical_not(np.all(np.isnan(seas_field[:,ilat, ilon]))):
                            dseas_field[:,ilat, ilon] = detrend_linear(seas_field[:,ilat,ilon]) \
                            + seas_field[:,ilat,ilon].mean()

            # if not, we can proceed over the whole dataset
            else:
                for ilat in range(dseas_field.shape[1]):
                    for ilon in range(dseas_field.shape[2]):
                        dseas_field[:,ilat, ilon] = detrend_linear(seas_field[:,ilat,ilon]) \
                        + seas_field[:,ilat,ilon].mean()

            self.dset['dates'] = (('dates',), dates)
            self.dset['seas_var'] = (('dates', 'latitudes', 'longitudes'), dseas_field)

        # if detrend is False, then just add the seaosnal values
        else:
            self.dset['dates'] = (('dates',), dates)
            self.dset['seas_var'] = (('dates', 'latitudes', 'longitudes'), seas_field)
Exemple #9
0
    def calculate_season(self):
        """
        calculates the season
        """
        self.seasons_params = seasons_params()

        if not (hasattr(self, 'dset_dict')):
            self._read_dset_params()

        # get the name of the file to open
        fname = self.dset_dict['path']

        # `dset` is now an attribute of the ensemble object
        self.dset = xray.open_dataset(fname)

        # get the variable and its index
        m_var = self.dset[self.variable].data
        index = self.dset['time'].to_index()

        # if the variable is rainfall, we calculate the running SUM
        if self.dset_dict['units'] in ['mm']:
            seas_field = bn.move_sum(m_var, self.seasons_params[self.season][0], \
                                          min_count=self.seasons_params[self.season][0], axis=0)
        # if not, then we calculate the running MEAN (average)
        else:
            seas_field = bn.move_mean(m_var, self.seasons_params[self.season][0], \
                                          min_count=self.seasons_params[self.season][0], axis=0)

        # get rid of the first nans in the time-series / fields after move_mean or move_sum
        seas_field = seas_field[(self.seasons_params[self.season][0] -
                                 1)::, :, :]
        index = index[(self.seasons_params[self.season][0] - 1)::]

        # now selects the SEASON of interest
        iseas = np.where(index.month == self.seasons_params[self.season][1])[0]
        dates = index[iseas]
        seas_field = np.take(seas_field, iseas, axis=0)

        # if detrend is set to `True`, we detrend
        # detrend_linear from matplotlib.mlab is faster than detrend from scipy.signal
        if self.detrend:
            dseas_field = np.ones(seas_field.shape) * np.nan
            # if there is a mask, we have to test each variable
            if 'mask' in self.dset.data_vars:
                for ilat in range(dseas_field.shape[1]):
                    for ilon in range(dseas_field.shape[2]):
                        if np.logical_not(
                                np.all(np.isnan(seas_field[:, ilat, ilon]))):
                            dseas_field[:,ilat, ilon] = detrend_linear(seas_field[:,ilat,ilon]) \
                            + seas_field[:,ilat,ilon].mean()

            # if not, we can proceed over the whole dataset
            else:
                for ilat in range(dseas_field.shape[1]):
                    for ilon in range(dseas_field.shape[2]):
                        dseas_field[:,ilat, ilon] = detrend_linear(seas_field[:,ilat,ilon]) \
                        + seas_field[:,ilat,ilon].mean()

            self.dset['dates'] = (('dates', ), dates)
            self.dset['seas_var'] = (('dates', 'latitudes', 'longitudes'),
                                     dseas_field)

        # if detrend is False, then just add the seaosnal values
        else:
            self.dset['dates'] = (('dates', ), dates)
            self.dset['seas_var'] = (('dates', 'latitudes', 'longitudes'),
                                     seas_field)
Exemple #10
0
    def eval(self):
        start_flag = self.base.start_flag
        end_flag = self.base.end_flag
        self.flag_mask = bn.move_sum(
            start_flag.astype(np.int32), self.least_time, min_count=1) >= 1
        end_mask = index2flag([
            i - j for i in self.base.end_indices
            for j in range(self.min_period)
        ], self.base.size)

        trigger_res = [self.eval_subtrigger(p) for p in self.triggers][::-1]

        fallback_price = np.where(end_flag, self.base[self.fallback_col], 0.0)
        sold_price = fallback_price
        price_flag = end_flag
        for sell_flag, sold_flag, target_price in trigger_res:
            sold_price = np.where(
                sell_flag & sold_flag, target_price,
                np.where(sell_flag, fallback_price, sold_price))
            price_flag = (sell_flag & sold_flag) | (
                (~sell_flag) & price_flag) | end_flag

        # 获取需要持有的天数
        arr = price_flag[::-1]
        days_keep = np.arange(len(arr)) + 1
        days_keep = days_keep - fill_zeros_with_last(
            np.where(arr, days_keep, 0))
        days_keep = days_keep[::-1]

        # 计算卖出位置的索引
        sell_indices = np.arange(self.base.size)
        sell_indices = np.where(
            # 已经持有的至少min_period或者股票已经在结束区间内
            end_mask | (days_keep > self.min_period),
            #当前索引 + 持有天数
            sell_indices + days_keep,
            # min_period天后的持有天数
            sell_indices + self.min_period +
            days_keep[np.roll(sell_indices, -self.min_period)])

        # 假设可以马上卖,按照策略能够最后卖出的价格
        prefer_sold_price = sold_price[np.arange(self.base.size) + days_keep]

        final_price_hold = np.roll(prefer_sold_price,
                                   -1) * (1 - self.extra_cost_rate)
        final_price_hold[-1] = prefer_sold_price[-1] * (1 -
                                                        self.extra_cost_rate)
        period_hold = days_keep + 2.0
        period_hold[-1] = 1.0

        # 最后的卖出价格
        limit_sold_price = sold_price[sell_indices]
        # 假设持有, 到卖出的持有天数
        limit_days_keep = sell_indices - np.arange(self.base.size)

        # 实际的收益率,考虑买入成功与否,买入不成功,实际收益为0
        buy_price = self.base[self.base_col] * self.buy_at
        price = self.base[self.base_col]
        buy_flag = (self.base[self.low_col] <= buy_price) & self.buy_cond
        buy_price_real = buy_price * (1 + self.extra_cost_rate)
        sold_price_real = limit_sold_price * (1 - self.extra_cost_rate)
        actual_rate = np.where(
            start_flag,
            1.0,
            np.where(
                # 以buy_at价格买入成功
                buy_flag,
                sold_price_real / buy_price_real,
                1.0)) - 1.0
        prefer_rate = prefer_sold_price * (
            1 - self.extra_cost_rate) / self.base[self.base_col] - 1.0
        # 实际占有资金的天数,如果买入失败,资金相当于以1的比例持有1天
        actual_days_keep = np.where(buy_flag, limit_days_keep + 1, 1)

        # 资金的平均日收益率
        unit_rate = np.power(1.0 + actual_rate, 1.0 / actual_days_keep) - 1.0

        return [
            actual_rate,
            actual_days_keep,
            unit_rate,
            buy_flag,
            price_flag,
            price,
            buy_price_real,
            sold_price_real,
            # prefer_sold_price,
            # prefer_rate,
            # days_keep + 1.0, #当天占用资金,相当于需要消耗一天的资金
            prefer_sold_price,  #假设已经持有的状态下,无额外约束的情况下,按照策略最后会以什么价格成交
            days_keep + 1.0,  #假设已经持有的情况下,最终还需要占用资金的天数,
            (buy_flag | True) & self.trade_filter
        ]
Exemple #11
0
 def time_move_sum(self, dtype, shape, order, axis, window):
     bn.move_sum(self.arr, window, axis=axis)
Exemple #12
0
 def time_move_sum(self, dtype, shape, window):
     bn.move_sum(self.arr, window)
Exemple #13
0
    # zero align

    this_unit = this_unit - this_unit[0]

    # bin data at 50ms

    bins = np.arange(0, np.max(this_unit), step=0.05)

    binned_spikes, edges = np.histogram(this_unit, bins=bins)

    mean_fr = np.sum(binned_spikes) / np.max(this_unit)

    # sliding window across 1 sec

    slide = bn.move_sum(binned_spikes, window=4) * 5

    z_slide = (slide - mean_fr) / mean_fr

    plt.plot(z_slide)
    plt.ylabel('z-scored FR')
    plt.xlabel('n windows')

    plt.title('Steinmetz CA1 unit %i' % unit)

    plt.show()

#%%

#for unit in range(len(amyg_units)):