def get_obs_quant_counts(df, quantiles=([.10, .30, .50, .70, .90])): if type(df) == pd.Series: rt = df.copy() else: rt = df.rt.copy() inter_quantiles = [quantiles[0] - 0] + [quantiles[i] - quantiles[i - 1] for i in range(1, len(quantiles))] + [1.00 - quantiles[-1]] obs_quant = mq(rt, prob=quantiles) observed = np.ceil((inter_quantiles) * len(rt) * .94).astype(int) return observed, obs_quant
def get_exp_counts(simdf, obs_quant, n_obs, quantiles=([.10, .30, .50, .70, .90])): if type(simdf) == pd.Series: simrt = simdf.copy() else: simrt = simdf.rt.copy() exp_quant = mq(simrt, prob=quantiles) oq = obs_quant expected = np.ceil(np.diff([0] + [pscore(simrt, oq_rt) * .01 for oq_rt in oq] + [1]) * n_obs) return expected, exp_quant
def rangl_data(self, data): """ called by __make_dataframes__ to generate observed data arrays """ gac = data.query('ttype=="go"').acc.mean() grt = data.query('response==1 & acc==1').rt.values ert = data.query('response==1 & acc==0').rt.values gq = mq(grt, prob=self.model.quantiles) eq = mq(ert, prob=self.model.quantiles) data_vector = [gac, gq, eq] if 'ssd' in self.data.columns: stopdf = data.query('ttype=="stop"') if self.model.ssd_method=='all': sacc=stopdf.groupby('ssd').mean()['acc'].values elif self.model.ssd_method=='central': sacc = np.array([stopdf.mean()['acc']]) data_vector.insert(1, sacc) return np.hstack(data_vector)
def get_observed_vector(rt, quantiles=array([10, 30, 50, 70, 90])): """ takes array of rt values and returns binned counts (trials that fall between each set of quantiles in quantiles). also returns the total number of observations (len(rt)) and the RT values at those quantiles (rtquant) """ inter_quantiles = array([quantiles[0] - 0] + [quantiles[i] - quantiles[i - 1] for i in range(1, len(quantiles))] + [100 - quantiles[-1]]) rtquant = mq(rt, prob=quantiles * .01) ocounts = np.ceil((inter_quantiles) * .01 * len(rt)).astype(int) n_obs = np.sum(ocounts) return [ocounts, rtquant, n_obs]
def loglikelihood(N, mug, mub, mu= 0.5, niter=1000): arr = [] for j in xrange(niter): l = [0.0] ran = randn(N) + array([mu] * N) # print ran for i in xrange(1,N): l.append(max(0, l[-1] + (mub-mug) *(ran[i] - 0.5* (mug+mub)))) arr.append(max(l)) from scipy.stats.mstats import mquantiles as mq print [i/abs(mug-mub) for i in mq(arr, [0.25,0.5,0.75,0.95, 0.975, 0.99])]
def loglikelihood(N, mug, mub, mu= 0.5, niter=1000): arr = [] for j in xrange(niter): l = [0.0] ran = randn(N) + array([mu] * N) # print ran for i in xrange(1,N): l.append(max(0, l[-1] + (mub-mug) *(ran[i] - 0.5* (mug+mub)))) arr.append(max(l)) from scipy.stats.mstats import mquantiles as mq print [i/abs(mug-mub) for i in mq(arr, [0.25,0.5,0.75,0.95, 0.975, 0.99])] return array(arr)
def __init_analyze_functions__(self): """ initiates the analysis function used in optimization routine to produce the yhat vector """ prob = self.quantiles go_axis, ss_axis = 2, 3 self.go_resp = lambda trace, upper: np.argmax((trace.T >= upper).T, axis=go_axis) * self.dt self.ss_resp_up = lambda trace, upper: np.argmax((trace.T >= upper).T, axis=ss_axis) * self.dt self.ss_resp_lo = lambda trace, x: np.argmax((trace.T <= 0).T, axis=ss_axis) * self.dt self.go_RT = lambda ontime, rbool: ontime[:, na] + (rbool*np.where(rbool==0., np.nan, 1)) self.ss_RT = lambda ontime, rbool: ontime[:, :, na] + (rbool*np.where(rbool==0., np.nan, 1)) self.RTQ = lambda zpd: map((lambda x: mq(x[0][x[0] < x[1]], prob)), zpd) if 'irace' in self.kind: self.ss_resp = self.ss_resp_up else: self.ss_resp = self.ss_resp_lo
def __init_analyze_functions__(self): """ initiates the analysis function used in optimization routine to produce the yhat vector """ prob = self.quantiles go_axis, ss_axis = 2, 3 self.go_resp = lambda trace, upper: np.argmax((trace.T >= upper).T, axis=go_axis) * self.dt self.ss_resp_up = lambda trace, upper: np.argmax((trace.T >= upper).T, axis=ss_axis) * self.dt self.ss_resp_lo = lambda trace, x: np.argmax((trace.T <= 0.).T, axis=ss_axis) * self.dt self.go_RT = lambda ontime, gCross: ontime[:, na] + (gCross * np.where(gCross==0., np.nan, 1.)) self.ss_RT = lambda ontime, ssCross: ontime[:, :, na] + (ssCross * np.where(ssCross==0., np.nan, 1.)) # self.RTQ = lambda zpd: map((lambda x: mq(x[0][x[0] < x[1]], prob)), zpd) self.RTQ = lambda zpd: [mq(rt[rt < deadline], prob) for rt, deadline in zpd] self.chunk = lambda x, nl: [array(x[i:i+nl]) for i in range(0, len(x), nl)] if 'irace' in self.kind: self.ss_resp = self.ss_resp_up else: self.ss_resp = self.ss_resp_lo
#!/usr/local/bin/env python from __future__ import division from copy import deepcopy import numpy as np from numpy import array from numpy.random import sample as rs from numpy import hstack as hs from numpy import newaxis as na from scipy.stats.mstats import mquantiles as mq resp_up = lambda trace, a: np.argmax((trace.T >= a).T, axis=2) * dt ss_resp_up = lambda trace, a: np.argmax((trace.T >= a).T, axis=3) * dt resp_lo = lambda trace: np.argmax((trace.T <= 0).T, axis=3) * dt RT = lambda ontime, rbool: ontime[:, na] + (rbool * np.where(rbool == 0, np.nan, 1)) RTQ = lambda zpd: map((lambda x: mq(x[0][x[0] < x[1]], prob)), zpd) def vectorize_params(p, pc_map, ncond=1): pvc = ['a', 'tr', 'vd', 'vi', 'xb'] for pkey in pvc: p[pkey] = p[pkey] * np.ones(ncond) for pkey, pkc in pc_map.items(): if ncond == 1: p[pkey] = np.asarray([p[pkey]]) break elif pkc[0] not in p.keys(): p[pkey] = p[pkey] * np.ones(len(pkc)) else: p[pkey] = array([p[pc] for pc in pkc]) return p
def nonparametric_summary(*, series: pd.Series, alphap: float = 1 / 3, betap: float = 1 / 3, decimals: int = 3) -> pd.Series: """ Calculate empirical quantiles for a series. Parameters ---------- series : pd.Series The input series. alphap : float = 1/3 Plotting positions. betap : float = 1/3 Plotting positions. decimals : int = 3 The number of decimal places for rounding. scipy.stats.mstats.mquantiles plotting positions: R method 1, SAS method 3: not yet implemented in scipy.stats.mstats.mquantiles R method 2, SAS method 5: not yet implemented in scipy.stats.mstats.mquantiles R method 3, SAS method 2: not yet implemented in scipy.stats.mstats.mquantiles R method 4, SAS method 1: alphap=0, betap=1 R method 5: alphap=0.5, betap=0.5 R method 6, SAS method 4, Minitab, SPSS: alphap=0, betap=0 R method 7, Splus 3.1: alphap=1, betap=1 R method 8: alphap=0.33, betap=0.33; is the recommended, default method R method 9: alphap=0.375, betap=0.375 Cunnane's method: alphap=0.4, betap=0.4 APL method; alphap=0.35, betap=0.35 Returns ------- pd.Series containing: lower outer fence : float lower inner fence : float lower quartile : float median : float upper quartile : float upper inner fence : float upper outer fence : float interquartile range : float inner outliers : List[float] outer outliers : List[float] minimum value : float maximum value : float count : int Examples -------- Example 1 >>> import datasense as ds >>> series = ds.random_data() >>> series = ds.nonparametric_summary(series=series) >>> print(series) Example 2 >>> series = ds.nonparametric_summary( >>> series=series, >>> alphap=0, >>> betap=0 >>> ) >>> print(series) """ xm = np.ma.masked_array(series, mask=np.isnan(series)) q25 = mq(xm, prob=(0.25), alphap=alphap, betap=betap) q50 = mq(xm, prob=(0.50), alphap=alphap, betap=betap) q75 = mq(xm, prob=(0.75), alphap=alphap, betap=betap) iqr = q75 - q25 lof = (q25 - iqr * 3) lif = (q25 - iqr * 1.5) uif = (q75 + iqr * 1.5) uof = (q75 + iqr * 3) return pd.Series({ "lower outer fence": round(lof[0], decimals), "lower inner fence": round(lif[0], decimals), "lower quartile": round(q25[0], decimals), "median": round(q50[0], decimals), "upper quartile": round(q75[0], decimals), "upper inner fence": round(uif[0], decimals), "upper outer fence": round(uof[0], decimals), "interquartile range": round(iqr[0], decimals), "inner outliers": [round(x, decimals) for x in series if x < lif or x > uif], "outer outliers": [round(x, decimals) for x in series if x < lof or x > uof], "minimum value": round(series.min(), 3), "maximum value": round(series.max(), 3), "count": series.count() })