def evaluate_successful_recall_dist(proto, exp_data, model_data, ax=None): ev_model_data = convert(model_data, 'success_count') if exp_data is not None: ev_exp_data = convert(exp_data, 'success_count') evaluate_dist_overlap(ev_exp_data.data, ev_model_data.data) plot_dist_stats(ev_exp_data.data, ax) plot_dist_stats(ev_model_data.data, ax)
def transpositions(data): data = convert(data, 'melted').data.dropna() y = data['recalled_pos'] - data.index.get_level_values('pos') h, edges = np.histogram( y.values, np.arange(min(y.values) - 0.5, max(y.values) + 0.5)) x = np.asarray(edges[:-1] + 0.5 * np.diff(edges), dtype=int) p = h / float(len(y)) ci_low, ci_upp = proportion_confint(h, len(y), method='beta') return pd.DataFrame( { 'p_transpose': p, 'ci_low': p - ci_low, 'ci_upp': ci_upp - p, }, index=x)
def plot_successful_recalls(n_successfull, n_items, ax=None, label=None, **kwargs): n_successfull = convert(n_successfull, 'success_count').data if ax is None: ax = plt.gca() ax.hist(n_successfull, bins=n_items + 1, range=(-0.5, n_items + 0.5), density=True, alpha=0.5, label=label, **kwargs) if label is not None: label = label + ' (mean)' ax.axvline(x=np.mean(n_successfull.data), label=label, **kwargs)
def p_first_recall(recalls): """Probability of first recall.""" recalls = convert(recalls, 'melted').data hist = recalls.xs(0, level='pos').groupby('recalled_pos').size() hist = hist.append( pd.Series({ i: 0 for i in range(1, int(max(hist.index)) + 1) if i not in hist.index })) hist = hist.sort_index() n = len(recalls.xs(0, level='pos')) ci_low, ci_upp = proportion_confint(hist, n, method='beta') hist /= n hist = hist.to_frame(name='p_first') hist['ci_low'] = hist['p_first'] - ci_low hist['ci_upp'] = ci_upp - hist['p_first'] hist.index += 1 hist.name = "Probabilitiy of first recall" return hist
def crp(recalls, max_out_pos=None): """Conditional response probability.""" recalls = convert(recalls, 'melted').data n_pos = recalls.index.get_level_values('pos').unique().size def exclude_repetitions(x): values = x.values for i, y in enumerate(values): if y in values[:i]: values[i] = np.nan return values recalls = recalls.sort_index() recalls['recalled_pos'] = recalls['recalled_pos'].groupby( level='trial').transform(exclude_repetitions) for k in recalls: if k != 'recalled_pos': recalls.drop([k], axis=1, inplace=True) recalls['lag'] = -recalls.groupby(level='trial').diff(-1) if max_out_pos: recalls = recalls.iloc[ recalls.index.get_level_values('pos') < max_out_pos] numerator = pd.DataFrame(recalls.reset_index().groupby( ['trial', 'lag']).size()).rename(columns={0: 'num'}) def get_denom(x): to_recall = list(range(n_pos)) possible_lags = pd.DataFrame({ 'lag': np.arange(-n_pos + 1, n_pos), 'denom': np.zeros(2 * n_pos - 1) }) x = x.dropna().sort_values(by='pos') for i in range(len(x)): row = x.iloc[i] pos = int(row['recalled_pos']) to_recall.remove(pos) for y in to_recall: possible_lags['denom'][possible_lags['lag'] == y - pos] += 1 return possible_lags denominator = recalls.reset_index().groupby('trial').apply(get_denom) denominator = denominator.reset_index().set_index(['trial', 'lag']) for key in ('level_1', 'level_2'): if key in denominator: denominator = denominator.drop([key], axis=1) denominator = np.maximum(denominator, 1) df = pd.merge(numerator, denominator, left_index=True, right_index=True, how='right').fillna(0) assert (df['num'] <= df['denom']).all() df['crp'] = (df['num'] / df['denom']).fillna(0.) crp_data = df.groupby(level='lag').mean() crp_data.loc[0, 'crp'] = np.nan ci = df['crp'].groupby( level='lag').apply(lambda x: pd.Series(bootstrap_ci(x, np.mean))) crp_data['ci_low'] = crp_data['crp'] - ci.xs(0, level=1) crp_data['ci_upp'] = ci.xs(1, level=1) - crp_data['crp'] crp_data.name = "Conditional response probability" return crp_data
def serial_pos_curve(recalls, strict=True): """Serial position curve.""" fmt = 'serial-pos-strict' if strict else 'serial-pos' return convert(recalls, fmt).data