def run(dest, data_path, seed): np.random.seed(seed) human = util.load_human(data_path)['C'].dropna(subset=['mass? response']) results = [] for kappa in [-1.0, 1.0, 'all']: if kappa == 'all': correct = human else: correct = human.groupby('kappa0').get_group(kappa) accuracy = correct\ .groupby('version')['mass? correct']\ .apply(util.bootstrap_mean)\ .unstack(-1)\ .reset_index() accuracy['kappa0'] = kappa results.append(accuracy) results = pd.concat(results)\ .set_index(['version', 'kappa0'])\ .sortlevel() results.to_csv(dest)
def run(dest, data_path): human = util.load_human(data_path) versions = list(human['all']['version'].unique()) results = {} for version in versions: hdata = human['all'].groupby('version').get_group(version) starttime = hdata.groupby('pid')['timestamp'].min() endtime = hdata.groupby('pid')['timestamp'].max() exptime = endtime - starttime medtime = timedelta(seconds=exptime.median().total_seconds()) meantime = timedelta(seconds=exptime.mean().total_seconds()) if version == "G": payrate = (1.0 / (exptime.astype(int) / (1e9 * 60 * 60))).mean() elif version == "H": payrate = (1.25 / (exptime.astype(int) / (1e9 * 60 * 60))).mean() elif version == "I": payrate = (0.70 / (exptime.astype(int) / (1e9 * 60 * 60))).mean() else: raise ValueError("unexpected version: %s" % version) results[version] = { "median_time": medtime, "mean_time": meantime, "mean_pay": payrate } results = pd.DataFrame.from_dict(results).T results.index.name = "version" results.to_csv(dest)
def run(dest, data_path): human = util.load_human(data_path) order = human['all']\ .set_index(['version', 'kappa0', 'pid', 'mode', 'trial'])[['stimulus']]\ .sortlevel() order.to_csv(dest)
def run(dest, data_path, seed): np.random.seed(seed) human = util.load_human(data_path)['C'].dropna(subset=['mass? response']) between_subjs = human\ .groupby('version')\ .get_group('I')\ .sort_values(by=['pid', 'trial'])\ .drop_duplicates('pid') between_subjs['num_mass_trials'] = -1 responses = pd.concat([human, between_subjs]) results = [] for kappa in [-1.0, 1.0, 'all']: if kappa == 'all': correct = responses else: correct = responses.groupby('kappa0').get_group(kappa) accuracy = correct\ .groupby(['version', 'num_mass_trials', 'trial'])['mass? correct']\ .apply(util.bootstrap_mean)\ .unstack(-1)\ .reset_index() accuracy['kappa0'] = kappa results.append(accuracy) results = pd.concat(results)\ .set_index(['version', 'kappa0', 'num_mass_trials', 'trial'])\ .sortlevel() results.to_csv(dest)
def run(dest, data_path, results_path): all_human = util.load_human(data_path)['C'] human = all_human\ .set_index(['version', 'pid', 'trial'])['mass? response']\ .unstack('trial')\ .sortlevel() # convert from -1,1 to 0,1 human = (human + 1) / 2.0 cols = ['likelihood', 'counterfactual', 'model', 'fitted'] model = pd\ .read_csv(os.path.join(results_path, "model_belief_by_trial_fit.csv"))\ .set_index(cols + ['version', 'pid', 'trial'])['p']\ .unstack('trial')\ .sortlevel() llh = model\ .groupby(level=cols)\ .apply(compute_log_lh, human) results = pd.melt(llh.reset_index(), id_vars=cols + ['version', 'pid'], var_name='trial', value_name='llh') results = pd.merge(results.dropna(), all_human[['pid', 'num_mass_trials']].drop_duplicates()) results = results.set_index(cols).sortlevel() results.to_csv(dest)
def run(dest, data_path): data = util.load_human(data_path) results = [] for block in ['A', 'B']: versions = list(data[block]['version'].unique()) versions.extend(['GH', 'all']) for version in versions: if version == 'all': human_all = data[block] elif version == 'GH': human_all = data[block]\ .set_index('version')\ .groupby(lambda x: x in ('G', 'H'))\ .get_group(True)\ .reset_index() else: human_all = data[block]\ .groupby('version')\ .get_group(version) human = human_all.set_index(['kappa0', 'stimulus', 'pid'])[['fall? response']] human = ((human - 1) / 6.0).reset_index() human['block'] = block human['version'] = version results.append(human) results = pd.concat(results)\ .set_index(['version', 'block', 'kappa0', 'stimulus', 'pid'])\ .sortlevel() results.to_csv(dest)
def run(dest, results_path, data_path, parallel): # load in raw human mass responses human = util.load_human(data_path)['C'][[ 'version', 'kappa0', 'pid', 'trial', 'stimulus', 'mass? response' ]] human.loc[:, 'mass? response'] = (human['mass? response'] + 1) / 2.0 data = pd.read_csv(os.path.join(results_path, 'model_belief_by_trial.csv')) # convert model belief to wide form cols = [ 'likelihood', 'counterfactual', 'version', 'model', 'kappa0', 'pid' ] belief = data\ .set_index(cols + ['trial', 'hypothesis'])['logp']\ .unstack('hypothesis')\ .sortlevel() # compute posterior log odds between the two hypotheses, and convert back # to long form log_odds = pd.melt( (belief[1.0] - belief[-1.0]).unstack('trial').reset_index(), id_vars=cols, var_name='trial', value_name='log_odds') # merge with human responses model = pd\ .merge(log_odds, human)\ .set_index(cols + ['trial'])\ .sortlevel()\ .dropna() # use L1 logistic regression to fit parameters individually to # each participant mapfunc = util.get_mapfunc(parallel) result = mapfunc(fit_responses, list(model.groupby(level=cols))) result = pd.concat(result).reset_index() # separate out the raw belief from the fitted belief fitted = result.drop(['p raw', 'p correct raw'], axis=1) fitted['fitted'] = True raw = result\ .drop(['p', 'p correct'], axis=1)\ .rename(columns={'p raw': 'p', 'p correct raw': 'p correct'}) raw['fitted'] = False raw['B'] = np.nan new_belief = pd\ .concat([fitted, raw])\ .set_index(cols)\ .sortlevel() assert not np.isnan(new_belief['p']).any() assert not np.isnan(new_belief['p correct']).any() assert not np.isinf(new_belief['p']).any() assert not np.isinf(new_belief['p correct']).any() new_belief.to_csv(dest)
def run(dest, results_path, data_path, parallel): # load in raw human mass responses human = util.load_human(data_path)['C'][[ 'version', 'kappa0', 'pid', 'trial', 'stimulus', 'mass? response' ]] human.loc[:, 'mass? response'] = (human['mass? response'] + 1) / 2.0 data = pd.read_csv(os.path.join(results_path, 'model_belief_by_trial.csv')) # convert model belief to wide form cols = ['likelihood', 'counterfactual', 'version', 'model', 'pid'] belief = data\ .set_index(cols + ['trial', 'hypothesis'])['logp']\ .unstack('hypothesis')\ .sortlevel() # compute posterior log odds between the two hypotheses, and convert back # to long form log_odds = pd.melt( (belief[1.0] - belief[-1.0]).unstack('trial').reset_index(), id_vars=cols, var_name='trial', value_name='log_odds') # merge with human responses model = pd\ .merge(log_odds, human)\ .set_index(cols)\ .sortlevel()\ .dropna() model['num_mass_trials'] = model\ .groupby(level=cols)\ .apply(len) between_subjs = model\ .reset_index()\ .groupby('version')\ .get_group('I')\ .groupby(cols)\ .apply(lambda x: x.sort_values(by='trial').head(1))\ .set_index(cols) between_subjs['num_mass_trials'] = -1 model = pd\ .concat([model, between_subjs])\ .reset_index()\ .set_index(cols + ['num_mass_trials', 'trial'])\ .sortlevel() # compute marginal likelihoods mapfunc = util.get_mapfunc(parallel) result = mapfunc(integrate, list(model.groupby(level=cols + ['num_mass_trials']))) result = util.as_df(result, cols + ['num_mass_trials']) result.to_csv(dest)
def run(dest, data_path, seed): np.random.seed(seed) human = util.load_human(data_path)['C']\ .dropna(axis=0, subset=['mass? response']) results = human\ .groupby(['version', 'kappa0', 'num_mass_trials', 'pid'])['mass? correct']\ .mean()\ .to_frame() results.to_csv(dest)
def run(dest, data_path): human = util.load_human(data_path) # compute how many participants we have for each condition counts = human['all']\ .groupby(['version', 'condition', 'counterbalance'])['pid']\ .apply(lambda x: len(x.unique()))\ .reset_index() counts.columns = [ 'version', 'condition', 'counterbalance', 'num_participants' ] counts = counts.set_index(['version', 'condition', 'counterbalance']) counts.to_csv(dest)
def run(dest, results_path, data_path, seed, parallel): np.random.seed(seed) human = util.load_human(data_path)['C'].dropna(subset=['mass? response'])\ .set_index(['version', 'kappa0', 'pid', 'trial'])\ .sortlevel() model = pd.read_csv(os.path.join(results_path, 'model_belief_by_trial_fit.csv'))\ .set_index(['version', 'kappa0', 'pid', 'trial'])\ .sortlevel() model['num_mass_trials'] = human['num_mass_trials'] model = model.reset_index().dropna(subset=['num_mass_trials']) between_subjs = model\ .groupby('version')\ .get_group('I')\ .sort_values(by=['pid', 'trial'])\ .drop_duplicates(['likelihood', 'counterfactual', 'model', 'fitted', 'pid']) between_subjs['num_mass_trials'] = -1 responses = pd.concat([model, between_subjs]) @require('numpy as np', 'pandas as pd', 'util') def bootstrap_mean(df, **kwargs): name, df = df df.name = name return util.bootstrap_mean(df, **kwargs) results = [] mapfunc = util.get_mapfunc(parallel) for kappa in [-1.0, 1.0, 'all']: if kappa == 'all': correct = responses else: correct = responses.groupby('kappa0').get_group(kappa) cols = [ 'likelihood', 'counterfactual', 'model', 'fitted', 'version', 'num_mass_trials', 'trial' ] accuracy = mapfunc(bootstrap_mean, list(correct.groupby(cols)['p correct'])) accuracy = util.as_df(accuracy, cols).reset_index() accuracy['kappa0'] = kappa results.append(accuracy) results = pd.concat(results)\ .set_index(['likelihood', 'counterfactual', 'model', 'fitted'])\ .sortlevel() results.to_csv(dest)
def run(dest, data_path): human = util.load_human(data_path) results = [] def num_chance(df): groups = df.groupby(['kappa0', 'stimulus'])['mass? correct'] alpha = (0.05 / len(groups.groups)) * 100 results = groups\ .apply(lambda x: util.bootstrap_mean(x, percentiles=[alpha]))\ .unstack(-1) <= 0.5 results = results.rename(columns={alpha: alpha / 100}) return results results = human['C']\ .dropna(axis=0, subset=['mass? response'])\ .groupby('version')\ .apply(num_chance) results.to_csv(dest)
def run(dest, data_path, results_path, seed): np.random.seed(seed) cols = ['likelihood', 'counterfactual', 'model', 'fitted', 'version'] # load human data human = util.load_human(data_path)['C']\ .dropna(axis=0, subset=['mass? response']) human.loc[:, 'mass? response'] = (human['mass? response'] + 1) / 2.0 human = human[['version', 'kappa0', 'stimulus', 'mass? response', 'pid']]\ .rename(columns={'mass? response': 'h'}) # load model data model = pd.read_csv( os.path.join(results_path, 'model_belief_by_trial_fit.csv')) model = model[cols + ['kappa0', 'stimulus', 'pid', 'p']]\ .rename(columns={'p': 'm'}) model.loc[:, 'm'] = (model['m'] > 0.5).astype(int) data = pd.merge(human, model).set_index(cols + ['kappa0', 'stimulus', 'pid']) TP = ((data['h'] == 1) & (data['m'] == 1)).groupby(level=cols).sum() FP = ((data['h'] == 0) & (data['m'] == 1)).groupby(level=cols).sum() TN = ((data['h'] == 0) & (data['m'] == 0)).groupby(level=cols).sum() FN = ((data['h'] == 1) & (data['m'] == 0)).groupby(level=cols).sum() precision = TP / (TP + FP) recall = TP / (TP + FN) F1 = 2 * (precision * recall) / (precision + recall) accuracy = (TP + TN) / (TP + FP + FN + TN) results = pd.DataFrame({ 'precision': precision, 'recall': recall, 'F1': F1, 'accuracy': accuracy }) results.to_csv(dest)
def run(dest, data_path): human = util.load_human(data_path)['C']\ .dropna(axis=0, subset=['mass? correct']) results = human.set_index(['version', 'kappa0', 'stimulus', 'pid'])[['mass? correct']] results.to_csv(dest)