Esempio n. 1
0
def run(dest, data_path, seed):
    np.random.seed(seed)
    human = util.load_human(data_path)['C'].dropna(subset=['mass? response'])

    results = []
    for kappa in [-1.0, 1.0, 'all']:
        if kappa == 'all':
            correct = human
        else:
            correct = human.groupby('kappa0').get_group(kappa)

        accuracy = correct\
            .groupby('version')['mass? correct']\
            .apply(util.bootstrap_mean)\
            .unstack(-1)\
            .reset_index()

        accuracy['kappa0'] = kappa
        results.append(accuracy)

    results = pd.concat(results)\
        .set_index(['version', 'kappa0'])\
        .sortlevel()

    results.to_csv(dest)
Esempio n. 2
0
def run(dest, data_path):
    human = util.load_human(data_path)
    versions = list(human['all']['version'].unique())
    results = {}
    for version in versions:
        hdata = human['all'].groupby('version').get_group(version)
        starttime = hdata.groupby('pid')['timestamp'].min()
        endtime = hdata.groupby('pid')['timestamp'].max()
        exptime = endtime - starttime
        medtime = timedelta(seconds=exptime.median().total_seconds())
        meantime = timedelta(seconds=exptime.mean().total_seconds())
        if version == "G":
            payrate = (1.0 / (exptime.astype(int) / (1e9 * 60 * 60))).mean()
        elif version == "H":
            payrate = (1.25 / (exptime.astype(int) / (1e9 * 60 * 60))).mean()
        elif version == "I":
            payrate = (0.70 / (exptime.astype(int) / (1e9 * 60 * 60))).mean()
        else:
            raise ValueError("unexpected version: %s" % version)

        results[version] = {
            "median_time": medtime,
            "mean_time": meantime,
            "mean_pay": payrate
        }

    results = pd.DataFrame.from_dict(results).T
    results.index.name = "version"

    results.to_csv(dest)
Esempio n. 3
0
def run(dest, data_path):
    human = util.load_human(data_path)
    order = human['all']\
        .set_index(['version', 'kappa0', 'pid', 'mode', 'trial'])[['stimulus']]\
        .sortlevel()

    order.to_csv(dest)
Esempio n. 4
0
def run(dest, data_path, seed):
    np.random.seed(seed)
    human = util.load_human(data_path)['C'].dropna(subset=['mass? response'])

    between_subjs = human\
        .groupby('version')\
        .get_group('I')\
        .sort_values(by=['pid', 'trial'])\
        .drop_duplicates('pid')
    between_subjs['num_mass_trials'] = -1
    responses = pd.concat([human, between_subjs])

    results = []
    for kappa in [-1.0, 1.0, 'all']:
        if kappa == 'all':
            correct = responses
        else:
            correct = responses.groupby('kappa0').get_group(kappa)

        accuracy = correct\
            .groupby(['version', 'num_mass_trials', 'trial'])['mass? correct']\
            .apply(util.bootstrap_mean)\
            .unstack(-1)\
            .reset_index()

        accuracy['kappa0'] = kappa
        results.append(accuracy)

    results = pd.concat(results)\
        .set_index(['version', 'kappa0', 'num_mass_trials', 'trial'])\
        .sortlevel()

    results.to_csv(dest)
Esempio n. 5
0
def run(dest, data_path, results_path):
    all_human = util.load_human(data_path)['C']
    human = all_human\
        .set_index(['version', 'pid', 'trial'])['mass? response']\
        .unstack('trial')\
        .sortlevel()
    # convert from -1,1 to 0,1
    human = (human + 1) / 2.0

    cols = ['likelihood', 'counterfactual', 'model', 'fitted']
    model = pd\
        .read_csv(os.path.join(results_path, "model_belief_by_trial_fit.csv"))\
        .set_index(cols + ['version', 'pid', 'trial'])['p']\
        .unstack('trial')\
        .sortlevel()

    llh = model\
        .groupby(level=cols)\
        .apply(compute_log_lh, human)

    results = pd.melt(llh.reset_index(),
                      id_vars=cols + ['version', 'pid'],
                      var_name='trial',
                      value_name='llh')

    results = pd.merge(results.dropna(),
                       all_human[['pid', 'num_mass_trials']].drop_duplicates())

    results = results.set_index(cols).sortlevel()
    results.to_csv(dest)
def run(dest, data_path):
    data = util.load_human(data_path)

    results = []
    for block in ['A', 'B']:
        versions = list(data[block]['version'].unique())
        versions.extend(['GH', 'all'])
        for version in versions:
            if version == 'all':
                human_all = data[block]
            elif version == 'GH':
                human_all = data[block]\
                    .set_index('version')\
                    .groupby(lambda x: x in ('G', 'H'))\
                    .get_group(True)\
                    .reset_index()
            else:
                human_all = data[block]\
                    .groupby('version')\
                    .get_group(version)

            human = human_all.set_index(['kappa0', 'stimulus', 'pid'])[['fall? response']]
            human = ((human - 1) / 6.0).reset_index()
            human['block'] = block
            human['version'] = version
            results.append(human)

    results = pd.concat(results)\
                .set_index(['version', 'block', 'kappa0', 'stimulus', 'pid'])\
                .sortlevel()

    results.to_csv(dest)
Esempio n. 7
0
def run(dest, results_path, data_path, parallel):
    # load in raw human mass responses
    human = util.load_human(data_path)['C'][[
        'version', 'kappa0', 'pid', 'trial', 'stimulus', 'mass? response'
    ]]
    human.loc[:, 'mass? response'] = (human['mass? response'] + 1) / 2.0

    data = pd.read_csv(os.path.join(results_path, 'model_belief_by_trial.csv'))

    # convert model belief to wide form
    cols = [
        'likelihood', 'counterfactual', 'version', 'model', 'kappa0', 'pid'
    ]
    belief = data\
        .set_index(cols + ['trial', 'hypothesis'])['logp']\
        .unstack('hypothesis')\
        .sortlevel()

    # compute posterior log odds between the two hypotheses, and convert back
    # to long form
    log_odds = pd.melt(
        (belief[1.0] - belief[-1.0]).unstack('trial').reset_index(),
        id_vars=cols,
        var_name='trial',
        value_name='log_odds')

    # merge with human responses
    model = pd\
        .merge(log_odds, human)\
        .set_index(cols + ['trial'])\
        .sortlevel()\
        .dropna()

    # use L1 logistic regression to fit parameters individually to
    # each participant
    mapfunc = util.get_mapfunc(parallel)
    result = mapfunc(fit_responses, list(model.groupby(level=cols)))
    result = pd.concat(result).reset_index()

    # separate out the raw belief from the fitted belief
    fitted = result.drop(['p raw', 'p correct raw'], axis=1)
    fitted['fitted'] = True
    raw = result\
        .drop(['p', 'p correct'], axis=1)\
        .rename(columns={'p raw': 'p', 'p correct raw': 'p correct'})
    raw['fitted'] = False
    raw['B'] = np.nan

    new_belief = pd\
        .concat([fitted, raw])\
        .set_index(cols)\
        .sortlevel()

    assert not np.isnan(new_belief['p']).any()
    assert not np.isnan(new_belief['p correct']).any()
    assert not np.isinf(new_belief['p']).any()
    assert not np.isinf(new_belief['p correct']).any()

    new_belief.to_csv(dest)
Esempio n. 8
0
def run(dest, results_path, data_path, parallel):
    # load in raw human mass responses
    human = util.load_human(data_path)['C'][[
        'version', 'kappa0', 'pid', 'trial', 'stimulus', 'mass? response'
    ]]
    human.loc[:, 'mass? response'] = (human['mass? response'] + 1) / 2.0

    data = pd.read_csv(os.path.join(results_path, 'model_belief_by_trial.csv'))

    # convert model belief to wide form
    cols = ['likelihood', 'counterfactual', 'version', 'model', 'pid']
    belief = data\
        .set_index(cols + ['trial', 'hypothesis'])['logp']\
        .unstack('hypothesis')\
        .sortlevel()

    # compute posterior log odds between the two hypotheses, and convert back
    # to long form
    log_odds = pd.melt(
        (belief[1.0] - belief[-1.0]).unstack('trial').reset_index(),
        id_vars=cols,
        var_name='trial',
        value_name='log_odds')

    # merge with human responses
    model = pd\
        .merge(log_odds, human)\
        .set_index(cols)\
        .sortlevel()\
        .dropna()
    model['num_mass_trials'] = model\
        .groupby(level=cols)\
        .apply(len)
    between_subjs = model\
        .reset_index()\
        .groupby('version')\
        .get_group('I')\
        .groupby(cols)\
        .apply(lambda x: x.sort_values(by='trial').head(1))\
        .set_index(cols)
    between_subjs['num_mass_trials'] = -1

    model = pd\
        .concat([model, between_subjs])\
        .reset_index()\
        .set_index(cols + ['num_mass_trials', 'trial'])\
        .sortlevel()

    # compute marginal likelihoods
    mapfunc = util.get_mapfunc(parallel)
    result = mapfunc(integrate,
                     list(model.groupby(level=cols + ['num_mass_trials'])))
    result = util.as_df(result, cols + ['num_mass_trials'])
    result.to_csv(dest)
Esempio n. 9
0
def run(dest, data_path, seed):
    np.random.seed(seed)
    human = util.load_human(data_path)['C']\
        .dropna(axis=0, subset=['mass? response'])

    results = human\
        .groupby(['version', 'kappa0', 'num_mass_trials', 'pid'])['mass? correct']\
        .mean()\
        .to_frame()

    results.to_csv(dest)
def run(dest, data_path):
    human = util.load_human(data_path)

    # compute how many participants we have for each condition
    counts = human['all']\
        .groupby(['version', 'condition', 'counterbalance'])['pid']\
        .apply(lambda x: len(x.unique()))\
        .reset_index()
    counts.columns = [
        'version', 'condition', 'counterbalance', 'num_participants'
    ]
    counts = counts.set_index(['version', 'condition', 'counterbalance'])

    counts.to_csv(dest)
def run(dest, results_path, data_path, seed, parallel):
    np.random.seed(seed)
    human = util.load_human(data_path)['C'].dropna(subset=['mass? response'])\
        .set_index(['version', 'kappa0', 'pid', 'trial'])\
        .sortlevel()

    model = pd.read_csv(os.path.join(results_path, 'model_belief_by_trial_fit.csv'))\
        .set_index(['version', 'kappa0', 'pid', 'trial'])\
        .sortlevel()
    model['num_mass_trials'] = human['num_mass_trials']
    model = model.reset_index().dropna(subset=['num_mass_trials'])

    between_subjs = model\
        .groupby('version')\
        .get_group('I')\
        .sort_values(by=['pid', 'trial'])\
        .drop_duplicates(['likelihood', 'counterfactual', 'model', 'fitted', 'pid'])
    between_subjs['num_mass_trials'] = -1
    responses = pd.concat([model, between_subjs])

    @require('numpy as np', 'pandas as pd', 'util')
    def bootstrap_mean(df, **kwargs):
        name, df = df
        df.name = name
        return util.bootstrap_mean(df, **kwargs)

    results = []
    mapfunc = util.get_mapfunc(parallel)
    for kappa in [-1.0, 1.0, 'all']:
        if kappa == 'all':
            correct = responses
        else:
            correct = responses.groupby('kappa0').get_group(kappa)

        cols = [
            'likelihood', 'counterfactual', 'model', 'fitted', 'version',
            'num_mass_trials', 'trial'
        ]
        accuracy = mapfunc(bootstrap_mean,
                           list(correct.groupby(cols)['p correct']))
        accuracy = util.as_df(accuracy, cols).reset_index()
        accuracy['kappa0'] = kappa
        results.append(accuracy)

    results = pd.concat(results)\
        .set_index(['likelihood', 'counterfactual', 'model', 'fitted'])\
        .sortlevel()

    results.to_csv(dest)
Esempio n. 12
0
def run(dest, data_path):
    human = util.load_human(data_path)
    results = []

    def num_chance(df):
        groups = df.groupby(['kappa0', 'stimulus'])['mass? correct']
        alpha = (0.05 / len(groups.groups)) * 100
        results = groups\
            .apply(lambda x: util.bootstrap_mean(x, percentiles=[alpha]))\
            .unstack(-1) <= 0.5
        results = results.rename(columns={alpha: alpha / 100})
        return results

    results = human['C']\
        .dropna(axis=0, subset=['mass? response'])\
        .groupby('version')\
        .apply(num_chance)

    results.to_csv(dest)
Esempio n. 13
0
def run(dest, data_path, results_path, seed):
    np.random.seed(seed)

    cols = ['likelihood', 'counterfactual', 'model', 'fitted', 'version']

    # load human data
    human = util.load_human(data_path)['C']\
        .dropna(axis=0, subset=['mass? response'])
    human.loc[:, 'mass? response'] = (human['mass? response'] + 1) / 2.0
    human = human[['version', 'kappa0', 'stimulus', 'mass? response', 'pid']]\
        .rename(columns={'mass? response': 'h'})

    # load model data
    model = pd.read_csv(
        os.path.join(results_path, 'model_belief_by_trial_fit.csv'))
    model = model[cols + ['kappa0', 'stimulus', 'pid', 'p']]\
        .rename(columns={'p': 'm'})
    model.loc[:, 'm'] = (model['m'] > 0.5).astype(int)

    data = pd.merge(human,
                    model).set_index(cols + ['kappa0', 'stimulus', 'pid'])
    TP = ((data['h'] == 1) & (data['m'] == 1)).groupby(level=cols).sum()
    FP = ((data['h'] == 0) & (data['m'] == 1)).groupby(level=cols).sum()
    TN = ((data['h'] == 0) & (data['m'] == 0)).groupby(level=cols).sum()
    FN = ((data['h'] == 1) & (data['m'] == 0)).groupby(level=cols).sum()

    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    F1 = 2 * (precision * recall) / (precision + recall)
    accuracy = (TP + TN) / (TP + FP + FN + TN)

    results = pd.DataFrame({
        'precision': precision,
        'recall': recall,
        'F1': F1,
        'accuracy': accuracy
    })

    results.to_csv(dest)
def run(dest, data_path):
    human = util.load_human(data_path)['C']\
        .dropna(axis=0, subset=['mass? correct'])
    results = human.set_index(['version', 'kappa0', 'stimulus', 'pid'])[['mass? correct']]
    results.to_csv(dest)