Example #1
0
def get_table():
    dbpath = DATA_PATH.joinpath("human", "workers.db")

    if not dbtools.Table.exists(dbpath, "workers"):
        logger.info("Creating new table 'workers'")
        tbl = dbtools.Table.create(dbpath, "workers", [('pid', str),
                                                       ('dataset', str)])

    else:
        logger.info("Loading existing table 'workers'")
        tbl = dbtools.Table(dbpath, "workers")

    return tbl
Example #2
0
def get_table():
    dbpath = CPO_PATH.joinpath("metadata.db")

    if not dbtools.Table.exists(dbpath, "stability"):
        logger.info("Creating new table 'stability'")
        tbl = dbtools.Table.create(dbpath, "stability", [('stimulus', str),
                                                         ('kappa', int),
                                                         ('nfell', int),
                                                         ('stable', int),
                                                         ('dataset', str)])

    else:
        logger.info("Loading existing table 'stability'")
        tbl = dbtools.Table(dbpath, "stability")

    return tbl
def gen_config(exp, cond, phase, cb):
    fb, ratio = parse_cond(cond, phase)

    # load render script
    script = load_script(exp, cond, phase, cb)
    script['feedback'] = fb
    script['ratio'] = ratio
    script['counterbalance'] = cb

    # load metadata (e.g. stability)
    tbl = dbtools.Table(CPO_PATH.joinpath("metadata.db"), "stability")
    meta = pd.concat(map(
        lambda s, k: load_meta(tbl, s, k),
        script.stimulus, script.kappa))

    # merge metadata and render script to get the config
    config = pd.merge(script, meta, on=['stimulus', 'kappa'])
    config = config.set_index('stimulus').sort().reset_index()

    # sanity check, to make sure ratios match kappas
    r2kappa = np.array(map(float, config.ratio))
    assert (10**config.kappa == r2kappa).all()

    # dump dataframe to a dictionary and sort by stimulus
    trials = config.reset_index(drop=True).T.to_dict().values()
    trials.sort(cmp=lambda x, y: cmp(x['stimulus'], y['stimulus']))

    # make sure there are no nans, because javascript won't load JSON
    # with nans in it
    for trial in trials:
        for key, val in trial.iteritems():
            if isinstance(val, float) and np.isnan(val):
                if key in ("color0", "color1"):
                    trial[key] = None

    # only return a list if there are multiple trials
    if len(trials) == 1:
        trials = trials[0]

    return trials
Example #4
0
def find_bad_participants(exp, data):
    """Check participant data to make sure they pass the following
    conditions:

    1. No duplicated trials
    2. They finished the whole experiment
    3. They passed the posttest

    Returns a dictionary of failed participants that includes the
    reasons why they failed.

    """

    participants = []
    for (assignment, pid), df in data.groupby(['assignment', 'pid']):
        info = {
            'pid': pid,
            'assignment': assignment,
            'note': None,
            'timestamp': None,
            'percent': 0.0,
            'num_failed': np.nan
        }

        # go ahead and add this to our list now -- the dictionary is
        # mutable, so when we update stuff later the dictionary in the
        # list will also be updated
        participants.append(info)

        # get the time they started the experiment
        times = df['psiturk_time'].copy()
        times.sort_values(inplace=True)
        start_time = pd.to_datetime(datetime.fromtimestamp(times.iloc[0] /
                                                           1e3))
        info['timestamp'] = start_time

        # add condition/counterbalance
        cond = int(df['condition'].unique())
        cb = int(df['counterbalance'].unique())
        info['condition'] = cond
        info['counterbalance'] = cb

        # check for duplicated entries
        if exp == 'mass_inference-G':
            dupes = df.sort_values(by='psiturk_time')[['mode', 'trial', 'trial_phase']]\
                      .duplicated().any()
            if dupes:
                logger.warning("%s (%s, %s) has duplicate trials", pid, cond,
                               cb)
                info['note'] = "duplicate_trials"
                continue

        # check to make sure they actually finished
        try:
            prestim = df\
                .set_index(['mode', 'trial', 'trial_phase'])\
                .groupby(level='trial_phase')\
                .get_group('prestim')
        except IndexError:
            if df['trial_phase'].isnull().all():
                incomplete = True
            else:
                raise
        else:
            if exp == 'mass_inference-G':
                num_trials = 62
            elif exp == 'mass_inference-H':
                num_trials = 62
            elif exp == 'mass_inference-I':
                num_trials = 32
            else:
                raise ValueError("unhandled experiment: %s" % exp)

            incomplete = len(prestim) != num_trials
            info['percent'] = 100 * len(prestim) / num_trials

        if incomplete:
            logger.warning(
                "%s (%s, %s) is incomplete (completed %d/32 trials [%.1f%%])",
                pid, cond, cb, len(prestim), info['percent'])
            info['note'] = "incomplete"
            continue

        # check to see if they passed the posttest
        posttest = df\
            .set_index(['mode', 'trial', 'trial_phase'])\
            .groupby(level=['mode', 'trial_phase'])\
            .get_group(('posttest', 'fall_response'))

        truth = (posttest['nfell'] > 0).astype(float)
        resp = (posttest['response'] > 4).astype(float)
        resp[posttest['response'] == 4] = np.nan
        failed = (truth != resp).sum()
        info['num_failed'] = failed

        if failed > 1:
            logger.warning("%s (%s, %s) failed posttest (%d wrong)", pid, cond,
                           cb, failed)
            info['note'] = "failed_posttest"
            continue

        # see if they already did (a version of) the experiment
        dbpath = DATA_PATH.joinpath("human", "workers.db")
        tbl = dbtools.Table(dbpath, "workers")
        datasets = tbl.select("dataset", where=("pid=?", pid))['dataset']
        exps = map(lambda x: path(x).namebase, datasets)
        if exp in exps:
            exps.remove(exp)
        if len(exps) > 0:
            logger.warning("%s (%s, %s) is a repeat worker", pid, cond, cb)
            info['note'] = "repeat_worker"
            continue

    return participants
Example #5
0
            stiminfo_path = os.path.join(dataset, "stimuli-info.csv")
            if stiminfo_path.exists():
                with open(stiminfo_path, "r") as fh:
                    stims = [x for x in fh.read().split("\n") if x != '']
                    stims = sorted([x.split(",")[0] for x in stims[1:]])
                    stims = {
                        "stim_%d" % (i + 1): x
                        for i, x in enumerate(stims)
                    }
            else:
                stims = None

            # order = dataset.split("_")[-1]
            dbpath = os.path.join(dataset, "data.db")
            tbl = dbtools.Table(dbpath, "Participants")
            condition = tbl[int(name[:-4])].condition.values[0]
            ccode = tbl[int(name[:-4])].completion_code.values[0]
            vcode = tbl[int(name[:-4])].validation_code.values[0]
            if 'completion_codes' not in extradata:
                extradata['completion_codes'] = {}
            extradata['completion_codes'][name[:-4]] = ccode
            if 'validation_codes' not in extradata:
                extradata['validation_codes'] = {}
            extradata['validation_codes'][name[:-4]] = vcode

            process = locals()['process_%s' % dataset_type]
            newdata, extradata_ = process(name[:-4], data, trials, stims,
                                          condition)
            all_data.update(newdata)