def save_task_data(data_loc, data): path = os.path.join(data_loc,'Individual_Measures') if not os.path.exists(path): os.makedirs(path) for exp_id in np.sort(data.experiment_exp_id.unique()): print('Saving %s...' % exp_id) extract_experiment(data,exp_id).to_csv(os.path.join(path, exp_id + '.csv.gz'), compression = 'gzip')
def save_task_data(data_loc, data): path = os.path.join(data_loc, 'Individual_Measures') if not os.path.exists(path): os.makedirs(path) for exp_id in np.sort(data.experiment_exp_id.unique()): print('Saving %s...' % exp_id) extract_experiment(data, exp_id).to_csv(os.path.join(path, exp_id + '.csv.gz'), compression='gzip')
def get_items(data): excluded_surveys = ['holt_laury_survey'] items = [] responses = [] responses_text = [] options = [] workers = [] item_nums = [] exps = [] for exp in data.experiment_exp_id.unique(): if 'survey' in exp and exp not in excluded_surveys: survey = extract_experiment(data,exp) try: responses += list(survey.response.map(lambda x: float(x))) except ValueError: continue items += list(survey.text) responses_text += [str(i) for i in list(survey.response_text)] options += list(survey.options) workers += list(survey.worker_id) item_nums += list(survey.question_num) exps += [exp] * len(survey.text) items_df = pd.DataFrame({'survey': exps, 'worker': workers, 'item_text': items, 'coded_response': responses, 'response_text': responses_text, 'options': options}, dtype = float) items_df.loc[:,'item_num'] = [str(i).zfill(2) for i in item_nums] items_df.loc[:,'item_ID'] = items_df['survey'] + '.' + items_df['item_num'].astype(str) items_df=items_df[['worker','item_ID','coded_response','item_text','response_text','options','survey','item_num']] return items_df
def quality_check_correction(data): """ This function corrects the issues with the stop signal tasks mentioned above """ for exp in [ 'stop_signal', 'motor_selective_stop_signal', 'stim_selective_stop_signal' ]: df = extract_experiment(data, exp) rt_thresh = 200 acc_thresh = .6 missed_thresh = .25 response_thresh = .95 passed_rt = df.query('rt != -1 and SS_trial_type=="go"').groupby( 'worker_id').rt.median() >= rt_thresh passed_miss = df.query('SS_trial_type=="go"').groupby( 'worker_id').rt.agg(lambda x: np.mean(x == -1)) < missed_thresh passed_acc = df.query('rt != -1').groupby( 'worker_id').correct.mean() >= acc_thresh passed_response = np.logical_not( df.query('rt != -1').groupby('worker_id').key_press.agg( lambda x: np.any( pd.value_counts(x) > pd.value_counts(x).sum() * response_thresh))) passed_df = pd.concat( [passed_rt, passed_acc, passed_miss, passed_response], axis=1).fillna(False, inplace=False) passed = passed_df.all(axis=1) failed = passed[passed == False] for subj in failed.index: data.loc[(data.experiment_exp_id == exp) & (data.worker_id == subj), 'passed_QC'] = False for subj in passed.index: data.loc[(data.experiment_exp_id == exp) & (data.worker_id == subj), 'passed_QC'] = True
def get_average_variable(results, var): '''Prints time taken for each experiment in minutes ''' averages = {} for exp in results.get_experiments(): data = extract_experiment(results,exp) try: average = data[var].mean() except TypeError: print("Cannot average %s" % (var)) averages[exp] = average return averages
def get_average_variable(results, var): '''Prints time taken for each experiment in minutes ''' averages = {} for exp in results.get_experiments(): data = extract_experiment(results, exp) try: average = data[var].mean() except TypeError: print("Cannot average %s" % (var)) averages[exp] = average return averages
def get_items(data): excluded_surveys = ['holt_laury_survey'] items = [] responses = [] responses_text = [] options = [] workers = [] item_nums = [] exps = [] for exp in data.experiment_exp_id.unique(): if 'survey' in exp and exp not in excluded_surveys: survey = extract_experiment(data, exp) try: responses += list(survey.response.map(lambda x: float(x))) except ValueError: continue items += list(survey.text) responses_text += [str(i) for i in list(survey.response_text)] options += list(survey.options) workers += list(survey.worker_id) item_nums += list(survey.question_num) exps += [exp] * len(survey.text) items_df = pd.DataFrame( { 'survey': exps, 'worker': workers, 'item_text': items, 'coded_response': responses, 'response_text': responses_text, 'options': options }, dtype=float) items_df.loc[:, 'item_num'] = [str(i).zfill(2) for i in item_nums] items_df.loc[:, 'item_ID'] = items_df['survey'] + '.' + items_df[ 'item_num'].astype(str) items_df = items_df[[ 'worker', 'item_ID', 'coded_response', 'item_text', 'response_text', 'options', 'survey', 'item_num' ]] return items_df
def quality_check_correction(data): """ This function corrects the issues with the stop signal tasks mentioned above """ for exp in ['stop_signal','motor_selective_stop_signal', 'stim_selective_stop_signal']: df = extract_experiment(data, exp) rt_thresh = 200 acc_thresh = .6 missed_thresh = .25 response_thresh = .95 passed_rt = df.query('rt != -1 and SS_trial_type=="go"').groupby('worker_id').rt.median() >= rt_thresh passed_miss = df.query('SS_trial_type=="go"').groupby('worker_id').rt.agg(lambda x: np.mean(x == -1)) < missed_thresh passed_acc = df.query('rt != -1').groupby('worker_id').correct.mean() >= acc_thresh passed_response = np.logical_not(df.query('rt != -1').groupby('worker_id').key_press.agg( lambda x: np.any(pd.value_counts(x) > pd.value_counts(x).sum()*response_thresh))) passed_df = pd.concat([passed_rt,passed_acc,passed_miss,passed_response], axis = 1).fillna(False, inplace = False) passed = passed_df.all(axis = 1) failed = passed[passed == False] for subj in failed.index: data.loc[(data.experiment_exp_id == exp) & (data.worker_id == subj),'passed_QC'] = False for subj in passed.index: data.loc[(data.experiment_exp_id == exp) & (data.worker_id == subj),'passed_QC'] = True
def results_check(data, exp_id = None, worker = None, columns = ['correct', 'rt'], remove_practice = True, use_groups = True, plot = False, silent = False): """Outputs info for a basic data check on the results object. Uses data_check to group, describe and plot dataframes. Function first filters the results object as specified, loops through each experiment and worker contained in the results object, performs some basic dataframe manipulation and runs data_check :data: the data from an expanalysis Result object :param experiment: a string or array of strings to select the experiment(s) before calculating basic stats :param worker: a string or array of strings to select the worker(s) before calculating basic stats :param columns: array of columns to subset summary statistics, if they exist :param remove_practice: bool, default True. If True will remove any rows labeled "practice" in the "exp_stage" column, if it exists :param use_groups: bool, default True. If True will lookup grouping variables using get_groupby for the experiment :param silent: bool, default False. If True will not print output :param plot: bool, default False: If True plots data using plot_groups :return summary, p: summary data frame and plot object """ assert 'worker_id' in data.columns and 'experiment_exp_id' in data.columns, \ "Results data must have 'worker_id' and 'experiment_exp_id' in columns" stats = {} results = result_filter(data, exp_id = exp_id, worker = worker) orig_plot = plot orig_silent = silent display = not silent or plot if display: print('******************************************************************************') print('Input: Type "exit" to end, "skip" to skip to the next experiment, or hit enter to continue') print('******************************************************************************') for experiment in numpy.unique(results['experiment_exp_id']): stats[experiment] = {} if display: print('******************************************************************************') print(' Experiment: ', experiment) print('******************************************************************************') if use_groups: groupby = get_groupby(experiment) else: groupby = [] experiment_df = extract_experiment(results, experiment) for worker in pandas.unique(experiment_df['worker_id']): if display: print('******************************************************************************') print(' Worker: ', worker) print('******************************************************************************') df = experiment_df.query('worker_id == "%s"' % worker) summary, p = data_check(df, columns, remove_practice, groupby, silent, plot) #add summary and plot to dictionary of summaries stats[experiment]= {worker: {'summary': summary, 'plot': p}} if not silent or plot: input_text = input("Press Enter to continue...") plt.close() if input_text in ['skip', 'save']: plot = False silent = True display = not silent or plot elif input_text == 'exit': break if display: if input_text not in ['exit', 'save']: plot = orig_plot silent = orig_silent display = not silent or plot elif input_text == 'exit': break return stats
def quality_check(data): """ Checks data to make sure each experiment passed some "gut check" measures Used to exclude data on individual tasks or whole subjects if they fail too many tasks. NOTE: This function has an issue such that it inappropriately evaluates stop signal tasks based on the number of missed responses. Rather than changing the function (which would affect our samples which are already determined) I am leaving it, and introducing a quality check correction that will be performed after subjects are already rejected """ start_time = time() rt_thresh_lookup = { 'angling_risk_task_always_sunny': 0, 'simple_reaction_time': 150 } acc_thresh_lookup = { 'digit_span': 0, 'hierarchical_rule': 0, 'information_sampling_task': 0, 'probabilistic_selection': 0, 'ravens': 0, 'shift_task': 0, 'spatial_span': 0, 'tower_of_london': 0 } missed_thresh_lookup = { 'information_sampling_task': 1, 'go_nogo': 1, 'tower_of_london': 2 } response_thresh_lookup = { 'angling_risk_task_always_sunny': np.nan, 'columbia_card_task_cold': np.nan, 'discount_titrate': np.nan, 'digit_span': np.nan, 'go_nogo': .98, 'kirby': np.nan, 'simple_reaction_time': np.nan, 'spatial_span': np.nan, } templates = data.groupby('experiment_exp_id').experiment_template.unique() data.loc[:,'passed_QC'] = True for exp in data.experiment_exp_id.unique(): try: if templates.loc[exp] == 'jspsych': print('Running QC on ' + exp) df = extract_experiment(data, exp) rt_thresh = rt_thresh_lookup.get(exp,200) acc_thresh = acc_thresh_lookup.get(exp,.6) missed_thresh = missed_thresh_lookup.get(exp,.25) response_thresh = response_thresh_lookup.get(exp,.95) # special cases... if exp == 'information_sampling_task': df.groupby('worker_id').which_click_in_round.value_counts() passed_response = df.groupby('worker_id').which_click_in_round.mean() > 2 passed_rt = pd.Series([True] * len(passed_response), index = passed_response.index) passed_miss = pd.Series([True] * len(passed_response), index = passed_response.index) passed_acc = pd.Series([True] * len(passed_response), index = passed_response.index) elif exp == 'go_nogo': passed_rt = df.query('rt != -1').groupby('worker_id').rt.median() >= rt_thresh passed_miss = df.groupby('worker_id').rt.agg(lambda x: np.mean(x == -1)) < missed_thresh df.correct = pd.to_numeric(df.correct) passed_acc = df.groupby('worker_id').correct.mean() >= acc_thresh passed_response = np.logical_not(df.groupby('worker_id').key_press.agg( lambda x: np.any(pd.value_counts(x) > pd.value_counts(x).sum()*response_thresh))) elif exp == 'psychological_refractory_period_two_choices': passed_rt = (df.groupby('worker_id').median()[['choice1_rt','choice2_rt']] >= rt_thresh).all(axis = 1) passed_acc = df.query('choice1_rt != -1').groupby('worker_id').choice1_correct.mean() >= acc_thresh passed_miss = ((df.groupby('worker_id').choice1_rt.agg(lambda x: np.mean(x!=-1) >= missed_thresh)) \ + (df.groupby('worker_id').choice2_rt.agg(lambda x: np.mean(x>-1) >= missed_thresh))) == 2 passed_response1 = np.logical_not(df.query('choice1_rt != -1').groupby('worker_id').choice1_key_press.agg( lambda x: np.any(pd.value_counts(x) > pd.value_counts(x).sum()*response_thresh))) passed_response2 = np.logical_not(df.query('choice2_rt != -1').groupby('worker_id').choice2_key_press.agg( lambda x: np.any(pd.value_counts(x) > pd.value_counts(x).sum()*response_thresh))) passed_response = np.logical_and(passed_response1,passed_response2) elif exp == 'ravens': passed_rt = df.query('rt != -1').groupby('worker_id').rt.median() >= rt_thresh passed_acc = df.query('rt != -1').groupby('worker_id').correct.mean() >= acc_thresh passed_response = np.logical_not(df.groupby('worker_id').stim_response.agg( lambda x: np.any(pd.value_counts(x) > pd.value_counts(x).sum()*response_thresh))) passed_miss = pd.Series([True] * len(passed_rt), index = passed_rt.index) elif exp == 'tower_of_london': passed_rt = df.groupby('worker_id').rt.median() >= rt_thresh passed_acc = df.query('trial_id == "feedback"').groupby('worker_id').correct.mean() >= acc_thresh # Labeling someone as "missing" too many problems if they don't make enough moves passed_miss = (df.groupby(['worker_id','problem_id']).num_moves_made.max().reset_index().groupby('worker_id').mean() >= missed_thresh).num_moves_made passed_response = pd.Series([True] * len(passed_rt), index = passed_rt.index) elif exp == 'two_stage_decision': passed_rt = (df.groupby('worker_id').median()[['rt_first','rt_second']] >= rt_thresh).all(axis = 1) passed_miss = df.groupby('worker_id').trial_id.agg(lambda x: np.mean(x == 'incomplete_trial')) < missed_thresh passed_acc = pd.Series([True] * len(passed_rt), index = passed_rt.index) passed_response = pd.Series([True] * len(passed_rt), index = passed_rt.index) passed_response1 = np.logical_not(df.query('rt_first != -1').groupby('worker_id').key_press_first.agg( lambda x: np.any(pd.value_counts(x) > pd.value_counts(x).sum()*response_thresh))) passed_response2 = np.logical_not(df.query('rt_second != -1').groupby('worker_id').key_press_second.agg( lambda x: np.any(pd.value_counts(x) > pd.value_counts(x).sum()*response_thresh))) passed_response = np.logical_and(passed_response1,passed_response2) elif exp == 'writing_task': passed_response = df.query('trial_id == "write"').groupby('worker_id').final_text.agg(lambda x: len(x[0]) > 100) passed_acc = pd.Series([True] * len(passed_response), index = passed_response.index) passed_rt = pd.Series([True] * len(passed_response), index = passed_response.index) passed_miss = pd.Series([True] * len(passed_response), index = passed_response.index) # everything else else: passed_rt = df.query('rt != -1').groupby('worker_id').rt.median() >= rt_thresh passed_miss = df.groupby('worker_id').rt.agg(lambda x: np.mean(x == -1)) < missed_thresh if 'correct' in df.columns: df.correct = pd.to_numeric(df.correct) passed_acc = df.query('rt != -1').groupby('worker_id').correct.mean() >= acc_thresh else: passed_acc = pd.Series([True] * len(passed_rt), index = passed_rt.index) if 'mouse_click' in df.columns: passed_response = np.logical_not(df.query('rt != -1').groupby('worker_id').mouse_click.agg( lambda x: np.any(pd.value_counts(x) > pd.value_counts(x).sum()*response_thresh))) elif 'key_press' in df.columns: passed_response = np.logical_not(df.query('rt != -1').groupby('worker_id').key_press.agg( lambda x: np.any(pd.value_counts(x) > pd.value_counts(x).sum()*response_thresh))) passed_df = pd.concat([passed_rt,passed_acc,passed_miss,passed_response], axis = 1).fillna(False, inplace = False) passed = passed_df.all(axis = 1) failed = passed[passed == False] for subj in failed.index: data.loc[(data.experiment_exp_id == exp) & (data.worker_id == subj),'passed_QC'] = False except AttributeError as e: print('QC could not be run on experiment %s' % exp) print(e) finish_time = (time() - start_time)/60 print('Finished QC. Time taken: ' + str(finish_time))
DVs_valence = [] datasets.append((data,directory, DVs, DVs_valence)) # calculate DVs for data,directory, DV_df, valence_df in datasets: readme_lines = [] meta_dir = path.join(directory,'metadata') reference_dir = path.join(directory,'references') if not path.exists(meta_dir): makedirs(meta_dir) if not path.exists(reference_dir): makedirs(reference_dir) # save target datasets print('Saving to %s...' % directory) print('Saving target measures...') demog_data = extract_experiment(data,'demographics_survey') demog_data = process_demographics(demog_data, directory, meta_dir) alcohol_drug_data = extract_experiment(data,'alcohol_drugs_survey') alcohol_drug_data = process_alcohol_drug(alcohol_drug_data, directory, meta_dir) health_data = extract_experiment(data,'k6_survey') health_data = process_health(health_data, directory, meta_dir) activity_level = DV_df.pop('leisure_time_activity_survey.activity_level') # concatenate targets target_data = pd.concat([demog_data, alcohol_drug_data, health_data, activity_level], axis = 1) target_data.to_csv(path.join(directory,'demographic_health.csv')) # save items items_df = get_items(data) print('Saving items...') subjectsxitems = items_df.pivot('worker','item_ID','coded_response') # ensure there are the correct number of items
def quality_check(data): """ Checks data to make sure each experiment passed some "gut check" measures Used to exclude data on individual tasks or whole subjects if they fail too many tasks. NOTE: This function has an issue such that it inappropriately evaluates stop signal tasks based on the number of missed responses. Rather than changing the function (which would affect our samples which are already determined) I am leaving it, and introducing a quality check correction that will be performed after subjects are already rejected """ start_time = time() rt_thresh_lookup = { 'angling_risk_task_always_sunny': 0, 'simple_reaction_time': 150 } acc_thresh_lookup = { 'digit_span': 0, 'hierarchical_rule': 0, 'information_sampling_task': 0, 'probabilistic_selection': 0, 'ravens': 0, 'shift_task': 0, 'spatial_span': 0, 'tower_of_london': 0 } missed_thresh_lookup = { 'information_sampling_task': 1, 'go_nogo': 1, 'tower_of_london': 2 } response_thresh_lookup = { 'angling_risk_task_always_sunny': np.nan, 'columbia_card_task_cold': np.nan, 'discount_titrate': np.nan, 'digit_span': np.nan, 'go_nogo': .98, 'kirby': np.nan, 'simple_reaction_time': np.nan, 'spatial_span': np.nan, } templates = data.groupby('experiment_exp_id').experiment_template.unique() data.loc[:, 'passed_QC'] = True for exp in data.experiment_exp_id.unique(): try: if templates.loc[exp] == 'jspsych': print('Running QC on ' + exp) df = extract_experiment(data, exp) rt_thresh = rt_thresh_lookup.get(exp, 200) acc_thresh = acc_thresh_lookup.get(exp, .6) missed_thresh = missed_thresh_lookup.get(exp, .25) response_thresh = response_thresh_lookup.get(exp, .95) # special cases... if exp == 'information_sampling_task': df.groupby('worker_id').which_click_in_round.value_counts() passed_response = df.groupby( 'worker_id').which_click_in_round.mean() > 2 passed_rt = pd.Series([True] * len(passed_response), index=passed_response.index) passed_miss = pd.Series([True] * len(passed_response), index=passed_response.index) passed_acc = pd.Series([True] * len(passed_response), index=passed_response.index) elif exp == 'go_nogo': passed_rt = df.query('rt != -1').groupby( 'worker_id').rt.median() >= rt_thresh passed_miss = df.groupby('worker_id').rt.agg( lambda x: np.mean(x == -1)) < missed_thresh df.correct = pd.to_numeric(df.correct) passed_acc = df.groupby( 'worker_id').correct.mean() >= acc_thresh passed_response = np.logical_not( df.groupby('worker_id').key_press.agg(lambda x: np.any( pd.value_counts(x) > pd.value_counts(x).sum() * response_thresh))) elif exp == 'psychological_refractory_period_two_choices': passed_rt = (df.groupby('worker_id').median()[[ 'choice1_rt', 'choice2_rt' ]] >= rt_thresh).all(axis=1) passed_acc = df.query('choice1_rt != -1').groupby( 'worker_id').choice1_correct.mean() >= acc_thresh passed_miss = ((df.groupby('worker_id').choice1_rt.agg(lambda x: np.mean(x!=-1) >= missed_thresh)) \ + (df.groupby('worker_id').choice2_rt.agg(lambda x: np.mean(x>-1) >= missed_thresh))) == 2 passed_response1 = np.logical_not( df.query('choice1_rt != -1').groupby('worker_id'). choice1_key_press.agg(lambda x: np.any( pd.value_counts(x) > pd.value_counts(x).sum() * response_thresh))) passed_response2 = np.logical_not( df.query('choice2_rt != -1').groupby('worker_id'). choice2_key_press.agg(lambda x: np.any( pd.value_counts(x) > pd.value_counts(x).sum() * response_thresh))) passed_response = np.logical_and(passed_response1, passed_response2) elif exp == 'ravens': passed_rt = df.query('rt != -1').groupby( 'worker_id').rt.median() >= rt_thresh passed_acc = df.query('rt != -1').groupby( 'worker_id').correct.mean() >= acc_thresh passed_response = np.logical_not( df.groupby('worker_id').stim_response.agg( lambda x: np.any( pd.value_counts(x) > pd.value_counts(x).sum() * response_thresh))) passed_miss = pd.Series([True] * len(passed_rt), index=passed_rt.index) elif exp == 'tower_of_london': passed_rt = df.groupby( 'worker_id').rt.median() >= rt_thresh passed_acc = df.query('trial_id == "feedback"').groupby( 'worker_id').correct.mean() >= acc_thresh # Labeling someone as "missing" too many problems if they don't make enough moves passed_miss = (df.groupby([ 'worker_id', 'problem_id' ]).num_moves_made.max().reset_index().groupby( 'worker_id').mean() >= missed_thresh).num_moves_made passed_response = pd.Series([True] * len(passed_rt), index=passed_rt.index) elif exp == 'two_stage_decision': passed_rt = (df.groupby('worker_id').median()[[ 'rt_first', 'rt_second' ]] >= rt_thresh).all(axis=1) passed_miss = df.groupby('worker_id').trial_id.agg( lambda x: np.mean(x == 'incomplete_trial' )) < missed_thresh passed_acc = pd.Series([True] * len(passed_rt), index=passed_rt.index) passed_response = pd.Series([True] * len(passed_rt), index=passed_rt.index) passed_response1 = np.logical_not( df.query('rt_first != -1').groupby( 'worker_id').key_press_first.agg(lambda x: np.any( pd.value_counts(x) > pd.value_counts(x).sum() * response_thresh))) passed_response2 = np.logical_not( df.query('rt_second != -1').groupby( 'worker_id').key_press_second.agg(lambda x: np.any( pd.value_counts(x) > pd.value_counts(x).sum() * response_thresh))) passed_response = np.logical_and(passed_response1, passed_response2) elif exp == 'writing_task': passed_response = df.query('trial_id == "write"').groupby( 'worker_id').final_text.agg(lambda x: len(x[0]) > 100) passed_acc = pd.Series([True] * len(passed_response), index=passed_response.index) passed_rt = pd.Series([True] * len(passed_response), index=passed_response.index) passed_miss = pd.Series([True] * len(passed_response), index=passed_response.index) # everything else else: passed_rt = df.query('rt != -1').groupby( 'worker_id').rt.median() >= rt_thresh passed_miss = df.groupby('worker_id').rt.agg( lambda x: np.mean(x == -1)) < missed_thresh if 'correct' in df.columns: df.correct = pd.to_numeric(df.correct) passed_acc = df.query('rt != -1').groupby( 'worker_id').correct.mean() >= acc_thresh else: passed_acc = pd.Series([True] * len(passed_rt), index=passed_rt.index) if 'mouse_click' in df.columns: passed_response = np.logical_not( df.query('rt != -1').groupby( 'worker_id').mouse_click.agg(lambda x: np.any( pd.value_counts(x) > pd.value_counts(x). sum() * response_thresh))) elif 'key_press' in df.columns: passed_response = np.logical_not( df.query('rt != -1').groupby( 'worker_id').key_press.agg(lambda x: np.any( pd.value_counts(x) > pd.value_counts(x). sum() * response_thresh))) passed_df = pd.concat( [passed_rt, passed_acc, passed_miss, passed_response], axis=1).fillna(False, inplace=False) passed = passed_df.all(axis=1) failed = passed[passed == False] for subj in failed.index: data.loc[(data.experiment_exp_id == exp) & (data.worker_id == subj), 'passed_QC'] = False except AttributeError as e: print('QC could not be run on experiment %s' % exp) print(e) finish_time = (time() - start_time) / 60 print('Finished QC. Time taken: ' + str(finish_time))
def results_check(data, exp_id=None, worker=None, columns=['correct', 'rt'], remove_practice=True, use_groups=True, plot=False, silent=False): """Outputs info for a basic data check on the results object. Uses data_check to group, describe and plot dataframes. Function first filters the results object as specified, loops through each experiment and worker contained in the results object, performs some basic dataframe manipulation and runs data_check :data: the data from an expanalysis Result object :param experiment: a string or array of strings to select the experiment(s) before calculating basic stats :param worker: a string or array of strings to select the worker(s) before calculating basic stats :param columns: array of columns to subset summary statistics, if they exist :param remove_practice: bool, default True. If True will remove any rows labeled "practice" in the "exp_stage" column, if it exists :param use_groups: bool, default True. If True will lookup grouping variables using get_groupby for the experiment :param silent: bool, default False. If True will not print output :param plot: bool, default False: If True plots data using plot_groups :return summary, p: summary data frame and plot object """ assert 'worker_id' in data.columns and 'experiment_exp_id' in data.columns, \ "Results data must have 'worker_id' and 'experiment_exp_id' in columns" stats = {} results = result_filter(data, exp_id=exp_id, worker=worker) orig_plot = plot orig_silent = silent display = not silent or plot if display: print( '******************************************************************************' ) print( 'Input: Type "exit" to end, "skip" to skip to the next experiment, or hit enter to continue' ) print( '******************************************************************************' ) for experiment in numpy.unique(results['experiment_exp_id']): stats[experiment] = {} if display: print( '******************************************************************************' ) print(' Experiment: ', experiment) print( '******************************************************************************' ) if use_groups: groupby = get_groupby(experiment) else: groupby = [] experiment_df = extract_experiment(results, experiment) for worker in pandas.unique(experiment_df['worker_id']): if display: print( '******************************************************************************' ) print(' Worker: ', worker) print( '******************************************************************************' ) df = experiment_df.query('worker_id == "%s"' % worker) summary, p = data_check(df, columns, remove_practice, groupby, silent, plot) #add summary and plot to dictionary of summaries stats[experiment] = {worker: {'summary': summary, 'plot': p}} if not silent or plot: input_text = input("Press Enter to continue...") plt.close() if input_text in ['skip', 'save']: plot = False silent = True display = not silent or plot elif input_text == 'exit': break if display: if input_text not in ['exit', 'save']: plot = orig_plot silent = orig_silent display = not silent or plot elif input_text == 'exit': break return stats