#short_DV_valence.csv #short_meaningful_variables.csv #short_meaningful_variables_EZ.csv #short_meaningful_variables_clean.csv #short_meaningful_variables_hddm.csv #short_meaningful_variables_imputed.csv #short_meaningful_variables_imputed_for_task_selection.csv #short_meaningful_variables_noDDM.csv #short_subject_x_items.csv #short_taskdata.csv #short_taskdata_clean.csv #short_taskdata_imputed.csv #short_taskdata_imputed_for_task_selection.csv #short_variables_exhaustive.csv files = glob(path.join(data_dir,'*csv')) files = [f for f in files if not any(i in f for i in ['demographic','health','alcohol_drug'])] #cd '/Users/zeynepenkavi/Documents/PoldrackLabLocal/Self_Regulation_Ontology/data_preparation/' chdir('/oak/stanford/groups/russpold/users/zenkavi/Self_Regulation_Ontology/data_preparation/') for f in files: name = f.split('/')[-1] df = pd.DataFrame.from_csv(f) convert_var_names(df) df.to_csv(path.join(data_dir, 'short_' + name)) print('short_' + name) readme_lines += ["short*.csv: short versions are the same as long versions with variable names shortened using variable_name_lookup.csv\n\n"] readme = open(path.join(data_dir, "README.txt"), "a") readme.writelines(readme_lines) readme.close()
# concatenate targets target_data = pd.concat([demog_data, alcohol_drug_data, health_data, activity_level], axis = 1) target_data.to_csv(path.join(directory,'demographic_health.csv')) # save items items_df = get_items(data) print('Saving items...') subjectsxitems = items_df.pivot('worker','item_ID','coded_response') # ensure there are the correct number of items if subjectsxitems.shape[1] != 593: print('Wrong number of items found for label: %s' % label) continue # save items items_df.to_csv(path.join(directory, 'items.csv.gz'), compression = 'gzip') subjectsxitems.to_csv(path.join(directory, 'subject_x_items.csv')) convert_var_names(subjectsxitems) assert np.max([len(name) for name in subjectsxitems.columns])<=8, \ "Found column names longer than 8 characters in short version" # save Individual Measures save_task_data(directory, data) if 'Complete' in directory: # save demographic targets reference np.savetxt(path.join(reference_dir,'demographic_health_reference.csv'), target_data.columns, fmt = '%s', delimiter=",") gen_reference_item_text(items_df) readme_lines += ["demographics_survey.csv: demographic information from expfactory-surveys\n\n"] readme_lines += ["alcohol_drug_survey.csv: alcohol, smoking, marijuana and other drugs from expfactory-surveys\n\n"] readme_lines += ["ky_survey.csv: mental health and neurological/health conditions from expfactory-surveys\n\n"] readme_lines += ["items.csv.gz: gzipped csv of all item information across surveys\n\n"] readme_lines += ["subject_x_items.csv: reshaped items.csv such that rows are subjects and columns are individual items\n\n"] readme_lines += ["Individual Measures: directory containing gzip compressed files for each individual measures\n\n"]