コード例 #1
0
#short_DV_valence.csv
#short_meaningful_variables.csv
#short_meaningful_variables_EZ.csv
#short_meaningful_variables_clean.csv
#short_meaningful_variables_hddm.csv
#short_meaningful_variables_imputed.csv
#short_meaningful_variables_imputed_for_task_selection.csv
#short_meaningful_variables_noDDM.csv
#short_subject_x_items.csv
#short_taskdata.csv
#short_taskdata_clean.csv
#short_taskdata_imputed.csv
#short_taskdata_imputed_for_task_selection.csv
#short_variables_exhaustive.csv

files = glob(path.join(data_dir,'*csv'))
files = [f for f in files if not any(i in f for i in ['demographic','health','alcohol_drug'])]
#cd '/Users/zeynepenkavi/Documents/PoldrackLabLocal/Self_Regulation_Ontology/data_preparation/'
chdir('/oak/stanford/groups/russpold/users/zenkavi/Self_Regulation_Ontology/data_preparation/')
for f in files:
    name = f.split('/')[-1]
    df = pd.DataFrame.from_csv(f)
    convert_var_names(df)
    df.to_csv(path.join(data_dir, 'short_' + name))
    print('short_' + name)
readme_lines += ["short*.csv: short versions are the same as long versions with variable names shortened using variable_name_lookup.csv\n\n"]

readme = open(path.join(data_dir, "README.txt"), "a")
readme.writelines(readme_lines)
readme.close()
コード例 #2
0
    # concatenate targets
    target_data = pd.concat([demog_data, alcohol_drug_data, 
                             health_data, activity_level], axis = 1)
    target_data.to_csv(path.join(directory,'demographic_health.csv'))
    # save items
    items_df = get_items(data)
    print('Saving items...')
    subjectsxitems = items_df.pivot('worker','item_ID','coded_response')
    # ensure there are the correct number of items
    if subjectsxitems.shape[1] != 593:
        print('Wrong number of items found for label: %s' % label)
        continue
    # save items
    items_df.to_csv(path.join(directory, 'items.csv.gz'), compression = 'gzip')
    subjectsxitems.to_csv(path.join(directory, 'subject_x_items.csv'))
    convert_var_names(subjectsxitems)
    assert np.max([len(name) for name in subjectsxitems.columns])<=8, \
        "Found column names longer than 8 characters in short version"
    # save Individual Measures
    save_task_data(directory, data)
    if 'Complete' in directory:
        # save demographic targets reference
        np.savetxt(path.join(reference_dir,'demographic_health_reference.csv'), target_data.columns, fmt = '%s', delimiter=",")
        gen_reference_item_text(items_df)

    readme_lines += ["demographics_survey.csv: demographic information from expfactory-surveys\n\n"]
    readme_lines += ["alcohol_drug_survey.csv: alcohol, smoking, marijuana and other drugs from expfactory-surveys\n\n"]
    readme_lines += ["ky_survey.csv: mental health and neurological/health conditions from expfactory-surveys\n\n"]
    readme_lines += ["items.csv.gz: gzipped csv of all item information across surveys\n\n"]
    readme_lines += ["subject_x_items.csv: reshaped items.csv such that rows are subjects and columns are individual items\n\n"]
    readme_lines += ["Individual Measures: directory containing gzip compressed files for each individual measures\n\n"]
コード例 #3
0
    # concatenate targets
    target_data = pd.concat([demog_data, alcohol_drug_data, 
                             health_data, activity_level], axis = 1)
    target_data.to_csv(path.join(directory,'demographic_health.csv'))
    # save items
    items_df = get_items(data)
    print('Saving items...')
    subjectsxitems = items_df.pivot('worker','item_ID','coded_response')
    # ensure there are the correct number of items
    if subjectsxitems.shape[1] != 593:
        print('Wrong number of items found for label: %s' % label)
        continue
    # save items
    items_df.to_csv(path.join(directory, 'items.csv.gz'), compression = 'gzip')
    subjectsxitems.to_csv(path.join(directory, 'subject_x_items.csv'))
    convert_var_names(subjectsxitems)
    assert np.max([len(name) for name in subjectsxitems.columns])<=8, \
        "Found column names longer than 8 characters in short version"
    # save Individual Measures
    save_task_data(directory, data)
    if 'Complete' in directory:
        # save demographic targets reference
        np.savetxt(path.join(reference_dir,'demographic_health_reference.csv'), target_data.columns, fmt = '%s', delimiter=",")
        gen_reference_item_text(items_df)

    readme_lines += ["demographics_survey.csv: demographic information from expfactory-surveys\n\n"]
    readme_lines += ["alcohol_drug_survey.csv: alcohol, smoking, marijuana and other drugs from expfactory-surveys\n\n"]
    readme_lines += ["ky_survey.csv: mental health and neurological/health conditions from expfactory-surveys\n\n"]
    readme_lines += ["items.csv.gz: gzipped csv of all item information across surveys\n\n"]
    readme_lines += ["subject_x_items.csv: reshaped items.csv such that rows are subjects and columns are individual items\n\n"]
    readme_lines += ["Individual Measures: directory containing gzip compressed files for each individual measures\n\n"]