import numpy as np import pandas as pd from sklearn.linear_model import LinearRegression from dimensional_structure.prediction_utils import run_prediction from selfregulation.utils.result_utils import load_results from selfregulation.utils.utils import get_recent_dataset, get_behav_data results = load_results(get_recent_dataset()) data = get_behav_data(file='variables_exhaustive.csv') # get demographics full_demog = results['task'].DA.get_scores() full_demog.columns = ['full_' + c for c in full_demog.columns] demog = pd.read_csv('/home/ian/Downloads/demog_fa_scores_t1.csv', index_col=0) # get predictors ddm_factors = pd.read_csv('/home/ian/Downloads/ez_t1_fa_3_scores.csv', index_col=0) ontology_factors = results['task'].EFA.get_scores() ontology_ddm_factors = ontology_factors[[ 'Speeded IP', 'Caution', 'Perc / Resp' ]] # # compare demographics diff = pd.DataFrame(demog.values - full_demog.loc[demog.index].values, index=demog.index, columns=demog.columns) corr = demog.join(full_demog).corr().iloc[:len(demog.columns), len(demog.columns):]
import matplotlib.pyplot as plt import numpy as np from os import makedirs, path import pandas as pd from scipy.spatial.distance import squareform from sklearn.manifold import MDS import seaborn as sns from dimensional_structure.HCA_plots import abs_pdist from selfregulation.utils.result_utils import load_results from selfregulation.utils.utils import get_info, get_recent_dataset # get dataset of interest basedir=get_info('base_directory') dataset = get_recent_dataset() dataset = path.join(basedir,'Data',dataset) datafile = dataset.split(path.sep)[-1] # load data results = load_results(datafile) data = results['task'].data out = results['task'].EFA.get_loading() nfactors = out.shape[1] task_subset = pd.concat([ out.filter(regex='choice_reaction_time', axis=0), out.filter(regex='^stop_signal\.(hddm|SSRT)', axis=0)[1:5]]) task_subset_data = data.loc[:, task_subset.index] task_variables = list(task_subset.index) plot_dir = output_dir = path.join(get_info('results_directory'), 'ontology_reconstruction', results['task'].ID, 'Plots') makedirs(plot_dir, exist_ok=True)
Created on Thu Jun 28 19:48:18 2018 @author: ian """ import numpy as np from os import makedirs, path import pandas as pd import pickle from sklearn.covariance import GraphLassoCV from sklearn.preprocessing import scale from dimensional_structure.graph_utils import Graph_Analysis from selfregulation.utils.utils import get_behav_data, get_recent_dataset from selfregulation.utils.result_utils import load_results from selfregulation.utils.r_to_py_utils import qgraph_cor dataset = get_recent_dataset() data = get_behav_data(dataset=dataset, file='meaningful_variables_imputed.csv') all_results = load_results(dataset) def get_EFA_HCA(results, EFA): if EFA == False: return results.HCA.results['data'] else: c = results.EFA.results['num_factors'] return results.HCA.results['EFA%s_oblimin' % c] EFA = True survey_HCA = get_EFA_HCA(all_results['survey'], EFA) survey_order = survey_HCA['reorder_vec']
import numpy as np import pandas as pd from sklearn.linear_model import LinearRegression from dimensional_structure.prediction_utils import run_prediction from selfregulation.utils.result_utils import load_results from selfregulation.utils.utils import get_recent_dataset, get_behav_data results = load_results(get_recent_dataset()) data = get_behav_data(file='variables_exhaustive.csv') # get demographics full_demog = results['task'].DA.get_scores() full_demog.columns = ['full_' + c for c in full_demog.columns] demog = pd.read_csv('/home/ian/Downloads/demog_fa_scores_t1.csv', index_col=0) # get predictors ddm_factors = pd.read_csv('/home/ian/Downloads/ez_t1_fa_3_scores.csv', index_col=0) ontology_factors = results['task'].EFA.get_scores() ontology_ddm_factors = ontology_factors[['Speeded IP', 'Caution', 'Perc / Resp']] # # compare demographics diff = pd.DataFrame(demog.values - full_demog.loc[demog.index].values, index=demog.index, columns=demog.columns) corr = demog.join(full_demog).corr().iloc[:len(demog.columns), len(demog.columns):] # EZ vars EZ_vars = data.filter(regex='EZ_(non_decision|drift|thresh)$') hddm_vars = data.filter(regex='hddm_(non_decision|drift|thresh)$')
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat Apr 13 18:41:59 2019 @author: ian """ import numpy as np import matplotlib.pyplot as plt import seaborn as sns from selfregulation.utils.utils import get_demographics, get_recent_dataset demo=get_demographics(get_recent_dataset(), drop_categorical=False) race_info = np.unique(demo.Race, return_counts=True) race_info = {k.lstrip():v for k,v in zip(race_info[0], race_info[1])} race_percentiles = {k:np.round(v/demo.shape[0]*100,2) for k,v in race_info.items()} age_stats = demo.Age.describe() print('** Race Statistics **') for x,y in race_percentiles.items(): print (x, ':', y) print('Hispanic %', demo.HispanicLatino.mean().round(3)) print('** Age and Sex **') print(age_stats) print('Female %', demo.Sex.mean().round(3)) # plots sns.set_context('paper') size=5 def style_ax(ax):
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat Apr 13 18:41:59 2019 @author: ian """ import numpy as np from selfregulation.utils.utils import get_demographics, get_recent_dataset demo=get_demographics(get_recent_dataset(), drop_categorical=False) race_info = np.unique(demo.Race, return_counts=True) race_info = {k.lstrip():v for k,v in zip(race_info[0], race_info[1])} race_percentiles = {k:np.round(v/demo.shape[0]*100,2) for k,v in race_info.items()} age_stats = demo.Age.describe() print('** Race Statistics **') for x,y in race_percentiles.items(): print (x, ':', y) print('Hispanic %', demo.HispanicLatino.mean().round(3)) print('** Age and Sex **') print(age_stats) print('Female %', demo.Sex.mean().round(3))