import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

from dimensional_structure.prediction_utils import run_prediction
from selfregulation.utils.result_utils import load_results
from selfregulation.utils.utils import get_recent_dataset, get_behav_data

results = load_results(get_recent_dataset())
data = get_behav_data(file='variables_exhaustive.csv')

# get demographics
full_demog = results['task'].DA.get_scores()
full_demog.columns = ['full_' + c for c in full_demog.columns]
demog = pd.read_csv('/home/ian/Downloads/demog_fa_scores_t1.csv', index_col=0)

# get predictors
ddm_factors = pd.read_csv('/home/ian/Downloads/ez_t1_fa_3_scores.csv',
                          index_col=0)
ontology_factors = results['task'].EFA.get_scores()
ontology_ddm_factors = ontology_factors[[
    'Speeded IP', 'Caution', 'Perc / Resp'
]]

#
# compare demographics
diff = pd.DataFrame(demog.values - full_demog.loc[demog.index].values,
                    index=demog.index,
                    columns=demog.columns)
corr = demog.join(full_demog).corr().iloc[:len(demog.columns),
                                          len(demog.columns):]
import matplotlib.pyplot as plt
import numpy as np
from os import makedirs, path
import pandas as pd
from scipy.spatial.distance import  squareform
from sklearn.manifold import MDS
import seaborn as sns
from dimensional_structure.HCA_plots import abs_pdist
from selfregulation.utils.result_utils import load_results
from selfregulation.utils.utils import get_info, get_recent_dataset

# get dataset of interest
basedir=get_info('base_directory')
dataset = get_recent_dataset()
dataset = path.join(basedir,'Data',dataset)
datafile = dataset.split(path.sep)[-1]

# load data
results = load_results(datafile)
data = results['task'].data
out = results['task'].EFA.get_loading()
nfactors = out.shape[1]
task_subset = pd.concat([
    out.filter(regex='choice_reaction_time', axis=0),
    out.filter(regex='^stop_signal\.(hddm|SSRT)', axis=0)[1:5]])
task_subset_data = data.loc[:, task_subset.index]
task_variables = list(task_subset.index)
plot_dir = output_dir = path.join(get_info('results_directory'),
                       'ontology_reconstruction', results['task'].ID, 'Plots')
makedirs(plot_dir, exist_ok=True)
Created on Thu Jun 28 19:48:18 2018

@author: ian
"""
import numpy as np
from os import makedirs, path
import pandas as pd
import pickle
from sklearn.covariance import GraphLassoCV
from sklearn.preprocessing import scale

from dimensional_structure.graph_utils import Graph_Analysis
from selfregulation.utils.utils import get_behav_data, get_recent_dataset
from selfregulation.utils.result_utils import load_results
from selfregulation.utils.r_to_py_utils import qgraph_cor
dataset = get_recent_dataset()
data = get_behav_data(dataset=dataset, file='meaningful_variables_imputed.csv')
all_results = load_results(dataset)


def get_EFA_HCA(results, EFA):
    if EFA == False:
        return results.HCA.results['data']
    else:
        c = results.EFA.results['num_factors']
        return results.HCA.results['EFA%s_oblimin' % c]


EFA = True
survey_HCA = get_EFA_HCA(all_results['survey'], EFA)
survey_order = survey_HCA['reorder_vec']
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

from dimensional_structure.prediction_utils import run_prediction
from selfregulation.utils.result_utils import load_results
from selfregulation.utils.utils import get_recent_dataset, get_behav_data

results = load_results(get_recent_dataset())
data = get_behav_data(file='variables_exhaustive.csv')

# get demographics
full_demog = results['task'].DA.get_scores()
full_demog.columns = ['full_' + c for c in full_demog.columns]
demog = pd.read_csv('/home/ian/Downloads/demog_fa_scores_t1.csv', index_col=0)

# get predictors
ddm_factors = pd.read_csv('/home/ian/Downloads/ez_t1_fa_3_scores.csv', index_col=0)
ontology_factors = results['task'].EFA.get_scores()
ontology_ddm_factors = ontology_factors[['Speeded IP', 'Caution', 'Perc / Resp']]

#
# compare demographics
diff = pd.DataFrame(demog.values - full_demog.loc[demog.index].values,
                    index=demog.index, columns=demog.columns)
corr = demog.join(full_demog).corr().iloc[:len(demog.columns), 
                                         len(demog.columns):]

# EZ vars 
EZ_vars = data.filter(regex='EZ_(non_decision|drift|thresh)$')
hddm_vars = data.filter(regex='hddm_(non_decision|drift|thresh)$')
Ejemplo n.º 5
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 13 18:41:59 2019

@author: ian
"""
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from selfregulation.utils.utils import get_demographics, get_recent_dataset

demo=get_demographics(get_recent_dataset(), drop_categorical=False)
race_info = np.unique(demo.Race, return_counts=True)
race_info = {k.lstrip():v for k,v in zip(race_info[0], race_info[1])}
race_percentiles = {k:np.round(v/demo.shape[0]*100,2) for k,v in race_info.items()}
age_stats = demo.Age.describe()

print('** Race Statistics **')
for x,y in race_percentiles.items():
    print (x, ':',  y)
print('Hispanic %', demo.HispanicLatino.mean().round(3))
print('** Age and Sex **')
print(age_stats)
print('Female %', demo.Sex.mean().round(3))


# plots
sns.set_context('paper')
size=5
def style_ax(ax):
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 13 18:41:59 2019

@author: ian
"""
import numpy as np
from selfregulation.utils.utils import get_demographics, get_recent_dataset

demo=get_demographics(get_recent_dataset(), drop_categorical=False)
race_info = np.unique(demo.Race, return_counts=True)
race_info = {k.lstrip():v for k,v in zip(race_info[0], race_info[1])}
race_percentiles = {k:np.round(v/demo.shape[0]*100,2) for k,v in race_info.items()}
age_stats = demo.Age.describe()

print('** Race Statistics **')
for x,y in race_percentiles.items():
    print (x, ':',  y)
print('Hispanic %', demo.HispanicLatino.mean().round(3))
print('** Age and Sex **')
print(age_stats)
print('Female %', demo.Sex.mean().round(3))