def get_change(self, retest_dataset): demographics = self.data retest = get_demographics(retest_dataset) retest = residualize_baseline(retest, self.residualize_vars) if 'BMI' in retest.columns: retest.drop(['WeightPounds', 'HeightInches'], axis=1, inplace=True) # get common variables common_index = sorted(list(set(demographics.index) & set(retest.index))) common_columns = sorted(list(set(demographics.columns) & set(retest.columns))) demographics = demographics.loc[common_index, common_columns] retest = retest.loc[common_index, common_columns] raw_change = retest-demographics # convert to scores c = self.get_c() demographic_factor_weights = get_attr(self.results['factor_tree_Rout_oblimin'][c],'weights') demographic_scores = scale(demographics).dot(demographic_factor_weights) retest_scores = scale(retest).dot(demographic_factor_weights) factor_change = pd.DataFrame(retest_scores-demographic_scores, index=common_index, columns = self.get_scores().columns) factor_change = self.reorder_factors(factor_change) factor_change.columns = [i + ' Change' for i in factor_change.columns] return factor_change, raw_change
import matplotlib.pyplot as plt import numpy as np from os import path import pandas as pd import seaborn as sns from selfregulation.utils.plot_utils import beautify_legend, format_num, format_variable_names from selfregulation.utils.utils import filter_behav_data, get_behav_data, get_demographics, get_info # correlation of ravens and literature # replication of "Intelligence and socioeconomic success: A meta-analytic # review of longitudinal research" base_dir = get_info('base_directory') ext = 'png' data = get_behav_data() demographics = get_demographics() data = data.loc[demographics.index] # get dataframe of intelligence measure (raven's progressive matrices) and demographics) df = pd.concat([data.filter(regex='raven'), demographics], axis=1) # get raven's reliability reliability = get_behav_data(dataset='Retest_02-03-2018', file='bootstrap_merged.csv.gz') raven_reliability = reliability.groupby('dv').icc.mean().filter( regex='raven')[0] # demographic reliabilities demo_reliabilities = [1.0] * demographics.shape[1] # correlations correlations = df.corr().filter(regex='raven').sort_values( by='ravens.score').iloc[:-1]
def __init__(self, datafile=None, loading_thresh=None, dist_metric=distcorr, boot_iter=1000, name='', filter_regex='.', ID=None, results_dir=None, residualize_vars=['Age', 'Sex'], saved_obj_file=None ): """ Args: datafile: name of a directory in "Data" loading_thresh: threshold to use for factor analytic result dist_metric: distance metric for hierarchical clustering that is passed to pdist name: string to append to ID, default to empty string filter_regex: regex string passed to data.filter ID: specify if a specific ID is desired results_dir: where to save results """ assert datafile is not None or saved_obj_file is not None # initialize with the saved object if available if saved_obj_file: self._load_init(saved_obj_file) else: # set vars self.dataset = datafile self.loading_thresh = None self.dist_metric = dist_metric self.boot_iter = boot_iter self.residualize_vars = residualize_vars if ID is None: self.ID = '%s_%s' % (name, str(random.getrandbits(16))) else: self.ID = '%s_%s' % (name, str(ID)) # set up output files self.results_dir = results_dir # load data self.data = get_behav_data(dataset=datafile, file='meaningful_variables_imputed.csv', filter_regex=filter_regex, verbose=True) self.data_no_impute = get_behav_data(dataset=datafile, file='meaningful_variables_clean.csv', filter_regex=filter_regex, verbose=True) self.demographics = get_demographics() # initialize analysis classes self.DA = Demographic_Analysis(self.demographics, residualize_vars=self.residualize_vars, boot_iter=self.boot_iter) self.EFA = EFA_Analysis(self.data, self.data_no_impute, boot_iter=self.boot_iter) self.HCA = HCA_Analysis(dist_metric=self.dist_metric) # load the results from the saved object if saved_obj_file: self._load_results(saved_obj_file)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat Apr 13 18:41:59 2019 @author: ian """ import numpy as np import matplotlib.pyplot as plt import seaborn as sns from selfregulation.utils.utils import get_demographics, get_recent_dataset demo=get_demographics(get_recent_dataset(), drop_categorical=False) race_info = np.unique(demo.Race, return_counts=True) race_info = {k.lstrip():v for k,v in zip(race_info[0], race_info[1])} race_percentiles = {k:np.round(v/demo.shape[0]*100,2) for k,v in race_info.items()} age_stats = demo.Age.describe() print('** Race Statistics **') for x,y in race_percentiles.items(): print (x, ':', y) print('Hispanic %', demo.HispanicLatino.mean().round(3)) print('** Age and Sex **') print(age_stats) print('Female %', demo.Sex.mean().round(3)) # plots sns.set_context('paper') size=5 def style_ax(ax):
import matplotlib.pyplot as plt import numpy as np from os import path import pandas as pd import seaborn as sns from selfregulation.utils.plot_utils import beautify_legend, format_num, format_variable_names from selfregulation.utils.utils import filter_behav_data, get_behav_data, get_demographics, get_info # correlation of ravens and literature # replication of "Intelligence and socioeconomic success: A meta-analytic # review of longitudinal research" base_dir = get_info('base_directory') ext= 'png' data = get_behav_data() demographics = get_demographics() data = data.loc[demographics.index] # get dataframe of intelligence measure (raven's progressive matrices) and demographics) df = pd.concat([data.filter(regex='raven'), demographics], axis=1) # get raven's reliability reliability = get_behav_data(dataset='Retest_02-03-2018', file='bootstrap_merged.csv.gz') raven_reliability = reliability.groupby('dv').icc.mean().filter(regex='raven')[0] # demographic reliabilities demo_reliabilities = [1.0]*demographics.shape[1] # correlations correlations = df.corr().filter(regex='raven').sort_values(by='ravens.score').iloc[:-1] correlations.insert(0, 'target_reliability', demo_reliabilities) adjusted = correlations['ravens.score']/(raven_reliability*correlations['target_reliability'])**.5 correlations.insert(0, 'adjusted_correlation', adjusted)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sat Apr 13 18:41:59 2019 @author: ian """ import numpy as np from selfregulation.utils.utils import get_demographics, get_recent_dataset demo=get_demographics(get_recent_dataset(), drop_categorical=False) race_info = np.unique(demo.Race, return_counts=True) race_info = {k.lstrip():v for k,v in zip(race_info[0], race_info[1])} race_percentiles = {k:np.round(v/demo.shape[0]*100,2) for k,v in race_info.items()} age_stats = demo.Age.describe() print('** Race Statistics **') for x,y in race_percentiles.items(): print (x, ':', y) print('Hispanic %', demo.HispanicLatino.mean().round(3)) print('** Age and Sex **') print(age_stats) print('Female %', demo.Sex.mean().round(3))