def test_count_feature_observed(self): control_data = [ np.array([ ['line', 'subject', 'tier2', 'tier3', 'tier1', 'tmin', 'tmax'], ['1', 'CF61WOM_47', 'sil', '?', '{noise}', '0', '1.375'], [ '2', 'CF61WOM_47', 'clause1_exp-s1', 'exp', '"allora parlaci un poco di te e della tua famiglia?" ', '1.375', '4.046' ], ['3', 'CF61WOM_47', 'sil', '?', '(.)_exp', '4.046', '4.214'], [ '4', 'CF61WOM_47', 'clause2_exp-s1', 'exp', '"quanti componenti sono" ', '4.214', '5.469' ], [ '5', 'CF61WOM_47', 'clause3_exp-s1', 'exp', '"e cosa fa"', '5.469', '5.867' ], ['6', 'CF61WOM_47', 'sil', '?', '[]', '5.867', '6.137'], [ '7', 'CF61WOM_47', 'clause1-s1', 'sub', 'quindi', '6.137', '6.633' ] ]) ] depressed_data = [ np.array([ ['line', 'subject', 'tier2', 'tier3', 'tier1', 'tmin', 'tmax'], ['1', 'PM33ATR_66', 'noise', '?', '{noise}', '0', '1.739'], [ '2', 'PM33ATR_66', 'clause1_exp-s1', 'exp', '"raccontamo un po\' come"', '1.739', '3.059' ], ['3', 'PM33ATR_66', 'sil', 'f', ':::e', '3.059', '3.375'], ['4', 'PM33ATR_66', 'fil', '?', ':m', '3.375', '4.046'], [ '5', 'PM33ATR_66', 'clause1_exp-s2', '?', '"hai passato quest\'ultima settimana"', '4.046', '5.727' ], ['6', 'PM33ATR_66', 'sil', '?', '(.)v', '5.727', '6.659'], [ '7', 'PM33ATR_66', 'clause1-s1', 'sub', "allora quest ultima settimana l'ho passata", '6.659', '9.187' ] ]) ] feature = 'sil' real_observed_f_count = [3, 2] observed_f_count, expected_f_count = count_feature( feature, control_data, depressed_data, 6.633, 9.187) np.testing.assert_almost_equal(real_observed_f_count, observed_f_count, 2)
from counts.count_analysis import import_data from counts.count_analysis import count_feature from counts.count_analysis import return_chisquare control_data, depressed_data, control_duration, depressed_duration = import_data() observed, expected = count_feature('ove', control_data, depressed_data, control_duration, depressed_duration) chisquare = return_chisquare(observed, expected) print(chisquare)
features = [ 'bac', 'cry', 'fil', 'lau', 'len', 'ove', 'sil' ] pvalues = {} control_data, depressed_data, control_duration, depressed_duration = import_data() for feature in features: obs, exp = count_feature(feature, control_data, depressed_data, control_duration, depressed_duration) pvalue = return_chisquare(obs, exp)[1] pvalues[feature] = pvalue pvalues = {feature: pvalues[feature] for feature in pvalues.keys() if pvalues[feature] < 0.05} sorted_pvalues = sorted(pvalues.items(), key=operator.itemgetter(1)) test_count = len(features) thresholds = {} for i in range(len(pvalues)): threshold = ALPHA * (i+1) / test_count thresholds[sorted_pvalues[i][0]] = threshold for feature, pvalue in sorted_pvalues: print('{0} {1} -- threshold: {2}'.format(feature, pvalue, thresholds[feature]))