예제 #1
0
    def test_count_feature_observed(self):
        control_data = [
            np.array([
                ['line', 'subject', 'tier2', 'tier3', 'tier1', 'tmin', 'tmax'],
                ['1', 'CF61WOM_47', 'sil', '?', '{noise}', '0', '1.375'],
                [
                    '2', 'CF61WOM_47', 'clause1_exp-s1', 'exp',
                    '"allora parlaci un poco di te e della tua famiglia?" ',
                    '1.375', '4.046'
                ],
                ['3', 'CF61WOM_47', 'sil', '?', '(.)_exp', '4.046', '4.214'],
                [
                    '4', 'CF61WOM_47', 'clause2_exp-s1', 'exp',
                    '"quanti componenti sono" ', '4.214', '5.469'
                ],
                [
                    '5', 'CF61WOM_47', 'clause3_exp-s1', 'exp', '"e cosa fa"',
                    '5.469', '5.867'
                ], ['6', 'CF61WOM_47', 'sil', '?', '[]', '5.867', '6.137'],
                [
                    '7', 'CF61WOM_47', 'clause1-s1', 'sub', 'quindi', '6.137',
                    '6.633'
                ]
            ])
        ]

        depressed_data = [
            np.array([
                ['line', 'subject', 'tier2', 'tier3', 'tier1', 'tmin', 'tmax'],
                ['1', 'PM33ATR_66', 'noise', '?', '{noise}', '0', '1.739'],
                [
                    '2', 'PM33ATR_66', 'clause1_exp-s1', 'exp',
                    '"raccontamo un po\' come"', '1.739', '3.059'
                ], ['3', 'PM33ATR_66', 'sil', 'f', ':::e', '3.059', '3.375'],
                ['4', 'PM33ATR_66', 'fil', '?', ':m', '3.375', '4.046'],
                [
                    '5', 'PM33ATR_66', 'clause1_exp-s2', '?',
                    '"hai passato quest\'ultima settimana"', '4.046', '5.727'
                ], ['6', 'PM33ATR_66', 'sil', '?', '(.)v', '5.727', '6.659'],
                [
                    '7', 'PM33ATR_66', 'clause1-s1', 'sub',
                    "allora quest ultima settimana l'ho passata", '6.659',
                    '9.187'
                ]
            ])
        ]

        feature = 'sil'

        real_observed_f_count = [3, 2]
        observed_f_count, expected_f_count = count_feature(
            feature, control_data, depressed_data, 6.633, 9.187)

        np.testing.assert_almost_equal(real_observed_f_count, observed_f_count,
                                       2)
예제 #2
0
from counts.count_analysis import import_data
from counts.count_analysis import count_feature
from counts.count_analysis import return_chisquare

control_data, depressed_data, control_duration, depressed_duration = import_data()
observed, expected = count_feature('ove', control_data, depressed_data, control_duration, depressed_duration)
chisquare = return_chisquare(observed, expected)

print(chisquare)
features = [
    'bac',
    'cry',
    'fil',
    'lau',
    'len',
    'ove',
    'sil'
]

pvalues = {}

control_data, depressed_data, control_duration, depressed_duration = import_data()

for feature in features:
    obs, exp = count_feature(feature, control_data, depressed_data, control_duration, depressed_duration)
    pvalue = return_chisquare(obs, exp)[1]
    pvalues[feature] = pvalue

pvalues = {feature: pvalues[feature] for feature in pvalues.keys() if pvalues[feature] < 0.05}
sorted_pvalues = sorted(pvalues.items(), key=operator.itemgetter(1))
test_count = len(features)

thresholds = {}

for i in range(len(pvalues)):
    threshold = ALPHA * (i+1) / test_count
    thresholds[sorted_pvalues[i][0]] = threshold

for feature, pvalue in sorted_pvalues:
    print('{0} {1} -- threshold: {2}'.format(feature, pvalue, thresholds[feature]))