Beispiel #1
0
def chisquare(matrix):
    sum_all = float(sum([sum(x) for x in matrix]))
    def sum_col(i):
        return sum(matrix[i])
    def sum_row(i):
        return sum([x[i] for x in matrix])
    dim_x = len(matrix)
    dim_y = len(matrix[0])

    matrix_expected = []
    for i in range(0, dim_x):
        col = []
        for j in range(0, dim_y):
            element = sum_col(i)*sum_row(j)/sum_all
            col.append(element)
        matrix_expected.append(col)
        
    matrix_chi = []
    for i in range(0, dim_x):
        col = []
        for j in range(0, dim_y):
            element = matrix[i][j]-matrix_expected[i][j]
            element *= element
            divide_by = matrix_expected[i][j] if matrix_expected[i][j]!=0 else 1
            element /= divide_by
            col.append(element)
        matrix_chi.append(col)
    chi = sum([sum(x) for x in matrix_chi])
    return chi, stats.chisqprob(chi, (dim_x-1)*(dim_y-1))
Beispiel #2
0
def chisquare(matrix):
    sum_all = float(sum([sum(x) for x in matrix]))

    def sum_col(i):
        return sum(matrix[i])

    def sum_row(i):
        return sum([x[i] for x in matrix])

    dim_x = len(matrix)
    dim_y = len(matrix[0])

    matrix_expected = []
    for i in range(0, dim_x):
        col = []
        for j in range(0, dim_y):
            element = sum_col(i) * sum_row(j) / sum_all
            col.append(element)
        matrix_expected.append(col)

    matrix_chi = []
    for i in range(0, dim_x):
        col = []
        for j in range(0, dim_y):
            element = matrix[i][j] - matrix_expected[i][j]
            element *= element
            divide_by = matrix_expected[i][
                j] if matrix_expected[i][j] != 0 else 1
            element /= divide_by
            col.append(element)
        matrix_chi.append(col)
    chi = sum([sum(x) for x in matrix_chi])
    return chi, stats.chisqprob(chi, (dim_x - 1) * (dim_y - 1))
def likelihoodRatioTest(rawString_n, rawString_d, df):

    test_stat = -2 * (ln(rawString_n) - ln(rawString_d))

    print round(test_stat, 2)


    return stats.chisqprob(test_stat, df)
def calcLRT(LRT_FILE, results):

    file = open(LRT_FILE, 'w')

    for i in range(1, len(results)):
        df_im1         = countFreeParameters(results[i-1])
        df_i           = countFreeParameters(results[i])
        likelihood_im1 = results[i-1]['likelihood']
        likelihood_i   = results[i]['likelihood']

        test_stat      = -2 * (ln(likelihood_i) - ln(likelihood_im1))
        delta_df       = df_im1 - df_i
        test_prob      = stats.chisqprob(test_stat, delta_df)

        # print entries for latex table
        file.write(results[i]['model_name'] + " " + "&" + " " + "some description" + " " + "&" + " " + str(df_i) + " " + "&" + " " +
                   str(delta_df) + " " + "&" + " " + likelihood_i + " " + "&" + " " + str(round(test_stat, 2)) + " " + "&" + " " +
                   str(round(test_prob, 4)) + " " + "\\\\" + "\n")

    print "LTR saved to file '" + LRT_FILE + "'"
    data = [float(d) for d in open(argv[1], 'r')]

    ## compute unimodal model
    uni = Gaussian(mean(data), std(data))
    uni_loglike = sum(log(uni.pdf(d)) for d in data)

    print 'Best singleton: {0}'.format(uni)
    print 'Null LL: {0:4.6}'.format(uni_loglike)

    ## find best one
    # set defaults
    best_gaus = None
    best_loglike = float('-inf')
    stderr.write('Computing best model with random restarts...\n')
    for i in xrange(_rand_restarts):
        mix = GaussianMixture(data, _mu_min, _mu_max, _sigma_min, _sigma_max)
        # I catch division errors from bad starts, and just throw them out...
        for i in xrange(_n_iterations):
            try:
                mix.iterate()
                if mix.loglike > best_loglike:
                    best_loglike = mix.loglike
                    best_gaus = mix
            except (ZeroDivisionError, ValueError):
                pass
    print 'Best {0}'.format(best_gaus)
    print 'Alternative LL: {0:4.6}'.format(best_gaus.loglike)
    test_stat = -2 * uni_loglike + 2 * best_gaus.loglike
    print 'Test statistic for LLR (Chi-sq, df=3): {0:4.6}'.format(test_stat)
    print 'P = {0:4.6}'.format(chisqprob(test_stat, 3))