Esempio n. 1
0
def RunTest(N, times, p):
    c = Coin(p)
    d = dict.fromkeys(N)
    for n in N:
        runs = range(times)
        rejected = 0
        for irun in runs:
            head_win = 0
            tail_win = 0
            for i in xrange(n):
                if c.next() == "head":
                    head_win += 1
                else:
                    tail_win += 1
            pvalue = binom_test(head_win,n,0.5)
            if pvalue < 0.05:
                rejected += 1
        d[n] = rejected
    
    return d
def first_order_analysis(s, significance=0.05):
    symbols = ['L', 'R']

    results = []

    N = len(s)
    
    if N == 0:
        raise ValueError('Empty string')
    
    for x in s:
        if not x in symbols:
            raise ValueError('Unknown character %r in string.' % x)
     
    n_L = count_overlapping(s, 'L')
    n_R = count_overlapping(s, 'R')
    n_RL = count_overlapping(s, 'RL')
    n_LL = count_overlapping(s, 'LL')
    n_RR = count_overlapping(s, 'RR')
    n_LR = count_overlapping(s, 'LR')
    
    fair_pvalue = scipy.stats.binom_test(n_L, N, 0.5)
    fair_rejected = fair_pvalue < significance
    
    # Get a confidence interval
    p_L_lb, p_L_ub = binofit(n_L, N, significance) 

    # Run the test for the lower bound
    
    ps = np.linspace(p_L_lb, p_L_ub, 50)
    pvalues = []
    whys = []
    
    for p in ps:
        # we check if any of the two is significant
        RL_pvalue_p = binom_test(n_RL, n_R, p)
        LL_pvalue_p = binom_test(n_LL, n_L, p)
        pvalue_p = min([ RL_pvalue_p, LL_pvalue_p ])
        
        # More detailed test (somewhat redudand)
        # We want to see if we are significantly POS or NEG correlated.
        LL_significantly_positive = binom.cdf(n_LL, n_L, p) > 1 - significance
        LL_significantly_negative = binom.cdf(n_LL, n_L, p) < significance
        # note: there was a bug in Matlab
        RL_significantly_negative = binom.cdf(n_RL, n_R, p) > 1 - significance
        RL_significantly_positive = binom.cdf(n_RL, n_R, p) < significance
        
        significantly_negative = LL_significantly_negative or RL_significantly_negative
        significantly_positive = LL_significantly_positive or RL_significantly_positive
        
        if  significantly_negative:
            correlation = '-'
        elif significantly_positive:
            correlation = '+'
        else:
            correlation = ''
        
        whys.append(correlation)
        pvalues.append(pvalue_p)
    
    best = np.argmax(pvalues)
    best_p = ps[best] 
    indep_pvalue = pvalues[best]
    indep_rejected = indep_pvalue < significance
    why = whys[best] if indep_rejected else ''
        
    
    results.extend([
        ('significance', significance),
        ('N', N),
        ('n_L', n_L),
        ('p_L', zdiv(n_L, N)),
        ('n_R', n_R),
        ('p_R', zdiv(n_R, N)),
        ('n_RL', n_RL),
        ('p_RL', zdiv(n_RL, n_R)),
        ('n_LL', n_LL),
        ('p_LL', zdiv(n_LL, n_L)),
        ('n_RR', n_RR),
        ('p_RR', zdiv(n_RR, n_R)),
        ('n_LR', n_LR),
        ('p_LR', zdiv(n_LR, n_L)),
        ('fair_pvalue', fair_pvalue),
        ('fair_rejected', fair_rejected),
        ('p_L_lb', p_L_lb),
        ('p_L_ub', p_L_ub),
        ('indep_pvalue', indep_pvalue),
        ('indep_rejected', indep_rejected),
        ('why', why),
        ('best_p_L', best_p),
    ]) 
    
    return results
Esempio n. 3
0
 def binom_p(self):
     return binom_test(self.goals(), self.n(), self.exp_goals_frac())
def las_vegas_report(outdir, page_id, results):
    # threshold for considering 0 response
    #eps = 0.0001
    # eps = 0.001
    # eps = 0
    
    r = Report('lasvegas_' + page_id)    
    f = r.figure('summary', cols=4, caption='Response to various filters')
    f_overlap = r.figure('summary-overlap', cols=4,
                         caption='Response area (overlap) of various filters')
    
    kernels = sorted(results.keys())
    for kernel in kernels:
        sign = results[kernel]['signs']
        response = results[kernel]['response']
        # overlap = results[kernel]['overlap']
        overlap = numpy.abs(response)
        
        eps = percentile(overlap, 75)
        
        matched_filter = results[kernel]['kernel']
        
        left = numpy.nonzero(sign == +1)
        right = numpy.nonzero(sign == -1)
        
        response_right = response[right]
        response_left = response[left]
         
        
        n = r.node(kernel)
        
        with n.data_pylab('response') as pylab:

            try:
                b = numpy.percentile(response_left, 95) #@UndefinedVariable
            except:
                b = scipy.stats.scoreatpercentile(response_left, 95)
                        
            def plothist(x, nbins, eps, **kwargs):
                nz, = numpy.nonzero(numpy.abs(x) > eps)
                # x with nonzero response
                print "using %d/%d" % (len(nz), len(x))
                xnz = x[nz]
                hist, bin_edges = numpy.histogram(xnz, range=(-b, b), bins=nbins)
                bins = (bin_edges[:-1] + bin_edges[1:]) * 0.5
                pylab.plot(bins, hist, **kwargs)
                
            nbins = 500
            plothist(response_left, nbins, eps, label='left')
            plothist(response_right, nbins, eps, label='right')

            a = pylab.axis()
            pylab.axis([-b, b, 0, a[-1]])
            pylab.legend()
            
        f.sub('%s/response' % kernel)
        
        with n.data_pylab('overlap') as pylab:

            def plothist2(x, nbins, **kwargs):
                hist, bin_edges = numpy.histogram(x, bins=nbins)
                bins = (bin_edges[:-1] + bin_edges[1:]) * 0.5
                pylab.plot(bins, hist, **kwargs)
                
            nbins = 200
            
            # plothist2(overlap, nbins, label='overlap')
            pylab.hist(overlap, nbins, log=True, label='hist of abs.response')

            a = pylab.axis()
            pylab.plot([ eps, eps], [a[2], a[3]], 'r-', label='threshold')
            #pylab.axis([-b, b, 0, a[-1]])
            pylab.legend()
            
        f_overlap.sub('%s/overlap' % kernel)
        
        def ratio2perc(i, n):
            p = 100.0 * i / n
            return "%.1f" % p
        
        def perc(x):
            pos, = numpy.nonzero(x)
            return ratio2perc(len(pos), len(x))
        
        cols = ['probability', 'no response', 'guessed L', 'guessed R']
        rows = ['left saccade', 'right saccade']
        table = [
            [ perc(sign == +1), perc(numpy.abs(response_left) < eps),
                              perc(response_left > eps),
                              perc(response_left < -eps) ],
            [ perc(sign == -1), perc(numpy.abs(response_right) < eps),
                              perc(response_right > eps),
                              perc(response_right < -eps) ],
        ]
        
        n.table('performance', data=table, rows=rows, cols=cols)
        
        use_eps = eps
        total = len(sign)
        given = numpy.abs(response) > use_eps
        num_given = len(numpy.nonzero(given)[0])
        correct = numpy.logical_or(
                numpy.logical_and(response > use_eps, sign == +1),
                numpy.logical_and(response < -use_eps, sign == -1)
        )
        
        num_correct = len(numpy.nonzero(correct)[0])
        
        perc_given = ratio2perc(num_given, total)
        perc_not_given = ratio2perc(len(sign) - num_given, len(sign))
        
    #    perc_correct_abs =  ratio2perc(num_correct, total) 
        
        perc_correct_given = ratio2perc(num_correct, num_given)
        
        signif = 0.01
        expected = \
            scipy.stats.binom.ppf([signif / 2, 1 - signif / 2], num_given, 0.5) / num_given
        #cdf = scipy.stats.binom.cdf(perc_correct_given, num_given, 0.5)
        pvalue = binom_test(num_correct, num_given, 0.5)
        
        cols = ['no response', 'with response',
                'correct (%given)', 'p-value', 'bounds under H0']
        table = [
            [ perc_not_given, perc_given,
               perc_correct_given,
              "%.4f" % pvalue,
              "[%.1f, %.1f]" % (100 * expected[0], 100 * expected[1]) ],
              
        ]
        
        n.table('performance2', data=table, cols=cols)
       
        
        add_posneg(n, 'kernel', matched_filter)
        
       
    output_file = os.path.join(outdir, '%s.html' % r.id)
    resources_dir = os.path.join(outdir, 'images')
    print("Writing to %s" % output_file)
    r.to_html(output_file, resources_dir=resources_dir)