コード例 #1
0
def levy(beta, n):
    num = G(1+beta) * np.sin(np.pi*beta/2)
    den = G((1+beta)/2) * beta * 2**((beta-1)/2)
    sigma_u, sigma_v = (num/den) ** (1/beta), 1
    u, v = np.random.normal(0, sigma_u, size=n), \
    np.random.normal(0, sigma_v, size=n)
    z = u/(np.abs(v)**(1/beta))
    return z.reshape(n,1)
コード例 #2
0
    def var_inference(self, num_start=1, display=True, max_iter=100, tol_L=1e-4):
        best_L = -np.inf
        for i in range(0, num_start):
            if i != 0:
                gamma = np.random.uniform(size=(self.N, self.T))
                self.gamma = (gamma.T/np.sum(gamma, axis=1)).T
            l_list = []
            for j in range(0, max_iter):
                av = 1 + np.sum(self.gamma, axis=0)
                t_bv = np.sum(self.gamma) + self.alpha
                bv = np.repeat(t_bv, self.T)
                gamma_colsum = av - 1
                gamma_colcumsum = np.cumsum(gamma_colsum)
                bv -= gamma_colcumsum

                atheta = self.a + np.dot((self.gamma).T, self.X)
                btheta = self.b + np.dot((self.gamma).T, (1 - self.X))

                log_theta_expct = psi(atheta) - psi(atheta + btheta)
                log_neg_theta_expct = psi(btheta) - psi(atheta + btheta)
                log_v_expct = psi(av) - psi(av + bv)
                log_neg_v_expct = psi(bv) - psi(av + bv)
                s1 = np.dot(log_theta_expct, self.X.T)
                s2 = np.dot(log_neg_theta_expct, (1 - self.X).T)
                s3 = log_v_expct
                s4 = np.cumsum(log_neg_v_expct) - log_neg_v_expct

                gamma = np.exp(s1.T + s2.T + s3 + s4)
                self.gamma = (gamma.T/np.sum(gamma, axis=1)).T

                fsz = self.T * self.D
                lf_s1 = np.sum((self.a - 1) * log_theta_expct + (self.b - 1) * log_neg_theta_expct) + \
                            fsz * (gammaln(self.a + self.b) + G(self.a) + G(self.b))
                lf_s2 = np.sum((self.alpha - 1) * log_neg_v_expct) + \
                            self.T * (gammaln(self.alpha + 1) - gammaln(self.alpha) - gammaln(1))
                lf_s3 = np.sum((s1 + s2) * self.gamma.T)
                lf_s4 = np.sum((s3 + s4) * self.gamma)
                l_full = lf_s1 + lf_s2 + lf_s3 + lf_s4

                lv_s1 = np.sum((av - 1) * (psi(av) - psi(av + bv)))
                lv_s2 = np.sum((bv - 1) * (psi(bv) - psi(av + bv)))
                lv_s3 = np.sum(gammaln(av + bv) - gammaln(av) - gammaln(bv))
                l_v = lv_s1 + lv_s2 + lv_s3

                ltheta_s1 = np.sum((atheta - 1) * (psi(atheta) - psi(atheta + btheta)))
                ltheta_s2 = np.sum((btheta - 1) * (psi(btheta) - psi(atheta + btheta)))
                ltheta_s3 = np.sum(gammaln(atheta + btheta) - gammaln(atheta) - gammaln(btheta))
                l_theta = ltheta_s1 + ltheta_s2 + ltheta_s3

                l_z = np.sum(self.gamma * np.log(self.gamma))

                l_final = l_full - l_v - l_theta - l_z

                if display:
                    print("Iteration_number=" + str(j))
                    print("L = " + str(l_final))
                    cl_num = np.argmax(self.gamma, axis=1)
                    clustersNums = np.unique(cl_num)
                    n_cl = clustersNums.shape[0]
                    print("number of components:" + str(n_cl))
                    print('=' * 20)
                l_list.append(l_final)
                if j != 0:
                    if abs(l_list[j] - l_list[j - 1]) < tol_L:
                        break
            if l_list[-1] > best_L:
                #print("!!!")
                best_L = l_list[-1]
                best_gamma = self.gamma
                best_L_list = l_list

            self.gamma = best_gamma
        return best_L_list
コード例 #3
0
ファイル: BumpHunter.py プロジェクト: sam-cal/pyBumpHunter
    def PlotBump(self, data, bkg, is_hist=False, filename=None):
        '''
        Plot the data and bakground histograms with the bump found by BumpHunter highlighted.
        
        Arguments :
            data : Numpy array containing the raw unbined data.
            
            bkg : Numpy array containing the raw unbined background.
            
            is_hist : Boolean specifying if data and bkg are in histogram form or not. Default to False.
            
            filename : Name of the file in which the plot will be saved. If None, the plot will be just shown
                       but not saved. Default to None.
        '''

        # Get the data in histogram form
        if (is_hist is False):
            H = np.histogram(data, bins=self.bins, range=self.rang)
        else:
            H = [data, self.bins]

        # Get bump min and max
        Bmin = H[1][self.min_loc_ar[0]]
        Bmax = H[1][self.min_loc_ar[0] + self.min_width_ar[0]]

        # Get the background in histogram form
        if (is_hist is False):
            Hbkg = np.histogram(bkg,
                                bins=self.bins,
                                range=self.rang,
                                weights=self.weights)[0]
        else:
            if (self.weights is None):
                Hbkg = bkg
            else:
                Hbkg = bkg * self.weights

        # Calculate significance for each bin
        sig = np.empty(Hbkg.size)
        sig[(H[0] == 0) & (Hbkg == 0)] = 1.0
        sig[H[0] >= Hbkg] = G(H[0][H[0] >= Hbkg], Hbkg[H[0] >= Hbkg])
        sig[H[0] < Hbkg] = 1 - G(H[0][H[0] < Hbkg] + 1, Hbkg[H[0] < Hbkg])
        sig = norm.ppf(1 - sig)
        sig[sig < 0] = 0  # If negative, set it to 0
        np.nan_to_num(sig, posinf=0, neginf=0, nan=0,
                      copy=False)  # Avoid errors
        sig[H[0] < Hbkg] = -sig[H[0] < Hbkg]  # Now we can make it signed

        # Plot the test histograms with the bump found by BumpHunter plus a little significance plot
        F = plt.figure(figsize=(12, 10))
        gs = grd.GridSpec(2, 1, height_ratios=[4, 1])

        pl1 = plt.subplot(gs[0])
        plt.title('Distributions with bump')

        if (is_hist is False):
            plt.hist(bkg,
                     bins=self.bins,
                     histtype='step',
                     range=self.rang,
                     weights=self.weights,
                     label='background',
                     linewidth=2,
                     color='red')
            plt.errorbar(0.5 * (H[1][1:] + H[1][:-1]),
                         H[0],
                         xerr=(H[1][1] - H[1][0]) / 2,
                         yerr=np.sqrt(H[0]),
                         ls='',
                         color='blue',
                         label='data')
        else:
            plt.hist(self.bins[:-1],
                     bins=self.bins,
                     histtype='step',
                     range=self.rang,
                     weights=Hbkg,
                     label='background',
                     linewidth=2,
                     color='red')
            plt.errorbar(0.5 * (H[1][1:] + H[1][:-1]),
                         H[0],
                         xerr=(H[1][1] - H[1][0]) / 2,
                         yerr=np.sqrt(H[0]),
                         ls='',
                         color='blue',
                         label='data')

        plt.plot(np.full(2, Bmin),
                 np.array([0, H[0][self.min_loc_ar[0]]]),
                 'r--',
                 label=('BUMP'))
        plt.plot(
            np.full(2, Bmax),
            np.array([0, H[0][self.min_loc_ar[0] + self.min_width_ar[0]]]),
            'r--')
        plt.legend(fontsize='large')
        plt.yscale('log')
        if self.rang != None:
            plt.xlim(self.rang)
        plt.tight_layout()

        plt.subplot(gs[1], sharex=pl1)
        plt.hist(H[1][:-1], bins=H[1], range=self.rang, weights=sig)
        plt.plot(np.full(2, Bmin),
                 np.array([sig.min(), sig.max()]),
                 'r--',
                 linewidth=2)
        plt.plot(np.full(2, Bmax),
                 np.array([sig.min(), sig.max()]),
                 'r--',
                 linewidth=2)
        plt.yticks(
            np.arange(np.round(sig.min()), np.round(sig.max()) + 1, step=1))
        plt.ylabel('significance', size='large')

        # Check if the plot should be saved or just displayed
        if (filename is None):
            plt.show()
        else:
            plt.savefig(filename, bbox_inches='tight')
            plt.close(F)

        return
コード例 #4
0
ファイル: BumpHunter.py プロジェクト: sam-cal/pyBumpHunter
def scan_hist(hist, ref, w_ar, self, ih):
    '''
            Function that scan a distribution and compute the p-value associated to every scan window following the
            BumpHunter algorithm. Compute also the significance for the data histogram.
            
            In order to make the function thread friendly, the results are saved through global variables.
            
            Arguments :
                hist : The data histogram (as obtain with the numpy.histogram function).
                
                ref : The reference (background) histogram (as obtain with the numpy.histogram function).
                
                w_ar : Array containing all the values of width to be tested.
                
                self : The BumpHunter instance that call the function
                
                ih : Indice of the distribution to be scanned. ih=0 refers to the data distribution and ih>0 refers to
                     the ih-th pseudo-data distribution.
            
            Results stored in inner variables :
                res : Numpy array of python list containing all the p-values of all windows computed durring the
                      scan. The numpy array as dimention (Nwidth), with Nwidth the number of window's width tested.
                      Each python list as dimension (Nstep), with Nstep the number of scan step for a given width
                      (different for every value of width).
                      
                min_Pval : Minimum p_value obtained durring the scan (float).
                
                min_loc : Position of the window corresponding to the minimum p-value (integer).
                
                min_width : Width of the window corresponding to the minimum p-value (integer).
            '''

    # Remove the first/last hist bins if empty ... just to be consistant we c++
    non0 = [iii for iii in range(hist.size) if hist[iii] > 0]
    Hinf, Hsup = min(non0), max(non0) + 1

    # Create the results array
    res = np.empty(w_ar.size, dtype=np.object)
    min_Pval, min_loc = np.empty(w_ar.size), np.empty(w_ar.size)

    # Loop over all the width of the window
    i = 0
    for w in w_ar:
        # Auto-adjust scan step if specified
        if (self.scan_step == 'full'):
            scan_stepp = w
        elif (self.scan_step == 'half'):
            scan_stepp = max(1, w // 2)
        else:
            scan_stepp = self.scan_step

        # Define possition range
        pos = np.arange(Hinf, Hsup - w + 1, scan_stepp)

        # Initialize local p-value array for width w
        res[i] = np.empty(pos.size)

        # Count events in all windows of width w
        #FIXME any better way to do it ?? Without loop ?? FIXME
        Nref = np.array([ref[p:p + w].sum() for p in pos])
        Nhist = np.array([hist[p:p + w].sum() for p in pos])

        # Calculate all local p-values for for width w
        if (self.mode == 'excess'):
            res[i][Nhist <= Nref] = 1.0
            res[i][Nhist > Nref] = G(Nhist[Nhist > Nref], Nref[Nhist > Nref])
        elif (self.mode == 'deficit'):
            res[i][Nhist <= Nref] = 1.0 - G(Nhist[Nhist <= Nref] + 1,
                                            Nref[Nhist <= Nref])
            res[i][Nhist > Nref] = 1.0
        res[i][(Nhist == 0) & (Nref == 0)] = 1.0
        res[i][(Nref == 0)
               & (Nhist > 0)] = 1.0  # To be consistant with c++ results

        # Get the minimum p-value and associated position for width w
        min_Pval[i] = res[i].min()
        min_loc[i] = pos[res[i].argmin()]

        i += 1

    # Get the minimum p-value and associated windonw among all width
    min_width = w_ar[min_Pval.argmin()]
    min_loc = min_loc[min_Pval.argmin()]
    min_Pval = min_Pval.min()

    # Save the results in inner variables and return
    self.res_ar[ih] = res
    self.min_Pval_ar[ih] = min_Pval
    self.min_loc_ar[ih] = int(min_loc)
    self.min_width_ar[ih] = int(min_width)
    return
コード例 #5
0
 def integrate(self, a, b):
     a = float(a)
     b = float(b)
     val = G(self.alpha + 1.0, a / self.x0) - G(self.alpha + 1.0,
                                                b / self.x0)
     return val
コード例 #6
0
u = 0
N = 160
M = 80
L = N - M
arr = np.array([0] * (N - M) + [1] * (M))
np.random.shuffle(arr)
#print(G(2.5))
#print(arr)
U_ML = M / 160.00

P = []
U = []
for i in range(101):
    if u > 1:
        u = 1
    Norm = G(a + b + N) / (G(a + M) * G(b + L))
    u1 = pow(u, (a - 1 + M))
    u2 = pow(1 - u, (b - 1 + L))
    P_Val = Norm * u1 * u2
    P.append(P_Val)
    U.append(u)
    u += 0.010000
    #print(Norm)
print('%0.2f' % U_ML)

fig = plt.figure()
plt.plot(U, P)
plt.ion()
plt.xlabel('μ')
plt.ylabel('P')
plt.show()
コード例 #7
0
def get_post_prob(M, a, b, m, l):
    gamma_part = G(m + a + l + b) / (G(m + a) * G(l + b))
    bern_part = M**(m + a - 1) * (1 - M)**(l + b - 1)
    return gamma_part * bern_part
コード例 #8
0
def norm_moffat(width, power):
    return G(power) / (width * np.sqrt(np.pi) * G(power - 1 / 2))
コード例 #9
0
ファイル: frechet.py プロジェクト: dtroop/evt-bandits
 def cvar(self, alph):
     gamma = self.gamma
     a = (gamma-1)/gamma
     x = -np.log(alph)
     return 1/(1-alph) * (G(a) - Ginc(a,x)*G(a))
コード例 #10
0
    def test_weibull_moments(self):
        self.assertEqual(self.X.mean, self.X.beta)
        self.assertAlmostEqual(self.X.var, self.X.beta**2 * (G(3) - 1))

        self.assertEqual(self.Y.mean, self.Y.beta * G(1.5))
        self.assertAlmostEqual(self.Y.var, self.Y.beta**2 * (G(2) - G(1.5)**2))
コード例 #11
0
def norm_moffat(width, power):
    from scipy.special import gamma as G
    return G(power) / (width * np.sqrt(np.pi) * G(power - 1 / 2))
コード例 #12
0
MaxP = 0
# print(G(2.5))
# print(arr)

U_ML = M / 160.00
print('%0.2f' % U_ML)

for j in range(len(arr)):
    P = []
    U = []
    u = 0

    for i in range(101):
        if u > 1:
            u = 1
        Norm = G(a + b + l + m) / (G(a + m) * G(b + l))
        u1 = pow(u, a - 1 + m)
        u2 = pow(1 - u, b - 1 + l)
        P_Val = Norm * u1 * u2
        if MaxP < P_Val:
            MaxP = P_Val
        P.append(P_Val)
        U.append(u)
        u += 0.010000

        # print(Norm)

        # #print(m/160.0)
    fig = plt.figure()
    plt.plot(U, P)
    plt.ion()
コード例 #13
0
def Patriarca(x,lam):
    n = 1. + 3.*lam/(1.-lam)
    return (n**n/G(n))*x**(n-1.)*np.exp(-n*x)
コード例 #14
0
ファイル: BumpHunter.py プロジェクト: lovaslin/pyBumpHunter
    def __scan_hist(self,hist,ref,w_ar,ih):
        '''
        Function that scan a distribution and compute the p-value associated to every scan window following the
        BumpHunter algorithm. Compute also the significance for the data histogram.
        
        In order to make the function thread friendly, the results are saved through global variables.
        
        Arguments :
            hist : The data histogram (as obtain with the numpy.histogram function).
            
            ref : The reference (background) histogram (as obtain with the numpy.histogram function).
            
            w_ar : Array containing all the values of width to be tested.
            
            self : The BumpHunter instance that call the function
            
            ih : Indice of the distribution to be scanned. ih=0 refers to the data distribution and ih>0 refers to
                 the ih-th pseudo-data distribution.
        
        Results stored in inner variables :
            res : Numpy array of python list containing all the p-values of all windows computed durring the
                  scan. The numpy array as dimention (Nwidth), with Nwidth the number of window's width tested.
                  Each python list as dimension (Nstep), with Nstep the number of scan step for a given width
                  (different for every value of width).
                  
            min_Pval : Minimum p_value obtained durring the scan (float).
            
            min_loc : Position of the window corresponding to the minimum p-value (integer).
            
            min_width : Width of the window corresponding to the minimum p-value (integer).
        '''
        
        # Remove the first/last hist bins if empty ... just to be consistant we c++
        non0 = [iii for iii in range(hist.size) if hist[iii]>0]
        Hinf,Hsup = min(non0),max(non0)+1
        
        # Create the results array
        res = np.empty(w_ar.size,dtype=np.object)
        min_Pval,min_loc = np.empty(w_ar.size),np.empty(w_ar.size,dtype=int)
        signal_eval = np.empty(w_ar.size)
            
        if self.useSideBand==True:
           ref_total  = ref[Hinf:Hsup].sum()
           hist_total = hist[Hinf:Hsup].sum()
        
        # Loop over all the width of the window
        i = 0
        for w in w_ar:
            # Auto-adjust scan step if specified
            if(self.scan_step=='full'):
                scan_stepp = w
            elif(self.scan_step=='half'):
                scan_stepp = max(1,w//2)
            else:
                scan_stepp = self.scan_step
            
            # Define possition range
            pos = np.arange(Hinf,Hsup-w+1,scan_stepp)
            
            # Check that there is at least one interval to check for width w
            # If not, we must set dummy values in order to avoid crashes
            if(pos.size==0):
                res[i] = np.array([1.0])
                min_Pval[i] = 1.0
                min_loc[i] = 0
                signal_eval[i] = 0
                i+=1
                continue
            
            # Initialize local p-value array for width w
            res[i] = np.ones(pos.size)
            
            # Count events in all windows of width w
            #FIXME any better way to do it ?? Without loop ?? FIXME
            Nref = np.array([ref[p:p+w].sum() for p in pos])
            Nhist = np.array([hist[p:p+w].sum() for p in pos])
            
            if self.useSideBand==True:
                Nref *= (hist_total-Nhist)/(ref_total-Nref)
            
            # Calculate all local p-values for for width w
            if(self.mode=='excess'):
                res[i][(Nhist>Nref) & (Nref>0)] = G(Nhist[(Nhist>Nref) & (Nref>0)],Nref[(Nhist>Nref) & (Nref>0)])
            elif(self.mode=='deficit'):
                res[i][Nhist<Nref] = 1.0-G(Nhist[Nhist<Nref]+1,Nref[Nhist<Nref])
            
            if self.useSideBand==True:
                res[i][res[i]<1e-300] = 1e-300 #prevent issue with very low p-value, sometimes induced by normalisation in the tail
	    
            # Get the minimum p-value and associated position for width w
            min_Pval[i] = res[i].min()
            min_loc[i] = pos[res[i].argmin()]
            signal_eval[i] = Nhist[min_loc[i]] - Nref[min_loc[i]]
            
            i += 1
        
        # Get the minimum p-value and associated window among all width
        min_width = w_ar[min_Pval.argmin()]
        min_loc = min_loc[min_Pval.argmin()]
        
        # Evaluate the number of signal event (for data only)
        if(ih==0):
            self.signal_eval = signal_eval[min_Pval.argmin()]
        
        min_Pval = min_Pval.min()
        
        # Save the results in inner variables and return
        self.res_ar[ih] = res
        self.min_Pval_ar[ih] = min_Pval
        self.min_loc_ar[ih] = int(min_loc)
        self.min_width_ar[ih] = int(min_width)
        return