def levy(beta, n): num = G(1+beta) * np.sin(np.pi*beta/2) den = G((1+beta)/2) * beta * 2**((beta-1)/2) sigma_u, sigma_v = (num/den) ** (1/beta), 1 u, v = np.random.normal(0, sigma_u, size=n), \ np.random.normal(0, sigma_v, size=n) z = u/(np.abs(v)**(1/beta)) return z.reshape(n,1)
def var_inference(self, num_start=1, display=True, max_iter=100, tol_L=1e-4): best_L = -np.inf for i in range(0, num_start): if i != 0: gamma = np.random.uniform(size=(self.N, self.T)) self.gamma = (gamma.T/np.sum(gamma, axis=1)).T l_list = [] for j in range(0, max_iter): av = 1 + np.sum(self.gamma, axis=0) t_bv = np.sum(self.gamma) + self.alpha bv = np.repeat(t_bv, self.T) gamma_colsum = av - 1 gamma_colcumsum = np.cumsum(gamma_colsum) bv -= gamma_colcumsum atheta = self.a + np.dot((self.gamma).T, self.X) btheta = self.b + np.dot((self.gamma).T, (1 - self.X)) log_theta_expct = psi(atheta) - psi(atheta + btheta) log_neg_theta_expct = psi(btheta) - psi(atheta + btheta) log_v_expct = psi(av) - psi(av + bv) log_neg_v_expct = psi(bv) - psi(av + bv) s1 = np.dot(log_theta_expct, self.X.T) s2 = np.dot(log_neg_theta_expct, (1 - self.X).T) s3 = log_v_expct s4 = np.cumsum(log_neg_v_expct) - log_neg_v_expct gamma = np.exp(s1.T + s2.T + s3 + s4) self.gamma = (gamma.T/np.sum(gamma, axis=1)).T fsz = self.T * self.D lf_s1 = np.sum((self.a - 1) * log_theta_expct + (self.b - 1) * log_neg_theta_expct) + \ fsz * (gammaln(self.a + self.b) + G(self.a) + G(self.b)) lf_s2 = np.sum((self.alpha - 1) * log_neg_v_expct) + \ self.T * (gammaln(self.alpha + 1) - gammaln(self.alpha) - gammaln(1)) lf_s3 = np.sum((s1 + s2) * self.gamma.T) lf_s4 = np.sum((s3 + s4) * self.gamma) l_full = lf_s1 + lf_s2 + lf_s3 + lf_s4 lv_s1 = np.sum((av - 1) * (psi(av) - psi(av + bv))) lv_s2 = np.sum((bv - 1) * (psi(bv) - psi(av + bv))) lv_s3 = np.sum(gammaln(av + bv) - gammaln(av) - gammaln(bv)) l_v = lv_s1 + lv_s2 + lv_s3 ltheta_s1 = np.sum((atheta - 1) * (psi(atheta) - psi(atheta + btheta))) ltheta_s2 = np.sum((btheta - 1) * (psi(btheta) - psi(atheta + btheta))) ltheta_s3 = np.sum(gammaln(atheta + btheta) - gammaln(atheta) - gammaln(btheta)) l_theta = ltheta_s1 + ltheta_s2 + ltheta_s3 l_z = np.sum(self.gamma * np.log(self.gamma)) l_final = l_full - l_v - l_theta - l_z if display: print("Iteration_number=" + str(j)) print("L = " + str(l_final)) cl_num = np.argmax(self.gamma, axis=1) clustersNums = np.unique(cl_num) n_cl = clustersNums.shape[0] print("number of components:" + str(n_cl)) print('=' * 20) l_list.append(l_final) if j != 0: if abs(l_list[j] - l_list[j - 1]) < tol_L: break if l_list[-1] > best_L: #print("!!!") best_L = l_list[-1] best_gamma = self.gamma best_L_list = l_list self.gamma = best_gamma return best_L_list
def PlotBump(self, data, bkg, is_hist=False, filename=None): ''' Plot the data and bakground histograms with the bump found by BumpHunter highlighted. Arguments : data : Numpy array containing the raw unbined data. bkg : Numpy array containing the raw unbined background. is_hist : Boolean specifying if data and bkg are in histogram form or not. Default to False. filename : Name of the file in which the plot will be saved. If None, the plot will be just shown but not saved. Default to None. ''' # Get the data in histogram form if (is_hist is False): H = np.histogram(data, bins=self.bins, range=self.rang) else: H = [data, self.bins] # Get bump min and max Bmin = H[1][self.min_loc_ar[0]] Bmax = H[1][self.min_loc_ar[0] + self.min_width_ar[0]] # Get the background in histogram form if (is_hist is False): Hbkg = np.histogram(bkg, bins=self.bins, range=self.rang, weights=self.weights)[0] else: if (self.weights is None): Hbkg = bkg else: Hbkg = bkg * self.weights # Calculate significance for each bin sig = np.empty(Hbkg.size) sig[(H[0] == 0) & (Hbkg == 0)] = 1.0 sig[H[0] >= Hbkg] = G(H[0][H[0] >= Hbkg], Hbkg[H[0] >= Hbkg]) sig[H[0] < Hbkg] = 1 - G(H[0][H[0] < Hbkg] + 1, Hbkg[H[0] < Hbkg]) sig = norm.ppf(1 - sig) sig[sig < 0] = 0 # If negative, set it to 0 np.nan_to_num(sig, posinf=0, neginf=0, nan=0, copy=False) # Avoid errors sig[H[0] < Hbkg] = -sig[H[0] < Hbkg] # Now we can make it signed # Plot the test histograms with the bump found by BumpHunter plus a little significance plot F = plt.figure(figsize=(12, 10)) gs = grd.GridSpec(2, 1, height_ratios=[4, 1]) pl1 = plt.subplot(gs[0]) plt.title('Distributions with bump') if (is_hist is False): plt.hist(bkg, bins=self.bins, histtype='step', range=self.rang, weights=self.weights, label='background', linewidth=2, color='red') plt.errorbar(0.5 * (H[1][1:] + H[1][:-1]), H[0], xerr=(H[1][1] - H[1][0]) / 2, yerr=np.sqrt(H[0]), ls='', color='blue', label='data') else: plt.hist(self.bins[:-1], bins=self.bins, histtype='step', range=self.rang, weights=Hbkg, label='background', linewidth=2, color='red') plt.errorbar(0.5 * (H[1][1:] + H[1][:-1]), H[0], xerr=(H[1][1] - H[1][0]) / 2, yerr=np.sqrt(H[0]), ls='', color='blue', label='data') plt.plot(np.full(2, Bmin), np.array([0, H[0][self.min_loc_ar[0]]]), 'r--', label=('BUMP')) plt.plot( np.full(2, Bmax), np.array([0, H[0][self.min_loc_ar[0] + self.min_width_ar[0]]]), 'r--') plt.legend(fontsize='large') plt.yscale('log') if self.rang != None: plt.xlim(self.rang) plt.tight_layout() plt.subplot(gs[1], sharex=pl1) plt.hist(H[1][:-1], bins=H[1], range=self.rang, weights=sig) plt.plot(np.full(2, Bmin), np.array([sig.min(), sig.max()]), 'r--', linewidth=2) plt.plot(np.full(2, Bmax), np.array([sig.min(), sig.max()]), 'r--', linewidth=2) plt.yticks( np.arange(np.round(sig.min()), np.round(sig.max()) + 1, step=1)) plt.ylabel('significance', size='large') # Check if the plot should be saved or just displayed if (filename is None): plt.show() else: plt.savefig(filename, bbox_inches='tight') plt.close(F) return
def scan_hist(hist, ref, w_ar, self, ih): ''' Function that scan a distribution and compute the p-value associated to every scan window following the BumpHunter algorithm. Compute also the significance for the data histogram. In order to make the function thread friendly, the results are saved through global variables. Arguments : hist : The data histogram (as obtain with the numpy.histogram function). ref : The reference (background) histogram (as obtain with the numpy.histogram function). w_ar : Array containing all the values of width to be tested. self : The BumpHunter instance that call the function ih : Indice of the distribution to be scanned. ih=0 refers to the data distribution and ih>0 refers to the ih-th pseudo-data distribution. Results stored in inner variables : res : Numpy array of python list containing all the p-values of all windows computed durring the scan. The numpy array as dimention (Nwidth), with Nwidth the number of window's width tested. Each python list as dimension (Nstep), with Nstep the number of scan step for a given width (different for every value of width). min_Pval : Minimum p_value obtained durring the scan (float). min_loc : Position of the window corresponding to the minimum p-value (integer). min_width : Width of the window corresponding to the minimum p-value (integer). ''' # Remove the first/last hist bins if empty ... just to be consistant we c++ non0 = [iii for iii in range(hist.size) if hist[iii] > 0] Hinf, Hsup = min(non0), max(non0) + 1 # Create the results array res = np.empty(w_ar.size, dtype=np.object) min_Pval, min_loc = np.empty(w_ar.size), np.empty(w_ar.size) # Loop over all the width of the window i = 0 for w in w_ar: # Auto-adjust scan step if specified if (self.scan_step == 'full'): scan_stepp = w elif (self.scan_step == 'half'): scan_stepp = max(1, w // 2) else: scan_stepp = self.scan_step # Define possition range pos = np.arange(Hinf, Hsup - w + 1, scan_stepp) # Initialize local p-value array for width w res[i] = np.empty(pos.size) # Count events in all windows of width w #FIXME any better way to do it ?? Without loop ?? FIXME Nref = np.array([ref[p:p + w].sum() for p in pos]) Nhist = np.array([hist[p:p + w].sum() for p in pos]) # Calculate all local p-values for for width w if (self.mode == 'excess'): res[i][Nhist <= Nref] = 1.0 res[i][Nhist > Nref] = G(Nhist[Nhist > Nref], Nref[Nhist > Nref]) elif (self.mode == 'deficit'): res[i][Nhist <= Nref] = 1.0 - G(Nhist[Nhist <= Nref] + 1, Nref[Nhist <= Nref]) res[i][Nhist > Nref] = 1.0 res[i][(Nhist == 0) & (Nref == 0)] = 1.0 res[i][(Nref == 0) & (Nhist > 0)] = 1.0 # To be consistant with c++ results # Get the minimum p-value and associated position for width w min_Pval[i] = res[i].min() min_loc[i] = pos[res[i].argmin()] i += 1 # Get the minimum p-value and associated windonw among all width min_width = w_ar[min_Pval.argmin()] min_loc = min_loc[min_Pval.argmin()] min_Pval = min_Pval.min() # Save the results in inner variables and return self.res_ar[ih] = res self.min_Pval_ar[ih] = min_Pval self.min_loc_ar[ih] = int(min_loc) self.min_width_ar[ih] = int(min_width) return
def integrate(self, a, b): a = float(a) b = float(b) val = G(self.alpha + 1.0, a / self.x0) - G(self.alpha + 1.0, b / self.x0) return val
u = 0 N = 160 M = 80 L = N - M arr = np.array([0] * (N - M) + [1] * (M)) np.random.shuffle(arr) #print(G(2.5)) #print(arr) U_ML = M / 160.00 P = [] U = [] for i in range(101): if u > 1: u = 1 Norm = G(a + b + N) / (G(a + M) * G(b + L)) u1 = pow(u, (a - 1 + M)) u2 = pow(1 - u, (b - 1 + L)) P_Val = Norm * u1 * u2 P.append(P_Val) U.append(u) u += 0.010000 #print(Norm) print('%0.2f' % U_ML) fig = plt.figure() plt.plot(U, P) plt.ion() plt.xlabel('μ') plt.ylabel('P') plt.show()
def get_post_prob(M, a, b, m, l): gamma_part = G(m + a + l + b) / (G(m + a) * G(l + b)) bern_part = M**(m + a - 1) * (1 - M)**(l + b - 1) return gamma_part * bern_part
def norm_moffat(width, power): return G(power) / (width * np.sqrt(np.pi) * G(power - 1 / 2))
def cvar(self, alph): gamma = self.gamma a = (gamma-1)/gamma x = -np.log(alph) return 1/(1-alph) * (G(a) - Ginc(a,x)*G(a))
def test_weibull_moments(self): self.assertEqual(self.X.mean, self.X.beta) self.assertAlmostEqual(self.X.var, self.X.beta**2 * (G(3) - 1)) self.assertEqual(self.Y.mean, self.Y.beta * G(1.5)) self.assertAlmostEqual(self.Y.var, self.Y.beta**2 * (G(2) - G(1.5)**2))
def norm_moffat(width, power): from scipy.special import gamma as G return G(power) / (width * np.sqrt(np.pi) * G(power - 1 / 2))
MaxP = 0 # print(G(2.5)) # print(arr) U_ML = M / 160.00 print('%0.2f' % U_ML) for j in range(len(arr)): P = [] U = [] u = 0 for i in range(101): if u > 1: u = 1 Norm = G(a + b + l + m) / (G(a + m) * G(b + l)) u1 = pow(u, a - 1 + m) u2 = pow(1 - u, b - 1 + l) P_Val = Norm * u1 * u2 if MaxP < P_Val: MaxP = P_Val P.append(P_Val) U.append(u) u += 0.010000 # print(Norm) # #print(m/160.0) fig = plt.figure() plt.plot(U, P) plt.ion()
def Patriarca(x,lam): n = 1. + 3.*lam/(1.-lam) return (n**n/G(n))*x**(n-1.)*np.exp(-n*x)
def __scan_hist(self,hist,ref,w_ar,ih): ''' Function that scan a distribution and compute the p-value associated to every scan window following the BumpHunter algorithm. Compute also the significance for the data histogram. In order to make the function thread friendly, the results are saved through global variables. Arguments : hist : The data histogram (as obtain with the numpy.histogram function). ref : The reference (background) histogram (as obtain with the numpy.histogram function). w_ar : Array containing all the values of width to be tested. self : The BumpHunter instance that call the function ih : Indice of the distribution to be scanned. ih=0 refers to the data distribution and ih>0 refers to the ih-th pseudo-data distribution. Results stored in inner variables : res : Numpy array of python list containing all the p-values of all windows computed durring the scan. The numpy array as dimention (Nwidth), with Nwidth the number of window's width tested. Each python list as dimension (Nstep), with Nstep the number of scan step for a given width (different for every value of width). min_Pval : Minimum p_value obtained durring the scan (float). min_loc : Position of the window corresponding to the minimum p-value (integer). min_width : Width of the window corresponding to the minimum p-value (integer). ''' # Remove the first/last hist bins if empty ... just to be consistant we c++ non0 = [iii for iii in range(hist.size) if hist[iii]>0] Hinf,Hsup = min(non0),max(non0)+1 # Create the results array res = np.empty(w_ar.size,dtype=np.object) min_Pval,min_loc = np.empty(w_ar.size),np.empty(w_ar.size,dtype=int) signal_eval = np.empty(w_ar.size) if self.useSideBand==True: ref_total = ref[Hinf:Hsup].sum() hist_total = hist[Hinf:Hsup].sum() # Loop over all the width of the window i = 0 for w in w_ar: # Auto-adjust scan step if specified if(self.scan_step=='full'): scan_stepp = w elif(self.scan_step=='half'): scan_stepp = max(1,w//2) else: scan_stepp = self.scan_step # Define possition range pos = np.arange(Hinf,Hsup-w+1,scan_stepp) # Check that there is at least one interval to check for width w # If not, we must set dummy values in order to avoid crashes if(pos.size==0): res[i] = np.array([1.0]) min_Pval[i] = 1.0 min_loc[i] = 0 signal_eval[i] = 0 i+=1 continue # Initialize local p-value array for width w res[i] = np.ones(pos.size) # Count events in all windows of width w #FIXME any better way to do it ?? Without loop ?? FIXME Nref = np.array([ref[p:p+w].sum() for p in pos]) Nhist = np.array([hist[p:p+w].sum() for p in pos]) if self.useSideBand==True: Nref *= (hist_total-Nhist)/(ref_total-Nref) # Calculate all local p-values for for width w if(self.mode=='excess'): res[i][(Nhist>Nref) & (Nref>0)] = G(Nhist[(Nhist>Nref) & (Nref>0)],Nref[(Nhist>Nref) & (Nref>0)]) elif(self.mode=='deficit'): res[i][Nhist<Nref] = 1.0-G(Nhist[Nhist<Nref]+1,Nref[Nhist<Nref]) if self.useSideBand==True: res[i][res[i]<1e-300] = 1e-300 #prevent issue with very low p-value, sometimes induced by normalisation in the tail # Get the minimum p-value and associated position for width w min_Pval[i] = res[i].min() min_loc[i] = pos[res[i].argmin()] signal_eval[i] = Nhist[min_loc[i]] - Nref[min_loc[i]] i += 1 # Get the minimum p-value and associated window among all width min_width = w_ar[min_Pval.argmin()] min_loc = min_loc[min_Pval.argmin()] # Evaluate the number of signal event (for data only) if(ih==0): self.signal_eval = signal_eval[min_Pval.argmin()] min_Pval = min_Pval.min() # Save the results in inner variables and return self.res_ar[ih] = res self.min_Pval_ar[ih] = min_Pval self.min_loc_ar[ih] = int(min_loc) self.min_width_ar[ih] = int(min_width) return