def posterior(x): ''' POSTERIOR Two Class Posterior Using Bayes Formula INPUT: x, features of different class, C-By-N vector C is the number of classes, N is the number of different feature OUTPUT: p, posterior of each class given by each feature, C-By-N matrix ''' C, N = x.shape l = likelihood(x) total = np.sum(x) p = np.zeros((C, N)) #TODO # begin answer priori = np.zeros((C, 1)) sum_ = x.sum(axis=1) for i in range(C): priori[i] = sum_[i] / total e = np.zeros(N) for i in range(N): for j in range(C): e[i] = e[i] + priori[j] * l[j,i] for i in range(N): for j in range(C): p[j,i] = l[j,i] * priori[j] / e[i] # end answer return p
def posterior(x): ''' POSTERIOR Two Class Posterior Using Bayes Formula INPUT: x, features of different class, C-By-N vector C is the number of classes, N is the number of different feature OUTPUT: p, posterior of each class given by each feature, C-By-N matrix ''' C, N = x.shape l = likelihood(x) total = np.sum(x) p = np.zeros((C, N)) #TODO # begin answer totalRow = np.sum(x, axis = 1) for i in range(C): sum = 0 for j in range(N): p[i][j] = l[i][j] * totalRow[i] / total sum += p[i][j] for j in range(N): sum = np.sum(p[:,j]) for i in range(C): p[i][j] = p[i][j] / sum # end answer return p
def posterior(x): ''' POSTERIOR Two Class Posterior Using Bayes Formula INPUT: x, features of different class, C-By-N vector C is the number of classes, N is the number of different feature OUTPUT: p, posterior of each class given by each feature, C-By-N matrix ''' C, N = x.shape l = likelihood(x) # total of occurences of features total = np.sum(x) p = np.zeros((C, N)) # begin answer total_per_feature = np.sum(x, axis=0) total_per_class = np.sum(x, axis=1) prior = np.zeros(C) for i in range(C): prior[i] = total_per_class[i] / total prob_per_feature = np.zeros(N) for i in range(N): prob_per_feature[i] = total_per_feature[i] / total for i in range(C): for j in range(N): p[i, j] = l[i, j] * prior[i] / prob_per_feature[j] # end answer return p
def posterior(x): ''' POSTERIOR Two Class Posterior Using Bayes Formula INPUT: x, features of different class, C-By-N vector C is the number of classes, N is the number of different feature OUTPUT: p, posterior of each class given by each feature, C-By-N matrix ''' C, N = x.shape l = likelihood(x) total = np.sum(x, axis=1) total2 = np.sum(x) r = 0 while r < C: m = 0 while m < N: l[r][m] *= (total[r] / total2) m += 1 r += 1 total = np.sum(l, axis=0) print(total) r = 0 while r < C: m = 0 while m < N: l[r][m] /= total[m] m += 1 r += 1 print(l) return l
def posterior(x): ''' POSTERIOR Two Class Posterior Using Bayes Formula INPUT: x, features of different class, C-By-N vector C is the number of classes, N is the number of different feature OUTPUT: p, posterior of each class given by each feature, C-By-N matrix ''' C, N = x.shape l = likelihood(x) total = np.sum(x) p = np.zeros((C, N)) #TODO # begin answer # total is the sum of the original dataset, though the input x is the distribution of dataset # total_class is the sum of every class from the original dataset total_class = x.sum(axis=1) p_w = total_class / total for i in range(0, C): for j in range(0, N): p[i, j] = p_w[i] * l[i, j] / (l[0, j] * p_w[0] + l[1, j] * p_w[1]) # end answer return p
def posterior(x): l = likelihood(x) total = np.sum(x) p_w = np.sum(x, axis=1, keepdims=True) / total pos = l * p_w px = np.sum(x, axis=0) / total return pos / px
def posterior(x): ''' POSTERIOR Two Class Posterior Using Bayes Formula INPUT: x, features of different class, C-By-N vector C is the number of classes, N is the number of different feature OUTPUT: p, posterior of each class given by each feature, C-By-N matrix ''' C, N = x.shape l = likelihood(x) total = np.sum(x) p = np.zeros((C, N)) #TODO # begin answer prior = np.zeros((C, 1)) for i in range(0, C): prior[i] = x.sum(axis=1)[i] / total px = np.zeros((1, N)) for j in range(0, N): for i in range(0, C): px[0][j] += prior[i] * l[i][j] for j in range(0, N): for i in range(0, C): p[i][j] = l[i][j] * prior[i] / px[0][j] # end answer return p
def __call__(self, params, dtype=np.double): from math import log #print params[0], params[1], params[2] return ( prior(params) + likelihood.likelihood(params) )
def __call__(self, params, dtype=np.double): q, beta, k, c1, c2, c3, deq, deqq, diq, delta, gamma, E0, I0 = params #from math import log return (prior.prior(q, beta, k, c1, c2, c3, deq, deqq, diq, delta, gamma, E0, I0) + likelihood.likelihood(q, beta, k, c1, c2, c3, deq, deqq, diq, delta, gamma, E0, I0))
def test_likelihood(): '''Test that the likelihood calculation is correct''' cases = np.asarray([[3, 1, 0, 1], [1, 0, 2, 1], [0, 0, 0, 1]]) intensity = np.asarray([[1, 3, 1.5, 6], [4.2, 3.1, 7, 1.4], [2, 5.1, 4.2, 8.9]]) result = likelihood.likelihood(intensity, cases) assert_almost_equal(result, -39.145, decimal=3)
def moveBorders(data,options): """ move parameter-boundaries to save computing power function borders=moveBorders(data, options) this function evaluates the likelihood on a much sparser, equally spaced grid definded by mbStepN and moves the borders in so that that marginals below tol are taken away from the borders. this is meant to save computing power by not evaluating the likelihood in areas where it is practically 0 everywhere. """ borders = [] tol = options['maxBorderValue'] d = options['borders'].shape[0] MBresult = {'X1D':[]} ''' move borders out should our borders be to tight, e.g. the distribution does not go to zero at the borders we move them out until this is the case. TODO it was disabled in MATLAB version. What to do with it? ''' ''' move borders inwards ''' for idx in range(0,d): if (len(options['mbStepN']) >= idx and options['mbStepN'][idx] >= 2 and options['borders'][idx,0] != options['borders'][idx,1]) : MBresult['X1D'].append(np.linspace(options['borders'][idx,0], options['borders'][idx,1], options['mbStepN'][idx])) else: if (options['borders'][idx,0] != options['borders'][idx,1] and options['expType'] != 'equalAsymptote'): warnings.warn('MoveBorders: You set only one evaluation for moving the borders!') MBresult['X1D'].append( np.array([0.5*np.sum(options['borders'][idx])])) MBresult['weight'] = getWeights(MBresult['X1D']) #kwargs = {'alpha': None, 'beta':None , 'lambda': None,'gamma':None , 'varscale':None } #fill_kwargs(kwargs,MBresult['X1D']) MBresult['Posterior'] = likelihood(data, options, MBresult['X1D'])[0] integral = sum(np.reshape(MBresult['Posterior'], -1) * np.reshape(MBresult['weight'], -1)) MBresult['Posterior'] /= integral borders = np.zeros([d,2]) for idx in range(0,d): (L1D,x,w) = marginalize(MBresult, np.array([idx])) x1 = x[np.max([np.where(L1D*w >= tol)[0][0] - 1, 0])] x2 = x[np.min([np.where(L1D*w >= tol)[0][-1]+1, len(x)-1])] borders[idx,:] = [x1,x2] return borders
def posterior(x, pks={}): #print tot_branch_length prior_value = prior(x, p=p, use_skewed_distr=use_skewed_distr, pks=pks) if prior_value == -float('inf'): return -float('inf'), prior_value likelihood_value = likelihood(x, emp_cov, M=M) pks['prior'] = prior_value pks['likelihood'] = likelihood_value #pks['posterior']=prior_value+likelihood_value return likelihood_value, prior_value
def __init__(self, S, M, U, V, I, T, X, Y, events, checkins, pre_compute_map, pre_compute_Aij): self.S = S self.M = M self.U = U self.V = V self.I = I self.T = T self.X = X self.Y = Y self.events = events self.checkins = checkins self.likelihood = likelihood(S, M, U, V, I, T, X, Y, events, checkins, pre_compute_map, pre_compute_Aij)
def moveBorders(data,options): """ move parameter-boundaries to save computing power function borders=moveBorders(data, options) this function evaluates the likelihood on a much sparser, equally spaced grid definded by mbStepN and moves the borders in so that that marginals below tol are taken away from the borders. this is meant to save computing power by not evaluating the likelihood in areas where it is practically 0 everywhere. """ borders = [] tol = options.maxBorderValue d = options.borders.shape[0] MBresult = lambda: 0 ''' move borders out should our borders be to tight, e.g. the distribution does not go to zero at the borders we move them out until this is the case. TODO it was disabled in MATLAB version. What to do with it? ''' ''' move borders inwards ''' for idx in range(0,d): if (len(options.mbStepN) >= idx and options.mbStepN[idx] >= 2 and options.borders[idx,0] != options.borders[idx,1]) : MBresult.X1D[idx] = np.linspace(options.borders[idx,0], options.borders[idx,1], options.mbStepN[idx]) else: if (options.borders[idx,0] != options.borders[idx,1] and options.expType != 'equalAsymptote'): warnings.warn('MoveBorders: You set only one evaluation for moving the borders!') MBresult.X1D[idx] = 0.5*np.sum(options.borders[idx]) MBresult.weight = getWeights(MBresult.X1D) MBresult.Posterior = likelihood(data, options, MBresult.X1D) # TODO check! integral = sum(MBresult.Posterior[:] * MBresult.weight[:]) MBresult.Posterior /= integral borders = np.zeros([d,2]) for idx in range(0,d): (L1D,x,w) = marginalize(MBresult, idx) x1 = x[np.max(np.where(L1D*w >= tol)[0] - 1, 1)] x2 = x[np.min(np.where(L1D*w >= tol)[-1]+1, len(x))] borders[idx,:] = [x1,x2] return borders
def Metro_Hastings(A_old, b_old, sigma_old): import numpy as np from fakedata import m from likelihood import likelihood # Initial guess of the parameters A_new = np.zeros(m, np.float64) b_new = np.zeros(m, np.float64) # Suppose the proposal distribution g(theta_new|theta_old) is gaussian N(theta_old,var_prop). # Generate a new candidate theta from the gaussian distribution var_prop = 1.0 sigma_new = np.random.normal(sigma_old, var_prop) for i in range(0, m): A_new[i] = np.random.normal(A_old[i], var_prop) b_new[i] = np.random.normal(b_old[i], var_prop) # Compute the log likelihood ratio lik_old = likelihood(A_old, b_old, sigma_old) lik_new = likelihood(A_new, b_new, sigma_new) log_r = lik_new - lik_old accepted = 0.0 u = 0.0 # Accept or reject if (log_r > 0): # Accept new parameters if r > 1 accepted = 1.0 # monitor acceptance else: u = np.random.uniform(0.0, 1.0) if (u < np.exp(log_r)): # Accept new parameters with probability r. accepted = 1.0 # monitor acceptance else: A_new = A_old b_new = b_old sigma_new = sigma_old return A_new, b_new, sigma_new, accepted
def posterior(x, pks={}): #print tot_branch_length prior_value = prior(x, p=p, use_skewed_distr=use_skewed_distr, pks=pks) if prior_value == -float('inf'): return -float('inf'), prior_value likelihood_value = likelihood(x, emp_cov, M=M, pks=pks) pks['prior'] = prior_value pks['likelihood'] = likelihood_value prior_values = (pks['branch_prior'], pks['no_admix_prior'], pks['admix_prop_prior'], pks['top_prior']) covariance = pks['covariance'] #pks['posterior']=prior_value+likelihood_value return likelihood_value, prior_value, prior_values, covariance
def posterior(x): ''' POSTERIOR Two Class Posterior Using Bayes Formula INPUT: x, features of different class, C-By-N vector C is the number of classes, N is the number of different feature OUTPUT: p, posterior of each class given by each feature, C-By-N matrix ''' C, N = x.shape l = likelihood(x) total = np.sum(x) p = np.zeros((C, N)) #TODO # begin answer l = likelihood(x) preC = [i / np.sum(x) for i in [np.sum(x[j]) for j in range(C)]] preX = [i / np.sum(x) for i in np.sum(x, axis=0)] for i in range(C): for j in range(N): p[i, j] = preC[i] * l[i, j] / preX[j] # end answer return p
def posterior(x, pks={}): #print tot_branch_length #print get_number_of_leaves(x[0]), emp_cov.shape[0] prior_value = prior(x, p=p, use_skewed_distr=use_skewed_distr, pks=pks, use_uniform_prior=use_uniform_prior) if prior_value == -float('inf'): return -float('inf'), prior_value likelihood_value = likelihood(x, emp_cov, M=M, nodes=nodes) pks['prior'] = prior_value pks['likelihood'] = likelihood_value #pks['posterior']=prior_value+likelihood_value return likelihood_value, prior_value
def moveBorders(data,options): """ move parameter-boundaries to save computing power function borders=moveBorders(data, options) this function evaluates the likelihood on a much sparser, equally spaced grid definded by mbStepN and moves the borders in so that that marginals below tol are taken away from the borders. this is meant to save computing power by not evaluating the likelihood in areas where it is practically 0 everywhere. """ borders = [] tol = options['maxBorderValue'] d = options['borders'].shape[0] MBresult = {'X1D':[]} ''' move borders inwards ''' for idx in range(0,d): if (len(options['mbStepN']) >= idx and options['mbStepN'][idx] >= 2 and options['borders'][idx,0] != options['borders'][idx,1]) : MBresult['X1D'].append(np.linspace(options['borders'][idx,0], options['borders'][idx,1], options['mbStepN'][idx])) else: if (options['borders'][idx,0] != options['borders'][idx,1] and options['expType'] != 'equalAsymptote'): warnings.warn('MoveBorders: You set only one evaluation for moving the borders!') MBresult['X1D'].append( np.array([0.5*np.sum(options['borders'][idx])])) MBresult['weight'] = getWeights(MBresult['X1D']) #kwargs = {'alpha': None, 'beta':None , 'lambda': None,'gamma':None , 'varscale':None } #fill_kwargs(kwargs,MBresult['X1D']) MBresult['Posterior'] = likelihood(data, options, MBresult['X1D'])[0] integral = sum(np.reshape(MBresult['Posterior'], -1) * np.reshape(MBresult['weight'], -1)) MBresult['Posterior'] /= integral borders = np.zeros([d,2]) for idx in range(0,d): (L1D,x,w) = marginalize(MBresult, np.array([idx])) x1 = x[np.max([np.where(L1D*w >= tol)[0][0] - 1, 0])] x2 = x[np.min([np.where(L1D*w >= tol)[0][-1]+1, len(x)-1])] borders[idx,:] = [x1,x2] return borders
def perform(self, node, inputs, outputs): """ Perform the Op; get the log-likelihood of the data given the inputs. """ start_index = 0 if self.fixed_r_c is None: r_c = inputs[0][0] start_index += 1 else: r_c = self.fixed_r_c if self.fixed_r_h is None: r_h = inputs[0][start_index] start_index += 1 else: r_h = self.fixed_r_h fit_params = { 'baseline_intensities': np.asarray( inputs[0][start_index:] ), 'r_c': r_c, 'r_h': r_h } if (r_c == 0) and (r_h == 0): intensity = likelihood.carehome_intensity_null( covariates=self.covariates, cases=self.cases, fit_params=fit_params ) else: intensity = likelihood.carehome_intensity( covariates=self.covariates, cases=self.cases, discharges=self.discharges, fit_params=fit_params, dist_params=self.dist_params ) logl = likelihood.likelihood(intensity, self.cases) if outputs is not None: outputs[0][0] = np.array(logl) else: return logl
def posterior(x): ''' POSTERIOR Two Class Posterior Using Bayes Formula INPUT: x, features of different class, C-By-N vector C is the number of classes, N is the number of different feature OUTPUT: p, posterior of each class given by each feature, C-By-N matrix ''' C, N = x.shape l = likelihood(x) total = np.sum(x) p = np.zeros((C, N)) #TODO p_w = np.sum(x, axis=1, keepdims=True) / total # begin answer p = l * p_w / (np.sum(l * p_w, axis=0)) # end answer return p
def ll_table(maindir,year,N_TRIAL,trajectories, feature_matrices,discount, Tprob,clobber=True): outname='results/ll_table.csv' if os.path.exists(outname) and not clobber: return pd.read_csv(outname) df = pd.DataFrame().from_dict(read_multi_data(maindir,year)) fnames = df.fname.values ll_list = [] for f in fnames: weights = np.load(f) # last updated weights weights = weights[-1].mean(axis=(0,1)) ll_list.append(likelihood(N_TRIAL,trajectories, feature_matrices, weights, discount, Tprob)) df['LL'] = pd.Series(ll_list) df.sort_values(by="LL",ascending=False,inplace=True) df = df.rename(columns={"V": "Year", "E": "Epochs", "N": "Number of experts", "LR":"LR", "LRD": "LR Decay","S":"Seed","LL":"LogLikelihood" }) df.to_csv('results/ll_table.csv') df.iloc[:,1:].to_html('results/ll_table.html',index=False) return df
def plot_ll(N_TRIAL, trajectories, feature_matrices, weights, discount, Tprob): N_EPOCHS, N_EXPERTS, N_TRIALS, N_FEAT = np.shape(weights) epochs = [0, 10, 20, 40, 80, 160, 320, 400, 499] lldict = {'epoch': [], 'average LL': []} for epoch in epochs: w = weights[epoch].mean(axis=(0, 1)) lldict['epoch'].append(epoch) lldict['average LL'].append( likelihood(N_TRIAL, trajectories, feature_matrices, w, discount, Tprob)) ll = pd.DataFrame.from_dict(lldict) g = sns.relplot(x="epoch", y="average LL", data=ll) g.fig.suptitle("Average Log Likelihood") plt.savefig(f'results/avgLL{str(datetime.date.today())}.png')
def posterior(x): ''' POSTERIOR Two Class Posterior Using Bayes Formula INPUT: x, features of different class, C-By-N vector C is the number of classes, N is the number of different feature OUTPUT: p, posterior of each class given by each feature, C-By-N matrix ''' C, N = x.shape l = likelihood(x) total = np.sum(x) prior = np.sum(x, axis=1) / total p = np.zeros((C, N)) #TODO # begin answer for c in range(C): p[c] = l[c] * prior[c] / (l[0] * prior[0] + l[1] * prior[1]) # end answer return p
def posterior(x): ''' POSTERIOR Two Class Posterior Using Bayes Formula INPUT: x, features of different class, C-By-N vector C is the number of classes, N is the number of different feature OUTPUT: p, posterior of each class given by each feature, C-By-N matrix ''' C, N = x.shape l = likelihood(x) total = np.sum(x) p = np.zeros((C, N)) #TODO # begin answer prior = np.sum(x, axis=1) / total evidence = np.sum(x, axis=0) / total p = l * prior.reshape(C, 1) / evidence.reshape(1, N) # end answer return p
def posterior(x): ''' POSTERIOR Two Class Posterior Using Bayes Formula INPUT: x, features of different class, C-By-N vector C is the number of classes, N is the number of different feature OUTPUT: p, posterior of each class given by each feature, C-By-N matrix ''' C, N = x.shape l = likelihood(x) total = np.sum(x) p = np.zeros((C, N)) #TODO # begin answer for j in range(N): P_x = np.sum(x[:, j]) / total for i in range(C): P_i = np.sum(x[i, :]) / total p[i][j] = l[i][j] * P_i / P_x # end answer return p
def posterior(x): ''' POSTERIOR Two Class Posterior Using Bayes Formula INPUT: x, features of different class, C-By-N vector C is the number of classes, N is the number of different feature OUTPUT: p, posterior of each class given by each feature, C-By-N matrix ''' C, N = x.shape l = likelihood(x) total = np.sum(x) p = np.zeros((C, N)) #TODO pw = np.sum(x, axis=1) px = np.sum(x, axis=0) pw /= total px /= total # begin answer for i in range(C): for j in range(N): p[i, j] = l[i, j] * pw[i] / px[j] # end answer return p
def parameter_estimation(model,f,param_dict): if type(model)==list: model_expr=model[0].rstrip() else: model_expr=model chosen_model=Model(mtype=model_expr) if model_expr in param_dict: param=param_dict[model_expr] print 'Sol ('+model_expr+') =',param else: L=lambda P:-likelihood(f[1],f[2],f[0],P,model) # parameters={} Sol=minimize(L,np.array([0.29,4.5]),method='BFGS') ###minimization # options={'maxfev':1e+08,'maxiter':1e+08} ###and parameter estimation param=Sol.x param_dict[model_expr]=Sol.x print 'Sol ('+model_expr+') =',param """Estimate the Posterior PDF - QUADRATIC APPROXIMATION of Likelihood function""" if 'C_m'+model_expr in param_dict: C_m=param_dict['C_m'+model_expr] Alpha=param_dict['Alpha'+model_expr] Mu=param_dict['Mu'+model_expr] PDF=param_dict['PDF'+model_expr] else: mu,alpha=symbols('mu alpha') Params=[mu,alpha] if model=='Ogden' or model=='Exponential' or model=='Mooney-Rivlin': L=likelihood(f[1],f[2],f[0],Params,model) diff_mu_2 = lambdify((mu,alpha),sympy.diff(L,mu,2)) diff_alpha_2 = lambdify((mu,alpha),sympy.diff(L,alpha,2)) diff_mu_alpha = lambdify((mu,alpha), sympy.diff(sympy.diff(L,alpha),mu)) else: cov_list = likelihood(f[1],f[2],f[0],Params,model) diff_mu_2 = cov_list[0] diff_alpha_2 = cov_list[1] diff_mu_alpha = cov_list[2] A = diff_mu_2(param[0],param[1]) B = diff_alpha_2(param[0],param[1]) C = diff_mu_alpha(param[0],param[1]) Sigma_Mu = np.sqrt(-B/(A*B-C**2)) Sigma_alpha = np.sqrt(-A/(A*B-C**2)) Sigma_Mu_alpha = np.sqrt(C/(A*B-C**2)+0j) print 'Sigma_Mu =',Sigma_Mu,'\n','Sigma_alpha =',Sigma_alpha,'\n','Sigma_Mu_alpha =',Sigma_Mu_alpha """Covariance Matrix""" delL=np.array([[A,C],[C,B]]) C_m=-np.linalg.inv(delL) param_dict['C_m'+model_expr]=C_m #%%============================================================================ """SAMPLING based estimate of Posterior PDF""" # Sampling values from [-2*sigma 2*sigma] Mu = np.r_[param[0]-2*Sigma_Mu:param[0]+2*Sigma_Mu:0.01] Alpha = np.r_[param[1]-2*Sigma_alpha:param[1]+2*Sigma_alpha:0.01] PDF=[] for i in range(len(Mu)): PDFrow=[] for j in range(len(Alpha)): P = [Mu[i],Alpha[j]] PDFrow.append(np.exp(likelihood(f[1],f[2],f[0],P,model))) PDF.append(PDFrow) PDF=np.array(PDF) param_dict['Mu'+model_expr]=Mu param_dict['Alpha'+model_expr]=Alpha param_dict['PDF'+model_expr]=PDF """Likelihood at optimal values of Mu and Alpha""" Pop = np.exp(likelihood(f[1],f[2],f[0],[param[0],param[1]],model)) print 'Pop =',Pop #%%============================================================================ """Evidence - Volume under the likelihood surface integrated over mu and alpha""" mu_max = 0 mu_min = 10 alpha_max = 0 alpha_min = 20 Mu = np.r_[param[0]-2*Sigma_Mu:param[0]+2*Sigma_Mu:0.01] Alpha = np.r_[param[1]-2*Sigma_alpha:param[1]+2*Sigma_alpha:0.01] Vol_PDF = np.trapz(np.trapz(PDF,Alpha,axis=1),Mu,axis=0) Evidence = ((1.0/(mu_max-mu_min))*(1.0/(alpha_max-alpha_min))*Vol_PDF) print 'Vol_PDF =',Vol_PDF print 'Evidence =',Evidence #Print Figures fig=plt.figure() plt.subplot(221) plt.errorbar(f[0],f[1],f[2],ecolor='r',elinewidth=1,capsize=3) plt.hold(True) plt.plot(f[0],chosen_model.T(f[0],param[0],param[1]),linewidth=1) plt.grid(True) plt.axis('tight') plt.hold(False) plt.subplot(222) cov_plot(param[:,np.newaxis],C_m,2,r'$\mu$',r'$\alpha$') Alpha,Mu=np.meshgrid(Alpha,Mu) ax_five=fig.add_subplot(223,projection='3d') surf_PDF=ax_five.plot_surface(Alpha,Mu,PDF,cmap=cm.coolwarm) fig.colorbar(surf_PDF) plt.axis('tight') #%%============================================================================ """Comparison - Quadratic Approximation and Samplin""" Mu, Alpha, PDF=param_dict['Mu'+model_expr],param_dict['Alpha'+model_expr],param_dict['PDF'+model_expr] plt.subplot(224) plt.contour(Mu,Alpha,PDF.T) plt.hold(True) plt.colorbar() cov_plot(param[:,np.newaxis],C_m,2,r'$\mu$',r'$\alpha$') plt.hold(False) # plt.show(plt.figure()) return fig,param_dict
def __call__(self, params, dtype=np.double): q, C, p = params from math import log return (prior.prior(q, C, p) + likelihood.likelihood(q, C, p))
def train(self, x, sample_nums): self.likelihood = likelihood(x) self.prior = np.array(sample_nums) / np.sum(sample_nums) self.log_likelihood = np.log(self.likelihood) self.log_prior = np.log(self.prior) self.trained = True
# read data data = sio.loadmat('./data.mat') x1_train, x1_test, x2_train, x2_test = data['x1_train'], data['x1_test'], data['x2_train'], data['x2_test'] all_x = np.concatenate([x1_train, x1_test, x2_train, x2_test], 1) data_range = [np.min(all_x), np.max(all_x)] from get_x_distribution import get_x_distribution train_x = get_x_distribution(x1_train, x2_train, data_range) test_x = get_x_distribution(x1_test, x2_test, data_range) from likelihood import likelihood l = likelihood(train_x) width = 0.35 p1 = plt.bar(np.arange(data_range[0], data_range[1] + 1), l.T[:,0], width) p2 = plt.bar(np.arange(data_range[0], data_range[1] + 1) + width, l.T[:,1], width) plt.xlabel('x') plt.ylabel('$P(x|\omega)$') plt.legend((p1[0], p2[0]), ('$\omega_1$', '$\omega_2$')) plt.axis([data_range[0] - 1, data_range[1] + 1, 0, 0.5]) plt.show() err = 0 C = l.shape[1] i = 0 while(i < C): if l[0][i] < l[1][i]:
def gridSetting(data,options,Seed): # Initialisierung d = np.size(options['borders'],0) X1D = [] '''Equal steps in cumulative distribution''' if options['gridSetType'] == 'cumDist': Like1D = np.zeros([options['GridSetEval'], 1]) for idx in range(d): if options['borders'][idx, 0] < options['borders'][idx,1]: X1D.append(np.zeros([1, options['stepN'][idx]])) local_N_eval = options['GridSetEval'] while any(np.diff(X1D[idx]) == 0): Xtest1D = np.linspace(options['borders'][idx,0], options['borders'][idx,1], local_N_eval) alpha = Seed[0] beta = Seed[1] l = Seed[2] gamma = Seed[3] varscale = Seed[4] if idx == 1: alpha = Xtest1D elif idx == 2: beta = Xtest1D elif idx == 3: l = Xtest1D elif idx == 4: gamma = Xtest1D elif idx == 5: varscale = Xtest1D Like1D = likelihood(data, options, [alpha, beta, l, gamma, varscale]) Like1D = Like1D + np.mean(Like1D)*options['UniformWeight'] Like1D = np.cumsum(Like1D) Like1D = Like1D/max(Like1D) wanted = np.linspace(0,1,options['stepN'][idx]) for igrid in range(options['stepN'][idx]): X1D[idx].append(copy.deepcopy(Xtest1D[Like1D >= wanted, 0, 'first'])) #TODO check local_N_eval = 10*local_N_eval else: X1D.append(copy.deepcopy(options['borders'][idx,0])) ''' equal steps in cumulative second derivative''' elif (options['gridSetType'] in ['2', '2ndDerivative']): Like1D = np.zeros([options['GridSetEval'], 1]) for idx in range(d): if options['borders'][idx,0] < options['borders'][idx,1]: X1D.append(np.zeros([1,options['stepN'][idx]])) local_N_eval = options['GridSetEval'] while any(np.diff(X1D[idx] == 0)): Xtest1D = np.linspace(options['borders'][idx,0], options['borders'][idx,1], local_N_eval) alpha = Seed[0] beta = Seed[1] l = Seed[2] gamma = Seed[3] varscale = Seed[4] if idx == 1: alpha = Xtest1D elif idx == 2: beta = Xtest1D elif idx == 3: l = Xtest1D elif idx == 4: gamma = Xtest1D elif idx == 5: varscale = Xtest1D # calc likelihood on the line Like1D = likelihood(data, options, [alpha, beta, l, gamma, varscale]) Like1D = np.abs(np.convolve(np.squeeze(Like1D), np.array([1,-2,1]), mode='same')) Like1D = Like1D + np.mean(Like1D)*options['UniformWeight'] Like1D = np.cumsum(Like1D) Like1D = Like1D/max(Like1D) wanted = np.linspace(0,1,options['stepN'][idx]) for igrid in range(options['stepN'][idx]): X1D[idx].append(copy.deepcopy(Xtest1D[Like1D >= wanted, 0, 'first'])) #ToDo local_N_eval = 10*local_N_eval if local_N_eval > 10**7: X1D[idx] = np.unique(np.array(X1D)) # TODO check break else: X1D.append(options['borders'][idx,0]) ''' different choices for the varscale ''' ''' We use STD now directly as parametrisation''' elif options['gridSetType'] in ['priorlike', 'STD', 'exp', '4power']: for i in range(4): if options['borders'](i,0) < options['borders'](i,1): X1D.append(np.linspace(options['borders'][i,0], options['borders'][i,1], options['stepN'][i])) else: X1D.append(copy.deepcopy(options['borders'][id,0])) if options['gridSetType'] == 'priorlike': maximum = b.cdf(options['borders'][4,1],1,options['betaPrior']) minimum = b.cdf(options['borders'][4,0],1,options['betaPrior']) X1D.append(b.ppf(np.linspace(minimum, maximum, options['stepN'][4]), 1, options['betaPrior'])) elif options['gridSetType'] == 'STD': maximum = np.sqrt(options['borders'][4,1]) minimum = np.sqrt(options['borders'][4,0]) X1D.append((np.linspace(minimum, maximum, options['stepN'][4]))**2) elif options['gridSetType'] == 'exp': p = np.linspace(1,1,options['stepN'][4]) X1D.append(np.log(p)/np.log(.1)*(options['borders'][4,1] - options['borders'][4,0]) + options['borders'][4,0]) elif options['gridSetType'] == '4power': maximum = np.sqrt(options['borders'][4,1]) minimum = np.sqrt(options['borders'][4,0]) X1D.append((np.linspace(minimum, maximum, options['stepN'][4]))**4) return X1D
j = j.astype(np.int) spam_test_tight = scipy.sparse.csr_matrix((spam_test, (i - 1, j - 1))) spam_test = scipy.sparse.csr_matrix( (spam_test_tight.shape[0], spam_train.shape[0])) spam_test[:, 0:spam_test_tight.shape[1]] = spam_test_tight from likelihood import likelihood # TODO # Implement a ham/spam email classifier, and calculate the accuracy of your classifier # Hint: you can directly do matrix multiply between scipy.sparse.coo_matrix and numpy.array. # Specifically, you can use sparse_matrix * np_array to do this. Note that when you use "*" operator # between numpy array, this is typically an elementwise multiply. # begin answer l = likelihood(x) print(l.shape) # a ratio = l[1] / l[0] max10_idx = np.argsort(ratio)[:10] import linecache for i in max10_idx: s = linecache.getline('all_word_map.txt', i + 1).strip() print(s) # f = open('all_word_map.txt') class SpamClassifier: def __init__(self): self.class_num = 2
def psignifitCore(data, options): """ This is the Core processing of psignifit, call the frontend psignifit! function result=psignifitCore(data,options) Data nx3 matrix with values [x, percCorrect, NTrials] sigmoid should be a handle to a function, which accepts X,parameters as inputs and gives back a value in [0,1]. ideally parameters(1) should correspond to the threshold and parameters(2) to the width (distance containing 95% of the function. """ d = len(options['borders']) result = {'X1D': [], 'marginals': [], 'marginalsX': [], 'marginalsW': []} '''Choose grid dynamically from data''' if options['dynamicGrid']: # get seed from linear regression with logit transform Seed = getSeed(data,options) # further optimize the logliklihood to obtain a good estimate of the MAP if options['expType'] == 'YesNo': calcSeed = lambda X: -l.logLikelihood(data, options, X[0], X[1], X[2], X[3], X[4]) Seed = scipy.optimize.fmin(func=calcSeed, x0 = Seed) elif options['expType'] == 'nAFC': calcSeed = lambda X: -l.logLikelihood(data, options, X[0], X[1], X[2], 1/options['expN'], X[3]) Seed = scipy.optimize.fmin(func=calcSeed, x0 = [Seed[0:2], Seed[4]]) Seed = [Seed[0:2], 1/options['expN'], Seed[3]] #ToDo check whether row or colum vector result['X1D'] = gridSetting(data,options, Seed) else: # for types which do not need a MAP estimate if (options['gridSetType'] == 'priorlike' or options['gridSetType'] == 'STD' or options['gridSetType'] == 'exp' or options['gridSetType'] == '4power'): result['X1D'] = gridSetting(data,options) else: # Use a linear grid for idx in range(0,d): # If there is an actual Interval if options['borders'][idx, 0] < options['borders'][idx,1]: result['X1D'].append(np.linspace(options['borders'][idx,0], options['borders'][idx,1], num=options['stepN'][idx])) # if parameter was fixed else: result['X1D'].append(np.array([options['borders'][idx,0]])) '''Evaluate likelihood and form it into a posterior''' (result['Posterior'], result['logPmax']) = l.likelihood(data, options, result['X1D']) result['weight'] = getWeights(result['X1D']) integral = np.sum(np.array(result['Posterior'][:])*np.array(result['weight'][:])) result['Posterior'] = result['Posterior']/integral result['integral'] = integral '''Compute marginal distributions''' for idx in range(0,d): m, mX, mW = marginalize(result, np.array([idx])) result['marginals'].append(m) result['marginalsX'].append(mX) result['marginalsW'].append(mW) result['marginals'] = np.squeeze(result['marginals']) result['marginalsX'] = np.squeeze(result['marginalsX']) result['marginalsW'] = np.squeeze(result['marginalsW']) '''Find point estimate''' if (options['estimateType'] in ['MAP','MLE']): # get MLE estimate #start at most likely grid point index = np.where(result['Posterior'] == np.max(result['Posterior'].ravel())) Fit = np.zeros([d,1]) for idx in range(0,d): Fit[idx] = result['X1D'][idx][index[idx]] if options['expType'] == 'YesNo': fun = lambda X, f: -l.logLikelihood(data, options, [X[0],X[1],X[2],X[3],X[4]]) x0 = deepcopy(Fit) a = None elif options['expType'] == 'nAFC': #def func(X,f): # return -l.logLikelihood(data,options, [X[0], X[1], X[2], f, X[3]]) #fun = func fun = lambda X, f: -l.logLikelihood(data,options, [X[0], X[1], X[2], f, X[3]]) x0 = deepcopy(Fit[0:3]) # Fit[3] is excluded x0 = np.append(x0,deepcopy(Fit[4])) a = np.array([1/options['expN']]) elif options['expType'] == 'equalAsymptote': fun = lambda X, f: -l.logLikelihood(data,options,[X[0], X[1], X[2], f, X[3]]) x0 = deepcopy(Fit[0:3]) x0 = np.append(x0,deepcopy(Fit[4])) a = np.array([np.nan]) else: raise ValueError('unknown expType') if options['fastOptim']: Fit = scipy.optimize.fmin(fun, x0, args = (a,), xtol=0, ftol = 0, maxiter = 100, maxfun=100) warnings.warn('changed options for optimization') else: Fit = scipy.optimize.fmin(fun, x0, args = (a,), disp = True) if options['expType'] == 'YesNo': result['Fit'] = deepcopy(Fit) elif options['expType'] == 'nAFC': fit = deepcopy(Fit[0:3]) fit = np.append(fit, np.array([1/options['expN']])) fit = np.append(fit, deepcopy(Fit[3])) result['Fit'] = fit elif options['expType'] =='equalAsymptote': fit = deepcopy(Fit[0:3]) fit = np.append(fit, Fit[2]) fit = np.append(fit, Fit[3]) result['Fit'] = fit else: raise ValueError('unknown expType') par_idx = np.where(np.isnan(options['fixedPars']) == False) for idx in par_idx: result['Fit'][idx] = options['fixedPars'][idx] elif options['estimateType'] == 'mean': # get mean estimate Fit = np.zeros([d,1]) for idx in range[0:d]: Fit[idx] = np.sum(result['marginals'][idx]*result['marginalsW'][idx]*result['marginalsX'][idx]) result['Fit'] = deepcopy(Fit) Fit = np.empty(Fit.shape) '''Include input into result''' result['options'] = options # no copies here, because they are not changing result['data'] = data '''Compute confidence intervals''' if ~options['fastOptim']: result['conf_Intervals'] = getConfRegion(result) return result
def psignifitCore(data, options): """ This is the Core processing of psignifit, call the frontend psignifit! function result=psignifitCore(data,options) Data nx3 matrix with values [x, percCorrect, NTrials] sigmoid should be a handle to a function, which accepts X,parameters as inputs and gives back a value in [0,1]. ideally parameters(1) should correspond to the threshold and parameters(2) to the width (distance containing 95% of the function. """ d = len(options['borders']) result = {'X1D': [], 'marginals': [], 'marginalsX': [], 'marginalsW': []} '''Choose grid dynamically from data''' if options['dynamicGrid']: # get seed from linear regression with logit transform Seed = getSeed(data,options) # further optimize the logliklihood to obtain a good estimate of the MAP if options['expType'] == 'YesNo': calcSeed = lambda X: -_l.logLikelihood(data, options, X[0], X[1], X[2], X[3], X[4]) Seed = scipy.optimize.fmin(func=calcSeed, x0 = Seed) elif options['expType'] == 'nAFC': calcSeed = lambda X: -_l.logLikelihood(data, options, X[0], X[1], X[2], 1/options['expN'], X[3]) Seed = scipy.optimize.fmin(func=calcSeed, x0 = [Seed[0:2], Seed[4]]) Seed = [Seed[0:2], 1/options['expN'], Seed[3]] #ToDo check whether row or colum vector result['X1D'] = gridSetting(data,options, Seed) else: # for types which do not need a MAP estimate if (options['gridSetType'] == 'priorlike' or options['gridSetType'] == 'STD' or options['gridSetType'] == 'exp' or options['gridSetType'] == '4power'): result['X1D'] = gridSetting(data,options) else: # Use a linear grid for idx in range(0,d): # If there is an actual Interval if options['borders'][idx, 0] < options['borders'][idx,1]: result['X1D'].append(np.linspace(options['borders'][idx,0], options['borders'][idx,1], num=options['stepN'][idx])) # if parameter was fixed else: result['X1D'].append(np.array([options['borders'][idx,0]])) '''Evaluate likelihood and form it into a posterior''' (result['Posterior'], result['logPmax']) = _l.likelihood(data, options, result['X1D']) result['weight'] = getWeights(result['X1D']) integral = np.sum(np.array(result['Posterior'][:])*np.array(result['weight'][:])) result['Posterior'] = result['Posterior']/integral result['integral'] = integral '''Compute marginal distributions''' for idx in range(0,d): m, mX, mW = marginalize(result, np.array([idx])) result['marginals'].append(m) result['marginalsX'].append(mX) result['marginalsW'].append(mW) result['marginals'] = np.squeeze(result['marginals']) result['marginalsX'] = np.squeeze(result['marginalsX']) result['marginalsW'] = np.squeeze(result['marginalsW']) '''Find point estimate''' if (options['estimateType'] in ['MAP','MLE']): # get MLE estimate #start at most likely grid point index = np.where(result['Posterior'] == np.max(result['Posterior'].ravel())) Fit = np.zeros([d,1]) for idx in range(0,d): Fit[idx] = result['X1D'][idx][index[idx]] if options['expType'] == 'YesNo': fun = lambda X, f: -_l.logLikelihood(data, options, [X[0],X[1],X[2],X[3],X[4]]) x0 = _deepcopy(Fit) a = None elif options['expType'] == 'nAFC': #def func(X,f): # return -_l.logLikelihood(data,options, [X[0], X[1], X[2], f, X[3]]) #fun = func fun = lambda X, f: -_l.logLikelihood(data,options, [X[0], X[1], X[2], f, X[3]]) x0 = _deepcopy(Fit[0:3]) # Fit[3] is excluded x0 = np.append(x0,_deepcopy(Fit[4])) a = np.array([1/options['expN']]) elif options['expType'] == 'equalAsymptote': fun = lambda X, f: -_l.logLikelihood(data,options,[X[0], X[1], X[2], f, X[3]]) x0 = _deepcopy(Fit[0:3]) x0 = np.append(x0,_deepcopy(Fit[4])) a = np.array([np.nan]) else: raise ValueError('unknown expType') if options['fastOptim']: Fit = scipy.optimize.fmin(fun, x0, args = (a,), xtol=0, ftol = 0, maxiter = 100, maxfun=100) warnings.warn('changed options for optimization') else: Fit = scipy.optimize.fmin(fun, x0, args = (a,), disp = False) if options['expType'] == 'YesNo': result['Fit'] = _deepcopy(Fit) elif options['expType'] == 'nAFC': fit = _deepcopy(Fit[0:3]) fit = np.append(fit, np.array([1/options['expN']])) fit = np.append(fit, _deepcopy(Fit[3])) result['Fit'] = fit elif options['expType'] =='equalAsymptote': fit = _deepcopy(Fit[0:3]) fit = np.append(fit, Fit[2]) fit = np.append(fit, Fit[3]) result['Fit'] = fit else: raise ValueError('unknown expType') par_idx = np.where(np.isnan(options['fixedPars']) == False) for idx in par_idx[0]: result['Fit'][idx] = options['fixedPars'][idx] elif options['estimateType'] == 'mean': # get mean estimate Fit = np.zeros([d,1]) for idx in range[0:d]: Fit[idx] = np.sum(result['marginals'][idx]*result['marginalsW'][idx]*result['marginalsX'][idx]) result['Fit'] = _deepcopy(Fit) Fit = np.empty(Fit.shape) '''Include input into result''' result['options'] = options # no copies here, because they are not changing result['data'] = data '''Compute confidence intervals''' if ~options['fastOptim']: result['conf_Intervals'] = getConfRegion(result) return result
#!/usr/bin/python # -*- coding: utf-8 *-* # #Realization of likelihood ratio method for object identifacation. #imports from __future__ import division from loadcfg import loadcfg from readfile import readfits from likelihood import likelihood #main #load cfg cf = loadcfg() #load and get abstract of catalog filt = (['HATLAS_IAU_ID', 'RA_J2000', 'DEC_J2000', 'F250_BEST', 'LR'], ['objID', 'ra', 'dec', 'r']) (ct, nm) = readfits('low.fits', 'high.fits', filt) #calculate likelihood ratio sigma = 2.4 # in arcsec r_max = 10 lr = likelihood(ct, nm, sigma, r_max)