def em_clustering(data, nclust, maxiter, epsilon): #Initialization of mean, covariance, and prior n, d = data.shape mu = np.zeros((nclust, d), dtype=float) sigma = np.zeros((nclust, d, d), dtype=float) for t in range(nclust): # assigning data points to the means mu[t] = data[t] sigma[t] = np.identity(d) prior = np.asarray(np.repeat(1.0 / nclust, nclust), dtype=float) #for each cluster one prior: for i in range(maxiter): mu_old = 1 * mu W = EM.e_step(data, mu, sigma, prior, nclust) #calling E-step funct. mu, sigma, prior = EM.m_step(data, W, mu, sigma, nclust) # calling M-step funct. #checking stopping criterion temp = 0 for j in range(nclust): temp = temp + np.sqrt(np.power((mu[j] - mu_old[j]), 2).sum()) temp = round(temp, 4) if temp <= epsilon: break #print "Iteration number = %d, stopping criterion = %.4f" %(i+1,temp) return mu, sigma, prior
def em_sd_rand(pos,x,y,z,rcut,eps,sig,nmax,beta,ss,tol,gamma): #------------------------------------------------------------------------- #Initial the arrays necessary for calculation #------------------------------------------------------------------------- force = np.zeros((len(pos), 3),order='F') Up=0 force_new = np.zeros((len(pos), 3),order='F') Up_new=0 Up,force = EM.potential(pos,x,y,z,rcut,eps,sig,Up,force) Up_prev=np.copy(Up) force_prev=np.copy(force) pos_i=np.copy(pos) pos_prev=np.copy(pos) for i in range(nmax): m=1 Up,force = EM.potential(pos,x,y,z,rcut,eps,sig,Up,force) n_rd=np.random.randint(1,len(pos)) force1d=np.reshape(force,(3*(len(pos)),1)) armijo_creteria=-np.matmul(np.ndarray.transpose(force1d),force1d)/np.linalg.norm(force) n_list=[] for k in range(n_rd): rand_select=np.random.randint(len(pos)) if rand_select not in n_list: n_list.append(np.random.randint(len(pos))) print("Step "+str(i)+", the potential energy is "+str(Up)) print("You have randomly pick "+str(len(n_list))+" number of particles") for j in range(100): for k in range(len(n_list)): pos_i[n_list[k]]=pos[n_list[k]]+((beta)**m)*ss*force[n_list[k]] Up_new, force_new=EM.potential(pos_i,x,y,z,rcut,eps,sig,Up_new,force_new) # if (Up_new-Up)<=0: if (Up_new-Up)<=(gamma*((beta)**m)*ss*armijo_creteria): pos=np.copy(pos_i) Up=Up_new break else: m+=1 if (np.linalg.norm(force)<tol): print("\n\nThe position is not changed") print("Its likely that you have found a minimum or saddle point") print("The energy minimization stoped after "+str(i)+" number of interations") print("The final potential energy obtained is "+str(Up)+"in units of kT") step = i break else: step=nmax pos_prev=np.copy(pos) if i==(nmax-1): print("\n\nMaximum number of interation has been reached") print("You can change the number of interactions using -nmax") print("The final potential energy obtained is "+str(Up)+"in units of kT") return pos, Up, step
def EmotionAnalysis(epsilon, figure): global news, CED EM.setEpsilon(epsilon) # for each new compute the emotional value and show it output = [] # output is a list of tuples with [day,CED of that day] for i in range(0, len(dayinterval)): EM.computeday(news[i], negativeLex, positiveLex, CED) output.append([news[i], CED.copy()]) Graph.plotallfigure(figure, output, dayinterval) # after the execution we need to clean the values of the CED so they doesnt iterfere with next execution for word in cedwords: CED[word] = 0
def em_sd1(pos,x,y,z,rcut,eps,sig,nmax,beta,ss,tol,gamma): #------------------------------------------------------------------------- #Initial the arrays necessary for calculation #------------------------------------------------------------------------- force = np.zeros((len(pos), 3),order='F') Up=0 force_new = np.zeros((len(pos), 3),order='F') Up_new=0 Up,force = EM.potential(pos,x,y,z,rcut,eps,sig,Up,force) Up_prev=np.copy(Up) force_prev=np.copy(force) pos_i=np.copy(pos) pos_prev=np.copy(pos) for i in range(nmax): m=1 Up,force = EM.potential(pos,x,y,z,rcut,eps,sig,Up,force) # Up_prev=np.copy(Up) force_abssum=np.sum(np.abs(force), axis=1) V_maxforce = np.amax(force_abssum) index_maxforce=np.where(force_abssum == V_maxforce) print("The max force is on "+str(index_maxforce)+" atom") print("Step "+str(i)+", the potential energy is "+str(Up)) force1d=np.reshape(force,(3*(len(pos)),1)) armijo_creteria=-np.matmul(np.ndarray.transpose(force1d),force1d)/np.linalg.norm(force) for j in range(100): for k in range(len(index_maxforce)): pos_i[index_maxforce[k]]=pos[index_maxforce[k]]+((beta)**m)*ss*force[index_maxforce[k]] Up_new, force_new=EM.potential(pos_i,x,y,z,rcut,eps,sig,Up_new,force_new) # if (Up_new-Up)<=0: if (Up_new-Up)<=(gamma*((beta)**m)*ss*armijo_creteria): pos=np.copy(pos_i) Up=Up_new break else: m+=1 if (np.linalg.norm(force)<tol): print("\n\nThe potential is not changed") print("Its likely that you have found a minimum or saddle point") print("The energy minimization stoped after "+str(i)+" number of interations") print("The final potential energy obtained is "+str(Up)+"in units of kT") step = i break else: step=nmax pos_prev=np.copy(pos) if i==(nmax-1): print("\n\nMaximum number of interation has been reached") print("You can change the number of interactions using -nmax") print("The final potential energy obtained is "+str(Up)+"in units of kT") return pos, Up, step
def fit(self,nIter=20,lookahead=True,metric='LL',verbose=True,lNo=5): import time import EM from _gpinf import precompute_gp, GP_timescale_Cost import scipy.optimize as op import copy as cp st = time.time() self.hists = {'lapinf':[], 'Cdinf':[], 'tavinf':[] } self.was_fit = True to_break = False for iterN in range(nIter): if verbose: print "Running EM iteration %s" %iterN, ####### E-step ########### lapinfres = EM.E_step(self.train_data,self.params) self._update_params_E(lapinfres) self.hists['lapinf'].append(lapinfres) ####### M-step ########### Cdinf, tavInf = EM.M_step(self.train_data,self.params) self.hists['Cdinf'].append(Cdinf); self.hists['tavinf'].append(tavInf) self._update_params_M(Cdinf,tavInf) # inf_rates = _np.exp(self.params['C'].dot(x[trl_idx]) + self.params['d'][:,None]) # LLi = _np.sum(self.train_data*_np.log(infRates) - infRates) if verbose: print "|| log(L): %s || total time: %ss" %(_np.round(Cdinf['logL'],decimals=2),_np.round(time.time()-st,decimals=1)), self.params['logL_store'].append(Cdinf['logL']) if lookahead: loo_res = self.leave_N_out_CV(N=lNo) #One Step look ahead prediction to monitor convergence if iterN>=1: if _np.median(loo_res['LL_poiss'])<pLL: self._update_params_E(self.hists['lapinf'][-2]) self._update_params_M(self.hists['Cdinf'][-2],self.hists['tavinf'][-2]) print "\n\nLeave one out error on validation set increased, stopping early" to_break = True pLL = _np.median(loo_res['LL_poiss']) if verbose: print " || cc %s ||" %_np.round(_np.median(loo_res['pearsonr']),decimals=2), print " || Validation LL %s" %_np.round(pLL/(_np.sum(self.dsets['validation'])),decimals=4) if to_break: break
def model_eval(dataFile, nclust, maxiter, epsilon): #reading data: X- data, y- class att. X, y = readingData.dataPrep(dataFile) #training start_time = time.time() mu, sigma, prior = EM.em_clustering(X, nclust, maxiter, epsilon) averageTraningTime = time.time() - start_time #testing W = EM.e_step(X, mu, sigma, prior, nclust) accuracy = testing.test(y, W, X) averageTraningTime = round(averageTraningTime, 3) accuracy = int(round(accuracy * 100)) print(" Traning running time :%s seconds " % averageTraningTime) print("accuracy:%s%%" % accuracy)
def scenario1(D, sFileStub): f = open(sFileStub + ".scen1.results", "w") EMResults = [] for iOutertrial in range(numOuterTrials): f.write("outertrial: %d\n" % iOutertrial) f.write("likelihood,NMI\n") bestEM = [] bestLikelihood = 0 for iRestart in range(numInnerTrials): EMAlg = EM.cEM(D) EMAlg.bPPC = False EMAlg.EM(len(D.classlist)) if iRestart == 0 or EMAlg.dEMLikelihood > bestLikelihood: bestLikelihood = EMAlg.dEMLikelihood bestEM = EMAlg EMResults.append(bestEM) f.write("%f,%f\n" % (bestLikelihood, utils.evaluateEM_NMI(D, EMAlg) ) ) f.flush() f.close() return EMResults
def good_graph(X): a = EM.EM_algorithm(components=2, tol=1e-6, max_iter=60) a.fit(X) y = a.res['gamma'].argmax(axis=1) plt.scatter(X[:, 0], X[:, 1], c=y, s=90) plt.title('Good clastering', fontsize=30) plt.show()
def quadratic_results(): global EM_par_SM_1, EM_par_SM_2 , EM_par_NM_1 , EM_par_NM_2 EM_par_SM_1, EM_par_SM_2 , EM_par_NM_1 , EM_par_NM_2 = EM.EM_results() global d,e d,e = read_data() global td,te td,te = interpolation() global e_sym2 e_sym2 = e_sym2_delta() global esym2_eta esym2_eta,esym4_eta = e_sym2_eta() global e_sym2_av,e_sym2_pot_av,e_sym2_pot_eff_av,s global e_sym2_eta_av,e_sym2_eta_pot_av,e_sym2_eta_pot_eff_av,s_eta e_sym2_av,e_sym2_pot_av,e_sym2_pot_eff_av,s,\ e_sym2_eta_av,e_sym2_eta_pot_av,e_sym2_eta_pot_eff_av,s_eta = data_preperation() global f_esym2_c,e_sym2_par,e_sym2_eta_par f_esym2_c,e_sym2_par,e_sym2_eta_par = Analyse_e_sym2() return d,e,td,te,e_sym2,esym2_eta,esym4_eta,e_sym2_av,e_sym2_eta_av,f_esym2_c,e_sym2_par,e_sym2_eta_par
def fit(self, data, iterations): prior, muu, sig = kmean_init.EM_init(data, self.nbmixtures_states) self.prior, self.muu, self.sig = em.EM(data, prior, muu, sig, iterate=iterations)
def em_sd(pos,x,y,z,rcut,eps,sig,nmax,beta,ss,tol,gamma): #------------------------------------------------------------------------- #Initial the arrays necessary for calculation #------------------------------------------------------------------------- force = np.zeros((len(pos), 3),order='F') Up=0 force_new = np.zeros((len(pos), 3),order='F') Up_new=0 Up,force = EM.potential(pos,x,y,z,rcut,eps,sig,Up,force) Up_prev=np.copy(Up) force_prev=np.copy(force) pos_i=np.copy(pos) pos_prev=np.copy(pos) for i in range(nmax): m=1 Up,force = EM.potential(pos,x,y,z,rcut,eps,sig,Up,force) print("Step "+str(i)+", the potential energy is "+str(Up)) force1d=np.reshape(force,(3*(len(pos)),1)) armijo_creteria=-(np.matmul(np.ndarray.transpose(force1d),force1d)/np.linalg.norm(force)) for j in range(100): pos_i=pos+((beta)**m)*ss*force Up_new, force_new=EM.potential(pos_i,x,y,z,rcut,eps,sig,Up_new,force_new) #This if statement #if (Up_new-Up)<=0: #This if statement implements Armijo rule # print((gamma*((beta)**m)*ss*armijo_creteria)) if (Up_new-Up)<=(gamma*((beta)**m)*ss*armijo_creteria): pos=np.copy(pos_i) Up=Up_new break else: m+=1 if (np.linalg.norm(force)<tol): print("\n\nThe position is not changed") print("Its likely that you have found a minimum or saddle point") print("The energy minimization stoped after "+str(i)+" number of interations") print("The final potential energy obtained is "+str(Up)+"in units of kT") step=i break else: step=nmax pos_prev=np.copy(pos) if i==(nmax-1): print("\n\nMaximum number of interation has been reached") print("You can change the number of interactions using -nmax") print("The final potential energy obtained is "+str(Up)+"in units of kT") return pos, Up, step
def inc_log(X): a = EM.EM_algorithm(components=10, tol=1e-10, max_iter=60) a.fit(X) plt.title('Increase of log-likelihood', fontsize=30) plt.xlabel('number of iteration', fontsize=15) plt.ylabel('log-likelihood', fontsize=15) plt.plot(a.res['likelihood']) plt.show()
def bad_graph(X): mu_s = np.array([[-5, 0], [0, 5]]) a = EM.EM_algorithm(components=2, mu_s=mu_s, tol=1e-6, max_iter=60) a.fit(X) y = a.res['gamma'].argmax(axis=1) plt.scatter(X[:, 0], X[:, 1], c=y, s=90) plt.title('Bad clastering', fontsize=30) plt.show()
def test_cross_val(self,trl_type='validation'): import EM import copy as cp CV_params = cp.deepcopy(self.params) CV_params['nTrials'] = len(self.dsets[trl_type]) CV_params['latent_trajctory'] =[_np.zeros([self.nDims,self.n_timePoints]) for i in range(len(CV_params['latent_traj']))] lapinfres = EM.E_step(self.dsets[trl_type],CV_params) self.CV_inf[trl_type] = lapinfres self.CV_inf[trl_type]['latent_traj'] = lapinfres['post_mean']
def multiprocess_EM_inner(pairs_inner, shared_genoMatrix, shared_resMatrix, Q, EM_iter, EM_tol, start_idx, seeds, EM_accel, EM_stop_haps): npops = Q.shape[1] w = start_idx # used to index rows in the results matrix for i in range(len(pairs_inner)): pair = pairs_inner[i] seed = seeds[i] # get genotype codes codes = shared_genoMatrix[pair[0]] + 3 * shared_genoMatrix[pair[1]] H = utils.get_rand_hap_freqs(n=npops, seed=seed) # do the EM if (EM_accel & EM_stop_haps): res_EM = EM.do_accelEM_stopfreqs((H, Q, codes, EM_iter, EM_tol)) elif EM_accel: res_EM = EM.do_accelEM((H, Q, codes, EM_iter, EM_tol)) else: res_EM = EM.do_multiEM((H, Q, codes, EM_iter, EM_tol)) LL = res_EM[1] H = res_EM[0] flags = utils.flag_maf(H, 0.05) # fill results matrix shared_resMatrix[w, 0] = pair[0] # index of first locus shared_resMatrix[w, 1] = pair[1] # index of second locus shared_resMatrix[w, 2] = np.sum(codes < 9) # count non_missing shared_resMatrix[w, 3] = LL # loglike shared_resMatrix[w, 4] = res_EM[2] # n_iter # fill out the flags for ix in range(npops): shared_resMatrix[w, 5 + ix] = flags[ix] # fill out the haplotype frequencies ix = 0 for pop in range(npops): for hap in range(4): shared_resMatrix[w, 5 + npops + ix] = H[pop, hap] ix += 1 w += 1
def main(): # Import results from Effective Mass module # EM = Effective Mass; par = Best fit parameter values # SM = Symmetric matter; NM = Neutron matter global EM_par_SM_1, EM_par_NM_1 EM_par_SM_1, _, EM_par_NM_1, _ = EM.EM_results() # Import results from symmetry energy module # Refer to this module for explanation of variables global te_SM_av, te_NM_av, f_SM, SM3_par, f_NM, NM3_par te_SM_av, te_NM_av, f_SM, SM3_par, f_NM, NM3_par = symmetry_energy.e_sym_results( ) # Import results from quadratic_symmetry energy module # Refer to this module for explanation of variables global d, e, td, te, e_sym2, esym2_eta, esym4_eta, e_sym2_av, e_sym2_eta_av, f_esym2_c, e_sym2_par, e_sym2_eta_par d, e, td, te, e_sym2, esym2_eta, esym4_eta, e_sym2_av, e_sym2_eta_av, f_esym2_c, e_sym2_par, e_sym2_eta_par = quadratic_symmetry_energy.quadratic_results( ) # Calculate and plot non-quadratic symmetry energies plot_e_symnq() # Calculate and plot Final residuals of the fit wrt the data plot_residues() # Print best fit value of parameters print('----------Delta----------') print("E_sym,nq = ", NM3_par['E_sat+E_sym'] - SM3_par['E_sat'] - e_sym2_par['E_sym2']) print("L_sym,nq = ", NM3_par['L_sym'] - e_sym2_par['L_sym2']) print("K_sym,nq = ", NM3_par['K_sat+K_sym'] - SM3_par['K_sat'] - e_sym2_par['K_sym2']) print("Q_sym,nq = ", NM3_par['Q_sat+Q_sym'] - SM3_par['Q_sat'] - e_sym2_par['Q_sym2']) print("Z_sym,nq = ", NM3_par['Z_sat+Z_sym'] - SM3_par['Z_sat'] - e_sym2_par['Z_sym2']) print('----------Eta----------') print("E_sym,nq = ", NM3_par['E_sat+E_sym'] - SM3_par['E_sat'] - e_sym2_eta_par['E_sym2']) print("L_sym,nq = ", NM3_par['L_sym'] - e_sym2_eta_par['L_sym2']) print("K_sym,nq = ", NM3_par['K_sat+K_sym'] - SM3_par['K_sat'] - e_sym2_eta_par['K_sym2']) print("Q_sym,nq = ", NM3_par['Q_sat+Q_sym'] - SM3_par['Q_sat'] - e_sym2_eta_par['Q_sym2']) print("Z_sym,nq = ", NM3_par['Z_sat+Z_sym'] - SM3_par['Z_sat'] - e_sym2_eta_par['Z_sym2']) print('----------Fit to E_sym4 without meta-model----------') e_sym4_eta_par = Fit_e_sym4_eta() print(e_sym4_eta_par) print('----------Fit to E_symnq without meta-model----------') e_symnq_eta_par = Fit_e_symnq_eta() print(e_symnq_eta_par)
def main(): # Import results from Effective Mass module # EM = Effective Mass; par = Best fit parameter values # SM = Symmetric matter; NM = Neutron matter # 1 = liner fit; 2 = quadratic fit global EM_par_SM_1, EM_par_SM_2 , EM_par_NM_1 , EM_par_NM_2 EM_par_SM_1, EM_par_SM_2 , EM_par_NM_1 , EM_par_NM_2 = EM.EM_results() # Read input data of energy/particle # d =density, e = energy global d,e d,e = read_data() # Perform interpolation to go to uniform grid in density # td = target density with unifrom grid, te = target energy global td,te td,te = interpolation() # Extract e_sym2 from data using delta expansion global e_sym2 e_sym2 = e_sym2_delta() # Extract e_sym2 from data using eta expansion global esym2_eta esym2_eta,_ = e_sym2_eta() # Prepare data for fitting and plotting # av refers to an average over the 6 Hamiltonians # s refers to svd cut imposed to regulate 0 eigenvalues of correlation matrix obtained during the averaging # eta refers to the expansion around NM. Absence of eta implies expansion around SM. global e_sym2_av,e_sym2_pot_av,e_sym2_pot_eff_av,s global e_sym2_eta_av,e_sym2_eta_pot_av,e_sym2_eta_pot_eff_av,s_eta e_sym2_av,e_sym2_pot_av,e_sym2_pot_eff_av,s,\ e_sym2_eta_av,e_sym2_eta_pot_av,e_sym2_eta_pot_eff_av,s_eta = data_preperation() # Fit to e_sym2 obtained above # f_esym2_c = Fit function; e_sym2_par = Best Fit parameters for delta expansion # e_sym2_eta_par = Best Fit parameters for eta expansion global f_esym2_c,e_sym2_par,e_sym2_eta_par f_esym2_c,e_sym2_par,e_sym2_eta_par = Analyse_e_sym2() # Plot e_sym2 for the two expansions and also the difference plot_e_sym2() # Print best fit parameter values. print ('\n','-----Delta----') print (e_sym2_par) print ('\n','-----Eta-----') print (e_sym2_eta_par)
def EM_s(X, count, max_iter=100, mu_s=None, sigma_s=None, pi_s=None, components=10, tol=1e-3, cov_type='full'): like_hood = -np.inf for i in range(count): a = EM.EM_algorithm(sigma_s=sigma_s, mu_s=mu_s, pi_s=pi_s, cov_type=cov_type, components=components, tol=tol, max_iter=max_iter) a.fit(X) like_hood = a.res['likelihood'][-1] sigma = a.res['sigma'] mu = a.res['mu'] pi = a.res['pi']
def GMMPVPosteriors(v_j, t, MU, SIGMA, PI, M, outLambda=None): # assuming unimodal ncomp = t.shape[0] # number of components nprop = t.shape[1] # number of proportions nsamp = v_j.shape[0] # number of samples if outLambda is None: T = np.zeros([nsamp, nprop]) else: T = np.zeros([nsamp, nprop + 1]) # outlier class is at the end for k in range(nprop): SIGMAt = M * (SIGMA * t[:, k][np.newaxis, np.newaxis, :]).sum(axis=2) MUt = M * (MU * t[:, k][np.newaxis, :]).sum(axis=1) T[:, k] = PI[k] * EM.Gaussian(v_j, MUt, SIGMAt) if outLambda is not None: # Outlier component T[:, -1] = PI[-1] * outLambda T = T / T.sum(axis=1)[:, np.newaxis] return T
def squant(): # default execution if (sys.argv[1].lower() == "default"): input = "alignments_small.cs423.gz" output = "quants.tsv" else: # determine if a command was entered if len(sys.argv) < 6: print("please enter a valid tool") else: squant = sys.argv[1] in_ = sys.argv[2] input = sys.argv[3] out_ = sys.argv[4] output = sys.argv[5] # check to see if flags were correctly typed if (not squant == "squant") or (not in_ == "--in") or (not out_ == "--out"): print('\033[31m' + "one of your flags was mistyped.") print('\033[39m') return import time start = time.time() # call EM EM.EM_Algorithm(input, output) # print time time = (time.time()-start)/60 print('runtime ~ ', round(time, 1), 'minutes') # Nice little print-out print('\033[31m' + "You can find results in " + output) # reset to default color print('\033[39m')
def e_sym_results(): global EM_par_SM_1, EM_par_SM_2, EM_par_NM_1, EM_par_NM_2 EM_par_SM_1, EM_par_SM_2, EM_par_NM_1, EM_par_NM_2 = EM.EM_results() global e_SM, e_NM, d_SM, d_NM, te_SM, te_NM, td e_SM, e_NM, d_SM, d_NM, te_SM, te_NM, td = SM_NM.SM_NM_results() global te_SM_av, te_NM_av, ts_SM, ts_NM, te_SM_pot_av global te_NM_pot_av, te_SM_pot_eff_av, te_NM_pot_eff_av global te_SM_pot_eff_1_av, te_NM_pot_eff_1_av te_SM_av,te_NM_av,ts_SM,ts_NM,te_SM_pot_av,\ te_NM_pot_av,te_SM_pot_eff_av,te_NM_pot_eff_av,\ te_SM_pot_eff_1_av,te_NM_pot_eff_1_av = data_preparation() global f_SM, SM3_par f_SM, SM3_par = Analyse_SM() global f_NM, NM3_par f_NM, NM3_par = Analyse_NM() return te_SM_av, te_NM_av, f_SM, SM3_par, f_NM, NM3_par
def main(): # Import results from Effective Mass module # EM = Effective Mass; par = Best fit parameter values # SM = Symmetric matter; NM = Neutron matter # 1 = liner fit; 2 = quadratic fit global EM_par_SM_1, EM_par_SM_2, EM_par_NM_1, EM_par_NM_2 EM_par_SM_1, EM_par_SM_2, EM_par_NM_1, EM_par_NM_2 = EM.EM_results() # Import results from SM_NM module # e_SM and e_NM are energy/particle in symmetric and neutron matter # d_SM and d_NM are the correspondind densities # td = target density with unifrom grid # te_SM, te_NM = target energies corresponding to td. global e_SM, e_NM, d_SM, d_NM, te_SM, te_NM, td e_SM, e_NM, d_SM, d_NM, te_SM, te_NM, td = SM_NM.SM_NM_results() # Prepare data for fitting and plotting # av refers to an average over the 6 Hamiltonians # s refers to svd cut imposed to regulate 0 eigenvalues of correlation matrix obtained during the averaging global te_SM_av, te_NM_av, ts_SM, ts_NM, te_SM_pot_av global te_NM_pot_av, te_SM_pot_eff_av, te_NM_pot_eff_av global te_SM_pot_eff_1_av, te_NM_pot_eff_1_av te_SM_av,te_NM_av,ts_SM,ts_NM,te_SM_pot_av,\ te_NM_pot_av,te_SM_pot_eff_av,te_NM_pot_eff_av,\ te_SM_pot_eff_1_av,te_NM_pot_eff_1_av = data_preparation() # Fit to energy per particle in SM. # This performs Scaling 3* global f_SM, SM3_par f_SM, SM3_par = Analyse_SM() # Fit to energy per particle in NM. # This performs Scaling 3* global f_NM, NM3_par f_NM, NM3_par = Analyse_NM() # Calculate and plot e_sym calculate_and_plot_esym()
def getVHSolver(createIfNotExisting=False): ''' Retrieves the VHSolver object. 'createIfNotExisting' if True forces the creation of a VHSolver object, if not already existing Returns the VHSolver object of the current Document. If more than one VHSolver object is present, return the first one. ''' # get the document containing this object doc = FreeCAD.ActiveDocument if doc is None: FreeCAD.Console.PrintWarning( translate( "EM", "No active document available. Cannot get any VHSolver object." )) return None solver = [obj for obj in doc.Objects if Draft.getType(obj) == "VHSolver"] if solver == []: if createIfNotExisting == True: solver = EM.makeVHSolver() if solver is None: FreeCAD.Console.PrintError( translate("EM", "Cannot create VHSolver!")) else: FreeCAD.Console.PrintWarning( translate( "EM", "Cannot get VHSolver. Is at least one VHSolver object existing?" )) return None else: # take the first in the list solver = solver[0] return solver
import EM as EM import numpy as np from sklearn.cluster import KMeans data = np.genfromtxt('/Users/wujiamin/Desktop/CORTEX data/gene_expression.txt', delimiter=' ') label_true = np.genfromtxt('/Users/wujiamin/Desktop/CORTEX data/labels.txt', delimiter=' ') label_true = label_true.astype(int) n_clus = 7 N, D = data.shape K = 50 EZ, params, A, mus, sigmas, decay_coef = EM.fitModel(data, K, singleSigma=False) mu = np.zeros([D, N]) for i in range(0, N): mu[:, i] = list(mus) fit_data = EZ clf = KMeans(n_clusters=n_clus) clf.fit(fit_data) labels = clf.labels_ accu_count = 0 for i in range(0, len(label_true)): if label_true[i] == labels[i]: accu_count = accu_count + 1 accuracy = accu_count / len(label_true) # print(accuracy)
help='Verbose mode (Styles Matrices + probZ)') args = parser.parse_args() data = init_from_file(args.train_data, 0, not args.r, args.t or args.n) # Uncomment to confirm correctness of Q and gradQ # check_gradient(data) # Run the naive (baseline) algorithm if args.n: print naive(data) # Train our model else: start = time() EM(data) elapsed = time() - start print "Completed training in %d minutes and %d seconds\n" % ( elapsed / 60, elapsed % 60) if args.v: data.outputResults() if args.t: if args.p: acc, ce = data.permutedAcc() else: acc = data.std_percent_correct() ce = data.std_cross_entropy() print "Percent Correct: " + str(acc * 100) + "%" print "Cross Entropy: " + str(ce)
#username = mzr3 import EM as em import pandas as pd if __name__ == "__main__": breast_cancer = pd.read_csv('./breast-cancer-wisconsin.csv') li = list(breast_cancer) breast_cancer = pd.DataFrame(breast_cancer.values, columns=li) # Class=li[-1] arr = breast_cancer.values y = arr[:, -1] X = arr[:, 0:-1] tester = em.ExpectationMaximizationTestCluster(X, y, clusters=range(2, 15), plot=True, stats=True) tester.run()
# Run multiple trials of a test if __name__ == '__main__': np.random.seed() accuracies = [] cross_entropies = [] times = [] steps = [] for sim in range(50): # Run 50 simulations data = init_from_file("Tests/SinkhornAnalysis/data/%d.txt" % sim, 0, True, True) start = time() num_steps = EM(data) elapsed = time() - start times.append(elapsed) steps.append(num_steps) print("Simulation %d:" % sim) print("Number of EM Steps: %d" % num_steps) print("Elapsed Time: %d minutes, %d seconds" % (elapsed / 60, elapsed % 60)) acc, ce = data.permutedAcc() print "Accuracy: %.2f %% | %.2f CE\n" % (acc * 100, ce) accuracies.append(acc) cross_entropies.append(ce) average_acc = 100 * sum(accuracies) / len(accuracies)
import numpy import EM import cData import utils # run EM several times and get the likelihood for iRestart in range(20): D = cData.cData("data/winenorm3_pyre.csv") # D = cData.cData("data/normvert.csv") M = EM.cEM(D) M.bPPC = False M.EM(3) print M.dEMLikelihood, print " nmi: ", print utils.evaluateEM_NMI(D, M)
def motif_matrix(fsa,motif,outfile,genome='mm9'): if genome=='hg18': markov="/nfs/genomes/human_gp_mar_06/hg18_promoters_3000_1000.markov" else: markov="/nfs/data/cwng/chipseq/hypotheses/Mouse.markov" #Load motif and background adjust PSSM m=MotifTools.load(motif) EM.loadMarkovBackground(markov) bg = EM.theMarkovBackground.zeroth() F=Fasta.load(fsa,key_func=lambda x:x) seqs=F.values() n_seqs=len(seqs) n_motifs=len(m) SCORES=np.zeros((n_motifs,n_seqs),dtype='float') #SHIFTS=np.zeros((n_motifs,n_seqs)) #out=open(outfile,'w') for i,M in enumerate(m): ll = M.logP EM.loadMarkovBackground(markov) bg = EM.theMarkovBackground.zeroth() for pos in ll: for letter in pos.keys(): pos[letter] = pos[letter] - math.log(bg[letter])/math.log(2.0) AM = MotifTools.Motif_from_ll(ll) #adj_model = MotifTools.Motif_from_ll(ll) #adj_model.source = M.source #pssm = MDsupport.Motif2c_PSSM(adj_model) #w=pssm.width #shift=[] #scores=[] mi,ma=AM.minscore,AM.maxscore #F_m={} #Search every seq for given motif above threshold t and print motif centered results for j,seq in enumerate(seqs): seq_fwd = seq.upper() #seq_rev = str(MotifTools.revcomplement(seq_fwd))[::-1] #scores_fwd = pssm.score_probe(seq_fwd) #scores_rev = pssm.score_probe(seq_rev) #max_score=mi #max_ind=0 #for ind,s in enumerate(scores_fwd): # if s> max_score: # max_score=s # max_ind=ind # strand='+' #for ind,s in enumerate(scores_rev): # if s> max_score: # max_score=s # max_ind=ind # strand='-' max_score=AM.bestscore(seq_fwd) mscore=(max_score-mi)/(ma-mi) #orig=len(seq_fwd)/2 #bind=max_ind+w//2 #d=abs(orig-bind) SCORES[i,j]=mscore #SHIFTS[i,j]=d #out.write('%1.3f\t'%mscore) #out.write('\n') #out.close() #del F np.savetxt(outfile,SCORES,fmt='%1.3f')
li = list(breast_cancer) breast_cancer = pd.DataFrame(breast_cancer.values, columns=li) Class = li[-1] arr = breast_cancer.values y = arr[:, -1] X = arr[:, 0:-1] clusters = range(2, 15) sp = SparseRandomProjection(n_components=4) output = sp.fit_transform(X) tester = em.ExpectationMaximizationTestCluster(output, y, clusters=range(2, 15), plot=False, stats=True) silhouette_EM, vmeasure_scores = tester.run() tester = kmtc.KMeansTestCluster(output, y, clusters=range(2, 15), plot=False, stats=True) silhouette_kmeans, V_measure = tester.run() """ Plot Silhouette Score from observations from the cluster centroid to use the Elbow Method to identify number of clusters to choose """ plt.plot(clusters, silhouette_kmeans, 'r^-', label="K Means")
count = -1 while cap.isOpened(): ret, frame = cap.read() count += 1 # if count < 52: # continue print(count) out = frame.copy() rows, cols, _ = frame.shape binary = np.zeros((rows, cols, 3), np.float32) for i in range(rows): for j in range(cols): individualPixel = frame[i, j, 1] #print(i,"x",j,":",individualPixel) if (EM.getProbGMM(individualPixel, 'green') >= 3.00e-6): binary[i, j] = np.array([255, 255, 255]) # else: # frame[i, j] = np.array([255, 255, 255]) new = cv2.medianBlur(binary, 5) new = cv2.GaussianBlur(new, (5, 5), 5) edges = cv2.Canny(np.uint8(new), 50, 255) conts, h = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) try: (conts_sorted, boundingBoxes) = contours.sort_contours(conts, method="left-to-right") hull = cv2.convexHull(conts_sorted[0]) (x, y), radius = cv2.minEnclosingCircle(hull) except: radius = 0 if radius > 6:
def run(): D = cData.cData("data/winenorm3_pyre.csv") E = EM.cEM(D) E.EM(3)
def test_read_file_points(self): points = EM.dataset_to_list_points(DATASET) self.assertTrue(len(points) > 0) self.assertTrue(points[0].dimension == 2)
def test_get_probability_cluster(self): self.assertEquals( EM.get_probability_cluster(point, Cluster([point], 1)), 1)
while cap.isOpened(): ret, frame = cap.read() if ret == True: out = frame.copy() rows, cols, _ = frame.shape binary = np.zeros((rows, cols, 3), np.float32) for i in range(rows): for j in range(cols): individualPixel = frame[i, j] temp = individualPixel[0] individualPixel[0] = individualPixel[2] individualPixel[2] = temp #print(i,"x",j,":",individualPixel) if (EM.getProbGMM(individualPixel, 'yellow') >= 3.00e-6): #if (EM.getProbGMM(individualPixel, 'yellow') >= 3.00e-6): binary[i, j] = np.array([255, 255, 255]) # else: # frame[i, j] = np.array([255, 255, 255]) new = cv2.medianBlur(binary, 5) #new = cv2.GaussianBlur(new,(5,5),5) edges = cv2.Canny(np.uint8(new), 50, 255) conts, h = cv2.findContours(edges, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) (conts_sorted, boundingBoxes) = contours.sort_contours(conts, method="left-to-right") hull = cv2.convexHull(conts_sorted[0]) (x, y), radius = cv2.minEnclosingCircle(hull) if radius > 6: cv2.circle(out, (int(x), int(y)), int(radius), (0, 255, 0), 4)
def test_mt_dot(self): m = np.matrix([[1, 2, 3], [4, 5, 6]]) exp_res = [17, 29, 45] act_res = em.my_dot(m) print act_res self.assertEqual(act_res, exp_res)
# random pairwise constraints and watch to see our # NMI increase import pickle import EM import cData import sys if len(sys.argv) < 2: print "provide filename and optionally a filename for the pickle of centers" exit(1) # use the same code for getting initial points as baturay did D = cData.cData(sys.argv[1]) D.setType("2", "random") EmAlg = EM.cEM(D) EmAlg.EM(len(D.classlist)) EmAlg.bPPC = True #Creates clusters depending on what EM guessed. D.createClusters(EmAlg) #Finds the outerpoints and the midpoints and assigns them in emclusters. D.repPoints(EmAlg) #This makes the algorithm start with good initial points. EmAlg = D.goodInitial(EmAlg) print "pickling starting position to: ", picklefname = "pickles/"+sys.argv[1].split('/')[-1]+".pickle" if len(sys.argv) > 2: picklefname = sys.argv[2] f = open(picklefname,"w") l = EmAlg.lCenters
import EM as em import DataLoader as loader import numpy as np GAUSS2 = 'data/2gaussian.txt' GAUSS3 = 'data/3gaussian.txt' path = GAUSS3 k = 3 x = np.matrix(loader.load_arrays(path)) x = np.transpose(x) label, model, llh = em.em(x, k) print model
def main(infile, numOfFeatures, numOfComponents, threshold, repetition): ## Data import to memory IrisDataFile = infile IrisData = readMatrix(IrisDataFile) IrisData = np.asarray(IrisData) rep = 0 while rep < repetition: try: ## exception handling print "This is " + str(rep) + "repetition:" patterns = IrisData[:, 0:numOfFeatures] gaussians = EM.factoryMixtureGaussians(numOfComponents, numOfFeatures) piList = EM.randomInitialise(patterns, gaussians) for tempGaussian in gaussians: print "Initilisation of parameters:" tempGaussian.showMuInTex() tempGaussian.showSigmaInTex() print piList previousCriteria = None currentCriteria = None convergenceThreshold = threshold loglikelihoodList = [] expectedLoglikelihoodList = [] times = 1 while True: ## E-step responsibilities = EM.expectation(patterns, gaussians, piList) currentCriteria, expectedLoglikelihood = EM.getCriteria( patterns, gaussians, piList, responsibilities) loglikelihoodList.append(currentCriteria) expectedLoglikelihoodList.append(expectedLoglikelihood) ## M-step gaussians, piList = EM.maximisation(patterns, responsibilities, gaussians) currentCriteria, expectedLoglikelihood = EM.getCriteria( patterns, gaussians, piList, responsibilities) loglikelihoodList.append(currentCriteria) expectedLoglikelihoodList.append(expectedLoglikelihood) ## result demonstration if previousCriteria is None: print "Current Criteria: " + str(currentCriteria) previousCriteria = currentCriteria times = times + 1 continue else: print "Criteria: previous - > current " + str(previousCriteria) + \ " -> " + str(currentCriteria) if abs((currentCriteria - previousCriteria) / previousCriteria) < convergenceThreshold: ## termination condition satisfied. print "Convergence Threshold reached.." print "Iteration Quantity: " + str(times) print "Parameters of resulted Guassians:" for tempGaussian in gaussians: tempGaussian.showMuInTex() tempGaussian.showSigmaInTex() print print "Resulted Mixture Coefficient (pi):" print piList break else: previousCriteria = currentCriteria print "Continues..." times = times + 1 continue ## check result lengthOfList = len(loglikelihoodList) assert lengthOfList == len(expectedLoglikelihoodList) iterationTimes = lengthOfList / 2 iteration = range(0, lengthOfList) iteration = np.divide( iteration, 2) ## one combo of E and M step called one step ## draw graph plt.figure(rep) plt.xlim([-2, iterationTimes + 2 ]) ## specify the display scope of figure plt.plot(iteration, loglikelihoodList, "b-", linewidth=3, label="Loglikelihood") plt.plot(iteration, expectedLoglikelihoodList, "r-", linewidth=3, label="expectedLoglikelihood") plt.plot([], [], "b.", label="Threshold = " + str(convergenceThreshold)) plt.xlabel("Iteration:" + str(iterationTimes)) plt.ylabel("Loglikelihood and ExpectedLoglikelihood") plt.legend(loc=4) rep = rep + 1 except: continue plt.show()
def fit(self, data): self.data = data Priors, Mu, Sigma = EM_init(data, self.numbefOfStates) self.Priors, self.Mu, self.Sigma, self.Pix = EM( data, Priors, Mu, Sigma)