def createEllipses(exp_mu,exp_C,ks): #takes set of params and makes a list of ellipse parameters: ellipses = [] #list of ellipse parameters f = 200 #500 #constant scaling factor to make ellipses visible (todo: make 95% confidence range) for k in ks: e= create_cov_ellipse(exp_C[k], exp_mu[k,:],color='r',alpha=0.3) #ellipse example: (lat,lng,width,height,rotation) ellipses.append((e.center[1],e.center[0],f*e.height,f*e.width,-e.angle)) return ellipses
def createEllipses(exp_mu, exp_C, ks): #takes set of params and makes a list of ellipse parameters: ellipses = [] #list of ellipse parameters f = 200 #500 #constant scaling factor to make ellipses visible (todo: make 95% confidence range) for k in ks: e = create_cov_ellipse(exp_C[k], exp_mu[k, :], color='r', alpha=0.3) #ellipse example: (lat,lng,width,height,rotation) ellipses.append( (e.center[1], e.center[0], f * e.height, f * e.width, -e.angle)) return ellipses
def dynamicObs(X,Zmax,exp_z,mu_grnd=None,exp_mu=None,exp_C=None,waitTime=0.02,pathLen=4): ion() fig = figure(figsize=(10,10)) ax_spatial = fig.add_subplot(1,1,1) #http://stackoverflow.com/questions/3584805/in-matplotlib-what-does-111-means-in-fig-add-subplot111 circs = [] (N,XDim) = shape(X) assert XDim==2 (N1,K) = shape(exp_z) assert N1==N NK = exp_z.sum(axis=0) alpha0 = 1.0 markers = ['x','o','^','v','d'] #,'d'] #markers for different inferred component assignments sct = [] #different scatter style for each data point (ks,) = where(exp_z.sum(axis=0)>=0.5) #find the components that actually appear in the data (not need to visualise the rest) ks = list(ks) for k in range(len(ks)): sct.append(scatter(9999.,9999.,marker=markers[k%len(markers)])) if mu_grnd is not None: (K_grnd,_) = shape(mu_grnd) for k in range(K_grnd): scatter(mu_grnd[k,0],mu_grnd[k,1],color='w',marker='d',s=50) #plot ground truth means #partition the data: Xi,ni = [], [] for k in ks: (ns,)=where(exp_z.argmax(axis=1)==k) Xi.append(X[ns,:]) ni.append(0) #current position in each partition if exp_mu is not None: #show only means corresponding to actual probability mass sctZ = scatter(exp_mu[ks,0],exp_mu[ks,1],color='r') #plot the inferred means (x_min,y_min),(x_max,y_max) = findLim(X) xlim(x_min,x_max) ylim(y_min,y_max) print 'X min max',(X[:,0].min(),X[:,1].min()),(X[:,0].max(),X[:,1].max()) path1 = None #the stored path of recent points for n in range(N): #work out which component current data point belongs to: i = Zmax[n] ik = ks.index(i) #look up scatter plot that we need to update print 'x_n, latent loc',X[n,:],i Xi_ = Xi[ik] n_ = ni[ik]+1 sct[ik].set_offsets(Xi_[:n_,:]) ni[ik] = n_ nprev = max(0,n-pathLen) if path1 is not None: path1.remove() path1, = plot(X[nprev:n+1,0],X[nprev:n+1,1],color='b') if exp_mu is not None: #ellipses to show covariance of components for circ in circs: circ.remove() circs = [] for k in ks: circ = create_cov_ellipse(exp_C[k], exp_mu[k,:],color='r',alpha=0.3) #calculate params of ellipses (adapted from http://stackoverflow.com/questions/12301071/multidimensional-confidence-intervals) circs.append(circ) #add to axes: ax_spatial.add_artist(circ) try: savefig(ANIM_ROOT + 'animation/%04d.png'%n) except IOError: 0 #print 'could not save file, IOError' time.sleep(waitTime) draw()
def infer(N,X_all,K,sensors,thres=1e-4,max_itr=200,min_itr=10,stateHyperparams=None, VERBOSE=0,Z_grnd=None,useMix=0,useHMM=1,plotSensor=None,plotX=None, mu_grnd=None): """ X_all: list of observations (in same ordering as |sensors|) K: truncation parameter (for number of components explaining data) """ #define hyperparameters by default: if stateHyperparams is None: stateHyperparams = {'alpha_pi':1.0, #hyperparam for initial state DP 'alpha_a':1.0, #hyperparam for transition DP } alpha_pi = stateHyperparams['alpha_pi'] #hyperparam for initial state DP alpha_a = stateHyperparams['alpha_a'] #hyperparam for transition DP #randomly initialise expected values of random variables: exp_s = np.array([np.random.uniform(0,100,(K,K)) for _ in range(N)]) for n in range(N): exp_s[n,:,:] = exp_s[n,:,:] / exp_s[n,:,:].sum() #either used ground truth provided for exp_z (for debug purposes) or initialise randomly: if Z_grnd is None: exp_z = np.array([np.random.dirichlet(np.ones(K)) for _ in range(N)]) # exp_z = zeros((N,K)) # # for n in range(N): # exp_z[n,0] = 0.99 # exp_z[n,1:] = 0.01/float(K-1) else: exp_z = np.zeros((N,K)) (N1,KG) = np.shape(Z_grnd) exp_z[:,:KG] = Z_grnd #make Z_grnd shape match exp-z shape #rand init of variational parameters: #[s.m(X_all[i],exp_z,randInit=1) for (s,i) in zip(sensors,itt.count())] tau_pi0, tau_pi1 = np.ones(K), np.ones(K) tau_a0,tau_a1 = np.ones((K,K)), np.ones((K,K)) tau_ck = np.ones(K) if plotSensor is not None: #animation: plt.ion() fig = plt.figure(figsize=(10,10)) ax_spatial = fig.add_subplot(1,1,1) #http://stackoverflow.com/questions/3584805/in-matplotlib-what-does-111-means-in-fig-add-subplot111 circs = [] ellipseColor = 'r' itr = 0 diff,prev_ln_obs_lik = 1,np.zeros((N,K)) #stop when parameters have converged (local optimum) while (itr<min_itr) or (itr<max_itr and diff>thres): #--------------- # M-step: #--------------- #variational parameters governing latent states: if useMix: tau_pi0,tau_pi1 = mixMPi(alpha_pi,exp_z,K) else: tau_pi0,tau_pi1 = mPi(alpha_pi,exp_z,K) tau_a0,tau_a1 = mA(alpha_a,exp_s,K) #tau_ck = alpha_pi + exp_z.sum(axis=0) #optimise variational parameters governing observation likelihoods: [s.m(X_all[i],exp_z) for (s,i) in zip(sensors,itt.count())] #--------------- # E-step: #--------------- #calculate observation likelihood of data for each sensor (combined): ln_obs_lik = np.array([s.loglik(X_all[i]) for (s,i) in zip(sensors,itt.count())]).sum(axis=0) #print 'ln_obs_lik',ln_obs_lik exp_ln_pi = ePi(tau_pi0,tau_pi1,K) #exp_ln_pi = digamma(tau_ck) - digamma(tau_ck.sum()) #find expected values of latent variables: if useMix: exp_z = mixEZ(ln_obs_lik, exp_ln_pi, N, K) #mixture model estimation of Z else: exp_ln_a = eA(tau_a0,tau_a1,K) ln_alpha_exp_z = eFowardsZ(exp_ln_pi, exp_ln_a, ln_obs_lik, N, K) #FORWARDS PASS ln_beta_exp_z = eBackwardsZ(exp_ln_pi, exp_ln_a, ln_obs_lik, N, K) #BACKWARDS PASS exp_z = eZ(ln_alpha_exp_z, ln_beta_exp_z, N) #find expected state for each time step exp_s = eS(exp_ln_a, ln_alpha_exp_z, ln_beta_exp_z, ln_obs_lik, N, K) #find expected transition for each time step #average difference in previous expected value of transition matrix diff = np.abs(ln_obs_lik - prev_ln_obs_lik).sum()/float(N*K) prev_ln_obs_lik = ln_obs_lik.copy() print('itr,diff',itr,diff) if VERBOSE: lim = 5 print('exp_z:\n',exp_z.argmax(axis=1)) #print 'ln_obs_lik:\n',ln_obs_lik[:lim,:] #print 'ln_alpha_exp_z',ln_alpha_exp_z #print 'ln_beta_exp_z',ln_beta_exp_z if plotSensor is not None: (ks,) = np.where(exp_z.sum(axis=0)>1.) #only look at active components X = plotX mvgSensor = plotSensor if itr==0: sctX = plt.scatter(X[:,0],X[:,1],marker='x',color='g') sctZ = plt.scatter(mvgSensor._m[:,0],mvgSensor._m[:,1],color='r') if mu_grnd is not None: #plot ground truth means (K_grnd,_) = np.shape(mu_grnd) for k in range(K_grnd): plt.scatter(mu_grnd[k,0],mu_grnd[k,1],color='k',marker='d',s=50) else: #ellipses to show covariance of components for circ in circs: circ.remove() circs = [] for k in ks: circ = create_cov_ellipse(mvgSensor._S[k], mvgSensor._m[k,:],color=ellipseColor,alpha=0.3) #calculate params of ellipses (adapted from http://stackoverflow.com/questions/12301071/multidimensional-confidence-intervals) circs.append(circ) #add to axes: ax_spatial.add_artist(circ) (_,XDim) = np.shape(X) hiddenOffsets = 99999*np.ones((K,XDim)) #hide non-significant components hiddenOffsets[ks,:] = mvgSensor._m[ks,:] sctZ.set_offsets(hiddenOffsets) plt.draw() if itr==0: time.sleep(10.) #time.sleep(0.05) #next iteration: itr+=1 #determine if we can switch off mix: if useMix and useHMM and (itr>=max_itr or diff<=thres): itr = 1 useMix = 0 diff = np.inf prev_ln_obs_lik = 0 ellipseColor='y' print('Mixture converged. SWTCHING TO HMM INFERENCE') print('completed inference.') exp_pi = expPi(tau_pi0, tau_pi1,K) exp_a = expA(tau_a0,tau_a1,K) if useMix: concMatrix = exp_pi else: concMatrix = exp_a print('final taupi0',tau_pi0) print('final taupi1',tau_pi1) return exp_z,sensors,concMatrix,viterbiLog(ln_obs_lik,exp_a,exp_pi)
def infer(N,X_all,K,sensors,thres=1e-4,max_itr=200,min_itr=10,stateHyperparams=None, VERBOSE=0,Z_grnd=None,useMix=0,useHMM=1,plotSensor=None,plotX=None, mu_grnd=None): #X_all: list of observations (in same ordering as |sensors|) #K: truncation parameter (for number of components explaining data) #define hyperparameters by default: if stateHyperparams is None: stateHyperparams = {'alpha_pi':1.0, #hyperparam for initial state DP 'alpha_a':1.0, #hyperparam for transition DP } alpha_pi = stateHyperparams['alpha_pi'] #hyperparam for initial state DP alpha_a = stateHyperparams['alpha_a'] #hyperparam for transition DP #randomly initialise expected values of random variables: exp_s = array([random.uniform(0,100,(K,K)) for _ in range(N)]) for n in range(N): exp_s[n,:,:] = exp_s[n,:,:] / exp_s[n,:,:].sum() #either used ground truth provided for exp_z (for debug purposes) or initialise randomly: if Z_grnd is None: exp_z = array([random.dirichlet(ones(K)) for _ in range(N)]) # exp_z = zeros((N,K)) # # for n in range(N): # exp_z[n,0] = 0.99 # exp_z[n,1:] = 0.01/float(K-1) else: exp_z = zeros((N,K)) (N1,KG) = shape(Z_grnd) exp_z[:,:KG] = Z_grnd #make Z_grnd shape match exp-z shape #rand init of variational parameters: #[s.m(X_all[i],exp_z,randInit=1) for (s,i) in zip(sensors,itt.count())] tau_pi0,tau_pi1 = ones(K), ones(K) tau_a0,tau_a1 = ones((K,K)), ones((K,K)) tau_ck = ones(K) if plotSensor is not None: #animation: ion() fig = figure(figsize=(10,10)) ax_spatial = fig.add_subplot(1,1,1) #http://stackoverflow.com/questions/3584805/in-matplotlib-what-does-111-means-in-fig-add-subplot111 circs = [] ellipseColor = 'r' itr = 0 diff,prev_ln_obs_lik = 1,zeros((N,K)) #stop when parameters have converged (local optimum) while (itr<min_itr) or (itr<max_itr and diff>thres): #--------------- # M-step: #--------------- #variational parameters governing latent states: if useMix: tau_pi0,tau_pi1 = mixMPi(alpha_pi,exp_z,K) else: tau_pi0,tau_pi1 = mPi(alpha_pi,exp_z,K) tau_a0,tau_a1 = mA(alpha_a,exp_s,K) #tau_ck = alpha_pi + exp_z.sum(axis=0) #optimise variational parameters governing observation likelihoods: [s.m(X_all[i],exp_z) for (s,i) in zip(sensors,itt.count())] #--------------- # E-step: #--------------- #calculate observation likelihood of data for each sensor (combined): ln_obs_lik = array([s.loglik(X_all[i]) for (s,i) in zip(sensors,itt.count())]).sum(axis=0) #print 'ln_obs_lik',ln_obs_lik exp_ln_pi = ePi(tau_pi0,tau_pi1,K) #exp_ln_pi = digamma(tau_ck) - digamma(tau_ck.sum()) #find expected values of latent variables: if useMix: exp_z = mixEZ(ln_obs_lik, exp_ln_pi, N, K) #mixture model estimation of Z else: exp_ln_a = eA(tau_a0,tau_a1,K) ln_alpha_exp_z = eFowardsZ(exp_ln_pi, exp_ln_a, ln_obs_lik, N, K) #FORWARDS PASS ln_beta_exp_z = eBackwardsZ(exp_ln_pi, exp_ln_a, ln_obs_lik, N, K) #BACKWARDS PASS exp_z = eZ(ln_alpha_exp_z, ln_beta_exp_z, N) #find expected state for each time step exp_s = eS(exp_ln_a, ln_alpha_exp_z, ln_beta_exp_z, ln_obs_lik, N, K) #find expected transition for each time step diff = abs(ln_obs_lik - prev_ln_obs_lik).sum()/float(N*K) #average difference in previous expected value of transition matrix prev_ln_obs_lik = ln_obs_lik.copy() print 'itr,diff',itr,diff if VERBOSE: lim = 5 print 'exp_z:\n',exp_z.argmax(axis=1) #print 'ln_obs_lik:\n',ln_obs_lik[:lim,:] #print 'ln_alpha_exp_z',ln_alpha_exp_z #print 'ln_beta_exp_z',ln_beta_exp_z if plotSensor is not None: (ks,) = where(exp_z.sum(axis=0)>1.) #only look at active components X = plotX mvgSensor = plotSensor if itr==0: sctX = scatter(X[:,0],X[:,1],marker='x',color='g') sctZ = scatter(mvgSensor._m[:,0],mvgSensor._m[:,1],color='r') if mu_grnd is not None: (K_grnd,_) = shape(mu_grnd) for k in range(K_grnd): scatter(mu_grnd[k,0],mu_grnd[k,1],color='k',marker='d',s=50) #plot ground truth means else: #ellipses to show covariance of components for circ in circs: circ.remove() circs = [] for k in ks: circ = create_cov_ellipse(mvgSensor._S[k], mvgSensor._m[k,:],color=ellipseColor,alpha=0.3) #calculate params of ellipses (adapted from http://stackoverflow.com/questions/12301071/multidimensional-confidence-intervals) circs.append(circ) #add to axes: ax_spatial.add_artist(circ) (_,XDim) = shape(X) hiddenOffsets = 99999*ones((K,XDim)) #hide non-significant components hiddenOffsets[ks,:] = mvgSensor._m[ks,:] sctZ.set_offsets(hiddenOffsets) draw() if itr==0: time.sleep(10.) #time.sleep(0.05) #next iteration: itr+=1 #determine if we can switch off mix: if useMix and useHMM and (itr>=max_itr or diff<=thres): itr = 1 useMix = 0 diff = inf prev_ln_obs_lik = 0 ellipseColor='y' print 'Mixture converged. SWTCHING TO HMM INFERENCE' print 'completed inference.' exp_pi = expPi(tau_pi0, tau_pi1,K) exp_a = expA(tau_a0,tau_a1,K) if useMix: concMatrix = exp_pi else: concMatrix = exp_a print 'final taupi0',tau_pi0 print 'final taupi1',tau_pi1 return exp_z,sensors,concMatrix,viterbiLog(ln_obs_lik,exp_a,exp_pi)
def dynamicObs(X, Zmax, exp_z, mu_grnd=None, exp_mu=None, exp_C=None, waitTime=0.02, pathLen=4): ion() fig = figure(figsize=(10, 10)) ax_spatial = fig.add_subplot( 1, 1, 1 ) #http://stackoverflow.com/questions/3584805/in-matplotlib-what-does-111-means-in-fig-add-subplot111 circs = [] (N, XDim) = shape(X) assert XDim == 2 (N1, K) = shape(exp_z) assert N1 == N NK = exp_z.sum(axis=0) alpha0 = 1.0 markers = ['x', 'o', '^', 'v', 'd' ] #,'d'] #markers for different inferred component assignments sct = [] #different scatter style for each data point (ks, ) = where( exp_z.sum(axis=0) >= 0.5 ) #find the components that actually appear in the data (not need to visualise the rest) ks = list(ks) for k in range(len(ks)): sct.append(scatter(9999., 9999., marker=markers[k % len(markers)])) if mu_grnd is not None: (K_grnd, _) = shape(mu_grnd) for k in range(K_grnd): scatter(mu_grnd[k, 0], mu_grnd[k, 1], color='w', marker='d', s=50) #plot ground truth means #partition the data: Xi, ni = [], [] for k in ks: (ns, ) = where(exp_z.argmax(axis=1) == k) Xi.append(X[ns, :]) ni.append(0) #current position in each partition if exp_mu is not None: #show only means corresponding to actual probability mass sctZ = scatter(exp_mu[ks, 0], exp_mu[ks, 1], color='r') #plot the inferred means (x_min, y_min), (x_max, y_max) = findLim(X) xlim(x_min, x_max) ylim(y_min, y_max) print 'X min max', (X[:, 0].min(), X[:, 1].min()), (X[:, 0].max(), X[:, 1].max()) path1 = None #the stored path of recent points for n in range(N): #work out which component current data point belongs to: i = Zmax[n] ik = ks.index(i) #look up scatter plot that we need to update print 'x_n, latent loc', X[n, :], i Xi_ = Xi[ik] n_ = ni[ik] + 1 sct[ik].set_offsets(Xi_[:n_, :]) ni[ik] = n_ nprev = max(0, n - pathLen) if path1 is not None: path1.remove() path1, = plot(X[nprev:n + 1, 0], X[nprev:n + 1, 1], color='b') if exp_mu is not None: #ellipses to show covariance of components for circ in circs: circ.remove() circs = [] for k in ks: circ = create_cov_ellipse( exp_C[k], exp_mu[k, :], color='r', alpha=0.3 ) #calculate params of ellipses (adapted from http://stackoverflow.com/questions/12301071/multidimensional-confidence-intervals) circs.append(circ) #add to axes: ax_spatial.add_artist(circ) try: savefig(ANIM_ROOT + 'animation/%04d.png' % n) except IOError: 0 #print 'could not save file, IOError' time.sleep(waitTime) draw()