예제 #1
0
파일: tests.py 프로젝트: jameshensman/pyvb
def PCA():
	#Principal Component Analysis.
	q = 2 #latent dimension
	d = 3 #observation dimension
	N = 200
	niters = 200
	true_W = np.random.randn(d,q)*10
	true_Z = np.random.randn(N,q)
	true_mean = np.random.randn(d,1)
	true_prec = 100.
	X_data = np.dot(true_Z,true_W.T) + true_mean.T + np.random.randn(N,d)*np.sqrt(1./true_prec)
	
	#set up the problem...
	Ws = [nodes.Gaussian(d,np.zeros((d,1)),np.eye(d)*1e-3) for  i in range(q)]
	W = nodes.hstack(Ws)
	Mu = nodes.Gaussian(d,np.zeros((d,1)),np.eye(d)*1e-3)
	Beta = nodes.Gamma(d,1e-3,1e-3)
	Zs = [nodes.Gaussian(q,np.zeros((q,1)),np.eye(q)) for i in range(N)]
	Xs = [nodes.Gaussian(d,W*z+Mu,Beta) for z in Zs]
	[xnode.observe(xval.reshape(d,1)) for xnode,xval in zip(Xs,X_data)]
	
	#infer!
	for i in range(niters):
		[w.update() for w in Ws]
		Mu.update()
		[z.update() for z in Zs]
		Beta.update()
		print niters-i
	
	#plot
	import pylab
	pylab.figure();pylab.title('True W')
	pylab.imshow( np.linalg.qr(W.pass_down_Ex())[0],interpolation='nearest')
	pylab.figure();pylab.title('E[W]')
	pylab.imshow( np.linalg.qr(true_W)[0],interpolation='nearest')
	pylab.figure();pylab.title('true Z')
	pylab.scatter(true_Z[:,0],true_Z[:,1],50,true_Z[:,0])
	pylab.figure();pylab.title('learned Z')
	learned_Z = np.hstack([z.qmu for z in Zs]).T
	pylab.scatter(learned_Z[:,0],learned_Z[:,1],50,true_Z[:,0])
	
	print '\nBeta'
	print true_prec,Beta.pass_down_Ex()[0,0]
	print '\nMu'
	print np.hstack((true_mean,Mu.pass_down_Ex()))
예제 #2
0
def PCA_missing_data(plot=True):
	#Principal Component Analysis, with randomly missing data
	q = 2 #latent dimension
	d = 5 #observation dimension
	N = 200
	niters = 200
	Nmissing = 100
	true_W = np.random.randn(d,q)
	true_Z = np.random.randn(N,q)
	true_mean = np.random.randn(d,1)
	true_prec = 20.
	Xdata_full = np.dot(true_Z,true_W.T) + true_mean.T 
	Xdata_observed = Xdata_full + np.random.randn(N,d)*np.sqrt(1./true_prec)
	
	#erase some data
	missing_index_i = np.argsort(np.random.randn(N))[:Nmissing]
	missing_index_j = np.random.multinomial(1,np.ones(d)/d,Nmissing).nonzero()[1]
	Xdata = Xdata_observed.copy()
	Xdata[missing_index_i,missing_index_j] = np.nan
	
	
	#set up the problem...
	Ws = [nodes.Gaussian(d,np.zeros((d,1)),np.eye(d)*1e-3) for  i in range(q)]
	W = nodes.hstack(Ws)
	Mu = nodes.Gaussian(d,np.zeros((d,1)),np.eye(d)*1e-3)
	Beta = nodes.Gamma(d,1e-3,1e-3)
	Zs = [nodes.Gaussian(q,np.zeros((q,1)),np.eye(q)) for i in range(N)]
	Xs = [nodes.Gaussian(d,W*z+Mu,Beta) for z in Zs]
	[xnode.observe(xval.reshape(d,1)) for xnode,xval in zip(Xs,Xdata)]
	
	#make a network object
	net = Network()
	net.addnode(W)
	net.fetch_network()# automagically fetches all of the other nodes...
	
	#infer!
	net.learn(100)
		
	#plot
	if plot:
		import pylab
		import hinton
		#compare true and learned W 
		Qtrue,Rtrue = np.linalg.qr(true_W)
		Qlearn,Rlearn = np.linalg.qr(W.pass_down_Ex())
		pylab.figure();pylab.title('True W')
		hinton.hinton(Qtrue)
		pylab.figure();pylab.title('E[W]')
		hinton.hinton(Qlearn)
		
		if q==2:#plot the latent variables
			pylab.figure();pylab.title('true Z')
			pylab.scatter(true_Z[:,0],true_Z[:,1],50,true_Z[:,0])
			pylab.figure();pylab.title('learned Z')
			learned_Z = np.hstack([z.pass_down_Ex() for z in Zs]).T
			pylab.scatter(learned_Z[:,0],learned_Z[:,1],50,true_Z[:,0])
			
		#recovered X mean
		X_rec = np.hstack([x.pass_down_Ex() for x in Xs]).T
		
		#Recovered X Variance
		#slight hack here - set q variance of observed nodes to zeros (it should be random...)
		for x in Xs:
			if x.observed:
				x.qcov *=0
		var_rec = np.vstack([np.diag(x.qcov) for x in Xs]) + 1./np.diag(Beta.pass_down_Ex())
		
		#plot each recovered signal in a separate figure
		for i in range(d):
			pylab.figure();pylab.title('recovered_signal '+str(i))
			
			pylab.plot(Xdata_full[:,i],'g',marker='.',label='True') # 'true' values of missing data (without noise)
			pylab.plot(X_rec[:,i],'b',label='Recovered') # recovered mising data values
			pylab.plot(Xdata[:,i],'k',marker='o',linewidth=2,label='Observed') # with noise, and holes where we took out values
			pylab.legend()
			
			volume_x = np.hstack((np.arange(len(Xs)),np.arange(len(Xs))[::-1]))
			volume_y = np.hstack((X_rec[:,i]+2*np.sqrt(var_rec[:,i]), X_rec[:,i][::-1]-2*np.sqrt(var_rec[:,i])[::-1]))
			pylab.fill(volume_x,volume_y,'b',alpha=0.3)
			
		
		
		print '\nBeta'
		print true_prec,Beta.pass_down_Ex()[0,0]
		print '\nMu'
		print np.hstack((true_mean,Mu.pass_down_Ex()))
		pylab.show()
예제 #3
0
	#true_Q = np.dot(q_temp.T,q_temp)/10000
	true_Q = np.diag(np.random.rand(q))*0.1
	true_Q_chol = np.linalg.cholesky(true_Q)
	
	#simulate the system. TODO check I've not got cholesky the wrong way around... (I think so...)
	true_X = np.zeros((T,q))
	Y_data = np.zeros((T,d))
	true_X[0] = np.random.randn(q)
	Y_data[0] = np.dot(true_C,true_X[0].reshape(q,1)).flatten() + np.dot(true_R_chol,np.random.randn(d,1)).flatten()
	for xlag,xnow,ynow in zip(true_X[:-1],true_X[1:],Y_data[1:]):
		xnow[:] = np.dot(true_A,xlag).flatten() + np.dot(true_Q_chol,np.random.randn(q,1)).flatten()
		ynow[:] = np.dot(true_C,xnow) + np.dot(true_R_chol,np.random.randn(d,1)).flatten()
	
	#set up the problem...
	As = [nodes.Gaussian(q,np.zeros((q,1)),np.eye(q)*1e-3) for  i in range(q)]
	A = nodes.hstack(As) #node to represent A
	Cs = [nodes.Gaussian(d,np.zeros((d,1)),np.eye(d)*1e-3) for  i in range(q)]
	C = nodes.hstack(Cs) #node to represent 
	#Q = nodes.Gamma(q,1e-3,1e-3)
	#R = nodes.Gamma(d,1e-3,1e-3)
	Q = nodes.DiagonalGamma(q,np.ones(q)*1e-3,np.ones(q)*1e-3)
	R = nodes.DiagonalGamma(d,np.ones(d)*1e-3,np.ones(d)*1e-3)
	#Q = nodes.Wishart(q,1e-3,np.eye(q)*1e-3) 
	#R = nodes.Wishart(d,1e-3,np.eye(d)*1e-3)
	
	X0 = nodes.Gaussian(q,np.zeros((q,1)),np.eye(q))
	Y0 = nodes.Gaussian(d,C*X0,R)
	Y0.observe(Y_data[0].reshape(d,1))
	Xs = [X0]
	Ys = [Y0]
	for t in range(1,T):