def downhill_models(M, P, FE, z, K=20, hh=.001, ep=5000, dp=0, wsp=.001, plt=False): from paris.signal import bss_eval rng = theano.tensor.shared_randomstreams.RandomStreams(0) # Shared variables to use x = Th.matrix('x') y = theano.shared(M.astype(theano.config.floatX)) d = theano.shared(float32(dp)) # Network weights W0 = theano.shared( sqrt(2. / (K + M.shape[0])) * random.randn(K, M.shape[0]).astype(theano.config.floatX)) W1 = theano.shared( sqrt(2. / (K + M.shape[0])) * random.randn(M.shape[0], K).astype(theano.config.floatX)) # First layer is the transform to a non-negative subspace h = psoftplus(W0.dot(x), 3.) # Dropout if dp > 0: h *= (1. / (1. - d) * (rng.uniform(size=h.shape) > d).astype( theano.config.floatX)).astype(theano.config.floatX) # Second layer reconstructs the input r = psoftplus(W1.dot(h), 3.) # Approximate input using KL-like distance cost = Th.mean(y * (Th.log(y + eps) - Th.log(r + eps)) - y + r) + wsp * Th.mean(abs(W1)) # Make an optimizer and define the training input opt = downhill.build('rprop', loss=cost, inputs=[x], params=[W0, W1]) train = downhill.Dataset(M.astype(theano.config.floatX), batch_size=0) # Train it downhill_train(opt, train, hh, ep, None) # Get approximation d = 0 _, _r = theano.function(inputs=[x], outputs=[h, r], updates=[])(M.astype(theano.config.floatX)) o = FE.ife(_r, P) sxr = bss_eval(o, 0, array([z])) return W1.get_value(), sxr
def lasagne_models(M, P, FE, z, K=20, hh=.0001, ep=5000, d=0, wsp=0.0001, plt=True): from paris.signal import bss_eval # Copy key variables to GPU _M = Th.matrix('_M') # Input and forward transform I = InputLayer(shape=M.T.shape, input_var=_M) # First layer is the transform to a non-negative subspace H0 = DenseLayer(I, num_units=K, nonlinearity=lambda x: psoftplus(x, 3.), b=None) # Optional dropout H = DropoutLayer(H0, d) # Compute source modulator R = DenseLayer(H, num_units=M.T.shape[1], nonlinearity=lambda x: psoftplus(x, 3.), b=None) # Cost function cost = (_M*(Th.log(_M+eps) - Th.log( get_output( R)+eps)) - _M + get_output( R)).mean() \ + wsp*Th.mean( abs( R.W)) # Train it using Lasagne opt = downhill.build('rprop', loss=cost, inputs=[_M], params=get_all_params(R)) train = downhill.Dataset(M.T.astype(float32), batch_size=0) er = downhill_train(opt, train, hh, ep, None)[-1] # Get approximation _r = nget(R, _M, M.T.astype(float32)).T _h = nget(H, _M, M.T.astype(float32)).T o = FE.ife(_r, P) sxr = bss_eval(o, 0, array([z])) return R, sxr
def nmf_sep(Z, FE, K, s=None): from paris.signal import bss_eval if s is not None: random.seed(s) # Get features M1, P1 = FE.fe(Z[0]) M2, P2 = FE.fe(Z[1]) MT, PT = FE.fe(Z[2] + Z[3]) # Overcomplete or not? t0 = time.time() if 1: w1, _ = pu(copy(M1), K[0], 300, 0, 0) w2, _ = pu(copy(M2), K[1], 300, 0, 0) w1 /= sum(w1, axis=0, keepdims=True) w2 /= sum(w2, axis=0, keepdims=True) w = (w1, w2) sp = [0, 0] else: # Get overcomplete bases w = [M1 / sum(M1, axis=0), M2 / sum(M2, axis=0)] sp = [.5, 1] # Fit 'em on mixture t1 = time.time() _, h = pu(copy(MT), w, 300, sp[0], sp[1]) print 'Done in', time.time() - t0, time.time() - t1, 'sec' # Get modulator estimates q = cumsum([0, w[0].shape[1], w[1].shape[1]]) fr = [w[i].dot(h[q[i]:q[i + 1], :]) for i in arange(2)] fr0 = hstack(w).dot(h) + eps # Resynth with Wiener filtering r = [FE.ife(fr[0] * (MT / fr0), PT), FE.ife(fr[1] * (MT / fr0), PT)] #r = [FE.ife( fr[0], PT), # FE.ife( fr[1], PT)] # Get results sxr = array( [bss_eval(r[i], i, vstack((Z[2], Z[3]))) for i in arange(len(r))]) return mean(sxr, axis=0), r
def downhill_separate(M, P, FE, W1, W2, z1, z2, hh=.001, ep=5000, d=0, wsp=.0001, plt=True): from paris.signal import bss_eval # Get dictionary sizes K = [W1.shape[1], W2.shape[1]] # Cache some things y = Th.matrix('y') w1 = theano.shared(W1.astype(theano.config.floatX), 'w1') w2 = theano.shared(W2.astype(theano.config.floatX), 'w2') # Activations to learn h1 = theano.shared( sqrt(2. / (K[0] + M.shape[1])) * random.randn(K[0], M.shape[1]).astype(theano.config.floatX)) h2 = theano.shared( sqrt(2. / (K[1] + M.shape[1])) * random.randn(K[1], M.shape[1]).astype(theano.config.floatX)) # Dropout if d > 0: dw1 = w1 * 1. / (1. - d) * (rng.uniform(size=w1.shape) > d).astype( theano.config.floatX) dw2 = w2 * 1. / (1. - d) * (rng.uniform(size=w2.shape) > d).astype( theano.config.floatX) else: dw1 = w1 dw2 = w2 # Approximate input r1 = psoftplus(dw1.dot(h1), 3.) r2 = psoftplus(dw2.dot(h2), 3.) r = r1 + r2 # KL-distance to input cost = Th.mean( y * (Th.log( y+eps) - Th.log( r+eps)) - y + r) \ + wsp*(Th.mean( abs( h1)) + Th.mean( abs( h2))) # Make it callable and derive updates ffwd_f = theano.function(inputs=[], outputs=[r1, r2, h1, h2], updates=[]) # Make an optimizer and define the inputs opt = downhill.build('rprop', loss=cost, inputs=[y], params=[h1, h2]) train = downhill.Dataset(M.astype(theano.config.floatX), batch_size=0) # Train it cst = downhill_train(opt, train, hh, ep, None) # So what happened? d = 0 _r1, _r2, _h1, _h2 = ffwd_f() _r = _r1 + _r2 + eps o1 = FE.ife(_r1 * (M / _r), P) o2 = FE.ife(_r2 * (M / _r), P) sxr = bss_eval(o1, 0, vstack((z1, z2))) + bss_eval(o2, 1, vstack((z1, z2))) # Return things of note return o1, o2, (array(sxr[:3]) + array(sxr[3:])) / 2.
def lasagne_separate2(M, P, FE, W1, W2, z1, z2, hh=.0001, ep=5000, d=0, wsp=.0001, plt=True): from paris.signal import bss_eval # Gt dictionary shapes K = [W1.shape[0], W2.shape[0]] # GPU cached data _M = theano.shared(M.T.astype(float32)) dum = Th.vector('dum') # We have weights to discover H = theano.shared(random.rand(M.T.shape[0], K[0] + K[1]).astype(float32)) fI = InputLayer(shape=(M.T.shape[0], K[0] + K[1]), input_var=H) # Split in two pathways fW1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1) fW2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1) # Dropout? dfW1 = DropoutLayer(fW1, d) dfW2 = DropoutLayer(fW2, d) # Compute source modulators using previously learned dictionaries R1 = DenseLayer(dfW1, num_units=M.shape[0], W=W1.astype(float32), nonlinearity=lambda x: psoftplus(x, 3.), b=None) R2 = DenseLayer(dfW2, num_units=M.shape[0], W=W2.astype(float32), nonlinearity=lambda x: psoftplus(x, 3.), b=None) # Add the two approximations R = ElemwiseSumLayer([R1, R2]) # Cost function cost = (_M*(Th.log(_M+eps) - Th.log( get_output( R)+eps)) - _M + get_output( R)).mean() \ + wsp*Th.mean( H) + 0*Th.mean( dum) # Train it using Lasagne opt = downhill.build('rprop', loss=cost, inputs=[dum], params=[H]) train = downhill.Dataset(array([0]).astype(float32), batch_size=0) er = downhill_train(opt, train, hh, ep, None)[-1] # Get outputs _r = nget(R, dum, array([0]).astype(float32)) + eps _r1 = nget(R1, dum, array([0]).astype(float32)) _r2 = nget(R2, dum, array([0]).astype(float32)) o1 = FE.ife(_r1 * (M / _r), P) o2 = FE.ife(_r2 * (M / _r), P) sxr = bss_eval(o1, 0, vstack((z1, z2))) + bss_eval(o2, 1, vstack((z1, z2))) return o1, o2, (array(sxr[:3]) + array(sxr[3:])) / 2.
def lasagne_separate(M, P, FE, W1, W2, z1, z2, hh=.0001, ep=5000, d=0, wsp=.0001, plt=True): from paris.signal import bss_eval # Gt dictionary shapes K = [W1.shape[0], W2.shape[0]] # GPU cached data _M = theano.shared(M.astype(float32)) # Input is the learned dictionary set lW = hstack((W1.T, W2.T)).astype(float32) _lW = Th.matrix('_lW') fI = InputLayer(shape=lW.shape, input_var=_lW) # Split in two paths fW1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1) fW2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1) # Dropout? dfW1 = DropoutLayer(fW1, d) dfW2 = DropoutLayer(fW2, d) # Compute source modulators R1 = DenseLayer(dfW1, num_units=M.shape[1], nonlinearity=lambda x: psoftplus(x, 3.), b=None) R2 = DenseLayer(dfW2, num_units=M.shape[1], nonlinearity=lambda x: psoftplus(x, 3.), b=None) # Bring to standard orientation R = ElemwiseSumLayer([R1, R2]) # Cost function cost = (_M*(Th.log(_M+eps) - Th.log( get_output( R)+eps)) - _M + get_output( R)).mean() \ + wsp*(Th.mean( abs( R1.W))+Th.mean( abs( R2.W))) # Train it using Lasagne opt = downhill.build('rprop', loss=cost, inputs=[_lW], params=get_all_params(R)) train = downhill.Dataset(lW, batch_size=0) er = downhill_train(opt, train, hh, ep, None)[-1] # Get outputs _r = nget(R, _lW, lW) + eps _r1 = nget(R1, _lW, lW) _r2 = nget(R2, _lW, lW) o1 = FE.ife(_r1 * (M / _r), P) o2 = FE.ife(_r2 * (M / _r), P) sxr = bss_eval(o1, 0, vstack((z1, z2))) + bss_eval(o2, 1, vstack((z1, z2))) return o1, o2, (array(sxr[:3]) + array(sxr[3:])) / 2.