def dnn_sep(M, W1, W2, hh=.0001, ep=5000, d=0, sp=.0001, spb=3, al='rprop'): # GPU cached data _M = theano.shared(M.T.astype(float64)) dum = Th.vector('dum') # Get layer sizes K = [] for i in range(len(W1)): K.append([W1[i].shape[0], W2[i].shape[0]]) K.append([M.T.shape[1], M.T.shape[1]]) # We have weights to discover, init = 2/(Nin+Nout) H = theano.shared( sqrt(2. / (K[0][0] + K[0][1] + M.shape[1])) * random.rand(M.T.shape[0], K[0][0] + K[0][1]).astype(float64)) fI = InputLayer(shape=(M.T.shape[0], K[0][0] + K[0][1]), input_var=H) # Split in two pathways, one for each source's autoencoder H1 = (len(W1) + 1) * [None] H2 = (len(W1) + 1) * [None] H1[0] = SliceLayer(fI, indices=slice(0, K[0][0]), axis=1) H2[0] = SliceLayer(fI, indices=slice(K[0][0], K[0][0] + K[0][1]), axis=1) # Put the subsequent layers for i in range(len(W1)): H1[i + 1] = DenseLayer(H1[i], num_units=K[i + 1][0], W=W1[i].astype(float64), nonlinearity=lambda x: psoftplus(x, spb), b=None) H2[i + 1] = DenseLayer(H2[i], num_units=K[i + 1][1], W=W2[i].astype(float64), nonlinearity=lambda x: psoftplus(x, spb), b=None) # Add the two approximations R = ElemwiseSumLayer([H1[-1], H2[-1]]) # Cost function Ro = get_output(R) + eps cost = Th.mean(_M * (Th.log(_M + eps) - Th.log(Ro)) - _M + Ro) + 0 * Th.mean(dum) for i in range(len(H1) - 1): cost += sp * Th.mean(abs(get_output(H1[i]))) + sp * Th.mean( abs(get_output(H2[i]))) # Train it using Lasagne opt = downhill.build(al, loss=cost, inputs=[dum], params=[H]) train = downhill.Dataset(array([d]).astype(float64), batch_size=0) er = downhill_train(opt, train, hh, ep, None) # Get outputs _r = nget(R, dum, array([0]).astype(float64)).T + eps _r1 = nget(H1[-1], dum, array([0]).astype(float64)).T _r2 = nget(H2[-1], dum, array([0]).astype(float64)).T return _r, _r1, _r2, er
def dnn_model(M, K=[20, 20], hh=.0001, ep=5000, d=0, wsp=0.0001, hsp=0, spb=3, bt=0, al='rprop'): # Sort out the activation from inspect import isfunction if isfunction(spb): act = spb else: act = lambda x: psoftplus(x, spb) # Copy key variables to GPU _M = Th.matrix('_M') # Input and forward transform I = InputLayer(shape=(None, M.shape[0]), input_var=_M) # Setup the layers L = K + [M.T.shape[1]] H = len(L) * [None] Hd = len(L) * [None] # First layer H[0] = DenseLayer(I, num_units=K[0], nonlinearity=act, b=None) # All the rest for k in range(1, len(L)): # Optional dropout Hd[k - 1] = DropoutLayer(H[k - 1], d) # Next layer H[k] = DenseLayer(Hd[k - 1], num_units=L[k], nonlinearity=act, b=None) # Cost function Ro = get_output(H[-1]) + eps cost = Th.mean(_M * (Th.log(_M + eps) - Th.log(Ro)) - _M + Ro) for k in range(len(L) - 1): cost += wsp * Th.mean(abs(H[k].W)) + hsp * Th.mean(get_output(H[k])) # Train it using Lasagne opt = downhill.build(al, loss=cost, inputs=[_M], params=get_all_params(H[-1])) train = downhill.Dataset(M.T.astype(float64), batch_size=bt) er = downhill_train(opt, train, hh, ep, None) # Get approximation h = [nget(H[k], _M, M.T.astype(float64)).T for k in range(len(L))] w = [H[k].W.get_value() for k in range(len(L))] return h, w, er
def cnn_model(M, K=20, T=1, hh=.0001, ep=5000, d=0, hsp=0.0001, wsp=0, spb=3, bt=0, al='rprop'): # Facilitate reasonable convolutions core theano.config.dnn.conv.algo_fwd = 'fft_tiling' theano.config.dnn.conv.algo_bwd_filter = 'none' theano.config.dnn.conv.algo_bwd_data = 'none' # Reformat input data M3 = reshape(M.astype(float32), (1, M.shape[0], M.shape[1])) # Copy key variables to GPU _M = Th.tensor3('_M') # Input and forward transform I = InputLayer(shape=M3.shape, input_var=_M) # First layer is the transform to a non-negative subspace H = Conv1DLayer(I, filter_size=T, num_filters=K, pad='same', nonlinearity=lambda x: psoftplus(x, spb), b=None) # Upper layer is the synthesizer R = Conv1DLayer(H, filter_size=T, num_filters=M.shape[0], pad='same', nonlinearity=lambda x: psoftplus(x, spb), b=None) # Cost function Ro = get_output(R) + eps cost = Th.mean( _M*(Th.log( _M+eps) - Th.log( Ro)) - _M + Ro) \ + hsp*Th.mean( get_output( H)) # Train it using Lasagne opt = downhill.build(al, loss=cost, inputs=[_M], params=get_all_params(R)) train = downhill.Dataset(M3, batch_size=bt) er = downhill_train(opt, train, hh, ep, None) # Get approximation and hidden state _r = squeeze(nget(R, _M, M3)) _h = squeeze(nget(H, _M, M3)) return _r, R.W.get_value(), er, _h
def cnn_model_th(M, K=20, T=1, hh=.0001, ep=5000, d=0, wsp=0.0001, dp=0): rng = theano.tensor.shared_randomstreams.RandomStreams(0) # Shared variables to use x = Th.matrix('x') y = theano.shared(M.astype(theano.config.floatX)) d = theano.shared(float32(dp)) # Network weights W0 = theano.shared( sqrt(2. / (K + M.shape[0])) * random.randn(K, M.shape[0]).astype(theano.config.floatX)) W1 = theano.shared( sqrt(2. / (K + M.shape[0])) * random.randn(M.shape[0], K).astype(theano.config.floatX)) # First layer is the transform to a non-negative subspace h = psoftplus(W0.dot(x), 3.) # Dropout if dp > 0: h *= (1. / (1. - d) * (rng.uniform(size=h.shape) > d).astype( theano.config.floatX)).astype(theano.config.floatX) # Second layer reconstructs the input l1 = W1.dot(h) r = psoftplus(l1, 3.) # Approximate input using KL-like distance cost = Th.mean(y * (Th.log(y + eps) - Th.log(r + eps)) - y + r) + wsp * Th.mean(abs(W1)) # Make an optimizer and define the training input opt = downhill.build('rprop', loss=cost, inputs=[x], params=[W0, W1]) train = downhill.Dataset(M.astype(theano.config.floatX), batch_size=0) # Train it er = downhill_train(opt, train, hh, ep, None) # Get approximation d = 0 _h, _r = theano.function(inputs=[x], outputs=[h, r], updates=[])(M.astype(theano.config.floatX)) o = FE.ife(_r, P) sxr = bss_eval(o, 0, array([z])) return _r, W1.get_value(), _h.get_value(), er
def rnn_model(M, K=20, hh=.0001, ep=5000, d=0, wsp=0.0001, hsp=0, spb=3, bt=0, al='rmsprop', t=5): # Copy key variables to GPU _M = Th.matrix('_M') # Input and forward transform I = InputLayer(shape=(None, M.shape[0]), input_var=_M) # First layer is the transform to a non-negative subspace H0 = DenseLayer(I, num_units=K, nonlinearity=lambda x: psoftplus(x, spb), b=None) # Optional dropout H = DropoutLayer(H0, d) # Compute output R = RecurrentLayer(H, num_units=M.T.shape[1], nonlinearity=lambda x: psoftplus(x, spb), gradient_steps=t, b=None) # Cost function Ro = get_output(R) + eps cost = Th.mean( _M*(Th.log( _M+eps) - Th.log( Ro)) - _M + Ro) \ + hsp*Th.mean( get_output( H0)) # Train it using Lasagne opt = downhill.build(al, loss=cost, inputs=[_M], params=get_all_params(R)) train = downhill.Dataset(M.T.astype(float32), batch_size=bt) er = downhill_train(opt, train, hh, ep, None) # Get approximation _r = nget(R, _M, M.T.astype(float32)).T _h = nget(H, _M, M.T.astype(float32)).T return _r, (R.W_in_to_hid.get_value(), R.W_hid_to_hid.get_value()), er, _h
def nn_model(M, K=20, hh=.0001, ep=5000, d=0, wsp=0.0001, hsp=0, spb=3, bt=0, al='rprop'): # Sort out the activation from inspect import isfunction if isfunction(spb): act = spb else: act = lambda x: psoftplus(x, spb) # Copy key variables to GPU _M = Th.matrix('_M') # Input and forward transform I = InputLayer(shape=(None, M.shape[0]), input_var=_M) # First layer is the transform to a non-negative subspace H0 = DenseLayer(I, num_units=K, nonlinearity=act, b=None) # Optional dropout H = DropoutLayer(H0, d) # Compute output R = DenseLayer(H, num_units=M.T.shape[1], nonlinearity=act, b=None) # Cost function Ro = get_output(R) + eps cost = Th.mean( _M*(Th.log( _M+eps) - Th.log( Ro)) - _M + Ro) \ + wsp*Th.mean( abs( R.W[0])) + hsp*Th.mean( get_output( H0)) # Train it using Lasagne opt = downhill.build(al, loss=cost, inputs=[_M], params=get_all_params(R)) train = downhill.Dataset(M.T.astype(float64), batch_size=bt) er = downhill_train(opt, train, hh, ep, None) # Get approximation _r = nget(R, _M, M.T.astype(float64)).T _h = nget(H, _M, M.T.astype(float64)).T return _r, R.W.get_value(), er, _h
def cnn_sep(M, W1, W2, hh=.0001, ep=5000, d=0, sp=.0001, spb=3, al='rprop'): # Facilitate reasonable convolutions core theano.config.dnn.conv.algo_fwd = 'fft_tiling' theano.config.dnn.conv.algo_bwd_filter = 'none' theano.config.dnn.conv.algo_bwd_data = 'none' # Reformat input data M3 = reshape(M.astype(float32), (1, M.shape[0], M.shape[1])) # Copy key variables to GPU _M = theano.shared(M3.astype(float32)) # Get dictionary shapes K = [W1.shape[1], W2.shape[1]] T = W1.shape[2] # We have weights to discover H = theano.shared( sqrt(2. / (K[0] + K[1] + M.shape[1])) * random.rand(1, K[0] + K[1], M.T.shape[0]).astype(float32)) fI = InputLayer(shape=(1, K[0] + K[1], M.T.shape[0]), input_var=H) # Split in two pathways H1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1) H2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1) # Compute source modulators using previously learned convolutional dictionaries R1 = Conv1DLayer(H1, filter_size=T, W=W1, num_filters=M.shape[0], pad='same', nonlinearity=lambda x: psoftplus(x, spb), b=None) R2 = Conv1DLayer(H2, filter_size=T, W=W2, num_filters=M.shape[0], pad='same', nonlinearity=lambda x: psoftplus(x, spb), b=None) # Add the two approximations R = ElemwiseSumLayer([R1, R2]) # Cost function dum = Th.vector('dum') Ro = get_output(R) + eps cost = Th.mean(_M * (Th.log(_M + eps) - Th.log(Ro)) - _M + Ro) + 0 * Th.mean(dum) + sp * Th.mean(abs(H)) # Train it using Lasagne opt = downhill.build(al, loss=cost, inputs=[dum], params=[H]) train = downhill.Dataset(array([0]).astype(float32), batch_size=0) er = downhill_train(opt, train, hh, ep, None) # Get outputs _r = squeeze(nget(R, dum, array([0]).astype(float32))) + eps _r1 = squeeze(nget(R1, dum, array([0]).astype(float32))) _r2 = squeeze(nget(R2, dum, array([0]).astype(float32))) return _r, _r1, _r2, er
def rnn_sep(M, W1, W2, hh=.0001, ep=5000, d=0, sp=.0001, spb=3, al='rmsprop', t=5): # Get dictionary shapes K = [W1[0].shape[0], W2[0].shape[0]] # GPU cached data _M = theano.shared(M.T.astype(float32)) dum = Th.vector('dum') # We have weights to discover H = theano.shared( sqrt(2. / (K[0] + K[1] + M.shape[1])) * random.rand(M.T.shape[0], K[0] + K[1]).astype(float32)) fI = InputLayer(shape=(M.T.shape[0], K[0] + K[1]), input_var=H) # Split in two pathways fW1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1) fW2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1) # Dropout? dfW1 = DropoutLayer(fW1, dum[0]) dfW2 = DropoutLayer(fW2, dum[0]) # Compute source modulators using previously learned dictionaries R1 = RecurrentLayer(dfW1, num_units=M.T.shape[1], b=None, W_in_to_hid=W1[0].astype(float32), W_hid_to_hid=W1[1].astype(float32), nonlinearity=lambda x: psoftplus(x, spb), gradient_steps=5) R2 = RecurrentLayer(dfW2, num_units=M.T.shape[1], b=None, W_in_to_hid=W2[0].astype(float32), W_hid_to_hid=W2[1].astype(float32), nonlinearity=lambda x: psoftplus(x, spb), gradient_steps=5) # Add the two approximations R = ElemwiseSumLayer([R1, R2]) # Cost function Ro = get_output(R) + eps cost = (_M*(Th.log(_M+eps) - Th.log( Ro+eps)) - _M + Ro).mean() \ + sp*Th.mean( abs( H)) + 0*Th.mean( dum) # Train it using Lasagne opt = downhill.build(al, loss=cost, inputs=[dum], params=[H]) train = downhill.Dataset(array([d]).astype(float32), batch_size=0) er = downhill_train(opt, train, hh, ep, None) # Get outputs _r = nget(R, dum, array([0]).astype(float32)).T + eps _r1 = nget(R1, dum, array([0]).astype(float32)).T _r2 = nget(R2, dum, array([0]).astype(float32)).T return _r, _r1, _r2, er
def nn_sep(M, W1, W2, hh=.0001, ep=5000, d=0, sp=.0001, spb=3, al='rprop'): # Sort out the activation from inspect import isfunction if isfunction(spb): act = spb else: act = lambda x: psoftplus(x, spb) # Get dictionary shapes K = [W1.shape[0], W2.shape[0]] # GPU cached data _M = theano.shared(M.T.astype(float64)) dum = Th.vector('dum') # We have weights to discover H = theano.shared( sqrt(2. / (K[0] + K[1] + M.shape[1])) * random.rand(M.T.shape[0], K[0] + K[1]).astype(float64)) fI = InputLayer(shape=(M.T.shape[0], K[0] + K[1]), input_var=H) # Split in two pathways fW1 = SliceLayer(fI, indices=slice(0, K[0]), axis=1) fW2 = SliceLayer(fI, indices=slice(K[0], K[0] + K[1]), axis=1) # Dropout? dfW1 = DropoutLayer(fW1, dum[0]) dfW2 = DropoutLayer(fW2, dum[0]) # Compute source modulators using previously learned dictionaries R1 = DenseLayer(dfW1, num_units=M.T.shape[1], W=W1.astype(float64), nonlinearity=act, b=None) R2 = DenseLayer(dfW2, num_units=M.T.shape[1], W=W2.astype(float64), nonlinearity=act, b=None) # Add the two approximations R = ElemwiseSumLayer([R1, R2]) # Cost function Ro = get_output(R) + eps cost = (_M*(Th.log(_M+eps) - Th.log( Ro+eps)) - _M + Ro).mean() \ + sp*Th.mean( abs( H)) + 0*Th.mean( dum) # Train it using Lasagne opt = downhill.build(al, loss=cost, inputs=[dum], params=[H]) #train = downhill.Dataset( array( [0]).astype(float32), batch_size=0) if isinstance(d, list): train = downhill.Dataset(array([d[0]]).astype(float64), batch_size=0) er = downhill_train(opt, train, hh, ep / 2, None) train = downhill.Dataset(array([d[1]]).astype(float64), batch_size=0) er += downhill_train(opt, train, hh, ep / 2, None) else: train = downhill.Dataset(array([d]).astype(float64), batch_size=0) er = downhill_train(opt, train, hh, ep, None) # Get outputs _r = nget(R, dum, array([0]).astype(float64)).T + eps _r1 = nget(R1, dum, array([0]).astype(float64)).T _r2 = nget(R2, dum, array([0]).astype(float64)).T return _r, _r1, _r2, er