def forward(p, h, x_true, y_true, i): i *= 0 inp = join2(h, x_true) emb = T.dot(inp, p['W0']) h0 = lngru_layer(p, emb, {}, prefix='gru1', mask=None, one_step=True, init_state=h[:, :1024], backwards=False) h1 = T.nnet.relu(ln(T.dot(h0[0], p['W1'][i])), alpha=0.02) h2 = T.nnet.relu(ln(T.dot(h1, p['W2'][i])), alpha=0.02) #h2 = h1 y_est = T.nnet.softmax(T.dot(h2, p['Wy'][i])) #h_next = T.dot(h2, p['Wo'][i]) h_next = h1 loss = crossent(y_est, y_true) acc = accuracy(y_est, y_true) return h_next, y_est, loss, acc, y_est
def network(p, x, true_y): x = x.flatten(2) h1 = lrelu(T.dot(x, p['W1'])) h2 = lrelu(T.dot(h1, p['W2'])) y = T.nnet.softmax(T.dot(h2, p['W3'])) loss = crossent(y, true_y) acc = accuracy(y, true_y) return loss, acc
def network(params, x, y, p1, p2): #x *= srng.binomial(n=1,p=p1,size=x.shape,dtype='float32').astype('float32')/p1 h1 = T.nnet.relu(bn(T.dot(bn(x), params['W1']) + params['b1'])) #h1 *= srng.binomial(n=1,p=p2,size=h1.shape,dtype='float32').astype('float32')/p2 h2 = T.nnet.relu(bn(T.dot(h1, params['W2']) + params['b2'])) #h2 *= srng.binomial(n=1,p=p2,size=h2.shape,dtype='float32').astype('float32')/p2 h3 = bn(T.dot(h2, params['W3']) + params['b3']) p = T.nnet.softmax(h3) loss = crossent(p, y) acc = accuracy(p, y) return {'loss': loss, 'p': p, 'acc': acc}
def forward(p, h, x_true, y_true, i): inp = join2(h, x_true) h1 = T.nnet.relu(ln(T.dot(inp, p['W1'][i])), alpha=0.02) #h2 = T.nnet.relu(ln(T.dot(h1, p['W2'])), alpha=0.02) h2 = h1 y_est = T.nnet.softmax(T.dot(h2, p['Wy'][i])) #h_next = T.dot(h2, p['Wo'][i]) h_next = h1 loss = crossent(y_est, y_true) acc = accuracy(y_est, y_true) return h_next, y_est, loss, acc
y_true = T.ivector() h_in = T.matrix() step = T.iscalar() print "giving x and y on all steps" y_true_use = T.switch(T.ge(step, 4), y_true, 10) x_true_use = x_true# * T.eq(step,0) h_next, y_est, class_loss,acc = forward(params_forward, h_in, x_true_use, y_true_use,step) h_in_rec, x_rec, y_rec = synthmem(params_synthmem, h_next,step) print "0.1 mult" rec_loss = 0.1 * (T.sqr(x_rec - x_true_use).sum() + T.sqr(h_in - h_in_rec).sum() + crossent(y_rec, y_true_use)) #should pull y_rec and y_true together! print "TURNED OFF CLASS LOSS IN FORWARD" #TODO: add in back params_forward.values() updates_forward = lasagne.updates.adam(rec_loss + 0.0 * class_loss, params_forward.values() + params_synthmem.values()) forward_method = theano.function(inputs = [x_true,y_true,h_in,step], outputs = [h_next, rec_loss, class_loss,acc,y_est], updates=updates_forward) forward_method_noupdate = theano.function(inputs = [x_true,y_true,h_in,step], outputs = [h_next, rec_loss, class_loss,acc]) ''' Goal: get a method that takes h[i+1] and dL/dh[i+1]. It runs synthmem on h[i+1] to get estimates of x[i], y[i], and h[i]. It then runs the forward on those values and gets that loss.
if only_y_last_step: y_true_use = T.switch(T.eq(step, num_steps - 1), y_true, 10) else: y_true_use = y_true x_true_use = x_true #x_true_use = T.switch(T.eq(step, 0), x_true, x_true*0.0) h_next, y_est, class_loss, acc, probs = forward(params_forward, h_in, x_true_use, y_true_use, step) h_in_rec, x_rec, y_rec = synthmem(params_synthmem, h_next, step) print "0.1 mult" rec_loss = 0.1 * (T.sqr(x_rec - x_true_use).sum() + T.sqr(h_in - h_in_rec).sum() + crossent(y_rec, y_true_use)) #should pull y_rec and y_true together! updates_forward = lasagne.updates.adam( rec_loss + use_class_loss_forward * class_loss, params_forward.values() + params_synthmem.values(), learning_rate=lr_f, beta1=beta1_f) forward_method = theano.function( inputs=[x_true, y_true, h_in, step], outputs=[h_next, rec_loss, class_loss, acc, y_est], updates=updates_forward) forward_method_noupdate = theano.function( inputs=[x_true, y_true, h_in, step],