with open('debug_nnet.pickle') as f: init_param = pickle.load(f) init_param = numpy.concatenate([i.flatten() for i in init_param]) mlp.packParam(init_param) with open('debug_data.pickle') as f: data = pickle.load(f) X = data[0] Y = data[1] with open('HJv.pickle') as f: HJv_theano = pickle.load(f) num_param = numpy.sum(mlp.sizes) batch_size = 100 grad,train_nll,train_error=mlp.get_gradient(X,Y,batch_size) d = 1.0*numpy.ones((num_param,)) col = mlp.get_Gv(X, Y, batch_size, d) #print 'Some col:' #print col """ grad,train_nll,train_error=mlp.get_gradient(X,Y,2) v=numpy.zeros(num_param) mlp.forward(X) O = mlp.layers[-1].output S = mlp.layers[-1].linear_output #nll.append(mlp.Cost(Y))
numpy.random.seed(18877) numpy.random.shuffle(train_cg_X) numpy.random.seed(18877) numpy.random.shuffle(train_cg_Y) train_cg_X_cur = train_cg_X[cg_chunk_index*cg_chunk_size:(cg_chunk_index+1)*cg_chunk_size,:] train_cg_Y_cur = train_cg_Y[cg_chunk_index*cg_chunk_size:(cg_chunk_index+1)*cg_chunk_size] cg_chunk_index = cg_chunk_index+1 nll=[] error=[] print "Iter: %d ..."%(i), "Lambda: %f"%(mlp._lambda) grad,train_nll,train_error = mlp.get_gradient(train_gradient_X, train_gradient_Y, batch_size) delta, next_init, after_cost = mlp.cg(-grad, train_cg_X_cur, train_cg_Y_cur, batch_size, next_init, 1) Gv = mlp.get_Gv(train_cg_X_cur,train_cg_Y_cur,batch_size,delta) delta_cost = numpy.dot(delta,grad+0.5*Gv) before_cost = mlp.quick_cost(numpy.zeros((num_param,)), train_cg_X_cur, train_cg_Y_cur, batch_size) l2norm = numpy.linalg.norm(Gv + mlp._lambda*delta + grad) print "Residual Norm: ",l2norm print 'Before cost: %f, After cost: %f'%(before_cost,after_cost) param = mlp.flatParam() + delta