from LR import Logisticlayer from mlp import MLP if __name__=="__main__": numpy.set_printoptions(threshold=numpy.nan) input_dim = 4 output_dim = 3 sample_size = 100 #X=numpy.random.normal(0,1,(sample_size,input_dim)) #temp,Y=numpy.nonzero(numpy.random.multinomial(1,[1.0/output_dim]*output_dim,size=sample_size)) mlp = MLP(4,3,[10,10]) with open('debug_nnet.pickle') as f: init_param = pickle.load(f) init_param = numpy.concatenate([i.flatten() for i in init_param]) mlp.packParam(init_param) with open('debug_data.pickle') as f: data = pickle.load(f) X = data[0] Y = data[1] with open('HJv.pickle') as f: HJv_theano = pickle.load(f) num_param = numpy.sum(mlp.sizes) batch_size = 100 grad,train_nll,train_error=mlp.get_gradient(X,Y,batch_size) d = 1.0*numpy.ones((num_param,))
delta, next_init, after_cost = mlp.cg(-grad, train_cg_X_cur, train_cg_Y_cur, batch_size, next_init, 1) Gv = mlp.get_Gv(train_cg_X_cur,train_cg_Y_cur,batch_size,delta) delta_cost = numpy.dot(delta,grad+0.5*Gv) before_cost = mlp.quick_cost(numpy.zeros((num_param,)), train_cg_X_cur, train_cg_Y_cur, batch_size) l2norm = numpy.linalg.norm(Gv + mlp._lambda*delta + grad) print "Residual Norm: ",l2norm print 'Before cost: %f, After cost: %f'%(before_cost,after_cost) param = mlp.flatParam() + delta mlp.packParam(param) tune_lambda = (after_cost - before_cost)/delta_cost if tune_lambda < 0.25: mlp._lambda = mlp._lambda*1.5 elif tune_lambda > 0.75: mlp._lambda = mlp._lambda/1.5 print "Training NNL: %f, Error: %f"%(train_nll,train_error) nll=[] error=[] for batch_index in xrange(n_valid_batches): X=valid_X[batch_index*batch_size:(batch_index+1)*batch_size,:] Y=valid_Y[batch_index*batch_size:(batch_index+1)*batch_size] mlp.forward(X)