def bpNet(dataSet, classLabels): SampIn = mat(dataSet).T expected = mat(classLabels) m, n = shape(dataSet) eb = 0.01 eta = 0.05 mc = 0.3 maxiter = 2000 errlist = [] nSampleNum = m nSampleDim = n - 1 nHidden = 4 nOut = 1 hi_w = 2.0 * (random.rand(nHidden, nSampleDim) - 0.5) hi_b = 2.0 * (random.rand(nHidden, 1) - 0.5) hi_wb = mat(Untils.mergMatrix(mat(hi_w), mat(hi_b))) out_w = 2.0 * (random.rand(nOut, nHidden) - 0.5) out_b = 2.0 * (random.rand(nOut, 1) - 0.5) out_wb = mat(Untils.mergMatrix(mat(out_w), mat(out_b))) dout_wbOld = 0.0 dhi_wbOld = 0.0 for i in xrange(maxiter): hi_input = hi_wb * SampIn hi_output = logistic(hi_input) hi2out = Untils.mergeMatrix(hi_output.T, ones((nSampleNum, 1))).T out_input = out_wb * hi2out out_output = logistic(out_input) err = expected - out_output sse = errorfunc(err) errlist.append(sse) if sse <= eb: print "iteration:", i + 1 break DELTA = multiply(err, dlogit(out_input, out_output)) wDelta = out_wb[:, :-1].T * DELTA delta = multiply(wDelta, dlogit(hi_input, hi_output)) dout_wb = DELTA * hi2out.T dhi_wb = delta * SampIn.T if i == 0: out_wb = out_wb + eta * dout_wb hi_wb = hi_wb + eta * dhi_wb else: out_wb = out_wb + (1.0 - mc) * eta * dout_wb + mc * dout_wbOld hi_wb = hi_wb + (1.0 - mc) * eta * dhi_wb + mc * dhi_wbOld dout_wbOld = dout_wb dhi_wbOld = dhi_wb return errlist, out_wb, hi_wb
def bpNet(dataSet, classLabels): # 数据集矩阵化 SampIn = mat(dataSet).T expected = mat(classLabels) m, n = shape(dataSet) # 网络参数 eb = 0.01 # 误差容限 eta = 0.05 # 学习率 mc = 0.3 # 动量因子 maxiter = 2000 # 最大迭代次数 errlist = [] # 误差列表 # 构造网络 # 初始化网络 nSampNum = m # 样本数量 nSampDim = n - 1 # 样本维度 nHidden = 4 # 隐含层神经元 nOut = 1 # 输出层 # 隐含层参数 hi_w = 2.0 * (random.rand(nHidden, nSampDim) - 0.5) hi_b = 2.0 * (random.rand(nHidden, 1) - 0.5) hi_wb = mat(Untils.mergMatrix(mat(hi_w), mat(hi_b))) # 输出层参数 out_w = 2.0 * (random.rand(nOut, nHidden) - 0.5) out_b = 2.0 * (random.rand(nOut, 1) - 0.5) out_wb = mat(Untils.mergMatrix(mat(out_w), mat(out_b))) # 默认旧权值 dout_wbOld = 0.0 dhi_wbOld = 0.0 for i in xrange(maxiter): #1. 工作信号正向传播 #1.1 输入层到隐含层 hi_input = hi_wb * SampIn # hi_wb 4,n SampIn n,m hi_output = logistic(hi_input) print "hi_output.T.shape", hi_output.T.shape hi2out = Untils.mergMatrix(hi_output.T, ones((nSampNum, 1))).T #1.2 隐含层到输出层 out_input = out_wb * hi2out out_output = logistic(out_input) #2. 误差计算 err = expected - out_output sse = errorfunc(err) errlist.append(sse) #2.1 判断是否收敛 if sse <= eb: print "iteration:", i + 1 break #3.误差信号反向传播 #3.1 DELTA为输出层到隐含层梯度 DELTA = multiply(err, dlogit(out_input, out_output)) wDelta = out_wb[:, :-1].T * DELTA print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%" print "err.shape", err.shape print "dlogit(out_input,out_output).shape", dlogit( out_input, out_output).shape print "out_wb[:,:-1].T.shape", out_wb[:, :-1].T.shape #3.2 delta为隐含层到输入层梯度 delta = multiply(wDelta, dlogit(hi_input, hi_output)) dout_wb = DELTA * hi2out.T print "DELTA.shape", DELTA.shape print "hi2out.T.shape", hi2out.T.shape #3.3 输入层的权值更新 dhi_wb = delta * SampIn.T #3.4 更新输出层和隐含层权值 if i == 0: out_wb = out_wb + eta * dout_wb hi_wb = hi_wb + eta * dhi_wb else: out_wb = out_wb + (1.0 - mc) * eta * dout_wb + mc * dout_wbOld hi_wb = hi_wb + (1.0 - mc) * eta * dhi_wb + mc * dhi_wbOld dout_wbOld = dout_wb dhi_wbOld = dhi_wb return errlist, out_wb, hi_wb
mc = 0.8 # 动量因子 maxiter = 1000 # 最大迭代次数 # 构造网络 # 初始化网络 nSampNum = m; # 样本数量 nSampDim = 2; # 样本维度 nHidden = 3; # 隐含层神经元 nOut = 1; # 输出层 # 隐含层参数 # net_Hidden * 3 一行代表一个隐含层节点 w = 2*(random.rand(nHidden,nSampDim)-1/2) b = 2*(random.rand(nHidden,1)-1/2) wex = mat(Untils.mergMatrix(mat(w),mat(b))) # 输出层参数 W = 2*(random.rand(nOut,nHidden)-1/2) B = 2*(random.rand(nOut,1)-1/2) WEX = mat(Untils.mergMatrix(mat(W),mat(B))) dWEXOld = [] ; dwexOld = [] # 初始化权值中间变量 # 训练 iteration = 0; # 初始化误差变量 errRec = []; for i in range(maxiter): # 工作信号正向传播 hp = wex*SampIn
def bpNet(dataSet,classLabels): # 数据集矩阵化 SampIn = mat(dataSet).T expected = mat(classLabels) [m,n] = shape(dataSet) # 网络参数 eb = 0.01 # 误差容限 eta = 0.05 # 学习率 mc = 0.2 # 动量因子 maxiter = 2000 # 最大迭代次数 errRec = [] # 误差 # 构造网络 # 初始化网络 nSampNum = m; # 样本数量 nSampDim = n-1; # 样本维度 nHidden = 4; # 隐含层神经元 nOut = 1; # 输出层 # 输入层参数 # 隐含层参数 # net_Hidden * 3 一行代表一个隐含层节点 w = 2.0*(random.rand(nHidden,nSampDim)-1.0/2.0) b = 2.0*(random.rand(nHidden,1)-1.0/2.0) wex = mat(Untils.mergMatrix(mat(w),mat(b))) # 输出层参数 W = 2.0*(random.rand(nOut,nHidden)-1.0/2.0) B = 2.0*(random.rand(nOut,1)-1.0/2.0) WEX = mat(Untils.mergMatrix(mat(W),mat(B))) dWEXOld = 0.0 ; dwexOld = 0.0 # 训练 iteration = 0.0; for i in range(maxiter): # 1. 工作信号正向传播 hp = wex*SampIn tau = logistic(hp) tauex = Untils.mergMatrix(tau.T, ones((nSampNum,1))).T HM = WEX*tauex out = logistic(HM) err = expected - out sse = sumsqr(err) errRec.append(sse); # 判断是否收敛 iteration = iteration + 1 if sse <= eb: print "iteration:",i break; # 2.误差信号反向传播 # DELTA和delta为局部梯度 DELTA = multiply(err,dlogit(HM,out)) wDelta = W.T*DELTA delta = multiply(wDelta,dlogit(hp,tau)) dWEX = DELTA*tauex.T dwex = delta*SampIn.T # 3.更新权值 if i == 0: WEX = WEX + eta * dWEX wex = wex + eta * dwex else : WEX = WEX + (1.0 - mc)*eta*dWEX + mc * dWEXOld wex = wex + (1.0 - mc)*eta*dwex + mc * dwexOld dWEXOld = dWEX dwexOld = dwex W = WEX[:,0:nHidden] return errRec,WEX,wex