eta = 0.001 iters, sErr = 200, np.Infinity TrEnt = [] TeEnt = [] for i in range(iters): # np.random.shuffle(X) for obs in range(Xtr.shape[0]): o0, y = Xtr[obs, :], float(Ytr[obs]) o1 = L1.feed(o0) o2 = float(L2.feed(o1)) dCost_do2 = L2.costFuncDeriv(y, o2) delta2 = dCost_do2 * L2.deriv_out(o2) # here eta*delta applies to the last column of # the resulting hstack, i.e. the bias L2.w += -eta * delta2 * np.hstack((o1, L2.biasVal)) delta1 = delta2 * L1.deriv_out(o1) L1.w += -eta * delta1.reshape(-1, 1) * np.hstack((o0, L1.biasVal)) _tr = L2.costFunc(Ytr, L2.score(Xtr, L1)) _te = L2.costFunc(Yte, L2.score(Xte, L1)) train_err = float(sum(_tr)) test_err = float(sum(_te)) sErr0 = sErr sErr = test_err if (sErr - sErr0) / sErr0 > 1e-6 and i > 50: