n_nodes = 60 L1 = Layer('tanh', n_nodes, in_dim) L2 = OutputLayer('sse', 'linear', 1, n_nodes) # eta = 0.001 iters, sErr = 200, np.Infinity TrEnt = [] TeEnt = [] for i in range(iters): # np.random.shuffle(X) for obs in range(Xtr.shape[0]): o0, y = Xtr[obs, :], float(Ytr[obs]) o1 = L1.feed(o0) o2 = float(L2.feed(o1)) dCost_do2 = L2.costFuncDeriv(y, o2) delta2 = dCost_do2 * L2.deriv_out(o2) # here eta*delta applies to the last column of # the resulting hstack, i.e. the bias L2.w += -eta * delta2 * np.hstack((o1, L2.biasVal)) delta1 = delta2 * L1.deriv_out(o1) L1.w += -eta * delta1.reshape(-1, 1) * np.hstack((o0, L1.biasVal)) _tr = L2.costFunc(Ytr, L2.score(Xtr, L1)) _te = L2.costFunc(Yte, L2.score(Xte, L1)) train_err = float(sum(_tr))