def train_1hidd(xMat, yMat, eta, nNodes, eps=1e-6, trace=False, shuffle=True): def feedForward(xs, ys, wtsOut, wtsHidd): zs = concateBias(sigmoid(xs @ wtsHidd)) return zs, softMax(zs @ wtsOut) def backProp(ys, yfit, xs, zs, wtsOut, wtsHidd): d_Out = eta * np.outer(zs, ys - yfit) d_hidd = eta * np.outer(xs, wtsOut @ (ys - yfit) * (zs * (1 - zs)))[:, :-1] return wtsOut + d_Out, wtsHidd + d_hidd xMat = concateBias(xMat) (nData, nK), nDim = yMat.shape, xMat.shape[1] wtOut = np.random.rand(nNodes + 1, nK) / 50 - 0.01 # init wts to be (-0.01,0.01) wtHidd = np.random.rand(nDim, nNodes) / 50 - 0.01 lastErr = np.inf # max error possible zs, yHats = feedForward(xMat, yMat, wtOut, wtHidd) meanErr = crossEntNK(yHats, yMat) epch = 0 while (abs(meanErr - lastErr) > eps) and epch < 1e6: # while not converged if epch % 1000 == 0 and trace: print('Iter #%u, error: %f' % (epch, meanErr)) if shuffle: seq = getRandomSeq(nData) # random seq for stoch. gradient descent else: seq = np.arange(nData) for n in seq: # loop over data set x, y = xMat[n], yMat[n] # index x and y for curr data point z, yHat = feedForward(x, y, wtOut, wtHidd) # feedforward wtOut, wtHidd = backProp(y, yHat, x, z, wtOut, wtHidd) # update weight lastErr = meanErr zs, yHats = feedForward(xMat, yMat, wtOut, wtHidd) # fitted Y for this epoch meanErr = crossEntNK(yHats, yMat) # err for this epoch if meanErr > lastErr: # slow learning rate if error increase eta /= 2 epch += 1 if trace: # print final error print('Final iteration #%u, error: %f' % (epch - 1, meanErr)) return (wtOut, wtHidd), epch, meanErr
def train_0hidd(xMat, yMat, eta, eps=1e-6, trace=False, shuffle=True): def feedForward(xs, ys, wts): return softMax(xs @ wts) def backProp(ys, yfit, xs, wts): return wts + eta * np.outer(xs, ys - yfit) xMat = concateBias(xMat) # add bias terms (nData, nK), nDim = yMat.shape, xMat.shape[1] # size of data and classes wt = np.random.rand(nDim, nK) / 50 - 0.01 # init wts to be (-0.01,0.01) lastErr = np.inf # max error possible yHats = feedForward(xMat, yMat, wt) # first feedforward calc meanErr = crossEntNK(yHats, yMat) # error from random weights epch = 0 while (abs(meanErr - lastErr) > eps) and epch < 1e6: # while not converged if epch % 1000 == 0 and trace: print('Iter #%u, error: %f' % (epch, meanErr)) if shuffle: # shuffle sequence of gradient descent seq = getRandomSeq(nData) # random seq for stoch. gradient descent else: seq = np.arange(nData) for n in seq: # loop over data set x, y = xMat[n], yMat[n] # index x and y for curr data point yHat = feedForward(x, y, wt) # feedforward wt = backProp(y, yHat, x, wt) # update weight lastErr = meanErr yHats = feedForward(xMat, yMat, wt) # fitted Y for this epoch meanErr = crossEntNK(yHats, yMat) # err for this epoch if meanErr > lastErr: # slow learning rate if error increase eta /= 5 epch += 1 if trace: # print final error print('Final iteration #%u, error: %f' % (epch - 1, meanErr)) return wt, epch, meanErr
def pred_0hidd(xMat, wts): yHat = softMax(concateBias(xMat) @ wts) return yHat.argmax(axis=1)
def pred_2hidd(xMat, wtsOut, wtsHidd2, wtsHidd1): z1 = sigmoid(concateBias(xMat) @ wtsHidd1) z2 = sigmoid(concateBias(z1) @ wtsHidd2) yHat = softMax(concateBias(z2) @ wtsOut) return yHat.argmax(axis=1)
def feedForward(xs, ys, wtsOut, wtsHidd2, wtsHidd1): z1s = concateBias(sigmoid(xs @ wtsHidd1)) z2s = concateBias(sigmoid(z1s @ wtsHidd2)) return (z1s, z2s), softMax(z2s @ wtsOut)
def pred_1hidd(xMat, wtsOut, wtsHidd): z = sigmoid(concateBias(xMat) @ wtsHidd) yHat = softMax(concateBias(z) @ wtsOut) return yHat.argmax(axis=1)
def feedForward(xs, ys, wtsOut, wtsHidd): zs = concateBias(sigmoid(xs @ wtsHidd)) return zs, softMax(zs @ wtsOut)