def train(self, x_all, y_all, epochs=25): '''trains the neural net using stochastic gradient descent inputs: x_all - 2d list like object of all x inputs y_all - 2d array of correct classifications epochs - number of passes through data outputs: none''' # convert x and y to 2D np arrays x_all = sh.make_2d(x_all) y_all = sh.make_2d(y_all) # total number of points to train on total_train = len(x_all) * epochs # number of points trained on so far current_train = 0 for i in range(epochs): # perform multiple passes through the data order = np.arange(len(x_all)) # array of indexes in random order - used for SGD np.random.shuffle(order) # train on the points in a random order for n in order: # generate the prediction; pass 2D array pull off first # element to get 1D array y_pre = self.predict([x_all[n]])[0] # calculate the sigma of the last layer - layer L self.delta_l[self.total_layers] = self.dx_ds(y_pre)\ * self.de_dx(y_pre, y_all[n]) # go backwards in L and calculate the deltas for l in reversed(range(2, self.total_layers + 1)): # back-propagate the deltas self.delta_l[l-1] = self.dx_ds(self.x_l[l-1]) * \ np.dot(self.weights[l], self.delta_l[l]) # now update the weights for l in range(1, self.total_layers + 1): # update weights of each layer self.weights[l] -=\ self.lr * np.dot(self.x_l[l-1], np.transpose(self.delta_l[l])) \ + self.reg * self.weights[l] # regularization term current_train += 1 # change divisor to tune how often progress bar prints out if current_train % (total_train / 20) == 0: print("%.1f%% of training complete" % (current_train / total_train * 100)) return
def predict(self, x_all): '''generates a prediction based on the current weights of the net. the result of each for the most recent point is stored in self.x_l inputs: x_all - 2d array of each x point to generate a prediction on outputs: y_pre - 2d array of predicted y_coordinates''' # set up array of result of predictions y_pre = np.zeros((len(x_all), self.n_outputs)) for n in range(len(x_all)): # reshape the input into a 2D n x 1 array self.x_l[0][1:] = sh.make_2d(x_all[n]) for l in range(1, self.total_layers + 1): # mutliply weights by x of previous layer to get intermediate s s_j = np.transpose( np.dot(np.transpose(self.x_l[l - 1]), self.weights[l])) # apply activation to get x_l self.x_l[l] = self.activation(s_j) # prediction is the output of the final layer # reshape to be one dimensional array output y_pre[n] = (self.x_l[self.total_layers]).reshape(self.n_outputs, ) # apply softmax if set to do so if self.softmax: y_pre[n] = sh.apply_softmax(y_pre[n]) # return array of predictions return y_pre
def dx_ds(self, x_l): '''calculates the derivative dx over ds, equivalent to the derivative of the activation function w/ respect to the input s inputs: x_l - output of activation for a given layer''' # dx over ds for different activation functions if self.act == "tanh": # derivative of tanh activation function dx_ds = (1 - x_l**2) return sh.make_2d(dx_ds)
def de_dx(self, y_pre, y_n): '''returns the derivative de over dx, the derivative of the error function with respect to x. inputs: y_pre - predicted y value y_n - true y value''' # de over dx for different loss functions if self.loss == "squared": # de/dx of squared loss: e = (y-yn) ** 2 de_dx = 2 * (y_pre - y_n) if self.loss == "cce": # de/dx of cross-entropy: -(ynlog(y) + (1-yn)log(1-y)) de_dx = -1 * y_n / y_pre + (1 - y_n) / (1 - y_pre) return sh.make_2d(de_dx)