def sqerror_gradients(network: List[List[Vector]], input_vector: Vector, target_vector: Vector) -> List[List[Vector]]: """ Given a neural network, an input vector, and a target vector, make a prediction and compute the gradient of the squared error loss with respect to the neuron weights. """ # forward pass hidden_outputs, outputs = feed_forward(network, input_vector) # gradients with respect to output neuron pre-activation outputs output_deltas = [output * (1 - output) * (output - target) for output, target in zip(outputs, target_vector)] # gradients with respect to output neuron weights output_grads = [[output_deltas[i] * hidden_output for hidden_output in hidden_outputs + [1]] for i, output_neuron in enumerate(network[-1])] # gradients with respect to hidden neuron pre-activation outputs hidden_deltas = [hidden_output * (1 - hidden_output) * dot(output_deltas, [n[i] for n in network[-1]]) for i, hidden_output in enumerate(hidden_outputs)] # gradients with respect to hidden neuron weights hidden_grads = [[hidden_deltas[i] * input for input in input_vector + [1]] for i, hidden_neuron in enumerate(network[0])] return [hidden_grads, output_grads]
def neuron_output(weights: Vector, inputs: Vector) -> float: '''weights includes the bias term, inputs includes a 1''' return sigmoid(dot(weights, inputs))
def ridge_penalty(beta: Vector, alpha: float) -> float: return alpha * dot(beta[1:], beta[1:])
def predict(x: Vector, beta: Vector) -> float: '''assumes that the first element of x is 1''' return dot(x, beta)