def minimize_stochastic(target_fn, gradient_fn, x, y, theta_0, alpha_0=0.01): data = zip(x, y) theta = theta_0 # initial guess alpha = alpha_0 # initial step size min_theta, min_value = None, float("inf") # the minimum so far iterations_with_no_improvement = 0 # if we ever got 100 iterations with no improvement, stop while iterations_with_no_improvement < 100: value = sum(target_fn(x_i, y_i, theta) for x_i, y_i in data) if value < min_value: # if we have found a new minimum, remember it and go back to the # original step size min_theta, min_value = theta, value iterations_with_no_improvement = 0 alpha = alpha_0 else: # otherwise we are not improving, so try shrinking the step size iterations_with_no_improvement = iterations_with_no_improvement + 1 alpha = alpha * 0.9 # and take a gradient step for each of the data points for x_i, y_i in in_random_order(data): gradient_i = gradient_fn(x_i, y_i, theta) theta = vector_subtract(theta, scalar_multiply(alpha, gradient_i)) return min_theta
def remove_projection_from_vector(v, w): """If we want to find further components beyond the first, we first remove the projections from the data using this function""" """projects v onto w and subtracts the result from v""" return vector_subtract(v, project(v, w))