def main(): n = 6000 v = [x for x in range(n)] m = [[x for x in range(n)] for _ in range(n)] time_start = time.time() for _ in range(3): linalg.mdotv(m, v) print(time.time() - time_start)
def gradient_single(self, train_x, train_y): # For batch, want to compute A and Z for each input all at once sigma_Z = [] A = [train_x] for l, layer in enumerate(self.layers): z = layer.compute_z(A[l]) a = layer.compute_a(z) A.append(a) sigma_Z.append(layer.compute_da(z)) delta_L = linalg.vtimesw(self.cost_d_function(train_y, A[-1]), sigma_Z[-1]) prev_delta = delta_L dCdb = [[] for _ in range(len(self.layers))] dCdw = [[] for _ in range(len(self.layers))] dCdb[-1] = delta_L dCdw[-1] = linalg.outer(delta_L, A[-2]) # TODO: for batch, we should loop through each training example and add/reduce it to the gradient matrices for l_plus_1 in range(len(self.layers)-1, 0, -1): layer = self.layers[l_plus_1] delta = linalg.vtimesw(linalg.mdotv(linalg.transpose(layer.w), prev_delta), sigma_Z[l_plus_1-1]) # Bias and weight dervis dCdb[l_plus_1-1] = delta dCdw[l_plus_1-1] = linalg.outer(delta, A[l_plus_1-1]) prev_delta = delta # TODO: divide by N, were this not to be a "single" gradient return dCdb, dCdw