def check_gradient(parameters): assert isinstance(parameters, SetupParams) print("checking gradient") network = create(parameters) rt_params = make_rt_params(parameters) if parameters.iterations > 0: # run defined iterations for i in range(0, parameters.iterations): network.process(parameters.x, rt_params) # this is my master copy of the weights nw_params = NetworkParams() network.get_weights(nw_params) print("weights:" + str(nw_params)) # make network use weights provided in params instead of local weights network.set_local_params(False) rt_params.set_params(nw_params) # no more parameter updates rt_params.set_update(NoUpdate()) res = network.process(parameters.x, rt_params) assert isinstance(res, ResultParams) print("cost1:" + str(res.cost)) res_params = res.get_params() assert isinstance(res_params, NetworkParams) print("grads 0: " + str(res_params)) # test network with params provided rt_params.set_params(nw_params) res = network.process(parameters.x, rt_params) print("after: " + str(res.cost)) epsilon = 1e-8 approx = [] grad = [] for l in range(1, len(parameters.topology)): print("processing layer:" + str(l)) assert nw_params.has_params(l) & res_params.has_derivatives(l) w, b = nw_params.get_params(l) dw, db = res_params.get_derivatives(l) w_tmp = np.copy(w) nw_params.set_params(l, w_tmp, b) rt_params.set_params(nw_params) for i in range(0, w.shape[0]): for j in range(0, w.shape[1]): saved_val = w_tmp[i, j] w_tmp[i, j] = saved_val + epsilon res_plus = network.process(parameters.x, rt_params) w_tmp[i, j] = saved_val - epsilon res_minus = network.process(parameters.x, rt_params) approx.append((res_plus.cost - res_minus.cost) / (2*epsilon)) grad.append(dw[i, j]) w_tmp[i, j] = saved_val b_tmp = np.copy(b) nw_params.set_params(l, w, b_tmp) rt_params.set_params(nw_params) for i in range(0, b.shape[0]): saved_val = b_tmp[i, 0] b_tmp[i, 0] = saved_val + epsilon res_plus = network.process(parameters.x, rt_params) b_tmp[i, 0] = saved_val - epsilon res_minus = network.process(parameters.x, rt_params) approx.append((res_plus.cost - res_minus.cost) / (2 * epsilon)) grad.append(db[i, 0]) b_tmp[i, 0] = saved_val # print("res." + str(i) + ":" + str(len(layer_params[len(layer_params) - 1]))) print("approx:" + str(len(approx))) print("grads: " + str(len(grad))) approx = np.array(approx).reshape((1, len(approx))) grad = np.array(grad).reshape((1, len(grad))) print("approx:" + str(approx)) print("grad :" + str(grad)) err = np.linalg.norm(approx - grad) / (np.linalg.norm(approx) + np.linalg.norm(grad)) print("error:" + str(err)) return err
class RuntimeParams: def __init__(self): self.__learn = False self.__y = None self.__verbose = False self.__params = NetworkParams() self.__weight = None self.__bias = None self.__alpha = 0.01 self.__update = NoUpdate() self.__lambda = 0 self.__keep_prob = 1 self.__threshold = 0.5 # self.__max_z = 0 self.__compute_y = False self.__check_overflow = False # hand down weight instead of weights held in layers # makes layers pass back derivatives in results def set_params(self, params): self.__params = params def get_params(self, layer_idx): return self.__params.get_params(layer_idx) def inc(self, alpha=0): self.__update.next_it(alpha) def set_update(self, update): assert isinstance(update, Update) self.__update = update def is_update(self): return self.__update.is_update def update(self, w, b, vw, vb, layer_idx): return self.__update.update(w, b, vw, vb, layer_idx) def set_lambda(self, lambd): self.__lambda = lambd def get_lambda(self): return self.__lambda def set_keep_prob(self, prob): assert (prob > 0) & (prob <= 1) self.__keep_prob = prob def get_keep_prob(self): return self.__keep_prob def set_check_overflow(self, co): self.__check_overflow = co def get_check_overflow(self): return self.__check_overflow # def set_max_z(self, max_z): # assert max_z >= 0 # self.__max_z = max_z # def get_max_z(self): # return self.__max_z def set_threshold(self, threshold): assert (threshold < 1) & (threshold > 0) self.__threshold = threshold def get_threshold(self): return self.__threshold def set_compute_y(self, compute_y): self.__compute_y = compute_y def set_eval(self, y=None): self.__y = y self.__learn = False self.__compute_y = True def set_train(self, y, compute_y=False): self.__y = y self.__learn = True self.__compute_y = compute_y def is_learn(self): return self.__learn def is_compute_y(self): return self.__compute_y def get_y(self): return self.__y def set_verbose(self, verbosity): self.__verbose = verbosity def is_verbose(self): return self.__verbose