def __init__(self): self.__learn = False self.__y = None self.__verbose = False self.__params = NetworkParams() self.__weight = None self.__bias = None self.__alpha = 0.01 self.__update = NoUpdate() self.__lambda = 0 self.__keep_prob = 1 self.__threshold = 0.5 # self.__max_z = 0 self.__compute_y = False self.__check_overflow = False
def from_dict(setup_dict): def import_class(cname): components = cname.split('.') mod = __import__(components[0]) for comp in components[1:]: mod = getattr(mod, comp) return mod setup = SetupParams() setup.alpha = setup_dict["alpha"] setup.alpha_min = setup_dict["alpha_min"] setup.beta1 = setup_dict["beta1"] setup.beta2 = setup_dict["beta2"] setup.epsilon = setup_dict["epsilon"] setup.lambd = setup_dict["lambda"] setup.keep_prob = setup_dict["keep_prob"] setup.check_overflow = setup_dict["check_overflow"] setup.graph = setup_dict["graph"] setup.iterations = setup_dict["iterations"] setup.topology = setup_dict["topology"] setup.verbosity = setup_dict["verbosity"] setup.local_params = setup_dict["local_params"] # setup.max_z = setup_dict["max_z"] setup.threshold = setup_dict["threshold"] pattern = re.compile("^<class '([^']+)'>$") setup.activations = [] for act_name in setup_dict["activations"]: match = pattern.fullmatch(act_name) assert match is not None name = match.group(1) setup.activations.append(import_class(name)) if "x" in setup_dict: setup.x = np.array(setup_dict["x"]) if "y" in setup_dict: setup.y = np.array(setup_dict["y"]) if "x_cv" in setup_dict: setup.x_cv = np.array(setup_dict["x_cv"]) if "y_cv" in setup_dict: setup.y_cv = np.array(setup_dict["y_cv"]) if "x_t" in setup_dict: setup.x_t = np.array(setup_dict["x_t"]) if "y_t" in setup_dict: setup.y_t = np.array(setup_dict["y_t"]) if "params" in setup_dict: setup.params = NetworkParams.from_dict(setup_dict["params"]) return setup
class ResultParams: def __init__(self, y_hat=None, cost=None, error=None): self.error = error self.y_hat = y_hat self.cost = 0 self.da = None self.__params = None self.__dv = None def is_error(self): return self.error is not None def set_params(self, layer_index, w, b): if self.__params is None: self.__params = NetworkParams(layer_index, w, b) else: self.__params.set_params(layer_index, w, b) def get_params(self): return self.__params def has_params(self): return self.__params is not None def set_derivatives(self, layer_index, dw, db): if self.__params is None: self.__params = NetworkParams() self.__params.set_derivatives(layer_index, dw, db) else: self.__params.set_derivatives(layer_index, dw, db)
def __init__(self): self.alpha = 0.01 self.alpha_min = 0.01 self.beta1 = 0 self.beta2 = 0 self.epsilon = 1e-8 self.lambd = 0 self.keep_prob = 1 self.check_overflow = False self.iterations = 1000 self.activations = [act.ReLU, act.Sigmoid] self.topology = None self.verbosity = 0 self.local_params = True self.graph = False self.params = NetworkParams() self.x = None self.y = None self.x_cv = None self.y_cv = None self.x_t = None self.y_t = None # self.max_z = 0 self.threshold = 0.5
class SetupParams: def __init__(self): self.alpha = 0.01 self.alpha_min = 0.01 self.beta1 = 0 self.beta2 = 0 self.epsilon = 1e-8 self.lambd = 0 self.keep_prob = 1 self.check_overflow = False self.iterations = 1000 self.activations = [act.ReLU, act.Sigmoid] self.topology = None self.verbosity = 0 self.local_params = True self.graph = False self.params = NetworkParams() self.x = None self.y = None self.x_cv = None self.y_cv = None self.x_t = None self.y_t = None # self.max_z = 0 self.threshold = 0.5 def valid(self): assert self.alpha > 0 assert self.alpha_min >= self.alpha assert self.iterations > 0 assert self.topology is not None assert len(self.topology) > 1 def to_dict(self): as_dict = { "alpha": self.alpha, "alpha_min": self.alpha_min, "beta1": self.beta1, "beta2": self.beta2, "epsilon": self.epsilon, "lambda": self.lambd, "keep_prob": self.keep_prob, "check_overflow": self.check_overflow, "graph": self.graph, "iterations": self.iterations, "topology": self.topology, "verbosity": self.verbosity, "local_params": self.local_params, # "max_z": self.max_z, "threshold": self.threshold } tmp_list = [] for act_name in self.activations: tmp_list.append(str(act_name)) as_dict["activations"] = tmp_list if not self.params.is_empty(): as_dict["params"] = self.params.to_dict() if self.x is not None: as_dict["x"] = self.x.tolist() if self.y is not None: as_dict["y"] = self.y.tolist() if self.x_cv is not None: as_dict["x_cv"] = self.x_cv.tolist() if self.y_cv is not None: as_dict["y_cv"] = self.y_cv.tolist() if self.x_t is not None: as_dict["x_t"] = self.x_t.tolist() if self.y_t is not None: as_dict["y_t"] = self.y_t.tolist() return as_dict @staticmethod def from_dict(setup_dict): def import_class(cname): components = cname.split('.') mod = __import__(components[0]) for comp in components[1:]: mod = getattr(mod, comp) return mod setup = SetupParams() setup.alpha = setup_dict["alpha"] setup.alpha_min = setup_dict["alpha_min"] setup.beta1 = setup_dict["beta1"] setup.beta2 = setup_dict["beta2"] setup.epsilon = setup_dict["epsilon"] setup.lambd = setup_dict["lambda"] setup.keep_prob = setup_dict["keep_prob"] setup.check_overflow = setup_dict["check_overflow"] setup.graph = setup_dict["graph"] setup.iterations = setup_dict["iterations"] setup.topology = setup_dict["topology"] setup.verbosity = setup_dict["verbosity"] setup.local_params = setup_dict["local_params"] # setup.max_z = setup_dict["max_z"] setup.threshold = setup_dict["threshold"] pattern = re.compile("^<class '([^']+)'>$") setup.activations = [] for act_name in setup_dict["activations"]: match = pattern.fullmatch(act_name) assert match is not None name = match.group(1) setup.activations.append(import_class(name)) if "x" in setup_dict: setup.x = np.array(setup_dict["x"]) if "y" in setup_dict: setup.y = np.array(setup_dict["y"]) if "x_cv" in setup_dict: setup.x_cv = np.array(setup_dict["x_cv"]) if "y_cv" in setup_dict: setup.y_cv = np.array(setup_dict["y_cv"]) if "x_t" in setup_dict: setup.x_t = np.array(setup_dict["x_t"]) if "y_t" in setup_dict: setup.y_t = np.array(setup_dict["y_t"]) if "params" in setup_dict: setup.params = NetworkParams.from_dict(setup_dict["params"]) return setup def __str__(self): res = 'n' + str(self.topology[0]) for layer_size in self.topology: res += '-' + str(layer_size) res += 'a' + str(self.alpha) res += 'l' + str(self.lambd) res += 'i' + str(self.iterations) return res
def set_derivatives(self, layer_index, dw, db): if self.__params is None: self.__params = NetworkParams() self.__params.set_derivatives(layer_index, dw, db) else: self.__params.set_derivatives(layer_index, dw, db)
def set_params(self, layer_index, w, b): if self.__params is None: self.__params = NetworkParams(layer_index, w, b) else: self.__params.set_params(layer_index, w, b)
def check_gradient(parameters): assert isinstance(parameters, SetupParams) print("checking gradient") network = create(parameters) rt_params = make_rt_params(parameters) if parameters.iterations > 0: # run defined iterations for i in range(0, parameters.iterations): network.process(parameters.x, rt_params) # this is my master copy of the weights nw_params = NetworkParams() network.get_weights(nw_params) print("weights:" + str(nw_params)) # make network use weights provided in params instead of local weights network.set_local_params(False) rt_params.set_params(nw_params) # no more parameter updates rt_params.set_update(NoUpdate()) res = network.process(parameters.x, rt_params) assert isinstance(res, ResultParams) print("cost1:" + str(res.cost)) res_params = res.get_params() assert isinstance(res_params, NetworkParams) print("grads 0: " + str(res_params)) # test network with params provided rt_params.set_params(nw_params) res = network.process(parameters.x, rt_params) print("after: " + str(res.cost)) epsilon = 1e-8 approx = [] grad = [] for l in range(1, len(parameters.topology)): print("processing layer:" + str(l)) assert nw_params.has_params(l) & res_params.has_derivatives(l) w, b = nw_params.get_params(l) dw, db = res_params.get_derivatives(l) w_tmp = np.copy(w) nw_params.set_params(l, w_tmp, b) rt_params.set_params(nw_params) for i in range(0, w.shape[0]): for j in range(0, w.shape[1]): saved_val = w_tmp[i, j] w_tmp[i, j] = saved_val + epsilon res_plus = network.process(parameters.x, rt_params) w_tmp[i, j] = saved_val - epsilon res_minus = network.process(parameters.x, rt_params) approx.append((res_plus.cost - res_minus.cost) / (2*epsilon)) grad.append(dw[i, j]) w_tmp[i, j] = saved_val b_tmp = np.copy(b) nw_params.set_params(l, w, b_tmp) rt_params.set_params(nw_params) for i in range(0, b.shape[0]): saved_val = b_tmp[i, 0] b_tmp[i, 0] = saved_val + epsilon res_plus = network.process(parameters.x, rt_params) b_tmp[i, 0] = saved_val - epsilon res_minus = network.process(parameters.x, rt_params) approx.append((res_plus.cost - res_minus.cost) / (2 * epsilon)) grad.append(db[i, 0]) b_tmp[i, 0] = saved_val # print("res." + str(i) + ":" + str(len(layer_params[len(layer_params) - 1]))) print("approx:" + str(len(approx))) print("grads: " + str(len(grad))) approx = np.array(approx).reshape((1, len(approx))) grad = np.array(grad).reshape((1, len(grad))) print("approx:" + str(approx)) print("grad :" + str(grad)) err = np.linalg.norm(approx - grad) / (np.linalg.norm(approx) + np.linalg.norm(grad)) print("error:" + str(err)) return err
class RuntimeParams: def __init__(self): self.__learn = False self.__y = None self.__verbose = False self.__params = NetworkParams() self.__weight = None self.__bias = None self.__alpha = 0.01 self.__update = NoUpdate() self.__lambda = 0 self.__keep_prob = 1 self.__threshold = 0.5 # self.__max_z = 0 self.__compute_y = False self.__check_overflow = False # hand down weight instead of weights held in layers # makes layers pass back derivatives in results def set_params(self, params): self.__params = params def get_params(self, layer_idx): return self.__params.get_params(layer_idx) def inc(self, alpha=0): self.__update.next_it(alpha) def set_update(self, update): assert isinstance(update, Update) self.__update = update def is_update(self): return self.__update.is_update def update(self, w, b, vw, vb, layer_idx): return self.__update.update(w, b, vw, vb, layer_idx) def set_lambda(self, lambd): self.__lambda = lambd def get_lambda(self): return self.__lambda def set_keep_prob(self, prob): assert (prob > 0) & (prob <= 1) self.__keep_prob = prob def get_keep_prob(self): return self.__keep_prob def set_check_overflow(self, co): self.__check_overflow = co def get_check_overflow(self): return self.__check_overflow # def set_max_z(self, max_z): # assert max_z >= 0 # self.__max_z = max_z # def get_max_z(self): # return self.__max_z def set_threshold(self, threshold): assert (threshold < 1) & (threshold > 0) self.__threshold = threshold def get_threshold(self): return self.__threshold def set_compute_y(self, compute_y): self.__compute_y = compute_y def set_eval(self, y=None): self.__y = y self.__learn = False self.__compute_y = True def set_train(self, y, compute_y=False): self.__y = y self.__learn = True self.__compute_y = compute_y def is_learn(self): return self.__learn def is_compute_y(self): return self.__compute_y def get_y(self): return self.__y def set_verbose(self, verbosity): self.__verbose = verbosity def is_verbose(self): return self.__verbose