コード例 #1
0
def check_gradient(parameters):
    assert isinstance(parameters, SetupParams)
    print("checking gradient")
    network = create(parameters)
    rt_params = make_rt_params(parameters)
    if parameters.iterations > 0:
        # run defined iterations
        for i in range(0, parameters.iterations):
            network.process(parameters.x, rt_params)

    # this is my master copy of the weights
    nw_params = NetworkParams()
    network.get_weights(nw_params)
    print("weights:" + str(nw_params))
    # make network use weights provided in params instead of local weights
    network.set_local_params(False)
    rt_params.set_params(nw_params)
    # no more parameter updates
    rt_params.set_update(NoUpdate())
    res = network.process(parameters.x, rt_params)
    assert isinstance(res, ResultParams)

    print("cost1:" + str(res.cost))

    res_params = res.get_params()
    assert isinstance(res_params, NetworkParams)
    print("grads 0:  " + str(res_params))

    # test network with params provided
    rt_params.set_params(nw_params)
    res = network.process(parameters.x, rt_params)
    print("after: " + str(res.cost))

    epsilon = 1e-8
    approx = []
    grad = []

    for l in range(1, len(parameters.topology)):
        print("processing layer:" + str(l))
        assert nw_params.has_params(l) & res_params.has_derivatives(l)
        w, b = nw_params.get_params(l)
        dw, db = res_params.get_derivatives(l)
        w_tmp = np.copy(w)
        nw_params.set_params(l, w_tmp, b)
        rt_params.set_params(nw_params)

        for i in range(0, w.shape[0]):
            for j in range(0, w.shape[1]):
                saved_val = w_tmp[i, j]
                w_tmp[i, j] = saved_val + epsilon
                res_plus = network.process(parameters.x, rt_params)
                w_tmp[i, j] = saved_val - epsilon
                res_minus = network.process(parameters.x, rt_params)
                approx.append((res_plus.cost - res_minus.cost) / (2*epsilon))
                grad.append(dw[i, j])
                w_tmp[i, j] = saved_val

        b_tmp = np.copy(b)
        nw_params.set_params(l, w, b_tmp)
        rt_params.set_params(nw_params)

        for i in range(0, b.shape[0]):
            saved_val = b_tmp[i, 0]
            b_tmp[i, 0] = saved_val + epsilon
            res_plus = network.process(parameters.x, rt_params)
            b_tmp[i, 0] = saved_val - epsilon
            res_minus = network.process(parameters.x, rt_params)
            approx.append((res_plus.cost - res_minus.cost) / (2 * epsilon))
            grad.append(db[i, 0])
            b_tmp[i, 0] = saved_val

        # print("res." + str(i) + ":" + str(len(layer_params[len(layer_params) - 1])))

    print("approx:" + str(len(approx)))
    print("grads: " + str(len(grad)))

    approx = np.array(approx).reshape((1, len(approx)))
    grad = np.array(grad).reshape((1, len(grad)))

    print("approx:" + str(approx))
    print("grad  :" + str(grad))
    err = np.linalg.norm(approx - grad) / (np.linalg.norm(approx) + np.linalg.norm(grad))
    print("error:" + str(err))
    return err
コード例 #2
0
class RuntimeParams:
    def __init__(self):
        self.__learn = False
        self.__y = None
        self.__verbose = False
        self.__params = NetworkParams()
        self.__weight = None
        self.__bias = None
        self.__alpha = 0.01
        self.__update = NoUpdate()
        self.__lambda = 0
        self.__keep_prob = 1
        self.__threshold = 0.5
        # self.__max_z = 0
        self.__compute_y = False
        self.__check_overflow = False

    # hand down weight instead of weights held in layers
    # makes layers pass back derivatives in results
    def set_params(self, params):
        self.__params = params

    def get_params(self, layer_idx):
        return self.__params.get_params(layer_idx)

    def inc(self, alpha=0):
        self.__update.next_it(alpha)

    def set_update(self, update):
        assert isinstance(update, Update)
        self.__update = update

    def is_update(self):
        return self.__update.is_update

    def update(self, w, b, vw, vb, layer_idx):
        return self.__update.update(w, b, vw, vb, layer_idx)

    def set_lambda(self, lambd):
        self.__lambda = lambd

    def get_lambda(self):
        return self.__lambda

    def set_keep_prob(self, prob):
        assert (prob > 0) & (prob <= 1)
        self.__keep_prob = prob

    def get_keep_prob(self):
        return self.__keep_prob

    def set_check_overflow(self, co):
        self.__check_overflow = co

    def get_check_overflow(self):
        return self.__check_overflow

    # def set_max_z(self, max_z):
    #     assert max_z >= 0
    #     self.__max_z = max_z

    # def get_max_z(self):
    #     return self.__max_z

    def set_threshold(self, threshold):
        assert (threshold < 1) & (threshold > 0)
        self.__threshold = threshold

    def get_threshold(self):
        return self.__threshold

    def set_compute_y(self, compute_y):
        self.__compute_y = compute_y

    def set_eval(self, y=None):
        self.__y = y
        self.__learn = False
        self.__compute_y = True

    def set_train(self, y, compute_y=False):
        self.__y = y
        self.__learn = True
        self.__compute_y = compute_y

    def is_learn(self):
        return self.__learn

    def is_compute_y(self):
        return self.__compute_y

    def get_y(self):
        return self.__y

    def set_verbose(self, verbosity):
        self.__verbose = verbosity

    def is_verbose(self):
        return self.__verbose