Esempio n. 1
0
    def cal_hidden_state(self, test, layernum):
        if layernum == 0:
            acx = test
        else:
            acx = get_activations_single_layer(self.model, np.array([test]),
                                               self.layerName(layernum - 1))

        units = int(
            int(self.model.layers[layernum].trainable_weights[0].shape[1]) / 4)
        # print("No units: ", units)

        # get weight
        W = self.model.layers[layernum].get_weights()[0]
        U = self.model.layers[layernum].get_weights()[1]
        b = self.model.layers[layernum].get_weights()[2]

        W_i = W[:, :units]
        W_f = W[:, units:units * 2]
        W_c = W[:, units * 2:units * 3]
        W_o = W[:, units * 3:]

        U_i = U[:, :units]
        U_f = U[:, units:units * 2]
        U_c = U[:, units * 2:units * 3]
        U_o = U[:, units * 3:]

        b_i = b[:units]
        b_f = b[units:units * 2]
        b_c = b[units * 2:units * 3]
        b_o = b[units * 3:]

        # calculate the hidden state value
        h_t = np.zeros((self.imagesize, units))
        c_t = np.zeros((self.imagesize, units))
        f_t = np.zeros((self.imagesize, units))
        h_t0 = np.zeros((1, units))
        c_t0 = np.zeros((1, units))

        for i in range(0, self.imagesize):
            f_gate = hard_sigmoid(
                np.dot(acx[i, :], W_f) + np.dot(h_t0, U_f) + b_f)
            i_gate = hard_sigmoid(
                np.dot(acx[i, :], W_i) + np.dot(h_t0, U_i) + b_i)
            o_gate = hard_sigmoid(
                np.dot(acx[i, :], W_o) + np.dot(h_t0, U_o) + b_o)
            new_C = np.tanh(np.dot(acx[i, :], W_c) + np.dot(h_t0, U_c) + b_c)
            c_t0 = f_gate * c_t0 + i_gate * new_C
            h_t0 = o_gate * np.tanh(c_t0)
            c_t[i, :] = c_t0
            h_t[i, :] = h_t0
            f_t[i, :] = f_gate

        return [h_t, c_t, f_t]
Esempio n. 2
0
    def cal_hidden_state(self, test, layer):
        acx = get_activations_single_layer(self.model, np.array([test]),
                                           self.layerName(0))
        acx = np.squeeze(acx)
        units = int(
            int(self.model.layers[1].trainable_weights[0].shape[1]) / 4)
        # print("No units: ", units)
        # lstm_layer = model.layers[1]
        W = self.model.layers[1].get_weights()[0]
        U = self.model.layers[1].get_weights()[1]
        b = self.model.layers[1].get_weights()[2]

        W_i = W[:, :units]
        W_f = W[:, units:units * 2]
        W_c = W[:, units * 2:units * 3]
        W_o = W[:, units * 3:]

        U_i = U[:, :units]
        U_f = U[:, units:units * 2]
        U_c = U[:, units * 2:units * 3]
        U_o = U[:, units * 3:]

        b_i = b[:units]
        b_f = b[units:units * 2]
        b_c = b[units * 2:units * 3]
        b_o = b[units * 3:]

        # calculate the hidden state value
        h_t = np.zeros((self.max_review_length, units))
        c_t = np.zeros((self.max_review_length, units))
        f_t = np.zeros((self.max_review_length, units))
        h_t0 = np.zeros((1, units))
        c_t0 = np.zeros((1, units))

        for i in range(0, self.max_review_length):
            f_gate = hard_sigmoid(
                np.dot(acx[i, :], W_f) + np.dot(h_t0, U_f) + b_f)
            i_gate = hard_sigmoid(
                np.dot(acx[i, :], W_i) + np.dot(h_t0, U_i) + b_i)
            o_gate = hard_sigmoid(
                np.dot(acx[i, :], W_o) + np.dot(h_t0, U_o) + b_o)
            new_C = np.tanh(np.dot(acx[i, :], W_c) + np.dot(h_t0, U_c) + b_c)
            c_t0 = f_gate * c_t0 + i_gate * new_C
            h_t0 = o_gate * np.tanh(c_t0)
            c_t[i, :] = c_t0
            h_t[i, :] = h_t0
            f_t[i, :] = f_gate

        return h_t, c_t, f_t
Esempio n. 3
0
def get_mlp_model(n_in, n_out, n_layers=2, n_hidden=50):
    assert n_layers >= 2, '`n_layers` should be greater than 1 (otherwise it is just an mlp)'

    # initialize weights
    weights = [utils.get_weights('w_1', n_in, n_hidden)]
    weights += [utils.get_weights('w_%d' % i, n_hidden, n_hidden) for i in range(2, n_layers)]
    weights += [utils.get_weights('w_%d' % n_layers, n_hidden, n_out)]

    # initialize biases
    biases = [utils.get_weights('b_%d' % i, n_hidden) for i in range(1, n_layers)]
    biases += [utils.get_weights('b_%d' % n_layers, n_out)]

    # binarized versions
    deterministic_binary_weights = [utils.binarize(w, mode='deterministic') for w in weights]
    stochastic_binary_weights = [utils.binarize(w, mode='stochastic') for w in weights]

    # variables
    lr = T.scalar(name='learning_rate')
    X = T.matrix(name='X', dtype=theano.config.floatX)
    y = T.matrix(name='y', dtype=theano.config.floatX)

    # generate outputs of mlps
    d_outs = [utils.hard_sigmoid(T.dot(X, deterministic_binary_weights[0]) + biases[0])]
    for w, b in zip(deterministic_binary_weights[1:], biases[1:]):
        d_outs.append(utils.hard_sigmoid(T.dot(d_outs[-1], w) + b))
    s_outs = [utils.hard_sigmoid(T.dot(X, stochastic_binary_weights[0]) + biases[0])]
    for w, b in zip(stochastic_binary_weights[1:], biases[1:]):
        s_outs.append(utils.hard_sigmoid(T.dot(s_outs[-1], w) + b))

    # cost function (see utils)
    cost = utils.get_cost((s_outs[-1]+1.)/2., (y+1.)/2., mode='mse')

    # get the update functions
    params = weights + biases
    grads = [T.grad(cost, p) for p in stochastic_binary_weights + biases]
    updates = [(p, T.clip(p - lr * g, -1, 1)) for p, g in zip(params, grads)]

    # generate training and testing functions
    train_func = theano.function([X, y, lr], [cost], updates=updates)
    test_func = theano.function([X], [d_outs[-1]])
    grads_func = theano.function([X, y], grads)
    int_output_func = theano.function([X], s_outs + d_outs)

    return train_func, test_func, grads_func, weights + biases, int_output_func
    def cal_hidden_state(self, test, layernum):
        if layernum == 0:
            acx = np.array(test)
        else:
            acx = get_activations_single_layer(self.model, np.array(test),
                                               self.layerName(layernum - 1))

        units = int(
            int(self.model.layers[layernum].trainable_weights[0].shape[1]) / 4)

        W = self.model.layers[layernum].get_weights()[0]
        U = self.model.layers[layernum].get_weights()[1]
        b = self.model.layers[layernum].get_weights()[2]

        h_t0 = np.zeros((acx.shape[0], 1, units))
        c_t0 = np.zeros((acx.shape[0], 1, units))
        s_t = np.tensordot(acx, W, axes=([2], [0])) + np.tensordot(
            h_t0, U, axes=([2], [0])) + b
        i = hard_sigmoid(s_t[:, :, :units])
        f = hard_sigmoid(s_t[:, :, units:units * 2])
        _c = np.tanh(s_t[:, :, units * 2:units * 3])
        o = hard_sigmoid(s_t[:, :, units * 3:])
        c_t = i * _c + f * c_t0
        h_t = o * np.tanh(c_t)

        # h_t0 = np.zeros(( 1, units))
        # c_t0 = np.zeros(( 1, units))
        # s_t = np.dot(acx, W) + np.dot(h_t0, U) + b
        # i = hard_sigmoid(s_t[:, :units])
        # f = hard_sigmoid(s_t[:, units: units * 2])
        # _c = np.tanh(s_t[:, units * 2: units * 3])
        # o = hard_sigmoid(s_t[:, units * 3:])
        # c_t = i*_c + f*c_t0
        # h_t = o*np.tanh(c_t)

        return h_t, c_t, f