Beispiel #1
0
def dbn_forward_pass(ws_vh, ws_v, ws_h, x, y=None):
    """
    Deep belief net forward pass.
    
    x: input data (N x D matrix)
    y: Class label (1-of-K coded, N x K matrix). If not None, it is concatenated
        to the input for top layer RBM when calculating the output of the DBN.
    ws_vh: list of layer weights (L x D x H)
    ws_v: list of layer input biases (L x D x 1)
    ws_h: list of layer output biases (L x H x 1)
    Returns activations (continuous) and outputs (0-1, sigmoid(activations)) of
    top layer
    """
    L = len(ws_vh)
    h = x.T

    # forward (bottom-up) pass
    for l in range(L - 1):
        ah = gnp.dot(ws_vh[l].T, h) + ws_h[l]
        h = gnp.logistic(ah)

    # if supervised, concatenate class labels to input to top layer RBM
    if y is not None:
        h = gnp.concatenate((y.T, h))

    ah = gnp.dot(ws_vh[-1].T, h) + ws_h[-1]
    h = gnp.logistic(ah)

    return ah.T, h.T
Beispiel #2
0
def check_fisher_information_indep():
    """Fisher information should agree with analytic solution for base rate RBM."""
    with misc.gnumpy_conversion_check('allow'):
        rbm = random_base_rate_rbm()
        E_v = gnp.logistic(rbm.vbias)
        E_h = gnp.logistic(rbm.hbias)

        G = tractable.exact_fisher_information(rbm, batch_units=BATCH_UNITS)
        assert_close(G, G.T, 'G not symmetric')

        G_vis_vishid = G[:NVIS, NVIS + NHID:].reshape((NVIS, NVIS, NHID))
        G_hid_vishid = G[NVIS:NVIS + NHID, NVIS + NHID:].reshape(
            (NHID, NVIS, NHID))
        G_vishid_vishid = G[NVIS + NHID:, NVIS + NHID:].reshape(
            (NVIS, NHID, NVIS, NHID))

        assert_close(G_vis_vishid[0, 0, 1], E_v[0] * (1. - E_v[0]) * E_h[1])
        assert_close(G_vis_vishid[0, 1, 2], 0.)
        assert_close(G_hid_vishid[0, 1, 0], E_h[0] * (1. - E_h[0]) * E_v[1])
        assert_close(G_hid_vishid[0, 1, 2], 0.)
        assert_close(G_vishid_vishid[0, 1, 0, 1],
                     E_v[0] * E_h[1] * (1. - E_v[0] * E_h[1]))
        assert_close(G_vishid_vishid[0, 1, 0, 2],
                     E_v[0] * (1. - E_v[0]) * E_h[1] * E_h[2])
        assert_close(G_vishid_vishid[0, 2, 1, 2],
                     E_h[2] * (1. - E_h[2]) * E_v[0] * E_v[1])
        assert_close(G_vishid_vishid[0, 1, 2, 3], 0.)
Beispiel #3
0
def rbm_sample(w_vh, w_v, w_h, x, k=1, clamped=None):
    """
    Sample from RBM with k steps of Gibbs sampling
    
    w_vh: Weights between visible and hidden units (matrix of size DxH)
    w_v: Visible unit biases (column vector of size Dx1)
    w_h: Hidden unit biases (column vector of size Hx1)
    x: Input (column vector of size DxN)
    k: Number of Gibbs steps. Default is 1.
    clamped: If not None, keeps the given elements of x clamped (constant)
        while sampling
        clamped is a two-tuple that gives the start and end indices of clamped elements
    Returns hidden unit and visible unit activations (matrices of size HxN, DxN)
    """
    if clamped is not None:
        cx = x[clamped[0] : clamped[1], :]

    v = x
    for i in range(k):
        # sample hiddens
        ah = gnp.dot(w_vh.T, v) + w_h
        h = gnp.logistic(ah)
        hs = h > gnp.rand(h.shape[0], h.shape[1])

        # sample visibles
        av = gnp.dot(w_vh, hs) + w_v
        v = gnp.logistic(av)

        if clamped is not None:
            v[clamped[0] : clamped[1], :] = cx

    return h, v
Beispiel #4
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(
            gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) +
            params[self.m_end:self.m_end + self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(
            self.shape).T) + params[-self.shape[0]:]

        w = params[:self.m_end].reshape(self.shape)
        cae = gpu.sum(
            gpu.mean(Dsigmoid(hddn)**2, axis=0) * gpu.sum(w**2, axis=0))
        cae *= self.cae

        _, delta = self.score(Z, inpts, error=True, addon=cae)

        g[:self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        cae_grad = gpu.mean(Dsigmoid(hddn)**2, axis=0) * w
        cae_grad += (gdot(inpts.T, (Dsigmoid(hddn)**2 * (1 - 2 * hddn))) / m *
                     gpu.sum(w**2, axis=0))
        g[:self.m_end] += self.cae * 2 * cae_grad.ravel()

        dsc_dha = Dsigmoid(hddn) * gdot(
            delta, params[:self.m_end].reshape(self.shape))

        g[:self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end:-self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Beispiel #5
0
def exact_fisher_information_biases(rbm, batch_units=10, show_progress=False):
    batch_size = 2 ** batch_units

    nvis, nhid = rbm.nvis, rbm.nhid
    num_params = nvis + nhid

    s = gnp.zeros(num_params)
    G = gnp.zeros((num_params, num_params))

    for hid, p in iter_configurations(rbm, batch_units=batch_units, show_progress=show_progress):
        g = gnp.zeros((batch_size, num_params))
        cond_vis = gnp.logistic(rbm.vis_inputs(hid))

        g[:, :nvis] = cond_vis
        g[:, nvis:] = hid

        s += gnp.dot(p, g)
        G += gnp.dot(g.T * p, g)

        diag_term = gnp.dot(p, g * (1. - g))
        G += np.diag(diag_term.as_numpy_array())

    G -= s[:, nax] * s[nax, :]

    return G
Beispiel #6
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(gdot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.size])
        Z = gdot(hddn, params[self.size:-self.shape[0]].reshape(self.Tshape)) + params[-self.shape[0]:]

        if self.rho_hat_grad == None:
            self.rho_hat_grad = hddn.mean(axis=0)
        else:
            self.rho_hat_grad *= 0.9
            self.rho_hat_grad += 0.1*hddn.mean(axis=0)

#        rho_hat = hddn.mean(axis=0)
        rho_hat = self.rho_hat_grad
        rho = self.rho
        sparsity = self.beta * gpu.sum(bKL(rho, rho_hat))
 
        _, delta = self.score(Z, inpts, error=True, addon=sparsity)

        g[self.size:-self.shape[0]] = gdot(hddn.T, delta).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        diff = Dsigmoid(hddn)
        dsparse_dha = -rho/rho_hat + (1-rho)/(1-rho_hat)
        dsc_dha = diff * (gdot(delta, params[:self.m_end].reshape(self.shape)) + self.beta*dsparse_dha/m)

        g[:self.m_end] = gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end:self.size] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Beispiel #7
0
def exact_samples(rbm, num, batch_units=10, show_progress=False):
    scores = get_scores(rbm, batch_units=batch_units).as_numpy_array()
    scores -= np.logaddexp.reduce(scores.ravel())
    p = np.exp(scores)

    prefix_len = rbm.nhid - batch_units
    prefixes = combinations_array(prefix_len).as_numpy_array()
    postfixes = combinations_array(batch_units).as_numpy_array()

    p_row = p.sum(1)
    p_row /= p_row.sum()
    cond_p_col = p / p_row[:, nax]

    cond_p_col *= (1. - 1e-8)   # keep np.random.multinomial from choking because the sum is greater than 1


    vis = np.zeros((num, rbm.nvis))
    hid = np.zeros((num, rbm.nhid))

    with misc.gnumpy_conversion_check('allow'):
        rows = np.random.multinomial(1, p_row, size=num).argmax(1)
        #cols = np.random.multinomial(1, cond_p_col[rows, :]).argmax(1)
        cols = np.array([np.random.multinomial(1, cond_p_col[row, :]).argmax()
                         for row in rows])
        hid = np.hstack([prefixes[rows, :], postfixes[cols, :]])
        vis = np.random.binomial(1, gnp.logistic(rbm.vis_inputs(hid)))

    return binary_rbms.RBMState(gnp.garray(vis), gnp.garray(hid))
Beispiel #8
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.m_end+self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(self.shape).T) + params[-self.shape[0]:]

        if self.rho_hat_grad == None:
            self.rho_hat_grad = hddn.mean(axis=0)
        else:
            self.rho_hat_grad *= 0.9
            self.rho_hat_grad += 0.1*hddn.mean(axis=0)

#        rho_hat = hddn.mean(axis=0)
        rho_hat = self.rho_hat_grad
        rho = self.rho
        sparsity = self.beta * gpu.sum(bKL(rho, rho_hat))

        _, delta = self.score(Z, inpts, error=True, addon=sparsity)

        g[:self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        diff = Dsigmoid(hddn)
        dsparse_dha = -rho/rho_hat + (1-rho)/(1-rho_hat)
        dsc_dha = diff * (gdot(delta, params[:self.m_end].reshape(self.shape)) + self.beta*dsparse_dha/m)

        g[:self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end:-self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Beispiel #9
0
def dbn_supervised_predict_sample(ws_vh, ws_v, ws_h, x, k=20):
    """
    Predict the class label of input x from supervised DBN
    WARNING: THIS IS PRETTY SLOW AND LESS RELIABLE THAN THE EXACT METHOD
    Uses the sampling method mentioned in section 6.2 of Hinton, Osindero, Teh 2006
    
    x: Input data. (NxD matrix)
    k: Number of Gibbs steps
    """
    L = len(ws_vh)
    N = x.shape[0]

    # make a forward pass to get from input layer to visible layer of top level
    # RBM
    h_prev = x.T

    # forward (bottom-up) pass, (use deterministic (we pass the activations, not
    # the stochastically sampled steps) forward pass)
    for l in range(L - 1):
        ah = gnp.dot(ws_vh[l].T, h_prev) + ws_h[l]
        h_prev = gnp.logistic(ah)

    H = ws_vh[-1].shape[0]  # number of visible units top level RBM
    Hx = h_prev.shape[0]  # number of hidden units in the penultimate layer
    K = H - Hx
    # (H - Hx) is the number of supervised inputs to top level RBM
    # we give random values to the supervised portion of the input
    v = gnp.concatenate((gnp.ones((K, N)) / K, h_prev))
    # we keep the visible units clamped while sampling
    h, v = rbm_sample(ws_vh[-1], ws_v[-1], ws_h[-1], v, k, clamped=(K, H))

    # sample visible units of top level RBM given
    return v[0:K, :].T
Beispiel #10
0
def sigmoid(x, computeGrad=False):
    if (not computeGrad):
        f = gp.logistic(x)
        return f

    g = x * (1. - x)
    return g
Beispiel #11
0
def sigmoid(x, computeGrad = False):
    if (not computeGrad): 
        f = gp.logistic(x)
        return f

    g = x * (1.-x)
    return g
Beispiel #12
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(
            gpu.dot(inpts, params[: self.m_end].reshape(self.shape)) + params[self.m_end : self.m_end + self.shape[1]]
        )
        Z = gdot(hddn, params[: self.m_end].reshape(self.shape).T) + params[-self.shape[0] :]

        w = params[: self.m_end].reshape(self.shape)
        cae = gpu.sum(gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * gpu.sum(w ** 2, axis=0))
        cae *= self.cae

        _, delta = self.score(Z, inpts, error=True, addon=cae)

        g[: self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0] :] = delta.sum(axis=0)

        cae_grad = gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * w
        cae_grad += gdot(inpts.T, (Dsigmoid(hddn) ** 2 * (1 - 2 * hddn))) / m * gpu.sum(w ** 2, axis=0)
        g[: self.m_end] += self.cae * 2 * cae_grad.ravel()

        dsc_dha = Dsigmoid(hddn) * gdot(delta, params[: self.m_end].reshape(self.shape))

        g[: self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end : -self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Beispiel #13
0
 def pseudo_likelihood_for_bit(self, vis, i):
     """Returns the likelihood of bit i of vis given all other bits
     of vis."""
     fe = self.free_energy(vis)
     vis_flip = vis
     vis_flip[:,i] = 1 - vis[:,i]
     fe_flip = self.free_energy(vis_flip)
     pl = gp.log(gp.logistic(fe_flip - fe))
     return pl
Beispiel #14
0
    def __init__(self, m, n, q=100, name=""):
        # name of layer
        self.name = name
        self.m = m  # input layer size
        self.n = n  # output layer size
        #self.p = p # piece group
        self.q = q  # batch size

        self.dropout = 0.1  # dropout rate
        self.learn = 10**-3
        self.l2reg = (1.0 - 10**-100)

        # activation function
        #self.f = lambda z: 1.0/(gpu.exp(-z)+1.0)
        self.f = lambda z: gpu.logistic(z)
        #self.f = lambda z: z
        # deriviative of activation function
        #self.g = lambda z: self.f(z)*(1.0-self.f(z))
        self.g = lambda z: z * (1.0 - z)
        #self.g = lambda z: 1.0

        d = 10**-8
        # weight matrix
        self.w = gpu.garray(
            np.random.uniform(low=-d, high=d, size=(m, n)).astype(np.float32))
        # bias vector
        self.b = gpu.garray(
            np.random.uniform(low=-d, high=d, size=(n)).astype(np.float32))

        # input of forward propagation
        self.x = gpu.garray(
            np.random.uniform(low=-d, high=d, size=(q, m)).astype(np.float32))
        # output of forward propagation
        self.s = gpu.garray(
            np.random.uniform(low=-d, high=d, size=(q, n)).astype(np.float32))
        # input of back propagation
        self.d = gpu.garray(
            np.random.uniform(low=-d, high=d, size=(q, n)).astype(np.float32))
        # output of back propagation
        self.e = gpu.garray(
            np.random.uniform(low=-d, high=d, size=(q, m)).astype(np.float32))
        # temporary array for error
        #self.u = gpu.garray(np.random.uniform(low=-d, high=d, size=(q, n, m)).astype(np.float32))

        # novelty key ****-> set self.t.size to (n, 1, p, 1)  ---> group max
        # mask for dropout
        self.r = gpu.garray(
            np.random.uniform(low=0., high=1., size=(self.m)).astype(
                np.float32) > self.dropout) / (1.0 - self.dropout)
        #print self.r
        # mask for piece group
        #self.t = gpu.garray(np.random.randint(low=0,  high=2,  size=(1, n, q)).astype(np.float32))

        # outward connections
        self.next = []
        # inward connections
        self.prev = []
Beispiel #15
0
def dbn_sample(ws_vh, ws_v, ws_h, x, y=None, k=1):
    """
    Sample from DBN
    
    ws_vh, ws_v, ws_h: Lists of layer weights for DBN
    x: Initial sample. This is the input to DBN. (1xD vector)
    y: Class label for the sample. This corresponds to sampling from class
        conditionals. (1-of-K coded, row vector) 
    k: Number of Gibbs steps
    Returns a sample from DBN (1xD vector)
    """
    L = len(ws_vh)

    # make a forward pass to get from input layer to visible layer of top level
    # RBM
    h_prev = x.T

    # forward (bottom-up) pass
    for l in range(L - 1):
        ah = gnp.dot(ws_vh[l].T, h_prev) + ws_h[l]
        h_prev = gnp.logistic(ah)
        h_prev = h_prev > gnp.rand(h_prev.shape[0], h_prev.shape[1])

    # if not supervised, sample from top layer RBM without clamping any of its
    # inputs
    if y is None:
        # sample from top layer RBM
        h, v = rbm_sample(ws_vh[-1], ws_v[-1], ws_h[-1], h_prev, k)
    else:
        K = y.shape[1]  # number of classes
        H = ws_vh[-1].shape[0]
        # generate a random input to top layer RBM with class label units clamped to y
        v = gnp.concatenate((y.T, h_prev))
        # sample from top layer RBM
        h, v = rbm_sample(ws_vh[-1], ws_v[-1], ws_h[-1], v, k, clamped=(0, K))
        v = v[K:H, :]

    # backward (top-down) pass
    # propagate sample from RBM back to input
    for l in range(L - 2, -1, -1):
        av = gnp.dot(ws_vh[l], v) + ws_v[l]
        v = gnp.logistic(av)

    return v.T
Beispiel #16
0
def check_fisher_information_biases_indep():
    """Fisher information should agree with analytic solution for base rate RBM."""
    with misc.gnumpy_conversion_check('allow'):
        rbm = random_base_rate_rbm()
        E_v = gnp.logistic(rbm.vbias)
        E_h = gnp.logistic(rbm.hbias)

        G = tractable.exact_fisher_information_biases(rbm, batch_units=BATCH_UNITS)
        assert_close(G, G.T, 'G not symmetric')

        G_vis_vis = G[:NVIS, :NVIS]
        G_vis_hid = G[:NVIS, NVIS:]
        G_hid_hid = G[NVIS:, NVIS:]

        assert_close(G_vis_vis[0, 0], E_v[0] * (1. - E_v[0]))
        assert_close(G_vis_vis[0, 1], 0.)
        assert_close(G_vis_hid[0, 0], 0.)
        assert_close(G_hid_hid[0, 0], E_h[0] * (1. - E_h[0]))
        assert_close(G_hid_hid[0, 1], 0.)
Beispiel #17
0
def nn_forward_pass(x, w, b, return_all=True):
    """
    Forward pass for multilayer feed-forward sigmoid neural network
    
    Hidden units have sigmoid non-linearity. 
    Output is soft-max.

    x: DxN matrix of input data
    w: Weights. List of weight matrices for each layer.
    b: Biases. List of bias vectors for each layer
    return_all: If True, returns hidden unit activations for each layer. If False
        just returns the output layer activations
    Returns a list h where each element is a matrix containing the activations
    for that layer. h[0] is input data x. 
    """
    # ---- TEMP HACK --------------
    # I should find a more seamless way of running in mixed (some operations
    # with numpy, some with gnumpy) mode.
    # I had to resort to this, because i needed the validation classification
    # step in nn_train to run on CPU with numpy. GPU ran out of memory.
    if isinstance(x, gnp.garray):
        use_gpu = True
    else:
        use_gpu = False

    layer_count = len(w)
    if return_all:
        hs = [x]  # unit activations for each layer
    h = x

    # all layers except the output layer
    for l in range(layer_count - 1):
        if use_gpu:
            a = gnp.dot(w[l].T, h) + b[l]
            h = gnp.logistic(a)
        else:
            a = np.dot(gnp.as_numpy_array(w[l]).T, h) + gnp.as_numpy_array(b[l])
            h = 1.0 / (1 + np.exp(-a))
        if return_all:
            hs.append(h)

    # output layer
    if use_gpu:
        h = gnp.dot(w[-1].T, h) + b[-1]
        h = gnp.exp(h) / gnp.sum(gnp.exp(h), axis=0)  # soft-max
    else:
        h = np.dot(gnp.as_numpy_array(w[-1]).T, h) + gnp.as_numpy_array(b[-1])
        h = np.exp(h) / np.sum(np.exp(h), axis=0)  # soft-max

    if return_all:
        hs.append(h)
        return hs
    else:
        return h
Beispiel #18
0
def check_fisher_information_biases_indep():
    """Fisher information should agree with analytic solution for base rate RBM."""
    with misc.gnumpy_conversion_check('allow'):
        rbm = random_base_rate_rbm()
        E_v = gnp.logistic(rbm.vbias)
        E_h = gnp.logistic(rbm.hbias)

        G = tractable.exact_fisher_information_biases(rbm,
                                                      batch_units=BATCH_UNITS)
        assert_close(G, G.T, 'G not symmetric')

        G_vis_vis = G[:NVIS, :NVIS]
        G_vis_hid = G[:NVIS, NVIS:]
        G_hid_hid = G[NVIS:, NVIS:]

        assert_close(G_vis_vis[0, 0], E_v[0] * (1. - E_v[0]))
        assert_close(G_vis_vis[0, 1], 0.)
        assert_close(G_vis_hid[0, 0], 0.)
        assert_close(G_hid_hid[0, 0], E_h[0] * (1. - E_h[0]))
        assert_close(G_hid_hid[0, 1], 0.)
Beispiel #19
0
def dbn_supervised_predict_exact(ws_vh, ws_v, ws_h, x):
    """
    Predict the class label of input x from supervised DBN
    Uses the exact method mentioned in section 6.2 of Hinton, Osindero, Teh 2006
    The free energy formula is taken from http://deeplearning.net/tutorial/rbm.html
    
    x: Input data. (NxD matrix)
    """
    L = len(ws_vh)
    N = x.shape[0]

    # make a forward pass to get from input layer to visible layer of top level
    # RBM
    h_prev = x.T

    # forward (bottom-up) pass, (use deterministic (we pass the activations, not
    # the stochastically sampled steps) forward pass)
    for l in range(L - 1):
        ah = gnp.dot(ws_vh[l].T, h_prev) + ws_h[l]
        h_prev = gnp.logistic(ah)

    H = ws_vh[-1].shape[0]  # number of visible units top level RBM
    Hx = h_prev.shape[0]  # number of hidden units in the penultimate layer
    K = H - Hx
    # (H - Hx) is the number of supervised inputs to top level RBM

    # for every class, assume it is the correct label and calculate its free energy
    y = gnp.zeros((K, N))
    free_energy = gnp.zeros((N, K))  # we actually calculate -free_energy
    for k in range(K):
        # set the current assumed class label
        y[k, :] = 1.0

        # visible unit vector
        v = gnp.concatenate((y, h_prev))
        e_v = gnp.dot(ws_v[-1].T, v)  # bias energy term

        ah = gnp.dot(ws_vh[-1].T, v) + ws_h[-1]
        e_h = gnp.sum(gnp.log(gnp.exp(ah) + 1.0), axis=0)

        free_energy[:, k] = e_v + e_h

        # zero the class labels for next iteration
        y[:, :] = 0.0

    # since these numbers may get pretty small, use the sum-exp trick for converting
    # these to probabilities
    pred_y = (
        gnp.exp(free_energy - gnp.max(free_energy, axis=1)[:, gnp.newaxis])
        / gnp.sum(gnp.exp(free_energy - gnp.max(free_energy, axis=1)[:, gnp.newaxis]), axis=1)[:, gnp.newaxis]
    )

    return pred_y
Beispiel #20
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = logistic(
            gpu.dot(inpts, params[: self.m_end].reshape(self.shape)) + params[self.m_end : self.m_end + self.shape[1]]
        )
        Z = gdot(hddn, params[: self.m_end].reshape(self.shape).T) + params[-self.shape[0] :]

        w = params[: self.m_end].reshape(self.shape)
        cae = gpu.sum(gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * gpu.sum(w ** 2, axis=0))
        cae *= self.cae

        sc = self.score(Z, inpts, addon=cae)
        return np.array([sc, cae])
Beispiel #21
0
def exact_moments(rbm, batch_units=10, show_progress=False):
    expect_vis = gnp.zeros(rbm.nvis)
    expect_hid = gnp.zeros(rbm.nhid)
    expect_prod = gnp.zeros((rbm.nvis, rbm.nhid))

    for hid, p in iter_configurations(rbm, batch_units=batch_units, show_progress=show_progress):
        cond_vis = gnp.logistic(rbm.vis_inputs(hid))
        expect_vis += gnp.dot(p, cond_vis)
        expect_hid += gnp.dot(p, hid)
        expect_prod += gnp.dot(cond_vis.T * p, hid)

    return binary_rbms.Moments(expect_vis, expect_hid, expect_prod)
Beispiel #22
0
def check_fisher_information_indep():
    """Fisher information should agree with analytic solution for base rate RBM."""
    with misc.gnumpy_conversion_check('allow'):
        rbm = random_base_rate_rbm()
        E_v = gnp.logistic(rbm.vbias)
        E_h = gnp.logistic(rbm.hbias)

        G = tractable.exact_fisher_information(rbm, batch_units=BATCH_UNITS)
        assert_close(G, G.T, 'G not symmetric')

        G_vis_vishid = G[:NVIS, NVIS+NHID:].reshape((NVIS, NVIS, NHID))
        G_hid_vishid = G[NVIS:NVIS+NHID, NVIS+NHID:].reshape((NHID, NVIS, NHID))
        G_vishid_vishid = G[NVIS+NHID:, NVIS+NHID:].reshape((NVIS, NHID, NVIS, NHID))

        assert_close(G_vis_vishid[0, 0, 1], E_v[0] * (1. - E_v[0]) * E_h[1])
        assert_close(G_vis_vishid[0, 1, 2], 0.)
        assert_close(G_hid_vishid[0, 1, 0], E_h[0] * (1. - E_h[0]) * E_v[1])
        assert_close(G_hid_vishid[0, 1, 2], 0.)
        assert_close(G_vishid_vishid[0, 1, 0, 1], E_v[0] * E_h[1] * (1. - E_v[0] * E_h[1]))
        assert_close(G_vishid_vishid[0, 1, 0, 2], E_v[0] * (1. - E_v[0]) * E_h[1] * E_h[2])
        assert_close(G_vishid_vishid[0, 2, 1, 2], E_h[2] * (1. - E_h[2]) * E_v[0] * E_v[1])
        assert_close(G_vishid_vishid[0, 1, 2, 3], 0.)
Beispiel #23
0
Datei: dnn.py Projekt: C2Tao/HMM
    def __init__(self, m, n, q=100, name=""):
        # name of layer
        self.name = name
        self.m = m  # input layer size
        self.n = n  # output layer size
        # self.p = p # piece group
        self.q = q  # batch size

        self.dropout = 0.1  # dropout rate
        self.learn = 10 ** -3
        self.l2reg = 1.0 - 10 ** -100

        # activation function
        # self.f = lambda z: 1.0/(gpu.exp(-z)+1.0)
        self.f = lambda z: gpu.logistic(z)
        # self.f = lambda z: z
        # deriviative of activation function
        # self.g = lambda z: self.f(z)*(1.0-self.f(z))
        self.g = lambda z: z * (1.0 - z)
        # self.g = lambda z: 1.0

        d = 10 ** -8
        # weight matrix
        self.w = gpu.garray(np.random.uniform(low=-d, high=d, size=(m, n)).astype(np.float32))
        # bias vector
        self.b = gpu.garray(np.random.uniform(low=-d, high=d, size=(n)).astype(np.float32))

        # input of forward propagation
        self.x = gpu.garray(np.random.uniform(low=-d, high=d, size=(q, m)).astype(np.float32))
        # output of forward propagation
        self.s = gpu.garray(np.random.uniform(low=-d, high=d, size=(q, n)).astype(np.float32))
        # input of back propagation
        self.d = gpu.garray(np.random.uniform(low=-d, high=d, size=(q, n)).astype(np.float32))
        # output of back propagation
        self.e = gpu.garray(np.random.uniform(low=-d, high=d, size=(q, m)).astype(np.float32))
        # temporary array for error
        # self.u = gpu.garray(np.random.uniform(low=-d, high=d, size=(q, n, m)).astype(np.float32))

        # novelty key ****-> set self.t.size to (n, 1, p, 1)  ---> group max
        # mask for dropout
        self.r = gpu.garray(np.random.uniform(low=0.0, high=1.0, size=(self.m)).astype(np.float32) > self.dropout) / (
            1.0 - self.dropout
        )
        # print self.r
        # mask for piece group
        # self.t = gpu.garray(np.random.randint(low=0,  high=2,  size=(1, n, q)).astype(np.float32))

        # outward connections
        self.next = []
        # inward connections
        self.prev = []
Beispiel #24
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = logistic(gdot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.size])
        Z = gdot(hddn, params[self.size:-self.shape[0]].reshape(self.Tshape)) + params[-self.shape[0]:]

        if self.rho_hat == None:
            self.rho_hat = hddn.mean(axis=0)
        else:
            self.rho_hat *= 0.9
            self.rho_hat += 0.1*hddn.mean(axis=0)

        sparsity = self.beta * gpu.sum(bKL(self.rho, self.rho_hat))
        sc = self.score(Z, inpts, addon=sparsity)
        
        return sc
Beispiel #25
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = logistic(gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.m_end+self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(self.shape).T) + params[-self.shape[0]:]

        if self.rho_hat == None:
            self.rho_hat = hddn.mean(axis=0)
        else:
            self.rho_hat *= 0.9
            self.rho_hat += 0.1*hddn.mean(axis=0)

        sparsity = self.beta * gpu.sum(bKL(self.rho, self.rho_hat))
        sc = self.score(Z, inpts, addon=sparsity)

        return np.array([sc, sc-sparsity, sparsity, gpu.mean(self.rho_hat)])
Beispiel #26
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = logistic(
            gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) +
            params[self.m_end:self.m_end + self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(
            self.shape).T) + params[-self.shape[0]:]

        w = params[:self.m_end].reshape(self.shape)
        cae = gpu.sum(
            gpu.mean(Dsigmoid(hddn)**2, axis=0) * gpu.sum(w**2, axis=0))
        cae *= self.cae

        sc = self.score(Z, inpts, addon=cae)
        return np.array([sc, cae])
Beispiel #27
0
def sig_ssd(z, targets, weight=0.5, predict=False, error=False, addon=0):
    """
    Sigmoid SSD.
    """
    bern = gpu.logistic(z)
    if predict:
        return bern
    n, m = bern.shape
    err = bern - targets
    if error:
        # rec. error + first deriv
        return weight * gpu.sum(err**2) / n + addon, 2. * weight * err / n
    else:
        # only return reconstruction error
        return weight * gpu.sum(err**2) / n + addon
Beispiel #28
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = logistic(
            gdot(inpts, params[:self.m_end].reshape(self.shape)) +
            params[self.m_end:self.size])
        Z = gdot(hddn, params[self.size:-self.shape[0]].reshape(
            self.Tshape)) + params[-self.shape[0]:]

        if self.rho_hat == None:
            self.rho_hat = hddn.mean(axis=0)
        else:
            self.rho_hat *= 0.9
            self.rho_hat += 0.1 * hddn.mean(axis=0)

        sparsity = self.beta * gpu.sum(bKL(self.rho, self.rho_hat))
        sc = self.score(Z, inpts, addon=sparsity)

        return sc
Beispiel #29
0
def mia(z, targets, predict=False, error=False, addon=0, tiny=1e-10):
    """
    Multiple independent attributes (i.e. independent
    binary cross entropy errors).

    Feed model output _z_ through logistic to get
    bernoulli distributed variables. 
    """
    bern = gpu.logistic(z)
    if predict:
        return bern
    n, _ = bern.shape
    # loss is binary cross entropy
    # for every output variable
    bce = -(targets * (bern + tiny).log() + (1 - targets) *
            (1 - bern + tiny).log()).sum()
    if error:
        return bce + addon, (bern - targets) / n
    else:
        return bce + addon
Beispiel #30
0
 def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
     y = gnp.logistic(gnp.as_garray(pred))
     return -((1 - self.target) * safe_log(1 - y) +
              self.target * safe_log(y)).sum(), y - self.target
import numpy as np
import gnumpy as g
from bkputils import *
import scipy.special

g.max_memory_usage = 285 * (1024 * 1024) # no idea where this comes from, but it makes gnumpy not crash (empiric value)

w = 10000
h = 10000

write("making random matrices")
m1 = np.random.rand(w, h)
m2 = np.random.rand(h, w)
writeDone()

write("numpy multiply")
n = np.dot(m1, m2)
p = scipy.special.expit(n)
writeDone()

write("gnumpy setup")
a = g.garray(m1)
b = g.garray(m2)
writeDone()

write("gnumpy multiply")
c = g.dot(a, b)
c = g.logistic(c)
writeDone()
Beispiel #32
0
def rbm_train(dataset, H, batch_size, epoch_count, epsilon, momentum, return_hidden=True, verbose=True):
    """
    Train a (binary) restricted boltzmann machine.
    
    dataset: Input data. DataSet instance or matrix of size N (number of data points) x D (input dimension)
    H: Number of hidden units
    batch_size: Number of data points in each batch
    epoch_count: Number of training epochs
    epsilon: Learning rate, either a scalar or an array (one value for each epoch)
    momentum: Momentum parameter, either a scalar or an array (one value for each epoch)
    return_hidden: If True, returns hidden unit activations for training data. 
    verbose: If True, prints progress information
    Returns w_vh (weights between visible-hidden units), w_v (visible unit
    biases), w_h (hidden unit biases), h (hidden unit activations for input data),
    error (reconstruction error at each epoch)
    """
    if isinstance(dataset, ds.DataSet):
        train_x = dataset.train.x
        N = dataset.train.N
        D = dataset.train.D
    else:
        train_x = dataset
        N = train_x.shape[0]
        D = train_x.shape[1]

    batch_count = int(np.ceil(N / float(batch_size)))

    # if momentum is a scalar, create a list with the same value for all epochs
    if not isinstance(momentum, list):
        momentum = [momentum] * epoch_count
    if not isinstance(epsilon, list):
        epsilon = [epsilon] * epoch_count

    # initialize weights
    w_vh = gnp.randn((D, H)) * 0.1
    w_v = gnp.zeros((D, 1))
    w_h = gnp.zeros((H, 1))

    # weight updates
    dw_vh = gnp.zeros((D, H))
    dw_v = gnp.zeros((D, 1))
    dw_h = gnp.zeros((H, 1))

    # hidden unit activations
    if return_hidden:
        h = np.zeros((N, H))  # keep this a numpy array to save memory
    else:
        h = []

    start_time = time.time()
    # reconstruction errors over epochs
    error = []
    batch_order = range(batch_count)
    for e in range(epoch_count):
        if verbose:
            print("Epoch " + repr(e + 1))

        batch_error = []
        processed_batch = 0
        for b in range(batch_count):
            processed_batch += 1
            if verbose:
                print("\r%d/%d" % (processed_batch, batch_count)),

            start = b * batch_size
            end = (b + 1) * batch_size if (b + 1) * batch_size < N else N
            x = train_x[start:end, :].T

            # apply momentum
            dw_vh *= momentum[e]
            dw_v *= momentum[e]
            dw_h *= momentum[e]

            # positive phase
            ahp = gnp.dot(w_vh.T, x) + w_h
            hp = gnp.logistic(ahp)

            # if it is the last epoch, store hidden unit activations
            if return_hidden and e == epoch_count - 1:
                h[start:end, :] = gnp.as_numpy_array(hp.T)

            # add positive gradient term
            dw_vh += gnp.dot(x, hp.T)
            dw_v += gnp.sum(x, axis=1)[:, gnp.newaxis]
            dw_h += gnp.sum(hp, axis=1)[:, gnp.newaxis]

            # sample hiddens
            hs = hp > gnp.rand(hp.shape[0], hp.shape[1])

            # negative phase
            avn = gnp.dot(w_vh, hs) + w_v
            vn = gnp.logistic(avn)
            ahn = gnp.dot(w_vh.T, vn) + w_h
            hn = gnp.logistic(ahn)

            dw_vh -= gnp.dot(vn, hn.T)
            dw_v -= gnp.sum(vn, axis=1)[:, gnp.newaxis]
            dw_h -= gnp.sum(hn, axis=1)[:, gnp.newaxis]

            # update weights
            w_vh += epsilon[e] / (end - start) * dw_vh
            w_v += epsilon[e] / (end - start) * dw_v
            w_h += epsilon[e] / (end - start) * dw_h

            batch_error.append(gnp.mean((vn - x) ** 2))

        # shuffle batch order
        np.random.shuffle(batch_order)

        error.append(np.mean(batch_error))
        if verbose:
            print("\nReconstruction error: " + repr(error[-1]))
            print("Elapsed time: " + str(time.time() - start_time))

    return w_vh, w_v, w_h, h, error
 def negative_phase(self): 
     self.h = gpu.logistic(gpu.dot(self.v,self.w)+self.bias_h)
     self.w_updt -= gpu.dot(self.v.T,self.h)
     self.bias_h_updt -= gpu.sum(self.h,axis=0)
     self.bias_v_updt -= gpu.sum(self.v,axis=0)
Beispiel #34
0
 def p_vis_given_hid(self, hid):
     """Returns a vector whose ith component is the probability that the ith
     visible unit is active given the states of the hidden units"""
     return gp.logistic(gp.dot(hid, self.weights.T) + self.bias_vis)
 def gibbs_updates(self): 
     self.h = (self.h > gpu.rand(100,800))    
     self.v = gpu.logistic(gpu.dot(self.h,self.w.T)+self.bias_v)
Beispiel #36
0
 def vis_expectations(self, h):
     return gnp.logistic(self.vis_inputs(h))
Beispiel #37
0
 def forward(self, A):
     return gnp.logistic(A)
Beispiel #38
0
 def hid_expectations(self, v):
     return gnp.logistic(self.hid_inputs(v))
Beispiel #39
0
 def forward_prop(self, x):
     return gnp.logistic(x)
Beispiel #40
0
 def p_hid_given_vis(self, vis):
     """Returns a vector whose ith component is the probability that the ith
     hidden unit is active given the states of the visible units"""
     return gp.logistic(gp.dot(vis, self.weights) + self.bias_hid)
Beispiel #41
0
 def forward_prop(self, x):
     return gnp.logistic(x)
Beispiel #42
0
        m2 = (momentum*m2) - ((grad2 + n2*L2)*alpha/(batch_size*1.0)) 
        mb1 = (momentum*mb1) - ((gradb1 + nb1*L2)*alpha/(batch_size*1.0))
        mb2 = (momentum*mb2) - ((gradb2 + nb2*L2)*alpha/(batch_size*1.0))
      
        w1 = w1 + m1
        w2 = w2 + m2    
        b1 = b1 + mb1
        b2 = b2 + mb2

    momentum = momentum + 0.001
    
    if momentum > 0.95: momentum = 0.95    
        
    batch_error_cv = 0
    for i in range(100):
        batch_error_cv += 1.0 - (gpu.sum(np.argmax(gpu.dot(gpu.logistic(gpu.dot(X_val[i],w1))*0.5,w2),axis=1) == y_val[i])/120.)
  
    batch_error = 0   
    for i in xrange(batches):#train error 5.9 sec
        z1 = gpu.dot(X[i],w1).logistic()*0.5
        feedforward = gpu.dot(z1,w2)
        batch_error += 1. - (np.sum(np.equal(np.argmax(feedforward,axis=1),y.as_numpy_array()[i].T)/(batch_size*1.0)))
    '''    
    if gpu.max(w1)**2 > 9:
        print 'halving the weights of w1'
        w1 = w1/2.
        m1 = m1/2.
    
    if gpu.max(w2)**2 > 9:
        print 'halving the weights of w2'
        w2 = w2/2.
        mb2 = (momentum * mb2) - ((gradb2 + nb2 * L2) * alpha /
                                  (batch_size * 1.0))

        w1 = w1 + m1
        w2 = w2 + m2
        b1 = b1 + mb1
        b2 = b2 + mb2

    momentum = momentum + 0.001

    if momentum > 0.95: momentum = 0.95

    batch_error_cv = 0
    for i in range(100):
        batch_error_cv += 1.0 - (gpu.sum(
            np.argmax(gpu.dot(gpu.logistic(gpu.dot(X_val[i], w1)) * 0.5, w2),
                      axis=1) == y_val[i]) / 120.)

    batch_error = 0
    for i in xrange(batches):  #train error 5.9 sec
        z1 = gpu.dot(X[i], w1).logistic() * 0.5
        feedforward = gpu.dot(z1, w2)
        batch_error += 1. - (np.sum(
            np.equal(np.argmax(feedforward, axis=1),
                     y.as_numpy_array()[i].T) / (batch_size * 1.0)))
    '''    
    if gpu.max(w1)**2 > 9:
        print 'halving the weights of w1'
        w1 = w1/2.
        m1 = m1/2.
    
Beispiel #44
0
 def hid_expectations(self, v):
     return gnp.logistic(self.hid_inputs(v))
Beispiel #45
0
 def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
     y = gnp.logistic(pred)
     return -((1 - self.target) * safe_log(1 - y) + self.target * safe_log(y)).sum(), y - self.target
Beispiel #46
0
 def forward(self, A):
     return gnp.logistic(A)
Beispiel #47
0
Datei: ais.py Projekt: surban/ml
 def base_sample_vis(self, n_samples):
     "Samples the visible units from the base rate RBM"
     p = gp.logistic(self.base_bias_vis)
     r = gp.rand((n_samples, self.base_bias_vis.shape[0]))
     return r < p
Beispiel #48
0
 def vis_expectations(self, h):
     return gnp.logistic(self.vis_inputs(h))
Beispiel #49
0
def exact_fisher_information(rbm, batch_units=10, show_progress=False, vis_shape=None, downsample=1, return_mean=False):
    batch_size = 2 ** batch_units

    if downsample == 1:
        vis_idxs = np.arange(rbm.nvis)
    else:
        temp = np.arange(rbm.nvis).reshape((28, 28))
        mask = np.zeros((28, 28), dtype=bool)
        mask[::downsample, ::downsample] = 1
        vis_idxs = temp[mask]
    nvis = vis_idxs.size
    nhid = rbm.nhid

    num_params = nvis + nhid + nvis * nhid

    E_vis = np.zeros(nvis)
    E_hid = np.zeros(nhid)
    E_vishid = np.zeros((nvis, nhid))

    E_vis_vis = np.zeros((nvis, nvis))
    E_vis_hid = np.zeros((nvis, nhid))
    E_vis_vishid = np.zeros((nvis, nvis, nhid))
    E_hid_hid = np.zeros((nhid, nhid))
    E_hid_vishid = np.zeros((nhid, nvis, nhid))
    E_vishid_vishid = np.zeros((nvis, nhid, nvis, nhid))
    

    for hid, p in iter_configurations(rbm, batch_units=batch_units, show_progress=show_progress):
        with misc.gnumpy_conversion_check('allow'):
            cond_vis = gnp.logistic(rbm.vis_inputs(hid))
            cond_vis = gnp.garray(cond_vis.as_numpy_array()[:, vis_idxs])
            vishid = (cond_vis[:, :, nax] * hid[:, nax, :]).reshape((batch_size, nvis * nhid))
            var_vis = cond_vis * (1. - cond_vis)

            E_vis += gnp.dot(p, cond_vis)
            E_hid += gnp.dot(p, hid)
            E_vishid += gnp.dot(cond_vis.T * p, hid)

            E_vis_vis += gnp.dot(cond_vis.T * p, cond_vis)
            diag_term = gnp.dot(p, cond_vis * (1. - cond_vis))
            E_vis_vis += gnp.garray(np.diag(diag_term.as_numpy_array()))

            E_vis_hid += gnp.dot(cond_vis.T * p, hid)

            E_hid_hid += gnp.dot(hid.T * p, hid)

            E_vis_vishid += gnp.dot(cond_vis.T * p, vishid).reshape((nvis, nvis, nhid))
            diag_term = gnp.dot(var_vis.T * p, hid)
            E_vis_vishid[np.arange(nvis), np.arange(nvis), :] += diag_term

            E_hid_vishid += gnp.dot(hid.T * p, vishid).reshape((nhid, nvis, nhid))

            E_vishid_vishid += gnp.dot(vishid.T * p, vishid).reshape((nvis, nhid, nvis, nhid))
            diag_term = ((cond_vis * (1. - cond_vis))[:, :, nax, nax] * hid[:, nax, :, nax] * hid[:, nax, nax, :] * p[:, nax, nax, nax]).sum(0)
            E_vishid_vishid[np.arange(nvis), :, np.arange(nvis), :] += diag_term

    G = np.zeros((num_params, num_params))
    vis_slc = slice(0, nvis)
    hid_slc = slice(nvis, nvis + nhid)
    vishid_slc = slice(nvis + nhid, None)
    G[vis_slc, vis_slc] = E_vis_vis
    G[vis_slc, hid_slc] = E_vis_hid
    G[vis_slc, vishid_slc] = E_vis_vishid.reshape((nvis, nvis * nhid))
    G[hid_slc, vis_slc] = E_vis_hid.T
    G[hid_slc, hid_slc] = E_hid_hid
    G[hid_slc, vishid_slc] = E_hid_vishid.reshape((nhid, nvis * nhid))
    G[vishid_slc, vis_slc] = E_vis_vishid.reshape((nvis, nvis * nhid)).T
    G[vishid_slc, hid_slc] = E_hid_vishid.reshape((nhid, nvis * nhid)).T
    G[vishid_slc, vishid_slc] = E_vishid_vishid.reshape((nvis * nhid, nvis * nhid))

    s = np.concatenate([E_vis, E_hid, E_vishid.ravel()])
    G -= np.outer(s, s)

    if return_mean:
        return G, s
    else:
        return G