Example #1
0
 def backward(self, dEdY):
     # Need to generalize, but now, let's assume it's the attention model.
     dEdX = []
     if self.gpu:
         if len(self.X) == 2:
             dEdY = dEdY.reshape(dEdY.shape[0], 1, dEdY.shape[1])
             dEdY = gpu.as_garray(dEdY)
             dEdX1 = self.beta * gpu.sum(dEdY * self.X[1], axis=2)
             dEdX2 = self.beta * dEdY * self.X[0]
             dEdX.append(dEdX1.as_numpy_array(dtype='float32'))
             dEdX.append(dEdX2.as_numpy_array(dtype='float32'))
         elif len(self.X) == 3:
             dEdY = gpu.as_garray(dEdY)
             dEdY2 = dEdY.reshape(dEdY.shape[0], 1, dEdY.shape[1])
             dEdY2 = gpu.as_garray(dEdY2)
             dEdX1 = self.X[2] * gpu.sum(dEdY2 * self.X[1], axis=2)
             dEdX2 = self.X[2].reshape(self.X[2].shape[0], 1, 1) * dEdY2 * self.X[0]
             dEdX3 = gpu.sum(dEdY * self.Z, axis=-1).reshape(self.X[2].shape[0], 1)
             dEdX.append(dEdX1.as_numpy_array(dtype='float32'))
             dEdX.append(dEdX2.as_numpy_array(dtype='float32'))
             dEdX.append(dEdX3.as_numpy_array(dtype='float32'))
     else:
         if len(self.X) == 2:
             dEdY = dEdY.reshape(dEdY.shape[0], 1, dEdY.shape[1])
             dEdX.append(self.beta * np.sum(dEdY * self.X[1], axis=2))
             dEdX.append(self.beta * dEdY * self.X[0])
         elif len(self.X) == 3:
             dEdY2 = dEdY.reshape(dEdY.shape[0], 1, dEdY.shape[1])
             dEdX.append(self.X[2] * np.sum(dEdY2 * self.X[1], axis=2))
             dEdX.append(self.X[2].reshape(self.X[2].shape[0], 1, 1) * dEdY2 * self.X[0])
             dEdX.append(np.sum(dEdY * self.Z, axis=-1).reshape(self.X[2].shape[0], 1))
     return dEdX
Example #2
0
 def _updateWeights(self, dEdW):
     if self.gpu:
         if self.gradientClip > 0.0:
             self.dEdWnorm = gpu.sqrt(gpu.sum(dEdW ** 2))
             if self.dEdWnorm > self.gradientClip:
                 dEdW *= self.gradientClip / self.dEdWnorm
         if self.learningRate > 0.0:
             self.lastdW = -self.learningRate * dEdW + \
                        self.momentum * self.lastdW
             self.W += self.lastdW
         if self.weightRegConst > 0.0:
             a = self.learningRate * self.weightRegConst
             self.W -= a * self.W
         if self.weightClip > 0.0:
             self.Wnorm = gpu.sqrt(gpu.sum(self.W ** 2))
             if self.Wnorm > self.weightClip:
                 self.W *= self.weightClip / self.Wnorm
     else:
         if self.gradientClip > 0.0:
             self.dEdWnorm = np.sqrt(np.sum(np.power(dEdW, 2)))
             if self.dEdWnorm > self.gradientClip:
                 dEdW *= self.gradientClip / self.dEdWnorm
         if self.learningRate > 0.0:
             self.lastdW = -self.learningRate * dEdW + \
                        self.momentum * self.lastdW
             self.W += self.lastdW
         if self.weightRegConst > 0.0:
             a = self.learningRate * self.weightRegConst
             self.W -= a * self.W
         if self.weightClip > 0.0:
             self.Wnorm = np.sqrt(np.sum(np.power(self.W, 2)))
             if self.Wnorm > self.weightClip:
                 self.W *= self.weightClip / self.Wnorm
Example #3
0
def simulate_step(bodies, dt_min, epsilon, dt_output, alpha):
    current_t = 0
    current_step = 0
    n_bodies = bodies.r.shape[0]
    delta_v = np.zeros_like(bodies.v)
    for i in range(n_bodies):
        coord_diff = bodies.r - bodies.r[i, :]
        r_ik3 = (gpu.sum(coord_diff**2, axis=1) + epsilon**2)**1.5  #+ 1e-16
        delta_v[i, :] = gpu.sum(bodies.m[:, np.newaxis] * coord_diff /
                                r_ik3[:, np.newaxis],
                                axis=0)

    dt = max(calculate_dt(bodies.v, delta_v, n_bodies, alpha), dt_min)
    bodies.v += 0.5 * dt * delta_v

    while True:
        bodies.r += dt * bodies.v
        for i in range(n_bodies):
            coord_diff = bodies.r - bodies.r[i, :]
            r_ik3 = (gpu.sum(coord_diff**2, axis=1) +
                     epsilon**2)**1.5  #+ 1e-16
            delta_v[i, :] = gpu.sum(bodies.m[:, np.newaxis] * coord_diff /
                                    r_ik3[:, np.newaxis],
                                    axis=0)

        dt = max(calculate_dt(bodies.v, delta_v, n_bodies, alpha), dt_min)
        bodies.v += dt * delta_v
        if current_step * dt_output <= current_t:
            current_step += 1
            yield current_t
            gpu.status()

        current_t += dt
Example #4
0
def l1svm_mia(z, targets, predict=False, error=False, addon=0):
    """
    l1-SVM for the hinge loss, Multiple independent attributes
    addon, weight
    Note: the targets here are (1, -1)
    """
    if predict:
        # argmax_t(z*t)
        t = 2 * (z > 0) - 1
        return t

    _value = (1 - z * targets)
    indicator = _value > 0
    maximum = indicator * _value
    # diff C for unbalance dataset
    # automatically adjust weights inversely proportional to class frequencies
    n, _ = targets.shape
    positive = gpu.sum((targets + 1.) / 2, axis=0)
    negative = n - positive
    inv_ne_freq = float(n) / (negative + 1)
    inv_po_freq = float(n) / (positive + 1)
    class_weight = inv_po_freq * (targets > 0) + inv_ne_freq * (targets < 0)
    bhl = gpu.sum(maximum * class_weight)
    if error:
        err = -targets * indicator * class_weight
        return bhl + addon, err
    else:
        return bhl + addon
Example #5
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(
            gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) +
            params[self.m_end:self.m_end + self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(
            self.shape).T) + params[-self.shape[0]:]

        w = params[:self.m_end].reshape(self.shape)
        cae = gpu.sum(
            gpu.mean(Dsigmoid(hddn)**2, axis=0) * gpu.sum(w**2, axis=0))
        cae *= self.cae

        _, delta = self.score(Z, inpts, error=True, addon=cae)

        g[:self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        cae_grad = gpu.mean(Dsigmoid(hddn)**2, axis=0) * w
        cae_grad += (gdot(inpts.T, (Dsigmoid(hddn)**2 * (1 - 2 * hddn))) / m *
                     gpu.sum(w**2, axis=0))
        g[:self.m_end] += self.cae * 2 * cae_grad.ravel()

        dsc_dha = Dsigmoid(hddn) * gdot(
            delta, params[:self.m_end].reshape(self.shape))

        g[:self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end:-self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Example #6
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(
            gpu.dot(inpts, params[: self.m_end].reshape(self.shape)) + params[self.m_end : self.m_end + self.shape[1]]
        )
        Z = gdot(hddn, params[: self.m_end].reshape(self.shape).T) + params[-self.shape[0] :]

        w = params[: self.m_end].reshape(self.shape)
        cae = gpu.sum(gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * gpu.sum(w ** 2, axis=0))
        cae *= self.cae

        _, delta = self.score(Z, inpts, error=True, addon=cae)

        g[: self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0] :] = delta.sum(axis=0)

        cae_grad = gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * w
        cae_grad += gdot(inpts.T, (Dsigmoid(hddn) ** 2 * (1 - 2 * hddn))) / m * gpu.sum(w ** 2, axis=0)
        g[: self.m_end] += self.cae * 2 * cae_grad.ravel()

        dsc_dha = Dsigmoid(hddn) * gdot(delta, params[: self.m_end].reshape(self.shape))

        g[: self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end : -self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
def mlpSingleOutput1Layer_costfunc(x, *args):
    inputSize, l1Size, lambda_hidden, inputs, targets = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    inputs = gpu.garray(inputs)
    targets = gpu.garray(targets)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1)))
    theta_output = gpu.garray(reshape(x[num_weights_L1:shape(x)[0]], (1, l1Size+1)))
    inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis = 0)
    #hidden_activation_L1 = hidden_activation_L1 * dropout_prob
    hidden_sum_output = gpu.dot(theta_output, hidden_activation_L1)
    outputs = hidden_sum_output.logistic()
    output_target_diff = (outputs - targets)**2
    regularized_penalty_output = theta_output[:,1:shape(theta_output)[1]]
    regularized_penalty_output = regularized_penalty_output * regularized_penalty_output
    regularized_penalty_L1 = theta_L1[:,1:shape(theta_L1)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    cost = gpu.sum(output_target_diff)/(2*numCases) + 0.5 * lambda_hidden*(gpu.sum(regularized_penalty_L1)+gpu.sum(regularized_penalty_output))
    print 'Multilayer Preceptron Cost:', cost
    del inputs
    del theta_L1
    del hidden_sum_L1
    del hidden_activation_L1
    del regularized_penalty_output
    del regularized_penalty_L1
    gpu.free_reuse_cache()
    return cost
Example #8
0
    def _cd_update_terms(self, vis, model_vis, model_p_vis):
        """Returns (weights update, visible bias update, hidden bias update) given
        visible states from the data vis, visible states sampled from the 
        model model_vis and the probability of the visible units being active         
        from the model."""
        #print "vis.shape:                ", vis.shape
        #print "p_hid(vis).shape:         ", self.p_hid(vis).shape
        #print "model_p_vis.shape:        ", model_p_vis.shape
        #print "p_hid(model_p_vis).shape: ", self.p_hid(model_p_vis).shape
        
        # my update rule:
        #dweights = (gp.dot(vis.T, self.p_hid(vis)) - 
        #            gp.dot(model_p_vis.T, self.p_hid(model_vis)))
        #dbias_vis = gp.sum(vis, axis=0) - gp.sum(model_p_vis, axis=0)
        #dbias_hid = (gp.sum(self.p_hid(vis), axis=0) - 
        #             gp.sum(self.p_hid(model_vis), axis=0))

        # deep learning update rule:
        dweights = (gp.dot(vis.T, self.p_hid_given_vis(vis)) - 
                    gp.dot(model_vis.T, self.p_hid_given_vis(model_vis)))
        dbias_vis = gp.sum(vis, axis=0) - gp.sum(model_vis, axis=0)
        dbias_hid = (gp.sum(self.p_hid_given_vis(vis), axis=0) - 
                     gp.sum(self.p_hid_given_vis(model_vis), axis=0))

        n_samples = vis.shape[0]
        return (dweights / n_samples, 
                dbias_vis / n_samples, 
                dbias_hid / n_samples)
Example #9
0
 def _updateWeights(self, dEdW):
     if self.gpu:
         if self.gradientClip > 0.0:
             self.dEdWnorm = gpu.sqrt(gpu.sum(dEdW**2))
             if self.dEdWnorm > self.gradientClip:
                 dEdW *= self.gradientClip / self.dEdWnorm
         if self.learningRate > 0.0:
             self.lastdW = -self.learningRate * dEdW + \
                        self.momentum * self.lastdW
             self.W += self.lastdW
         if self.weightRegConst > 0.0:
             a = self.learningRate * self.weightRegConst
             self.W -= a * self.W
         if self.weightClip > 0.0:
             self.Wnorm = gpu.sqrt(gpu.sum(self.W**2))
             if self.Wnorm > self.weightClip:
                 self.W *= self.weightClip / self.Wnorm
     else:
         if self.gradientClip > 0.0:
             self.dEdWnorm = np.sqrt(np.sum(np.power(dEdW, 2)))
             if self.dEdWnorm > self.gradientClip:
                 dEdW *= self.gradientClip / self.dEdWnorm
         if self.learningRate > 0.0:
             self.lastdW = -self.learningRate * dEdW + \
                        self.momentum * self.lastdW
             self.W += self.lastdW
         if self.weightRegConst > 0.0:
             a = self.learningRate * self.weightRegConst
             self.W -= a * self.W
         if self.weightClip > 0.0:
             self.Wnorm = np.sqrt(np.sum(np.power(self.W, 2)))
             if self.Wnorm > self.weightClip:
                 self.W *= self.weightClip / self.Wnorm
Example #10
0
def costfunc_gpu_ReLU(x, *args):
    num_input,num_hidden,num_output,inputs,lambda_val,sparsityParam,beta = args
    num_weights1 = (num_input+1)*num_hidden
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1)))
    weights1 = x[0:num_weights1].reshape((num_hidden,num_input+1))
    #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1)))
    weights2 = x[num_weights1:shape(x)[0]].reshape((num_output,num_hidden+1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1,nData)), inputs), axis = 0)
    hidden_sum = gpu.dot(weights1, data)
    hidden_activation = gpu.log(1+hidden_sum.exp())
    p_avg = gpu.sum(hidden_activation,axis=1)/nData
    hidden_activation = gpu.concatenate((gpu.ones((1,nData)), hidden_activation), axis = 0)
    output = gpu.dot(weights2, hidden_activation)
    regularized_penalty1 = weights1[:,1:shape(weights1)[1]]
    regularized_penalty2 = weights2[:,1:shape(weights2)[1]]
    regularized_penalty1 = regularized_penalty1 * regularized_penalty1
    regularized_penalty2 = regularized_penalty2 * regularized_penalty2
    output_target_diff = (output - inputs)*(output - inputs)
    KL = gpu.sum(sparsityParam*gpu.log(sparsityParam/p_avg) + (1-sparsityParam)*gpu.log((1-sparsityParam)/(1-p_avg)))
    cost = gpu.sum(output_target_diff)/(2*nData) + 0.5 * lambda_val * (gpu.sum(regularized_penalty1) + gpu.sum(regularized_penalty2)) + beta*KL
    print 'ReLU Linear Decoder Cost: ', cost
    return cost
def simulate_step(bodies, dt_min, G, epsilon, dt_output, alpha):
    current_t = 0
    current_step = 0
    n_bodies = bodies.r.shape[0]
    delta_v = np.zeros_like(bodies.v)
    for i in range(n_bodies):
        coord_diff = bodies.r - bodies.r[i, :]
        r_ik3 = (gpu.sum(coord_diff**2, axis=1) + epsilon**2)**1.5 #+ 1e-16
        delta_v[i,:] = G*gpu.sum(bodies.m[:, np.newaxis] * coord_diff / r_ik3[:, np.newaxis], axis=0)
        
    dt = max(calculate_dt(bodies.v, delta_v, n_bodies, alpha), dt_min)
    bodies.v += 0.5 * dt * delta_v

    while True:
        bodies.r += dt * bodies.v      
        for i in range(n_bodies):        
            coord_diff = bodies.r - bodies.r[i, :]
            r_ik3 = (gpu.sum(coord_diff**2, axis=1) + epsilon**2)**1.5 #+ 1e-16
            delta_v[i,:] = G*gpu.sum(bodies.m[:, np.newaxis] * coord_diff / r_ik3[:, np.newaxis], axis=0)
        
        dt = max(calculate_dt(bodies.v, delta_v, n_bodies, alpha), dt_min)
        bodies.v += dt * delta_v
        if current_step * dt_output <= current_t:
            current_step += 1
            yield current_t
            gpu.status()
            
        current_t += dt   
   def forward(self):
      """
      Perform a forward pass to calculate the activation (objective)
      """

      numExamples = self.output_port.getOutput().shape[0]
      self.objective = -gpu.sum(gpu.garray(self.target_port.getOutput()) * gpu.log(gpu.garray(self.output_port.getOutput())))
      self.objective += -gpu.sum((1.0 - self.target_port.getOutput())*(gpu.log(1.000001 - self.output_port.getOutput())))
      self.objective /= numExamples
Example #13
0
def mlpSoftmax_costfunc(x, *args):
    numClasses, inputSize, l1Size, l2Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    #x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    #theta_L1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1))
    #print numClasses, l2Size
    theta_L2 = gpu.garray(
        reshape(x[num_weights_L1:num_weights_L2 + num_weights_L1],
                (l2Size, l1Size + 1)))
    #theta_L2 = x[num_weights_L1:num_weights_L2+num_weights_L1].reshape((l2Size, l1Size + 1))
    theta_softmax = gpu.garray(
        reshape(x[num_weights_L2 + num_weights_L1:shape(x)[0]],
                (numClasses, l2Size)))
    #theta_softmax = x[num_weights_L2+num_weights_L1:shape(x)[0]].reshape((numClasses, l2Size))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones(
        (1, numCases)), hidden_activation_L1),
                                           axis=0)
    hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1)
    hidden_activation_L2 = hidden_sum_L2.logistic()
    hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L2)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis=0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions, axis=0)
    temp = groundTruth * gpu.log(predictions)
    regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]]
    regularized_penalty_L2 = theta_L2[:, 1:shape(theta_L2)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    regularized_penalty_L2 = regularized_penalty_L2 * regularized_penalty_L2
    cost = -1 * gpu.sum(temp) / numCases + 0.5 * lambda_hidden * (
        gpu.sum(regularized_penalty_L1) + gpu.sum(regularized_penalty_L2)
    ) + 0.5 * lambda_softmax * gpu.sum(theta_softmax * theta_softmax)
    print 'Multilayer Softmax Cost:', cost
    del inputs
    del theta_L1
    del theta_L2
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_L2
    del hidden_activation_L2
    del hidden_sum_softmax
    del predictions
    del temp
    del regularized_penalty_L1
    del regularized_penalty_L2
    gpu.free_reuse_cache()
    return cost
Example #14
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = logistic(
            gpu.dot(inpts, params[: self.m_end].reshape(self.shape)) + params[self.m_end : self.m_end + self.shape[1]]
        )
        Z = gdot(hddn, params[: self.m_end].reshape(self.shape).T) + params[-self.shape[0] :]

        w = params[: self.m_end].reshape(self.shape)
        cae = gpu.sum(gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * gpu.sum(w ** 2, axis=0))
        cae *= self.cae

        sc = self.score(Z, inpts, addon=cae)
        return np.array([sc, cae])
Example #15
0
def dbn_supervised_predict_exact(ws_vh, ws_v, ws_h, x):
    """
    Predict the class label of input x from supervised DBN
    Uses the exact method mentioned in section 6.2 of Hinton, Osindero, Teh 2006
    The free energy formula is taken from http://deeplearning.net/tutorial/rbm.html
    
    x: Input data. (NxD matrix)
    """
    L = len(ws_vh)
    N = x.shape[0]

    # make a forward pass to get from input layer to visible layer of top level
    # RBM
    h_prev = x.T

    # forward (bottom-up) pass, (use deterministic (we pass the activations, not
    # the stochastically sampled steps) forward pass)
    for l in range(L - 1):
        ah = gnp.dot(ws_vh[l].T, h_prev) + ws_h[l]
        h_prev = gnp.logistic(ah)

    H = ws_vh[-1].shape[0]  # number of visible units top level RBM
    Hx = h_prev.shape[0]  # number of hidden units in the penultimate layer
    K = H - Hx
    # (H - Hx) is the number of supervised inputs to top level RBM

    # for every class, assume it is the correct label and calculate its free energy
    y = gnp.zeros((K, N))
    free_energy = gnp.zeros((N, K))  # we actually calculate -free_energy
    for k in range(K):
        # set the current assumed class label
        y[k, :] = 1.0

        # visible unit vector
        v = gnp.concatenate((y, h_prev))
        e_v = gnp.dot(ws_v[-1].T, v)  # bias energy term

        ah = gnp.dot(ws_vh[-1].T, v) + ws_h[-1]
        e_h = gnp.sum(gnp.log(gnp.exp(ah) + 1.0), axis=0)

        free_energy[:, k] = e_v + e_h

        # zero the class labels for next iteration
        y[:, :] = 0.0

    # since these numbers may get pretty small, use the sum-exp trick for converting
    # these to probabilities
    pred_y = (
        gnp.exp(free_energy - gnp.max(free_energy, axis=1)[:, gnp.newaxis])
        / gnp.sum(gnp.exp(free_energy - gnp.max(free_energy, axis=1)[:, gnp.newaxis]), axis=1)[:, gnp.newaxis]
    )

    return pred_y
def calculate_dt(v, delta_v, N_bodies, alpha):
    a_max = 0.
    for i in range(N_bodies):
        delta_v = gpu.garray(delta_v)
        a = gpu.sum(delta_v[i,:]**2)
        if a > a_max:
            a_max = a
            a_max_index = i
            
    v = gpu.garray(v)
    v_mag = gpu.sqrt(gpu.sum(v[a_max_index,:]**2))
    return alpha*v_mag/a_max
Example #17
0
def calculate_dt(v, delta_v, N_bodies, alpha):
    a_max = 0.
    for i in range(N_bodies):
        delta_v = gpu.garray(delta_v)
        a = gpu.sum(delta_v[i, :]**2)
        if a > a_max:
            a_max = a
            a_max_index = i

    v = gpu.garray(v)
    v_mag = gpu.sqrt(gpu.sum(v[a_max_index, :]**2))
    return alpha * v_mag / a_max
Example #18
0
def ssd(z, targets, weight=0.5, predict=False, error=False, addon=0):
    """
    """
    if predict:
        return z
    n, m = z.shape
    err = z - targets
    if error:
        # rec. error + first deriv
        return weight * gpu.sum(err**2) / n + addon, 2. * weight * err / n
    else:
        # only return reconstruction error
        return weight * gpu.sum(err**2) / n + addon
Example #19
0
    def forward(self):
        """
      Perform a forward pass to calculate the activation (objective)
      """

        numExamples = self.output_port.getOutput().shape[0]
        self.objective = -gpu.sum(
            gpu.garray(self.target_port.getOutput()) *
            gpu.log(gpu.garray(self.output_port.getOutput())))
        self.objective += -gpu.sum(
            (1.0 - self.target_port.getOutput()) *
            (gpu.log(1.000001 - self.output_port.getOutput())))
        self.objective /= numExamples
def fine_tuning_cost_gpu(x, *args):
    inputSize, l1Size, l2Size, l3Size, l4Size, l5Size, lambda_val, inputs = args
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    num_weights_L3 = l3Size * (l2Size + 1)
    num_weights_L4 = l4Size * (l3Size + 1)
    num_weights_L5 = l5Size * (l4Size + 1)
    #num_weights_L6 = inputSize * (l5Size + 1)
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    #weights1 = reshape(x[0:num_weights_L1], (l1Size, inputSize + 1))
    weights1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1))
    #weights2 = reshape(x[num_weights_L1:num_weights_L1+num_weights_L2], (l2Size, l1Size + 1))
    weights2 = x[num_weights_L1:num_weights_L1+num_weights_L2].reshape((l2Size, l1Size + 1))
    #weights3 = reshape(x[num_weights_L1+num_weights_L2:num_weights_L1+num_weights_L2+num_weights_L3], (l3Size, l2Size + 1))
    weights3 = x[num_weights_L1+num_weights_L2:num_weights_L1+num_weights_L2+num_weights_L3].reshape((l3Size, l2Size + 1))
    #weights4 = reshape(x[num_weights_L1+num_weights_L2+num_weights_L3:num_weights_L1+num_weights_L2+num_weights_L3+num_weights_L4], (l4Size, l3Size + 1))
    weights4 = x[num_weights_L1+num_weights_L2+num_weights_L3:num_weights_L1+num_weights_L2+num_weights_L3+num_weights_L4].reshape((l4Size, l3Size + 1))
    #weights5 = reshape(x[num_weights_L1+num_weights_L2+num_weights_L3+num_weights_L4:num_weights_L1+num_weights_L2+num_weights_L3+num_weights_L4+num_weights_L5], (l5Size, l4Size + 1))
    weights5 = x[num_weights_L1+num_weights_L2+num_weights_L3+num_weights_L4:num_weights_L1+num_weights_L2+num_weights_L3+num_weights_L4+num_weights_L5].reshape((l5Size, l4Size + 1))
    #weights6 = reshape(x[num_weights_L1+num_weights_L2+num_weights_L3+num_weights_L4+num_weights_L5:shape(x)[0]], (inputSize, l5Size+1))
    weights6 = x[num_weights_L1+num_weights_L2+num_weights_L3+num_weights_L4+num_weights_L5:shape(x)[0]].reshape((inputSize, l5Size+1))
    nData = shape(inputs)[1]
    x = gpu.concatenate((gpu.ones((1,nData)), inputs), axis = 0)
    hidden1_sum = gpu.dot(weights1, x)
    hidden1_activation = hidden1_sum.logistic()
    hidden1_activation = gpu.concatenate((gpu.ones((1,nData)), hidden1_activation), axis = 0)
    hidden2_sum = gpu.dot(weights2, hidden1_activation)
    hidden2_activation = hidden2_sum.logistic()
    hidden2_activation = gpu.concatenate((gpu.ones((1,nData)), hidden2_activation), axis = 0)
    hidden3_sum = gpu.dot(weights3, hidden2_activation)
    hidden3_activation = hidden3_sum.logistic()
    hidden3_activation = gpu.concatenate((gpu.ones((1,nData)), hidden3_activation), axis = 0)
    hidden4_sum = gpu.dot(weights4, hidden3_activation)
    hidden4_activation = hidden4_sum.logistic()
    hidden4_activation = gpu.concatenate((gpu.ones((1,nData)), hidden4_activation), axis = 0)
    hidden5_sum = gpu.dot(weights5, hidden4_activation)
    hidden5_activation = hidden5_sum.logistic()
    hidden5_activation = gpu.concatenate((gpu.ones((1,nData)), hidden5_activation), axis = 0)
    output_sum = gpu.dot(weights6, hidden5_activation)
    outputs = output_sum.logistic()
    regularized_penalty4 = weights4[:,1:shape(weights4)[1]]
    regularized_penalty5 = weights5[:,1:shape(weights5)[1]]
    regularized_penalty6 = weights6[:,1:shape(weights6)[1]]
    regularized_penalty4 = regularized_penalty4 ** 2
    regularized_penalty5 = regularized_penalty5 ** 2
    regularized_penalty6 = regularized_penalty6 ** 2
    output_target_diff = (outputs - inputs)**2
    cost = gpu.sum(output_target_diff)/(2*nData) + 0.5 * lambda_val * (gpu.sum(regularized_penalty4) + gpu.sum(regularized_penalty5) + gpu.sum(regularized_penalty6))
    print 'Fine Tuning Cost: ', cost
    return cost
def costfunc_gpu(x, *args):
    num_input, num_hidden, num_output, inputs, noNoiseData, lambda_val, sparsityParam, beta = args
    num_weights1 = (num_input + 1) * num_hidden
    x = gpu.garray(x)
    #    randomNoise = random.random_sample(shape(inputs))
    #    criteriaTable = randomNoise > 0.32
    #    inputs = inputs * criteriaTable
    inputs = gpu.garray(inputs)
    noNoiseData = gpu.garray(noNoiseData)
    #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1)))
    weights1 = x[0:num_weights1].reshape((num_hidden, num_input + 1))
    #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1)))
    weights2 = x[num_weights1:shape(x)[0]].reshape(
        (num_output, num_hidden + 1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1, nData)), inputs), axis=0)
    hidden_sum = gpu.dot(weights1, data)
    hidden_activation = hidden_sum.logistic()
    p_avg = gpu.sum(hidden_activation, axis=1) / nData
    hidden_activation = gpu.concatenate((gpu.ones(
        (1, nData)), hidden_activation),
                                        axis=0)
    output = gpu.dot(weights2, hidden_activation)
    regularized_penalty1 = weights1[:, 1:shape(weights1)[1]]
    regularized_penalty2 = weights2[:, 1:shape(weights2)[1]]
    regularized_penalty1 = regularized_penalty1 * regularized_penalty1
    regularized_penalty2 = regularized_penalty2 * regularized_penalty2
    output_target_diff = (output - noNoiseData) * (output - noNoiseData)
    KL = gpu.sum(sparsityParam * gpu.log(sparsityParam / p_avg) +
                 (1 - sparsityParam) * gpu.log((1 - sparsityParam) /
                                               (1 - p_avg)))
    cost = gpu.sum(output_target_diff) / (2 * nData) + 0.5 * lambda_val * (
        gpu.sum(regularized_penalty1) +
        gpu.sum(regularized_penalty2)) + beta * KL
    print 'GPU Linear Denoising Decoder Cost: ', cost
    del x
    del inputs
    del noNoiseData
    del data
    del hidden_sum
    del hidden_activation
    del p_avg
    del output
    del regularized_penalty1
    del regularized_penalty2
    del weights1
    del weights2
    del output_target_diff
    gpu.free_reuse_cache()
    return cost
Example #22
0
    def pt_score(self, params, inpts, **kwargs):
        hddn = logistic(
            gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) +
            params[self.m_end:self.m_end + self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(
            self.shape).T) + params[-self.shape[0]:]

        w = params[:self.m_end].reshape(self.shape)
        cae = gpu.sum(
            gpu.mean(Dsigmoid(hddn)**2, axis=0) * gpu.sum(w**2, axis=0))
        cae *= self.cae

        sc = self.score(Z, inpts, addon=cae)
        return np.array([sc, cae])
Example #23
0
def norm_trans(X, mode='ff'):
    """Compute feedforward and backprop for unit-normalization."""
    EPS = 0.00000001
    if (mode == 'ff'):
        N = gp.sqrt(gp.sum(X**2.0, axis=1) + EPS)
        N = N[:,gp.newaxis]
        F = X / N
    if (mode == 'bp'):
        N = gp.sqrt(gp.sum(X['X']**2.0, axis=1) + EPS)
        N = N[:,gp.newaxis]
        V = X['dLdA'] * X['X']
        V = gp.sum(V, axis=1)
        V = V[:,gp.newaxis]
        F = (X['dLdA'] / N) - (X['A'] * (V / (N**2.0)))
    return F
Example #24
0
def sig_ssd(z, targets, weight=0.5, predict=False, error=False, addon=0):
    """
    Sigmoid SSD.
    """
    bern = gpu.logistic(z)
    if predict:
        return bern
    n, m = bern.shape
    err = bern - targets
    if error:
        # rec. error + first deriv
        return weight * gpu.sum(err**2) / n + addon, 2. * weight * err / n
    else:
        # only return reconstruction error
        return weight * gpu.sum(err**2) / n + addon
Example #25
0
def norm_trans(X, mode='ff'):
    """Compute feedforward and backprop for unit-normalization."""
    EPS = 0.00000001
    if (mode == 'ff'):
        N = gp.sqrt(gp.sum(X**2.0, axis=1) + EPS)
        N = N[:, gp.newaxis]
        F = X / N
    if (mode == 'bp'):
        N = gp.sqrt(gp.sum(X['X']**2.0, axis=1) + EPS)
        N = N[:, gp.newaxis]
        V = X['dLdA'] * X['X']
        V = gp.sum(V, axis=1)
        V = V[:, gp.newaxis]
        F = (X['dLdA'] / N) - (X['A'] * (V / (N**2.0)))
    return F
Example #26
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(gdot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.size])
        Z = gdot(hddn, params[self.size:-self.shape[0]].reshape(self.Tshape)) + params[-self.shape[0]:]

        if self.rho_hat_grad == None:
            self.rho_hat_grad = hddn.mean(axis=0)
        else:
            self.rho_hat_grad *= 0.9
            self.rho_hat_grad += 0.1*hddn.mean(axis=0)

#        rho_hat = hddn.mean(axis=0)
        rho_hat = self.rho_hat_grad
        rho = self.rho
        sparsity = self.beta * gpu.sum(bKL(rho, rho_hat))
 
        _, delta = self.score(Z, inpts, error=True, addon=sparsity)

        g[self.size:-self.shape[0]] = gdot(hddn.T, delta).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        diff = Dsigmoid(hddn)
        dsparse_dha = -rho/rho_hat + (1-rho)/(1-rho_hat)
        dsc_dha = diff * (gdot(delta, params[:self.m_end].reshape(self.shape)) + self.beta*dsparse_dha/m)

        g[:self.m_end] = gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end:self.size] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Example #27
0
def correlation_fraction(g, s, nvis, nhid):
    with misc.gnumpy_conversion_check('allow'):
        expect_vis = s[:nvis]
        expect_hid = s[nvis:nvis+nhid]
        da = g[:nvis]
        db = g[nvis:nvis+nhid]
        dW = g[nvis+nhid:].reshape((nvis, nhid))

        first_order_expl = gnp.outer(da, expect_hid) + gnp.outer(expect_vis, db)
        first_order_norm = gnp.sum(da**2) + gnp.sum(db**2) + gnp.sum(first_order_expl**2)

        dcorr = dW - first_order_expl
        dcorr_norm = gnp.sum(dcorr**2)
        g_norm = gnp.sum(g**2)
        #return first_order_norm, dcorr_norm, g_norm
        return dcorr_norm / (dcorr_norm + first_order_norm)
Example #28
0
 def softmax(self, x):
     max = gp.max(x, axis=1)
     x = x - max[:, gp.newaxis]
     y = gp.exp(x)
     s = gp.sum(y, 1)
     z = y / s[:, gp.newaxis]
     return z
Example #29
0
    def pt_grad(self, params, inpts, **kwargs):
        g = gzeros(params.shape)
        m, _ = inpts.shape

        hddn = logistic(gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.m_end+self.shape[1]])
        Z = gdot(hddn, params[:self.m_end].reshape(self.shape).T) + params[-self.shape[0]:]

        if self.rho_hat_grad == None:
            self.rho_hat_grad = hddn.mean(axis=0)
        else:
            self.rho_hat_grad *= 0.9
            self.rho_hat_grad += 0.1*hddn.mean(axis=0)

#        rho_hat = hddn.mean(axis=0)
        rho_hat = self.rho_hat_grad
        rho = self.rho
        sparsity = self.beta * gpu.sum(bKL(rho, rho_hat))

        _, delta = self.score(Z, inpts, error=True, addon=sparsity)

        g[:self.m_end] = gdot(delta.T, hddn).ravel()
        g[-self.shape[0]:] = delta.sum(axis=0)

        diff = Dsigmoid(hddn)
        dsparse_dha = -rho/rho_hat + (1-rho)/(1-rho_hat)
        dsc_dha = diff * (gdot(delta, params[:self.m_end].reshape(self.shape)) + self.beta*dsparse_dha/m)

        g[:self.m_end] += gdot(inpts.T, dsc_dha).ravel()

        g[self.m_end:-self.shape[0]] = dsc_dha.sum(axis=0)
        # clean up
        del delta, hddn, Z
        return g
Example #30
0
def loss_mclr(Yh, Y):
    """Compute mutinomial logistic regression loss for Yh, w.r.t. Y.

    Values in Yh should probably be network outputs, and each row in Y must
    be a +1/-1 indicator vector for the target class of a row in Yh.
    """
    obs_count = float(Y.shape[0])
    # Get boolean mask for each observation's target class
    cl_mask = (Y > 0.0)
    # Compute softmax distribution tranform of Yh
    sm_sum = gp.sum(gp.exp(Yh), axis=1)
    P = gp.exp(Yh) / sm_sum[:, gp.newaxis]
    dL = (P - cl_mask) / obs_count
    logP = gp.log(P) * cl_mask
    L = -gp.sum(logP) / obs_count
    return {'L': L, 'dL': dL}
Example #31
0
 def softmax(self, x):
     max=gp.max(x,axis=1)
     x=x-max[:,gp.newaxis]
     y=gp.exp(x)
     s=gp.sum(y,1)
     z=y/s[:,gp.newaxis]
     return z
Example #32
0
File: rbm.py Project: surban/ml
 def strict_flip_sample(self, vis_start, iterations, beta=1):
     """Flips a randomly chosen bit and accepts the change if the
     resulting free energy is lower. Repeats for given iterations."""
     vis = vis_start.copy()
     fes = self.free_energy(vis) 
     n_total_flips = 0
 
     for i in range(iterations):
         # flip a bit at random
         f = np.random.randint(0, vis.shape[1])
         vis_prop = vis.copy()
         vis_prop[:,f] = 1-vis[:,f]
     
         # calculate new free energy and accept change if it is lower
         fes_prop = self.free_energy(vis_prop, beta=beta)
         acc_prop = fes_prop <= fes
         n_flips = gp.sum(acc_prop)
         n_total_flips += n_flips
     
         # compose new state
         acc_prop_t = gp.tile(acc_prop, (vis.shape[1], 1)).T
         vis = acc_prop_t * vis_prop + (1-acc_prop_t) * vis
         fes = acc_prop * fes_prop + (1-acc_prop) * fes
     
     return vis
Example #33
0
File: dbn.py Project: evolu8/gdbn
    def CDStep(self, inputBatch, layer, learnRate, momentum, L2Cost = 0):
        """
        layer=0 will train the first RBM directly on the input
        """
        inputBatch = inputBatch if isinstance(inputBatch, gnp.garray) else gnp.garray(inputBatch)
        mbsz = inputBatch.shape[0]
        vis = self.fprop(inputBatch, layer)
        GRBMFlag = layer==0 and self.realValuedVis
        visType = RBMGaussian() if GRBMFlag else self.RBMHidUnitType
        visHidStats, hidBiasStats, visBiasStats, negVis = \
                     CD1(vis, self.weights[layer], self.genBiases[layer], self.biases[layer], visType, self.RBMHidUnitType)
        factor = 1-momentum if not self.nestCompare else 1
        self.dW = momentum*self.dW + factor*visHidStats
        self.dvb = momentum*self.dvb + factor*visBiasStats
        self.dhb = momentum*self.dhb + factor*hidBiasStats

        if L2Cost > 0:
            self.weights[layer] *= 1-L2Cost*learnRate*factor 
        
        self.weights[layer] += (learnRate/mbsz) * self.dW
        self.genBiases[layer] += (learnRate/mbsz) * self.dvb
        self.biases[layer] += (learnRate/mbsz) * self.dhb

        #we compute squared error even for binary visible unit RBMs because who cares
        return gnp.sum((vis-negVis)**2)
    def CDStep(self, inputBatch, layer, learnRate, momentum, L2Cost=0):
        """
		layer=0 will train the first RBM directly on the input
		"""
        inputBatch = inputBatch if isinstance(
            inputBatch, gnp.garray) else gnp.garray(inputBatch)
        mbsz = inputBatch.shape[0]
        vis = self.fprop(inputBatch, layer)
        GRBMFlag = layer == 0 and self.realValuedVis
        visType = RBMGaussian() if GRBMFlag else self.RBMHidUnitType
        visHidStats, hidBiasStats, visBiasStats, negVis = \
            CD1(vis, self.weights[layer], self.genBiases[layer], self.biases[layer], visType, self.RBMHidUnitType)
        factor = 1 - momentum if not self.nestCompare else 1
        self.dW = momentum * self.dW + factor * visHidStats
        self.dvb = momentum * self.dvb + factor * visBiasStats
        self.dhb = momentum * self.dhb + factor * hidBiasStats

        if L2Cost > 0:
            self.weights[layer] *= 1 - L2Cost * learnRate * factor

        self.weights[layer] += (learnRate / mbsz) * self.dW
        self.genBiases[layer] += (learnRate / mbsz) * self.dvb
        self.biases[layer] += (learnRate / mbsz) * self.dhb

        #we compute squared error even for binary visible unit RBMs because who cares
        return gnp.sum((vis - negVis)**2)
Example #35
0
    def backprop(self, X, y_target) :
        # forward
        activity = []
        result = X
        for i in range(len(self.weights)):
            p = self.dropout_probability[i]
            mask = (g.rand(result.shape) >= p)
            result = result * mask
            del mask
            activity.append(result)
            w,b = self.weights[i]
            result = g.dot(result,w) + b
            result = self.activation[i](result)
            
        # backward
        gradientNodes = []
        lastGradient = self.gradient[-1](result, y_target)
        gradientNodes.append(lastGradient)
        for i in reversed(range(1,len(self.weights))):
            w,b = self.weights[i]
            lastGradient = g.dot(lastGradient, w.T) * self.gradient[i-1](activity[i])
            gradientNodes.append(lastGradient)
                
        # get gradient
        resultGradient = []
        for i in range(len(self.weights)):
            gradW = (g.dot(activity[i].T,gradientNodes[-(i+1)]) / len(X))
            assert(gradW.shape == self.weights[i][0].shape)
            gradB = (g.sum(gradientNodes[-(i+1)],axis=0) / len(X))
            assert(gradB.shape == self.weights[i][1].shape)
            resultGradient.append([gradW,gradB])

        del gradientNodes
        
        return resultGradient
Example #36
0
def loss_mclr(Yh, Y):
    """Compute mutinomial logistic regression loss for Yh, w.r.t. Y.

    Values in Yh should probably be network outputs, and each row in Y must
    be a +1/-1 indicator vector for the target class of a row in Yh.
    """
    obs_count = float(Y.shape[0])
    # Get boolean mask for each observation's target class
    cl_mask = (Y > 0.0)
    # Compute softmax distribution tranform of Yh
    sm_sum = gp.sum(gp.exp(Yh), axis=1)
    P = gp.exp(Yh) / sm_sum[:,gp.newaxis]
    dL = (P - cl_mask) / obs_count
    logP = gp.log(P) * cl_mask
    L = -gp.sum(logP) / obs_count
    return {'L': L, 'dL': dL}
   def forward(self):
      """
      Perform a forward pass to calculate the activation (objective)
      """

      numExamples = self.output_port.getOutput().shape[0]
      self.objective = 0.5 * gpu.sum((self.output_port.getOutput() - self.target_port.getOutput())**2) / numExamples
def mlpSoftmax_costfunc(x, *args):
    numClasses, inputSize, l1Size, l2Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    #x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1)))
    #theta_L1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1))
    #print numClasses, l2Size
    theta_L2 = gpu.garray(reshape(x[num_weights_L1:num_weights_L2+num_weights_L1], (l2Size, l1Size + 1)))
    #theta_L2 = x[num_weights_L1:num_weights_L2+num_weights_L1].reshape((l2Size, l1Size + 1))
    theta_softmax = gpu.garray(reshape(x[num_weights_L2+num_weights_L1:shape(x)[0]], (numClasses, l2Size)))
    #theta_softmax = x[num_weights_L2+num_weights_L1:shape(x)[0]].reshape((numClasses, l2Size))
    inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis=0)
    hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1)
    hidden_activation_L2 = hidden_sum_L2.logistic()
    hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L2)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis = 0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions,axis = 0)
    temp = groundTruth*gpu.log(predictions)
    regularized_penalty_L1 = theta_L1[:,1:shape(theta_L1)[1]]
    regularized_penalty_L2 = theta_L2[:,1:shape(theta_L2)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    regularized_penalty_L2 = regularized_penalty_L2 * regularized_penalty_L2
    cost = -1*gpu.sum(temp)/numCases + 0.5 * lambda_hidden*(gpu.sum(regularized_penalty_L1) + gpu.sum(regularized_penalty_L2)) + 0.5 * lambda_softmax * gpu.sum(theta_softmax*theta_softmax)
    print 'Multilayer Softmax Cost:', cost
    del inputs
    del theta_L1
    del theta_L2
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_L2
    del hidden_activation_L2
    del hidden_sum_softmax
    del predictions
    del temp
    del regularized_penalty_L1
    del regularized_penalty_L2
    gpu.free_reuse_cache()
    return cost
   def forward(self):
      """
      Perform a forward step - activate the net input using logistic function
      """

      # Perform the activation
      self.output.setOutput(gpu.exp(self.input.getNetInput()))
      self.output.setOutput(self.output.getOutput() / (gpu.garray([gpu.sum(self.output.getOutput(),1)]).transpose()))
Example #40
0
 def energy(self, vis, hid):
     assert hid.ndim == 2
     #return (vis * self.vbias[nax, :]).sum(1) + \
     #       (hid * self.hbias[nax, :]).sum(1) + \
     #           (vis[:, :, nax] * self.weights[nax, :, :] * hid[:, nax, :]).sum(2).sum(1)
     return gnp.dot(vis, self.vbias) + \
            gnp.dot(hid, self.hbias) + \
            gnp.sum(vis * gnp.dot(hid, self.weights.T), 1)
Example #41
0
 def energy(self, vis, hid):
     assert hid.ndim == 2
     #return (vis * self.vbias[nax, :]).sum(1) + \
     #       (hid * self.hbias[nax, :]).sum(1) + \
     #           (vis[:, :, nax] * self.weights[nax, :, :] * hid[:, nax, :]).sum(2).sum(1)
     return gnp.dot(vis, self.vbias) + \
            gnp.dot(hid, self.hbias) + \
            gnp.sum(vis * gnp.dot(hid, self.weights.T), 1)
Example #42
0
File: dnn.py Project: C2Tao/HMM
 def update(self):
     self.w *= self.l2reg
     if self.dropout > 0:
         self.w -= gpu.dot((self.x * self.r).T, self.d) * self.learn  # / self.q
     else:
         self.w -= gpu.dot(self.x.T, self.d) * self.learn  # / self.q
     self.b *= self.l2reg
     self.b -= gpu.sum(self.d, 0) * self.learn
Example #43
0
 def clip_params(self, max_norm=10.0):
     """Bound L2 (row-wise) norm of W by max_norm."""
     M = self.params['W']
     m_scales = max_norm / gp.sqrt(gp.sum(M**2.0,axis=1) + 1e-5)
     mask = (m_scales < 1.0) # with gnumpy, this already comes as float32
     m_scales = (m_scales * mask) + (1.0 - mask)
     self.params['W'] = M * m_scales[:,gp.newaxis]
     return
Example #44
0
def rmssd(z, targets, predict=False, error=False, addon=0):
    """
    Root mean sum of squares.
    """
    if predict:
        return z
    n, m = z.shape
    err = z - targets
    per_sample = gpu.sqrt(gpu.sum(err**2, axis=1) + 1e-8)

    if error:
        # rec. error + first deriv
        return gpu.sum(per_sample) / n + addon, err / (
            n * per_sample[:, gpu.newaxis])
    else:
        # only return reconstruction error
        return gpu.sum(per_sample) / n + addon
Example #45
0
 def phi(self):
     """
     Compute phi = p(w|z).
     """
     V = self.nzw.shape[1]
     num = self.nzw + self.beta
     num /= gpu.sum(num, axis=1)[:, np.newaxis]
     print num
     return num
Example #46
0
    def forward(self):
        """
      Perform a forward pass to calculate the activation (objective)
      """

        numExamples = self.output_port.getOutput().shape[0]
        self.objective = 0.5 * gpu.sum(
            (self.output_port.getOutput() - self.target_port.getOutput())**
            2) / numExamples
def mlpSoftmax1Layer_grad(x, *args):
    numClasses, inputSize, l1Size, lambda_softmax, lambda_hidden, inputs, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_softmax = numClasses * l1Size
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    theta_softmax = gpu.garray(
        reshape(x[num_weights_L1:shape(x)[0]], (numClasses, l1Size)))
    theta_L1_grad = gpu.zeros(shape(theta_L1))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    #hidden_activation_L1 = gpu.log(1+hidden_sum_L1.exp())
    #hidden_derivative_L1 = hidden_sum_L1.logistic()
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum_L1)) * (hidden_sum_L1 > 0)
    hidden_activation_L1 = hidden_sum_L1 * relu_mask_hidden1
    #hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_derivative_L1 = relu_mask_hidden1
    hidden_sum_softmax_imd = gpu.dot(theta_softmax, hidden_activation_L1)
    hidden_sum_softmax = hidden_sum_softmax_imd - hidden_sum_softmax_imd.max(
        axis=0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions, axis=0)
    softmax_imd = groundTruth - predictions
    theta_softmax_grad = -1 * gpu.dot(
        softmax_imd,
        gpu.garray(transpose(hidden_activation_L1.as_numpy_array()))
    ) / numCases + lambda_softmax * theta_softmax
    deltaOut = -softmax_imd
    delta_L1_imd = gpu.dot(
        gpu.garray(transpose(theta_softmax.as_numpy_array())), deltaOut)
    delta_L1_imd2 = delta_L1_imd * hidden_derivative_L1
    #delta_L1_imd2 = (delta_L1_imd*hidden_activation_L1)*(1-hidden_activation_L1)
    delta_L1 = gpu.dot(delta_L1_imd2,
                       gpu.garray(transpose(inputs.as_numpy_array())))
    theta_L1_grad += delta_L1
    theta_L1_grad = theta_L1_grad / numCases
    theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] = theta_L1_grad[:, 1:shape(
        theta_L1_grad)[1]] + theta_L1[:, 1:shape(theta_L1)[1]] * lambda_hidden
    theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1)
    theta_softmax_grad = reshape(theta_softmax_grad.as_numpy_array(),
                                 num_weights_softmax)
    del inputs
    del theta_L1
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_softmax
    del predictions
    del softmax_imd
    del deltaOut
    del delta_L1_imd
    del delta_L1_imd2
    del delta_L1
    gpu.free_reuse_cache()
    return hstack((theta_L1_grad, theta_softmax_grad))
Example #48
0
 def getErrorLoss(self,a0,a2,factor=1):
     """
     compute error/reconstruction error
     a2: reconstruction
     a0: input
     one row per case
     """
     loss=factor*0.5*gp.sum((a2-a0)**2)/a0.shape[0]
     return loss 
Example #49
0
 def update(self):
     self.w *= self.l2reg
     if self.dropout > 0:
         self.w -= gpu.dot(
             (self.x * self.r).T, self.d) * self.learn  # / self.q
     else:
         self.w -= gpu.dot(self.x.T, self.d) * self.learn  # / self.q
     self.b *= self.l2reg
     self.b -= gpu.sum(self.d, 0) * self.learn
Example #50
0
 def safe_softmax(self, Y):
     """Compute a reasonably (numerically) safe softmax."""
     Y_max = gp.max(Y, axis=1)
     Y_max = Y_max[:,gp.newaxis]
     Y_exp = gp.exp(Y - Y_max)
     Y_sum = gp.sum(Y_exp, axis=1)
     Y_sum = Y_sum[:,gp.newaxis]
     Y_sm = Y_exp / Y_sum
     return Y_sm
Example #51
0
def correlation_fraction(g, s, nvis, nhid):
    with misc.gnumpy_conversion_check('allow'):
        expect_vis = s[:nvis]
        expect_hid = s[nvis:nvis + nhid]
        da = g[:nvis]
        db = g[nvis:nvis + nhid]
        dW = g[nvis + nhid:].reshape((nvis, nhid))

        first_order_expl = gnp.outer(da, expect_hid) + gnp.outer(
            expect_vis, db)
        first_order_norm = gnp.sum(da**2) + gnp.sum(db**2) + gnp.sum(
            first_order_expl**2)

        dcorr = dW - first_order_expl
        dcorr_norm = gnp.sum(dcorr**2)
        g_norm = gnp.sum(g**2)
        #return first_order_norm, dcorr_norm, g_norm
        return dcorr_norm / (dcorr_norm + first_order_norm)
Example #52
0
 def getErrorLoss(self, a0, a2, factor=1):
     """
     compute error/reconstruction error
     a2: reconstruction
     a0: input
     one row per case
     """
     loss = factor * 0.5 * gp.sum((a2 - a0)**2) / a0.shape[0]
     return loss
Example #53
0
    def costAndGrad(self, data, labels):

        # forward prop
        self.hActs[0] = data
        i = 1
        for w, b in self.stack:
            self.hActs[i] = w.dot(self.hActs[i - 1]) + b
            if i <= len(self.layerSizes):
                self.hActs[i] = self.activation(self.hActs[i])
            i += 1

        probs = self.hActs[-1] - gp.max(self.hActs[-1], axis=0)
        probs = gp.exp(probs)
        probs = probs / gp.sum(probs, axis=0)
        probs += (probs < 1e-8) * (1e-8 - probs)

        labelMat = np.zeros(probs.shape)
        labelMat[labels, range(self.mbSize)] = 1
        labelMat = gp.garray(labelMat)
        cost = -(1. / self.mbSize) * gp.sum(labelMat * gp.log(probs))

        if not self.train:
            return cost, None

        # back prop
        self.deltas[-1] = probs - labelMat
        i = len(self.layerSizes) - 1
        for w, b in reversed(self.stack[1:]):
            grad = self.activation(self.hActs[i + 1], True)
            self.deltas[i] = w.T.dot(self.deltas[i + 1]) * grad
            i -= 1

        # compute gradients
        for i in range(len(self.grad)):
            self.grad[i][0] = (1. / self.mbSize) * self.deltas[i].dot(
                self.hActs[i].T)
            self.grad[i][1] = (1. / self.mbSize) * gp.sum(
                self.deltas[i], axis=1).reshape(-1, 1)

            # add gaussian noise
            # self.grad[i][0] += .01 * gp.randn(self.grad[i][0].shape)
            # self.grad[i][1] += .01 * gp.randn(self.grad[i][1].shape)

        return cost, self.grad
    def forward(self):
        """
      Perform a forward step - activate the net input using logistic function
      """

        # Perform the activation
        self.output.setOutput(gpu.exp(self.input.getNetInput()))
        self.output.setOutput(
            self.output.getOutput() /
            (gpu.garray([gpu.sum(self.output.getOutput(), 1)]).transpose()))
Example #55
0
 def reg_loss(self, Ws=[]):
     """Compute basic L1/L2 loss and gradient on weights in Ws."""
     if (len(Ws) == 0):
         Ws = self.layer_weights()
     L = 0.0
     dLdWs = []
     for i in range(self.layer_count):
         L = L + (self.lam_l2 * gp.sum(Ws[i]**2.0))
         dLdWs.append((2.0 * self.lam_l2) * Ws[i])
     return {'L': L, 'dLdWs': dLdWs}
Example #56
0
def dev_loss(A, dev_type=1, use_shepherd=0):
    """DEV regularizer, cool stuff."""
    b_reps = len(A)
    b_obs = A[0].shape[0]
    At = []
    for i in range(b_reps):
        if (dev_type == 1):
            At.append(norm_trans(A[i], 'ff'))
        elif (dev_type == 2):
            At.append(tanh_trans(A[i], 'ff'))
        elif (dev_type == 3):
            At.append(line_trans(A[i], 'ff'))
        else:
            raise Exception('Unknown DEV types.')
    # Compute the mean activations for this ensemble sample
    N = float(A[0].shape[1])
    n = float(b_reps)
    m = float(b_obs * b_reps * N)
    Am = gp.zeros(At[0].shape)
    if (use_shepherd != 1):
        for i in range(b_reps):
            Am = Am + At[i]
        Am = Am / float(b_reps)
    else:
        Am = At[0]
    # Compute difference from mean of each set of droppy activations
    Ad = [(At[i] - Am) for i in range(b_reps)]
    L = sum([gp.sum(ad**2.0) for ad in Ad]) / m
    dLdA = []
    if (use_shepherd != 1):
        Add = gp.zeros(At[0].shape)
        for i in range(b_reps):
            Add = Add + Ad[i]
        for i in range(b_reps):
            dLdA.append(-(2.0/m) * ((((1.0/n) - 1.0) * Ad[i]) + \
                    ((1.0/n) * (Add - Ad[i]))))
    else:
        for i in range(b_reps):
            if (i == 0):
                dLdA.append(gp.zeros(Ad[0].shape))
            else:
                dLdA.append((2.0 / m) * Ad[i])
        for i in range(1, b_reps):
            dLdA[0] = dLdA[0] - dLdA[i]
    # Backpropagate gradient on variance through the desired transform
    for i in range(b_reps):
        BP = {'X': A[i], 'A': At[i], 'dLdA': dLdA[i]}
        if (dev_type == 1):
            dLdA[i] = norm_trans(BP, 'bp')
        elif (dev_type == 2):
            dLdA[i] = tanh_trans(BP, 'bp')
        elif (dev_type == 3):
            dLdA[i] = line_trans(BP, 'bp')
    return {'L': L, 'dLdA': dLdA}
def costfunc_gpu_ReLU(x, *args):
    num_input, num_hidden, num_output, inputs, lambda_val, sparsityParam, beta = args
    num_weights1 = (num_input + 1) * num_hidden
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1)))
    weights1 = x[0:num_weights1].reshape((num_hidden, num_input + 1))
    #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1)))
    weights2 = x[num_weights1:shape(x)[0]].reshape(
        (num_output, num_hidden + 1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1, nData)), inputs), axis=0)
    hidden_sum = gpu.dot(weights1, data)
    #hidden_activation = gpu.log(1+hidden_sum.exp())
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum)) * (hidden_sum > 0)
    hidden_activation = hidden_sum * relu_mask_hidden1
    hidden_activation = gpu.concatenate((gpu.ones(
        (1, nData)), hidden_activation),
                                        axis=0)
    output = gpu.dot(weights2, hidden_activation)
    regularized_penalty1 = weights1[:, 1:shape(weights1)[1]]
    regularized_penalty2 = weights2[:, 1:shape(weights2)[1]]
    regularized_penalty1 = regularized_penalty1 * regularized_penalty1
    regularized_penalty2 = regularized_penalty2 * regularized_penalty2
    output_target_diff = (output - inputs) * (output - inputs)
    cost = gpu.sum(output_target_diff) / (2 * nData) + 0.5 * lambda_val * (
        gpu.sum(regularized_penalty1) + gpu.sum(regularized_penalty2))
    print 'GPU ReLU Linear Decoder Cost: ', cost
    del x
    del inputs
    del data
    del hidden_sum
    del hidden_activation
    del output
    del regularized_penalty1
    del regularized_penalty2
    del weights1
    del weights2
    del output_target_diff
    gpu.free_reuse_cache()
    return cost
Example #58
0
 def bprop(self, data, targs):
     cost = 0
     a = data
     acts = [a]
     zs = [a]
     for l in xrange(len(self.weights)):
         z = self.biases[l] + gnp.dot(a, self.weights[l])
         a = sigmoid(z)
         zs.append(z)
         acts.append(a)
     #print acts[-1]
     delta_l = self.loss_fn_grad(acts[-1], targs) * sigmoid_prime(zs[-1])
     for l in reversed(xrange(len(self.weights))):
         self.biasGrads[l] += gnp.sum(delta_l, axis=0)
         self.WGrads[l] += gnp.dot(acts[l].T, delta_l)
         if l > 0:
             delta_l = gnp.dot(delta_l, self.weights[l].T) * sigmoid_prime(
                 zs[l])
     cost = self.loss_fn(acts[-1], targs)
     n_err = gnp.sum(acts[-1].argmax(axis=1) != targs.argmax(axis=1))
     return (cost, n_err)