Example #1
0
def costfunc_gpu_ReLU(x, *args):
    num_input,num_hidden,num_output,inputs,lambda_val,sparsityParam,beta = args
    num_weights1 = (num_input+1)*num_hidden
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1)))
    weights1 = x[0:num_weights1].reshape((num_hidden,num_input+1))
    #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1)))
    weights2 = x[num_weights1:shape(x)[0]].reshape((num_output,num_hidden+1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1,nData)), inputs), axis = 0)
    hidden_sum = gpu.dot(weights1, data)
    hidden_activation = gpu.log(1+hidden_sum.exp())
    p_avg = gpu.sum(hidden_activation,axis=1)/nData
    hidden_activation = gpu.concatenate((gpu.ones((1,nData)), hidden_activation), axis = 0)
    output = gpu.dot(weights2, hidden_activation)
    regularized_penalty1 = weights1[:,1:shape(weights1)[1]]
    regularized_penalty2 = weights2[:,1:shape(weights2)[1]]
    regularized_penalty1 = regularized_penalty1 * regularized_penalty1
    regularized_penalty2 = regularized_penalty2 * regularized_penalty2
    output_target_diff = (output - inputs)*(output - inputs)
    KL = gpu.sum(sparsityParam*gpu.log(sparsityParam/p_avg) + (1-sparsityParam)*gpu.log((1-sparsityParam)/(1-p_avg)))
    cost = gpu.sum(output_target_diff)/(2*nData) + 0.5 * lambda_val * (gpu.sum(regularized_penalty1) + gpu.sum(regularized_penalty2)) + beta*KL
    print 'ReLU Linear Decoder Cost: ', cost
    return cost
Example #2
0
def bKL(x, y):
    """
    Kullback-Leibler divergence between two
    bernoulli random vectors x and y.
    Note: Not symmetric.
    """
    return x * gpu.log(x / y) + (1 - x) * gpu.log((1 - x) / (1 - y))
Example #3
0
 def KL(rho, rho_target, KL_flat):
     y = rho.copy()
     if KL_flat:
         y[gp.where(y < rho_target)] = rho_target * gp.ones(
             y[gp.where(y < rho_target)].shape)
     return rho_target * gp.log(rho_target / y) + (1 - rho_target) * gp.log(
         (1 - rho_target) / (1 - y))
Example #4
0
 def reconstruction_cross_entropy(self, vis):
     """Returns the cross entropy between vis and its reconstruction 
     obtained by one step of Gibbs sampling."""
     _, sampled_p_vis = self.gibbs_sample(vis, 1)
     cross_entropy = gp.mean(vis * gp.log(sampled_p_vis) - 
                             (1 - vis) * gp.log(1-sampled_p_vis),
                             axis=1)
     return cross_entropy
   def forward(self):
      """
      Perform a forward pass to calculate the activation (objective)
      """

      numExamples = self.output_port.getOutput().shape[0]
      self.objective = -gpu.sum(gpu.garray(self.target_port.getOutput()) * gpu.log(gpu.garray(self.output_port.getOutput())))
      self.objective += -gpu.sum((1.0 - self.target_port.getOutput())*(gpu.log(1.000001 - self.output_port.getOutput())))
      self.objective /= numExamples
Example #6
0
    def forward(self):
        """
      Perform a forward pass to calculate the activation (objective)
      """

        numExamples = self.output_port.getOutput().shape[0]
        self.objective = -gpu.sum(
            gpu.garray(self.target_port.getOutput()) *
            gpu.log(gpu.garray(self.output_port.getOutput())))
        self.objective += -gpu.sum(
            (1.0 - self.target_port.getOutput()) *
            (gpu.log(1.000001 - self.output_port.getOutput())))
        self.objective /= numExamples
def costfunc_gpu(x, *args):
    num_input, num_hidden, num_output, inputs, noNoiseData, lambda_val, sparsityParam, beta = args
    num_weights1 = (num_input + 1) * num_hidden
    x = gpu.garray(x)
    #    randomNoise = random.random_sample(shape(inputs))
    #    criteriaTable = randomNoise > 0.32
    #    inputs = inputs * criteriaTable
    inputs = gpu.garray(inputs)
    noNoiseData = gpu.garray(noNoiseData)
    #weights1 = gpu.garray(reshape(x[0:num_weights1],(num_hidden,num_input+1)))
    weights1 = x[0:num_weights1].reshape((num_hidden, num_input + 1))
    #weights2 = gpu.garray(reshape(x[num_weights1:shape(x)[0]], (num_output,num_hidden+1)))
    weights2 = x[num_weights1:shape(x)[0]].reshape(
        (num_output, num_hidden + 1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1, nData)), inputs), axis=0)
    hidden_sum = gpu.dot(weights1, data)
    hidden_activation = hidden_sum.logistic()
    p_avg = gpu.sum(hidden_activation, axis=1) / nData
    hidden_activation = gpu.concatenate((gpu.ones(
        (1, nData)), hidden_activation),
                                        axis=0)
    output = gpu.dot(weights2, hidden_activation)
    regularized_penalty1 = weights1[:, 1:shape(weights1)[1]]
    regularized_penalty2 = weights2[:, 1:shape(weights2)[1]]
    regularized_penalty1 = regularized_penalty1 * regularized_penalty1
    regularized_penalty2 = regularized_penalty2 * regularized_penalty2
    output_target_diff = (output - noNoiseData) * (output - noNoiseData)
    KL = gpu.sum(sparsityParam * gpu.log(sparsityParam / p_avg) +
                 (1 - sparsityParam) * gpu.log((1 - sparsityParam) /
                                               (1 - p_avg)))
    cost = gpu.sum(output_target_diff) / (2 * nData) + 0.5 * lambda_val * (
        gpu.sum(regularized_penalty1) +
        gpu.sum(regularized_penalty2)) + beta * KL
    print 'GPU Linear Denoising Decoder Cost: ', cost
    del x
    del inputs
    del noNoiseData
    del data
    del hidden_sum
    del hidden_activation
    del p_avg
    del output
    del regularized_penalty1
    del regularized_penalty2
    del weights1
    del weights2
    del output_target_diff
    gpu.free_reuse_cache()
    return cost
Example #8
0
    def from_moments(moments, weights_std=0.):
        """Initialize an RBM so the visible and hidden biases match the given moments and the weights are
        set to small random values."""
        assert isinstance(moments, Moments)
        assert np.allclose(moments.expect_prod.as_numpy_array(),
                           gnp.outer(moments.expect_vis, moments.expect_hid).as_numpy_array())
        vbias = gnp.log(moments.expect_vis) - gnp.log(1. - moments.expect_vis)
        hbias = gnp.log(moments.expect_hid) - gnp.log(1. - moments.expect_hid)
        assert np.all(np.isfinite(vbias.as_numpy_array())) and np.all(np.isfinite(hbias.as_numpy_array()))
        
        if weights_std > 0.:
            weights = gnp.garray(np.random.normal(0., weights_std, size=(vbias.size, hbias.size)))
        else:
            weights = gnp.zeros((vbias.size, hbias.size))

        return RBM(vbias, hbias, weights)
Example #9
0
def rect_log(x, computeGrad = False):
	if (not computeGrad):
		f = gp.log(x*(x>0)+1)* (x>0)
		return f

	g = (x>0) / (gp.exp(x))
	return g
Example #10
0
File: loss.py Project: yujiali/pynn
    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        pred = gnp.as_garray(pred)
        y = gnp.exp(pred - pred.max(axis=1)[:, gnp.newaxis])
        y = y / y.sum(axis=1)[:, gnp.newaxis]

        return -(self.target *
                 gnp.log(y + _SMALL_CONSTANT)).sum(), y - self.target
Example #11
0
def rect_log(x, computeGrad=False):
    if (not computeGrad):
        f = gp.log(x * (x > 0) + 1) * (x > 0)
        return f

    g = (x > 0) / (gp.exp(x))
    return g
def log_exp_sum(x, axis=1):
    x_max = x.max(axis=axis)
    if isinstance(x, gnp.garray):
        return (x_max + gnp.log(
            gnp.exp(x - x_max[:, gnp.newaxis]).sum(axis=axis))).asarray()
    else:
        return x_max + np.log(np.exp(x - x_max[:, np.newaxis]).sum(axis=axis))
Example #13
0
    def forward_prop(self,
                     X,
                     add_noise=False,
                     compute_loss=False,
                     is_test=True):
        """
        Compute the forward propagation step that maps the input data matrix X
        into the output. Loss and loss gradient will be computed when
        compute_loss set to True. Note that the loss is applied on nonlinearity
        activation, rather than the final output by default, unless 
        loss_after_nonlin is set to True.
        """
        if self.params.dropout > 0 and add_noise:
            self.dropout_mask = gnp.rand(X.shape[0],
                                         X.shape[1]) > self.params.dropout
            self.inputs = X * self.dropout_mask
        else:
            self.inputs = X
        self.noise_added = add_noise

        if not self.use_batch_normalization:
            self.activation = self.inputs.dot(self.params.W) + self.params.b
            self.output = self.nonlin.forward_prop(self.activation)

            if self.sparsity_weight > 0:
                self._sparsity_current = self._sparsity_smoothing * self.output.mean(axis=0) \
                        + (1 - self._sparsity_smoothing) * self._sparsity_current
                self._sparsity_objective = (- self.sparsity * gnp.log(self._sparsity_current + 1e-20) \
                        - (1 - self.sparsity) * gnp.log(1 - self._sparsity_current + 1e-20)).sum() * self.sparsity_weight
        else:
            self.activation = self.inputs.dot(self.params.W)
            self.bn_output = self.bn_layer.forward_prop(self.activation,
                                                        is_test=is_test)
            self.output = self.nonlin.forward_prop(self.bn_output)

        if compute_loss and self.loss is not None:
            if self.loss_after_nonlin:
                self.loss_value, self.loss_grad = self.loss.compute_loss_and_grad(
                    self.output, compute_grad=True)
            else:
                self.loss_value, self.loss_grad = self.loss.compute_loss_and_grad(
                    self.activation
                    if not self.use_batch_normalization else self.bn_output,
                    compute_grad=True)
            self.loss_computed = True

        return self.output
Example #14
0
 def getErrorLoss(self, a0, a2,factor=1.0):
     """
     error is measured by neg log likelihood
     """
     pow=a2**a0
     p=gp.exp(-a2)*pow/self.factor[a0] 
     l=gp.log(p)
     return -l.sum(axis=1).mean()*factor
Example #15
0
 def getErrorLoss(self, a0, a2, factor=1.0):
     """
     error is measured by neg log likelihood
     """
     pow = a2**a0
     p = gp.exp(-a2) * pow / self.factor[a0]
     l = gp.log(p)
     return -l.sum(axis=1).mean() * factor
Example #16
0
 def pseudo_likelihood_for_bit(self, vis, i):
     """Returns the likelihood of bit i of vis given all other bits
     of vis."""
     fe = self.free_energy(vis)
     vis_flip = vis
     vis_flip[:,i] = 1 - vis[:,i]
     fe_flip = self.free_energy(vis_flip)
     pl = gp.log(gp.logistic(fe_flip - fe))
     return pl
Example #17
0
def mlpSoftmax_costfunc(x, *args):
    numClasses, inputSize, l1Size, l2Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    #x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    #theta_L1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1))
    #print numClasses, l2Size
    theta_L2 = gpu.garray(
        reshape(x[num_weights_L1:num_weights_L2 + num_weights_L1],
                (l2Size, l1Size + 1)))
    #theta_L2 = x[num_weights_L1:num_weights_L2+num_weights_L1].reshape((l2Size, l1Size + 1))
    theta_softmax = gpu.garray(
        reshape(x[num_weights_L2 + num_weights_L1:shape(x)[0]],
                (numClasses, l2Size)))
    #theta_softmax = x[num_weights_L2+num_weights_L1:shape(x)[0]].reshape((numClasses, l2Size))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones(
        (1, numCases)), hidden_activation_L1),
                                           axis=0)
    hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1)
    hidden_activation_L2 = hidden_sum_L2.logistic()
    hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L2)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis=0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions, axis=0)
    temp = groundTruth * gpu.log(predictions)
    regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]]
    regularized_penalty_L2 = theta_L2[:, 1:shape(theta_L2)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    regularized_penalty_L2 = regularized_penalty_L2 * regularized_penalty_L2
    cost = -1 * gpu.sum(temp) / numCases + 0.5 * lambda_hidden * (
        gpu.sum(regularized_penalty_L1) + gpu.sum(regularized_penalty_L2)
    ) + 0.5 * lambda_softmax * gpu.sum(theta_softmax * theta_softmax)
    print 'Multilayer Softmax Cost:', cost
    del inputs
    del theta_L1
    del theta_L2
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_L2
    del hidden_activation_L2
    del hidden_sum_softmax
    del predictions
    del temp
    del regularized_penalty_L1
    del regularized_penalty_L2
    gpu.free_reuse_cache()
    return cost
def log_exp_sum_1d(x):
    """
    This computes log(exp(x_1) + exp(x_2) + ... + exp(x_n)) as 
    x* + log(exp(x_1-x*) + exp(x_2-x*) + ... + exp(x_n-x*)), where x* is the
    max over all x_i.  This can avoid numerical problems.
    """
    x_max = x.max()
    if isinstance(x, gnp.garray):
        return x_max + gnp.log(gnp.exp(x - x_max).sum())
    else:
        return x_max + np.log(np.exp(x - x_max).sum())
def log_exp_sum_1d(x):
    """
    This computes log(exp(x_1) + exp(x_2) + ... + exp(x_n)) as 
    x* + log(exp(x_1-x*) + exp(x_2-x*) + ... + exp(x_n-x*)), where x* is the
    max over all x_i.  This can avoid numerical problems.
    """
    x_max = x.max()
    if isinstance(x, gnp.garray):
        return x_max + gnp.log(gnp.exp(x - x_max).sum())
    else:
        return x_max + np.log(np.exp(x - x_max).sum())
Example #20
0
    def from_moments(moments, weights_std=0.):
        """Initialize an RBM so the visible and hidden biases match the given moments and the weights are
        set to small random values."""
        assert isinstance(moments, Moments)
        assert np.allclose(
            moments.expect_prod.as_numpy_array(),
            gnp.outer(moments.expect_vis, moments.expect_hid).as_numpy_array())
        vbias = gnp.log(moments.expect_vis) - gnp.log(1. - moments.expect_vis)
        hbias = gnp.log(moments.expect_hid) - gnp.log(1. - moments.expect_hid)
        assert np.all(np.isfinite(vbias.as_numpy_array())) and np.all(
            np.isfinite(hbias.as_numpy_array()))

        if weights_std > 0.:
            weights = gnp.garray(
                np.random.normal(0.,
                                 weights_std,
                                 size=(vbias.size, hbias.size)))
        else:
            weights = gnp.zeros((vbias.size, hbias.size))

        return RBM(vbias, hbias, weights)
Example #21
0
def dbn_supervised_predict_exact(ws_vh, ws_v, ws_h, x):
    """
    Predict the class label of input x from supervised DBN
    Uses the exact method mentioned in section 6.2 of Hinton, Osindero, Teh 2006
    The free energy formula is taken from http://deeplearning.net/tutorial/rbm.html
    
    x: Input data. (NxD matrix)
    """
    L = len(ws_vh)
    N = x.shape[0]

    # make a forward pass to get from input layer to visible layer of top level
    # RBM
    h_prev = x.T

    # forward (bottom-up) pass, (use deterministic (we pass the activations, not
    # the stochastically sampled steps) forward pass)
    for l in range(L - 1):
        ah = gnp.dot(ws_vh[l].T, h_prev) + ws_h[l]
        h_prev = gnp.logistic(ah)

    H = ws_vh[-1].shape[0]  # number of visible units top level RBM
    Hx = h_prev.shape[0]  # number of hidden units in the penultimate layer
    K = H - Hx
    # (H - Hx) is the number of supervised inputs to top level RBM

    # for every class, assume it is the correct label and calculate its free energy
    y = gnp.zeros((K, N))
    free_energy = gnp.zeros((N, K))  # we actually calculate -free_energy
    for k in range(K):
        # set the current assumed class label
        y[k, :] = 1.0

        # visible unit vector
        v = gnp.concatenate((y, h_prev))
        e_v = gnp.dot(ws_v[-1].T, v)  # bias energy term

        ah = gnp.dot(ws_vh[-1].T, v) + ws_h[-1]
        e_h = gnp.sum(gnp.log(gnp.exp(ah) + 1.0), axis=0)

        free_energy[:, k] = e_v + e_h

        # zero the class labels for next iteration
        y[:, :] = 0.0

    # since these numbers may get pretty small, use the sum-exp trick for converting
    # these to probabilities
    pred_y = (
        gnp.exp(free_energy - gnp.max(free_energy, axis=1)[:, gnp.newaxis])
        / gnp.sum(gnp.exp(free_energy - gnp.max(free_energy, axis=1)[:, gnp.newaxis]), axis=1)[:, gnp.newaxis]
    )

    return pred_y
Example #22
0
 def xent_loss_and_grad(self, Yh, Y_cat):
     """Cross-entropy loss for predictions Yh given targets Y_cat."""
     # Convert from categorical classes to "one-hot" target vectors
     Y_ind = zeros(Yh.shape)
     Y_ind[np.arange(Y_ind.shape[0]), Y_cat] = 1.0
     # Push one-hot targets vectors to the GPU
     Y_ind = gp.garray(Y_ind)
     # Compute softmax and then cross-entropy loss
     Yh_sm = self.safe_softmax(Yh)
     L = -gp.sum((Y_ind * gp.log(Yh_sm)))
     dLdYh = Yh_sm - Y_ind
     return [L, dLdYh]
Example #23
0
    def forward_prop(self, X, add_noise=False, compute_loss=False):
        """
        Compute the forward propagation step that maps the input data matrix X
        into the output. Loss and loss gradient will be computed when
        compute_loss set to True. Note that the loss is applied on nonlinearity
        activation, rather than the final output by default, unless 
        loss_after_nonlin is set to True.
        """
        if self.params.dropout > 0 and add_noise:
            self.dropout_mask = gnp.rand(X.shape[0], X.shape[1]) > self.params.dropout
            self.inputs = X * self.dropout_mask
        else:
            self.inputs = X
        self.noise_added = add_noise

        if not self.use_batch_normalization:
            self.activation = self.inputs.dot(self.params.W) + self.params.b
            self.output = self.nonlin.forward_prop(self.activation)

            if self.sparsity_weight > 0:
                self._sparsity_current = self._sparsity_smoothing * self.output.mean(axis=0) \
                        + (1 - self._sparsity_smoothing) * self._sparsity_current
                self._sparsity_objective = (- self.sparsity * gnp.log(self._sparsity_current + 1e-20) \
                        - (1 - self.sparsity) * gnp.log(1 - self._sparsity_current + 1e-20)).sum() * self.sparsity_weight
        else:
            self.activation = self.inputs.dot(self.params.W)
            self.bn_output = self.bn_layer.forward_prop(self.activation)
            self.output = self.nonlin.forward_prop(self.bn_output)

        if compute_loss and self.loss is not None:
            if self.loss_after_nonlin:
                self.loss_value, self.loss_grad = self.loss.compute_loss_and_grad(
                        self.output, compute_grad=True)
            else:
                self.loss_value, self.loss_grad = self.loss.compute_loss_and_grad(
                        self.activation if not self.use_batch_normalization else self.bn_output, compute_grad=True)
            self.loss_computed = True
        
        return self.output
Example #24
0
def costFunction(X, y, theta1, theta2, lam=None, reg=False):

    # Get the number of training examples:
    m = np.size(y)
    # Map labels to binary vectors:
    y = gpu.garray(binaryMapper(y)).T
    # Feed it forward:
    a1, a2, a3 = forwardProp(X, theta1, theta2)

    # Get the cost without regularization:
    J = gpu.sum(-(gpu.log(a3) * y) - (gpu.log(1 - a3) * (1 - y))) / m

    # Add-regularization penalties to the cost (excluding the bias ):
    if reg == True:

        J += ((gpu.sum(theta1[:, 1:]**2) + gpu.sum(theta2[:, 1:]**2)) *
              (lam / (2.0 * m)))

        print "Regularized Cost: " + str(J)
    else:
        print "Unregularized Cost: " + str(J)

    return J, a1, a2, a3
Example #25
0
def loss_mclr(Yh, Y):
    """Compute mutinomial logistic regression loss for Yh, w.r.t. Y.

    Values in Yh should probably be network outputs, and each row in Y must
    be a +1/-1 indicator vector for the target class of a row in Yh.
    """
    obs_count = float(Y.shape[0])
    # Get boolean mask for each observation's target class
    cl_mask = (Y > 0.0)
    # Compute softmax distribution tranform of Yh
    sm_sum = gp.sum(gp.exp(Yh), axis=1)
    P = gp.exp(Yh) / sm_sum[:,gp.newaxis]
    dL = (P - cl_mask) / obs_count
    logP = gp.log(P) * cl_mask
    L = -gp.sum(logP) / obs_count
    return {'L': L, 'dL': dL}
Example #26
0
def loss_mclr(Yh, Y):
    """Compute mutinomial logistic regression loss for Yh, w.r.t. Y.

    Values in Yh should probably be network outputs, and each row in Y must
    be a +1/-1 indicator vector for the target class of a row in Yh.
    """
    obs_count = float(Y.shape[0])
    # Get boolean mask for each observation's target class
    cl_mask = (Y > 0.0)
    # Compute softmax distribution tranform of Yh
    sm_sum = gp.sum(gp.exp(Yh), axis=1)
    P = gp.exp(Yh) / sm_sum[:, gp.newaxis]
    dL = (P - cl_mask) / obs_count
    logP = gp.log(P) * cl_mask
    L = -gp.sum(logP) / obs_count
    return {'L': L, 'dL': dL}
def mlpSoftmax_costfunc(x, *args):
    numClasses, inputSize, l1Size, l2Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    #x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1)))
    #theta_L1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1))
    #print numClasses, l2Size
    theta_L2 = gpu.garray(reshape(x[num_weights_L1:num_weights_L2+num_weights_L1], (l2Size, l1Size + 1)))
    #theta_L2 = x[num_weights_L1:num_weights_L2+num_weights_L1].reshape((l2Size, l1Size + 1))
    theta_softmax = gpu.garray(reshape(x[num_weights_L2+num_weights_L1:shape(x)[0]], (numClasses, l2Size)))
    #theta_softmax = x[num_weights_L2+num_weights_L1:shape(x)[0]].reshape((numClasses, l2Size))
    inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis=0)
    hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1)
    hidden_activation_L2 = hidden_sum_L2.logistic()
    hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L2)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis = 0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions,axis = 0)
    temp = groundTruth*gpu.log(predictions)
    regularized_penalty_L1 = theta_L1[:,1:shape(theta_L1)[1]]
    regularized_penalty_L2 = theta_L2[:,1:shape(theta_L2)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    regularized_penalty_L2 = regularized_penalty_L2 * regularized_penalty_L2
    cost = -1*gpu.sum(temp)/numCases + 0.5 * lambda_hidden*(gpu.sum(regularized_penalty_L1) + gpu.sum(regularized_penalty_L2)) + 0.5 * lambda_softmax * gpu.sum(theta_softmax*theta_softmax)
    print 'Multilayer Softmax Cost:', cost
    del inputs
    del theta_L1
    del theta_L2
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_L2
    del hidden_activation_L2
    del hidden_sum_softmax
    del predictions
    del temp
    del regularized_penalty_L1
    del regularized_penalty_L2
    gpu.free_reuse_cache()
    return cost
Example #28
0
    def costAndGrad(self, data, labels):

        # forward prop
        self.hActs[0] = data
        i = 1
        for w, b in self.stack:
            self.hActs[i] = w.dot(self.hActs[i - 1]) + b
            if i <= len(self.layerSizes):
                self.hActs[i] = self.activation(self.hActs[i])
            i += 1

        probs = self.hActs[-1] - gp.max(self.hActs[-1], axis=0)
        probs = gp.exp(probs)
        probs = probs / gp.sum(probs, axis=0)
        probs += (probs < 1e-8) * (1e-8 - probs)

        labelMat = np.zeros(probs.shape)
        labelMat[labels, range(self.mbSize)] = 1
        labelMat = gp.garray(labelMat)
        cost = -(1. / self.mbSize) * gp.sum(labelMat * gp.log(probs))

        if not self.train:
            return cost, None

        # back prop
        self.deltas[-1] = probs - labelMat
        i = len(self.layerSizes) - 1
        for w, b in reversed(self.stack[1:]):
            grad = self.activation(self.hActs[i + 1], True)
            self.deltas[i] = w.T.dot(self.deltas[i + 1]) * grad
            i -= 1

        # compute gradients
        for i in range(len(self.grad)):
            self.grad[i][0] = (1. / self.mbSize) * self.deltas[i].dot(
                self.hActs[i].T)
            self.grad[i][1] = (1. / self.mbSize) * gp.sum(
                self.deltas[i], axis=1).reshape(-1, 1)

            # add gaussian noise
            # self.grad[i][0] += .01 * gp.randn(self.grad[i][0].shape)
            # self.grad[i][1] += .01 * gp.randn(self.grad[i][1].shape)

        return cost, self.grad
Example #29
0
    def costAndGrad(self,data,labels):
        
        # forward prop
        self.hActs[0] = data
        i = 1
        for w,b in self.stack:
            self.hActs[i] = w.dot(self.hActs[i-1])+b
            if i <= len(self.layerSizes):
                self.hActs[i] = self.activation(self.hActs[i])
            i += 1

        probs = self.hActs[-1]-gp.max(self.hActs[-1],axis=0)
        probs = gp.exp(probs)
        probs = probs/gp.sum(probs,axis=0)
        probs += (probs < 1e-8)*(1e-8-probs)

        labelMat = np.zeros(probs.shape)
        labelMat[labels,range(self.mbSize)] = 1
        labelMat = gp.garray(labelMat)
        cost = -(1./self.mbSize)*gp.sum(labelMat*gp.log(probs))

        if not self.train:
            return cost,None

        # back prop
        self.deltas[-1] = probs-labelMat
        i = len(self.layerSizes)-1
        for w,b in reversed(self.stack[1:]):
            grad = self.activation(self.hActs[i+1], True)
            self.deltas[i] = w.T.dot(self.deltas[i+1])*grad
            i -= 1

        # compute gradients
        for i in range(len(self.grad)):
            self.grad[i][0] = (1./self.mbSize)*self.deltas[i].dot(self.hActs[i].T)
            self.grad[i][1] = (1./self.mbSize)*gp.sum(self.deltas[i],axis=1).reshape(-1,1)

            # add gaussian noise
            # self.grad[i][0] += .01 * gp.randn(self.grad[i][0].shape)
            # self.grad[i][1] += .01 * gp.randn(self.grad[i][1].shape)

        return cost,self.grad
Example #30
0
    def costAndGrad(self, data, labels):

        # forward prop
        self.hActs[0] = data
        i = 1
        for w, b in self.stack:
            self.hActs[i] = w.dot(self.hActs[i - 1]) + b
            if i <= len(self.layerSizes):
                self.hActs[i] = (1 / 2.) * (
                    self.hActs[i] + gp.sign(self.hActs[i]) * self.hActs[i])
            i += 1

        probs = self.hActs[-1] + gp.min(self.hActs[-1], axis=0)
        probs = gp.exp(probs)
        probs = probs / gp.sum(probs, axis=0)

        labelMat = np.zeros(probs.shape)
        labelMat[labels, range(self.mbSize)] = 1
        labelMat = gp.garray(labelMat)
        cost = -(1. / self.mbSize) * gp.sum(labelMat * gp.log(probs))

        if not self.train:
            return cost, None

        # back prop
        self.deltas[-1] = probs - labelMat
        i = len(self.layerSizes) - 1
        for w, b in reversed(self.stack[1:]):
            self.deltas[i] = w.T.dot(self.deltas[i + 1]) * gp.sign(
                self.hActs[i + 1])
            i -= 1

        # compute gradients
        for i in range(len(self.grad)):
            self.grad[i][0] = (1. / self.mbSize) * self.deltas[i].dot(
                self.hActs[i].T)
            self.grad[i][1] = (1. / self.mbSize) * gp.sum(
                self.deltas[i], axis=1).reshape(-1, 1)

        return cost, self.grad
def mlpSoftmax1Layer_costfunc(x, *args):
    numClasses, inputSize, l1Size, lambda_softmax, lambda_hidden, inputs, groundTruth = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    theta_softmax = gpu.garray(
        reshape(x[num_weights_L1:shape(x)[0]], (numClasses, l1Size)))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    #hidden_activation_L1 = gpu.log(1+hidden_sum_L1.exp())
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum_L1)) * (hidden_sum_L1 > 0)
    hidden_activation_L1 = hidden_sum_L1 * relu_mask_hidden1
    #hidden_activation_L1 = hidden_sum_L1.logistic()
    hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L1)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis=0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions, axis=0)
    temp = groundTruth * gpu.log(predictions)
    temp = temp.as_numpy_array()
    temp[temp == -inf] = -200.0
    temp = nan_to_num(temp)
    regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    cost = -1 * sum(temp) / numCases + 0.5 * lambda_hidden * (
        gpu.sum(regularized_penalty_L1)) + 0.5 * lambda_softmax * gpu.sum(
            theta_softmax * theta_softmax)
    print 'Multilayer Softmax Cost:', cost
    del inputs
    del theta_L1
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_softmax
    del predictions
    del temp
    del regularized_penalty_L1
    gpu.free_reuse_cache()
    return cost
Example #32
0
def classify_ensemble(data, probs, batch_size, num_steps):
	epoch_size = ((len(data) // batch_size) - 1) // num_steps
	start_time = time.time()
	costs = 0.0
	iters = 0
	# for i in range(len(probs)):
	#   probs[i] = probs[i] / np.sum(probs[i])
	#probs = tf.nn.softmax(probs)
	# print(np.sum(probs[0]))
	# print(np.sum(probs[50]))
	#print(len(probs[0]))
	for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size,
																										num_steps)):
		# print(step)
		# print(x)
		# print(y)
		# print(probs)
		#print(probs[0])
		cost = -1 * gpu.log(probs[step][0,y[0,0]])
		#print(cost)
		'''
		loss = tf.nn.seq2seq.sequence_loss_by_example(
				[logits],
				[tf.reshape(y, [-1])],
				[tf.ones([batch_size * num_steps], dtype=tf.float64)])

		print(loss)
		cost = tf.reduce_sum(loss) / batch_size
		print(cost)
		'''
		costs += cost
		iters += num_steps
		
		if step % (epoch_size // 10) == 10:
			print("%.3f perplexity: %.3f speed: %.0f wps" %
						(step * 1.0 / epoch_size, gpu.exp(costs / iters),
						 iters * batch_size / (time.time() - start_time)))

	return gpu.exp(costs / iters)	
Example #33
0
def grad_costfunc_gpu_ReLU(x, *args):
    num_input,num_hidden,num_output,inputs,lambda_val,sparsityParam,beta = args
    num_weights1 = (num_input+1)*num_hidden
    num_weights2 = (num_hidden+1)*num_output
    x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    weights1 = x[0:num_weights1].reshape((num_hidden,num_input+1))
    weights2 = x[num_weights1:shape(x)[0]].reshape((num_output,num_hidden+1))
    nData = shape(inputs)[1]
    data = gpu.concatenate((gpu.ones((1,nData)), inputs), axis = 0)
    hidden_sum = gpu.dot(weights1, data)
    hidden_activation = gpu.log(1+hidden_sum.exp())
    p_avg = gpu.sum(hidden_activation,axis=1)/nData
    grad_sparse = -1*sparsityParam/p_avg.as_numpy_array() + (1-sparsityParam)/(1-p_avg.as_numpy_array())
    grad_sparse = append(0,grad_sparse)
    grad_sparse = tile(grad_sparse, (nData, 1))
    grad_sparse = gpu.garray(transpose(grad_sparse))
    hidden_activation = gpu.concatenate((gpu.ones((1,nData)), hidden_activation), axis = 0)
    outputs = gpu.dot(weights2, hidden_activation)
    weights1_grad = gpu.zeros(shape(weights1))
    weights2_grad = gpu.zeros(shape(weights2))
    p = outputs-inputs
    weights2_grad += gpu.dot(p, gpu.garray(transpose(hidden_activation.as_numpy_array())))
    q_temp = gpu.dot(gpu.garray(transpose(weights2.as_numpy_array())),p) + beta*grad_sparse
    #q = multiply(multiply(q_temp,hidden_activation),(1-hidden_activation))
    q = q_temp*hidden_sum.logistic()
    delta2 = gpu.dot(q, gpu.garray(transpose(data.as_numpy_array())))
    weights1_grad += delta2[1:shape(delta2)[0], :]
    weights1_grad = weights1_grad/nData
    weights2_grad = weights2_grad/nData
    weights1_grad[:,1:shape(weights1_grad)[1]] = weights1_grad[:,1:shape(weights1_grad)[1]] + weights1[:,1:shape(weights1)[1]] * lambda_val
    weights2_grad[:,1:shape(weights2_grad)[1]] = weights2_grad[:,1:shape(weights2_grad)[1]] + weights2[:,1:shape(weights2)[1]] * lambda_val
    #weights1_grad = reshape(weights1_grad, num_weights1)
    weights1_grad = weights1_grad.reshape(num_weights1)
    #weights2_grad = reshape(weights2_grad, num_weights2)
    weights2_grad = weights2_grad.reshape(num_weights2)
    return hstack((weights1_grad.as_numpy_array(),weights2_grad.as_numpy_array()))
Example #34
0
File: nnet.py Project: awni/awni_ml
    def costAndGrad(self,data,labels):
        
        # forward prop
        self.hActs[0] = data
        i = 1
        for w,b in self.stack:
            self.hActs[i] = w.dot(self.hActs[i-1])+b
            if i <= len(self.layerSizes):
                self.hActs[i] = (1/2.)*(self.hActs[i]+gp.sign(self.hActs[i])*self.hActs[i])
            i += 1

        probs = self.hActs[-1]+gp.min(self.hActs[-1],axis=0)
        probs = gp.exp(probs)
        probs = probs/gp.sum(probs,axis=0)

        labelMat = np.zeros(probs.shape)
        labelMat[labels,range(self.mbSize)] = 1
        labelMat = gp.garray(labelMat)
        cost = -(1./self.mbSize)*gp.sum(labelMat*gp.log(probs))

        if not self.train:
            return cost,None

        # back prop
        self.deltas[-1] = probs-labelMat
        i = len(self.layerSizes)-1
        for w,b in reversed(self.stack[1:]):
            self.deltas[i] = w.T.dot(self.deltas[i+1])*gp.sign(self.hActs[i+1])
            i -= 1
        
        # compute gradients
        for i in range(len(self.grad)):
            self.grad[i][0] = (1./self.mbSize)*self.deltas[i].dot(self.hActs[i].T)
            self.grad[i][1] = (1./self.mbSize)*gp.sum(self.deltas[i],axis=1).reshape(-1,1)

        return cost,self.grad
Example #35
0
File: ais.py Project: surban/ml
 def init_using_dataset(self, vis_samples):
     "Calculates the biases of the base rate RBM using the given samples"
     epsilon = 1e-2
     vis_mean = gp.mean(vis_samples, axis=0)
     self.base_bias_vis = gp.log((vis_mean + epsilon) / (1 - vis_mean + epsilon))
Example #36
0
 def invert_output(self, z):
     return gnp.log(z / (1 - z))
def mlpSoftmax_costfunc(x, *args):
    numClasses, inputSize, l1Size, l2Size, l3Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth, dropout_probability = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    num_weights_L3 = l3Size * (l2Size + 1)
    num_weights_softmax = numClasses * l3Size
    #x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1], (l1Size, inputSize + 1)))
    #theta_L1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1))
    #print numClasses, l2Size
    theta_L2 = gpu.garray(reshape(x[num_weights_L1:num_weights_L2+num_weights_L1], (l2Size, l1Size + 1)))
    #theta_L2 = x[num_weights_L1:num_weights_L2+num_weights_L1].reshape((l2Size, l1Size + 1))
    theta_L3 = gpu.garray(reshape(x[num_weights_L2+num_weights_L1:num_weights_L2+num_weights_L1+num_weights_L3], (l3Size, l2Size + 1)))
    theta_softmax = gpu.garray(reshape(x[num_weights_L2+num_weights_L1+num_weights_L3:shape(x)[0]], (numClasses, l3Size)))
    #theta_softmax = x[num_weights_L2+num_weights_L1:shape(x)[0]].reshape((numClasses, l2Size))
    theta_L1_grad = gpu.zeros(shape(theta_L1))
    theta_L2_grad = gpu.zeros(shape(theta_L2))
    theta_L3_grad = gpu.zeros(shape(theta_L3))
    dropout_l1 = gpu.garray(bernoulli.rvs(dropout_probability, size = (l1Size+1, numCases)))
    dropout_l2 = gpu.garray(bernoulli.rvs(dropout_probability, size = (l2Size+1, numCases)))
    dropout_l3 = gpu.garray(bernoulli.rvs(dropout_probability, size = (l3Size, numCases)))
    inputs = gpu.concatenate((gpu.ones((1,numCases)), inputs), axis = 0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    #hidden_activation_L1 = gpu.log(1+hidden_sum_L1.exp())
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum_L1)) * (hidden_sum_L1>0)
    hidden_activation_L1 = hidden_sum_L1*relu_mask_hidden1
    hidden_derivative_L1 = relu_mask_hidden1
    #hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis=0)
    hidden_derivative_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_derivative_L1), axis=0)
    hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis=0) * dropout_l1
    hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1)
    #hidden_activation_L2 = gpu.log(1+hidden_sum_L2.exp())
    relu_mask_hidden2 = gpu.ones(shape(hidden_sum_L2)) * (hidden_sum_L2>0)
    hidden_activation_L2 = hidden_sum_L2*relu_mask_hidden2
    hidden_derivative_L2 = relu_mask_hidden2
    #hidden_activation_L2 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L2), axis=0)
    hidden_derivative_L2 = gpu.concatenate((gpu.ones((1,numCases)), hidden_derivative_L2), axis=0)
    hidden_activation_L2 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L2), axis=0) * dropout_l2
    hidden_sum_L3 = gpu.dot(theta_L3, hidden_activation_L2)
    #hidden_activation_L3 = gpu.log(1+hidden_sum_L3.exp())
    relu_mask_hidden3 = gpu.ones(shape(hidden_sum_L3)) * (hidden_sum_L3>0)
    #hidden_activation_L3 = hidden_sum_L3*relu_mask_hidden3
    hidden_derivative_L3 = relu_mask_hidden3
    hidden_activation_L3 = hidden_sum_L3*relu_mask_hidden3 * dropout_l3
    #hidden_activation_L3 = hidden_sum_L3.logistic() * dropout_l3
    hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L3)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis = 0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions,axis = 0)
    pred = predictions.argmax(axis=0) + 1
    accuracy = mean(pred == labels) * 100
    temp = groundTruth*gpu.log(predictions)
    temp = temp.as_numpy_array()
    temp[temp==-inf] = -200.0
    temp = nan_to_num(temp)
    regularized_penalty_L1 = theta_L1[:,1:shape(theta_L1)[1]]
    regularized_penalty_L2 = theta_L2[:,1:shape(theta_L2)[1]]
    regularized_penalty_L3 = theta_L3[:,1:shape(theta_L3)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    regularized_penalty_L2 = regularized_penalty_L2 * regularized_penalty_L2
    regularized_penalty_L3 = regularized_penalty_L3 * regularized_penalty_L3
    pred_cost = -1*sum(temp)/numCases
    l2norm_cost = 0.5 * lambda_hidden*(gpu.sum(regularized_penalty_L3) + gpu.sum(regularized_penalty_L2) + gpu.sum(regularized_penalty_L1)) + 0.5 * lambda_softmax * gpu.sum(theta_softmax*theta_softmax)
    #l2norm_cost = 0
    cost = pred_cost + l2norm_cost
    print 'Prediction Accuracy:                       ', accuracy, '%'
    print 'Multilayer Softmax Prediction Cost:        ', pred_cost
    print 'Multilayer Softmax L2 Normalisation Cost:  ', l2norm_cost
    print 'Multilayer Softmax Cost:                   ', cost    
    print '--------------------------------------------------------------------'
    softmax_imd = groundTruth - predictions
    #theta_softmax_grad = -1*gpu.dot(softmax_imd, gpu.garray(transpose(hidden_activation_L3.as_numpy_array())))/numCases
    theta_softmax_grad = -1*gpu.dot(softmax_imd, gpu.garray(transpose(hidden_activation_L3.as_numpy_array())))/numCases + lambda_softmax * theta_softmax
    deltaOut = -softmax_imd
    delta_L3_imd = gpu.dot(gpu.garray(transpose(theta_softmax.as_numpy_array())), deltaOut)
    delta_L3_imd2 = delta_L3_imd*hidden_derivative_L3
    #delta_L3_imd2 = (delta_L3_imd * hidden_activation_L3) * (1-hidden_activation_L3)
    delta_L3 = gpu.dot(delta_L3_imd2, gpu.garray(transpose(hidden_activation_L2.as_numpy_array())))
    theta_L3_grad += delta_L3
    delta_L2_imd = gpu.dot(gpu.garray(transpose(theta_L3.as_numpy_array())), delta_L3_imd2)
    delta_L2_imd2 = delta_L2_imd*hidden_derivative_L2
    delta_L2_imd2 = delta_L2_imd2[1:shape(delta_L2_imd2)[0]+1, :]
    delta_L2 = gpu.dot(delta_L2_imd2, gpu.garray(transpose(hidden_activation_L1.as_numpy_array())))
    theta_L2_grad += delta_L2
    delta_L1_imd = gpu.dot(gpu.garray(transpose(theta_L2.as_numpy_array())), delta_L2_imd2)
    delta_L1_imd2 = delta_L1_imd*hidden_derivative_L1
    delta_L1_imd2 = delta_L1_imd2[1:shape(delta_L1_imd2)[0]+1, :]
    delta_L1 = gpu.dot(delta_L1_imd2, gpu.garray(transpose(inputs.as_numpy_array())))
    theta_L1_grad += delta_L1
    theta_L1_grad = theta_L1_grad/numCases
    theta_L2_grad = theta_L2_grad/numCases
    theta_L3_grad = theta_L3_grad/numCases
    theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] = theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] + theta_L1[:, 1: shape(theta_L1)[1]] * lambda_hidden
    theta_L2_grad[:, 1:shape(theta_L2_grad)[1]] = theta_L2_grad[:, 1:shape(theta_L2_grad)[1]] + theta_L2[:, 1: shape(theta_L2)[1]] * lambda_hidden
    theta_L3_grad[:, 1:shape(theta_L3_grad)[1]] = theta_L3_grad[:, 1:shape(theta_L3_grad)[1]] + theta_L3[:, 1: shape(theta_L3)[1]] * lambda_hidden       
    theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1)
    theta_L2_grad = reshape(theta_L2_grad.as_numpy_array(), num_weights_L2)
    theta_L3_grad = reshape(theta_L3_grad.as_numpy_array(), num_weights_L3)
    theta_softmax_grad = reshape(theta_softmax_grad.as_numpy_array(), num_weights_softmax)
    del inputs
    del theta_L1
    del theta_L2
    del theta_L3
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_L2
    del hidden_activation_L2
    del hidden_activation_L3
    del hidden_sum_L3
    del hidden_sum_softmax
    del predictions
    del temp
    del softmax_imd
    del deltaOut
    del delta_L3_imd
    del delta_L3_imd2
    del delta_L3
    del delta_L2_imd
    del delta_L2_imd2
    del delta_L2
    del delta_L1_imd
    del delta_L1_imd2
    del delta_L1
    #del regularized_penalty_L1
    #del regularized_penalty_L2
    gpu.free_reuse_cache()
    return cost, hstack((theta_L1_grad,theta_L2_grad,theta_L3_grad,theta_softmax_grad))
Example #38
0
def score_softmax(y_target,y_predicted):
    assert(type(y_target) == type(y_predicted))
    if type(y_target) is g.garray:
        return g.sum(y_target * g.log(y_predicted + 1e-30))
    else:
        return np.sum(y_target * np.log(y_predicted + 1e-300))
Example #39
0
 def invert_output(self, z):
     return 0.5 * gnp.log((1+z) / (1-z))
def mlpSoftmax_costfunc(x, *args):
    numClasses, inputSize, l1Size, l2Size, l3Size, lambda_softmax, lambda_hidden, inputs, labels, groundTruth, dropout_probability = args
    numCases = shape(inputs)[1]
    num_weights_L1 = l1Size * (inputSize + 1)
    num_weights_L2 = l2Size * (l1Size + 1)
    num_weights_L3 = l3Size * (l2Size + 1)
    num_weights_softmax = numClasses * l3Size
    #x = gpu.garray(x)
    inputs = gpu.garray(inputs)
    theta_L1 = gpu.garray(reshape(x[0:num_weights_L1],
                                  (l1Size, inputSize + 1)))
    #theta_L1 = x[0:num_weights_L1].reshape((l1Size, inputSize + 1))
    #print numClasses, l2Size
    theta_L2 = gpu.garray(
        reshape(x[num_weights_L1:num_weights_L2 + num_weights_L1],
                (l2Size, l1Size + 1)))
    #theta_L2 = x[num_weights_L1:num_weights_L2+num_weights_L1].reshape((l2Size, l1Size + 1))
    theta_L3 = gpu.garray(
        reshape(
            x[num_weights_L2 + num_weights_L1:num_weights_L2 + num_weights_L1 +
              num_weights_L3], (l3Size, l2Size + 1)))
    theta_softmax = gpu.garray(
        reshape(
            x[num_weights_L2 + num_weights_L1 + num_weights_L3:shape(x)[0]],
            (numClasses, l3Size)))
    #theta_softmax = x[num_weights_L2+num_weights_L1:shape(x)[0]].reshape((numClasses, l2Size))
    theta_L1_grad = gpu.zeros(shape(theta_L1))
    theta_L2_grad = gpu.zeros(shape(theta_L2))
    theta_L3_grad = gpu.zeros(shape(theta_L3))
    dropout_l1 = gpu.garray(
        bernoulli.rvs(dropout_probability, size=(l1Size + 1, numCases)))
    dropout_l2 = gpu.garray(
        bernoulli.rvs(dropout_probability, size=(l2Size + 1, numCases)))
    dropout_l3 = gpu.garray(
        bernoulli.rvs(dropout_probability, size=(l3Size, numCases)))
    inputs = gpu.concatenate((gpu.ones((1, numCases)), inputs), axis=0)
    hidden_sum_L1 = gpu.dot(theta_L1, inputs)
    #hidden_activation_L1 = gpu.log(1+hidden_sum_L1.exp())
    relu_mask_hidden1 = gpu.ones(shape(hidden_sum_L1)) * (hidden_sum_L1 > 0)
    hidden_activation_L1 = hidden_sum_L1 * relu_mask_hidden1
    hidden_derivative_L1 = relu_mask_hidden1
    #hidden_activation_L1 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L1), axis=0)
    hidden_derivative_L1 = gpu.concatenate((gpu.ones(
        (1, numCases)), hidden_derivative_L1),
                                           axis=0)
    hidden_activation_L1 = gpu.concatenate(
        (gpu.ones((1, numCases)), hidden_activation_L1), axis=0) * dropout_l1
    hidden_sum_L2 = gpu.dot(theta_L2, hidden_activation_L1)
    #hidden_activation_L2 = gpu.log(1+hidden_sum_L2.exp())
    relu_mask_hidden2 = gpu.ones(shape(hidden_sum_L2)) * (hidden_sum_L2 > 0)
    hidden_activation_L2 = hidden_sum_L2 * relu_mask_hidden2
    hidden_derivative_L2 = relu_mask_hidden2
    #hidden_activation_L2 = gpu.concatenate((gpu.ones((1,numCases)), hidden_activation_L2), axis=0)
    hidden_derivative_L2 = gpu.concatenate((gpu.ones(
        (1, numCases)), hidden_derivative_L2),
                                           axis=0)
    hidden_activation_L2 = gpu.concatenate(
        (gpu.ones((1, numCases)), hidden_activation_L2), axis=0) * dropout_l2
    hidden_sum_L3 = gpu.dot(theta_L3, hidden_activation_L2)
    #hidden_activation_L3 = gpu.log(1+hidden_sum_L3.exp())
    relu_mask_hidden3 = gpu.ones(shape(hidden_sum_L3)) * (hidden_sum_L3 > 0)
    #hidden_activation_L3 = hidden_sum_L3*relu_mask_hidden3
    hidden_derivative_L3 = relu_mask_hidden3
    hidden_activation_L3 = hidden_sum_L3 * relu_mask_hidden3 * dropout_l3
    #hidden_activation_L3 = hidden_sum_L3.logistic() * dropout_l3
    hidden_sum_softmax = gpu.dot(theta_softmax, hidden_activation_L3)
    hidden_sum_softmax = hidden_sum_softmax - hidden_sum_softmax.max(axis=0)
    predictions = hidden_sum_softmax.exp()
    predictions = predictions / gpu.sum(predictions, axis=0)
    pred = predictions.argmax(axis=0) + 1
    accuracy = mean(pred == labels) * 100
    temp = groundTruth * gpu.log(predictions)
    temp = temp.as_numpy_array()
    temp[temp == -inf] = -200.0
    temp = nan_to_num(temp)
    regularized_penalty_L1 = theta_L1[:, 1:shape(theta_L1)[1]]
    regularized_penalty_L2 = theta_L2[:, 1:shape(theta_L2)[1]]
    regularized_penalty_L3 = theta_L3[:, 1:shape(theta_L3)[1]]
    regularized_penalty_L1 = regularized_penalty_L1 * regularized_penalty_L1
    regularized_penalty_L2 = regularized_penalty_L2 * regularized_penalty_L2
    regularized_penalty_L3 = regularized_penalty_L3 * regularized_penalty_L3
    pred_cost = -1 * sum(temp) / numCases
    l2norm_cost = 0.5 * lambda_hidden * (
        gpu.sum(regularized_penalty_L3) + gpu.sum(regularized_penalty_L2) +
        gpu.sum(regularized_penalty_L1)) + 0.5 * lambda_softmax * gpu.sum(
            theta_softmax * theta_softmax)
    #l2norm_cost = 0
    cost = pred_cost + l2norm_cost
    print 'Prediction Accuracy:                       ', accuracy, '%'
    print 'Multilayer Softmax Prediction Cost:        ', pred_cost
    print 'Multilayer Softmax L2 Normalisation Cost:  ', l2norm_cost
    print 'Multilayer Softmax Cost:                   ', cost
    print '--------------------------------------------------------------------'
    softmax_imd = groundTruth - predictions
    #theta_softmax_grad = -1*gpu.dot(softmax_imd, gpu.garray(transpose(hidden_activation_L3.as_numpy_array())))/numCases
    theta_softmax_grad = -1 * gpu.dot(
        softmax_imd,
        gpu.garray(transpose(hidden_activation_L3.as_numpy_array()))
    ) / numCases + lambda_softmax * theta_softmax
    deltaOut = -softmax_imd
    delta_L3_imd = gpu.dot(
        gpu.garray(transpose(theta_softmax.as_numpy_array())), deltaOut)
    delta_L3_imd2 = delta_L3_imd * hidden_derivative_L3
    #delta_L3_imd2 = (delta_L3_imd * hidden_activation_L3) * (1-hidden_activation_L3)
    delta_L3 = gpu.dot(
        delta_L3_imd2,
        gpu.garray(transpose(hidden_activation_L2.as_numpy_array())))
    theta_L3_grad += delta_L3
    delta_L2_imd = gpu.dot(gpu.garray(transpose(theta_L3.as_numpy_array())),
                           delta_L3_imd2)
    delta_L2_imd2 = delta_L2_imd * hidden_derivative_L2
    delta_L2_imd2 = delta_L2_imd2[1:shape(delta_L2_imd2)[0] + 1, :]
    delta_L2 = gpu.dot(
        delta_L2_imd2,
        gpu.garray(transpose(hidden_activation_L1.as_numpy_array())))
    theta_L2_grad += delta_L2
    delta_L1_imd = gpu.dot(gpu.garray(transpose(theta_L2.as_numpy_array())),
                           delta_L2_imd2)
    delta_L1_imd2 = delta_L1_imd * hidden_derivative_L1
    delta_L1_imd2 = delta_L1_imd2[1:shape(delta_L1_imd2)[0] + 1, :]
    delta_L1 = gpu.dot(delta_L1_imd2,
                       gpu.garray(transpose(inputs.as_numpy_array())))
    theta_L1_grad += delta_L1
    theta_L1_grad = theta_L1_grad / numCases
    theta_L2_grad = theta_L2_grad / numCases
    theta_L3_grad = theta_L3_grad / numCases
    theta_L1_grad[:, 1:shape(theta_L1_grad)[1]] = theta_L1_grad[:, 1:shape(
        theta_L1_grad)[1]] + theta_L1[:, 1:shape(theta_L1)[1]] * lambda_hidden
    theta_L2_grad[:, 1:shape(theta_L2_grad)[1]] = theta_L2_grad[:, 1:shape(
        theta_L2_grad)[1]] + theta_L2[:, 1:shape(theta_L2)[1]] * lambda_hidden
    theta_L3_grad[:, 1:shape(theta_L3_grad)[1]] = theta_L3_grad[:, 1:shape(
        theta_L3_grad)[1]] + theta_L3[:, 1:shape(theta_L3)[1]] * lambda_hidden
    theta_L1_grad = reshape(theta_L1_grad.as_numpy_array(), num_weights_L1)
    theta_L2_grad = reshape(theta_L2_grad.as_numpy_array(), num_weights_L2)
    theta_L3_grad = reshape(theta_L3_grad.as_numpy_array(), num_weights_L3)
    theta_softmax_grad = reshape(theta_softmax_grad.as_numpy_array(),
                                 num_weights_softmax)
    del inputs
    del theta_L1
    del theta_L2
    del theta_L3
    del theta_softmax
    del hidden_sum_L1
    del hidden_activation_L1
    del hidden_sum_L2
    del hidden_activation_L2
    del hidden_activation_L3
    del hidden_sum_L3
    del hidden_sum_softmax
    del predictions
    del temp
    del softmax_imd
    del deltaOut
    del delta_L3_imd
    del delta_L3_imd2
    del delta_L3
    del delta_L2_imd
    del delta_L2_imd2
    del delta_L2
    del delta_L1_imd
    del delta_L1_imd2
    del delta_L1
    #del regularized_penalty_L1
    #del regularized_penalty_L2
    gpu.free_reuse_cache()
    return cost, hstack(
        (theta_L1_grad, theta_L2_grad, theta_L3_grad, theta_softmax_grad))
def log_exp_sum(x, axis=1):
    x_max = x.max(axis=axis)
    if isinstance(x, gnp.garray):
        return (x_max + gnp.log(gnp.exp(x - x_max[:,gnp.newaxis]).sum(axis=axis))).asarray()
    else:
        return x_max + np.log(np.exp(x - x_max[:,np.newaxis]).sum(axis=axis))
Example #42
0
def unigram_partition(data_path, num_ensembles, model_name, method = 'none', train = True, random_training_order = False, reverse_order = False):
		
		algo_name = 'unigram' + '_' + 'partition'

		raw_data = reader.ptb_raw_data(data_path)
		train_data, valid_data, test_data, _, word_to_id = raw_data

		if reverse_order:
				train_data.reverse()
				valid_data.reverse()
				test_data.reverse()
				algo_name = 'reverse ' + algo_name

		eos_id = word_to_id['<eos>']
		case_weight_length = len(train_data)-1
		train_case_weights = np.repeat(1.0/case_weight_length, case_weight_length).tolist()

		train_sentence_list = reader.get_sentence_list(train_data, eos_id, reverse_order)
		if random_training_order:
				#train_sentence_list = reader.get_sentence_list(train_data, eos_id)
				perm = range(len(train_sentence_list))
				np.random.shuffle(perm)
				train_data = []
				for idx in perm:
						train_data += train_sentence_list[idx]
				train_sentence_list = reader.get_sentence_list(train_data, eos_id, reverse_order)
		num_sent = len(train_sentence_list)
		train_sentence_weights = np.repeat(1.0/num_sent, num_sent).tolist()

		new_train_data = train_data
	
		FLAGS.model = model_name

		config = get_config()
		eval_config = get_config()
		eval_config.batch_size = 1
		eval_config.num_steps = 1

		alpha_t_list = []
		full_test_set_logits = []
		for i in range(len(test_data)-1):
				full_test_set_logits.append(np.zeros((1,eval_config.vocab_size)))
		
		sentence_starters = []
		id_to_sentence_num_dict = {}
		for i in range(num_sent):
				if reverse_order:
						desired_id = train_sentence_list[i][-1]
				else:
						desired_id = train_sentence_list[i][0]
				if desired_id in id_to_sentence_num_dict:
						id_to_sentence_num_dict[desired_id].append(i)
				else:
						id_to_sentence_num_dict[desired_id] = [i]
						sentence_starters.append(desired_id)


		id_to_model = {}
		for idx in sentence_starters:
				id_to_model[idx] = [1]

		id_to_weight = {}


		gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = 0.5)

		for iii in range(num_ensembles):
				with tf.Graph().as_default(), tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) as session:
						initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)

						with tf.variable_scope("model", reuse=None, initializer=initializer):
								sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options))
								m = PTBModel(is_training=True, config=config)
								sess.close()


						with tf.variable_scope("model", reuse=True, initializer=initializer):
								mvalid = PTBModel(is_training=False, config=config)
								mtest = PTBModel(is_training=False, config=eval_config)
								
								m2 = PTBModel(is_training=False, config=eval_config)

						tf.initialize_all_variables().run()
						saver = tf.train.Saver()
						if iii > 0:
								np.savetxt(checkpoint_dir + 'test_set_probs_no_alpha.out', np.squeeze(test_set_probs_no_alpha), delimiter = ',')

						if random_training_order:
								new_folder = 'random training order ' + algo_name + '_' + model_name + '/' + 'ensemble' + str(iii + 1)
						else:
								new_folder = algo_name + '_' + model_name + '/' + 'ensemble' + str(iii + 1)
						
						checkpoint_dir = 'simple-examples/ckpt/' + new_folder + '/'
						if not os.path.exists(checkpoint_dir):
								os.makedirs(checkpoint_dir)

						if iii > 0:
								for k,v in id_to_sentence_num_dict.items():
										total_wt = 0
										num_sentences = len(v)
										for sent_idx in v:
												total_wt += train_sentence_weights[sent_idx]
										id_to_weight[k] = total_wt/num_sentences

								sorted_id_to_weight = sorted(id_to_weight.items(), key = operator.itemgetter(1), reverse = True)
								np.savetxt(checkpoint_dir + 'sorted_id_to_weight', sorted_id_to_weight, delimiter = ',')
								new_train_data = []
								i = 0
								sent_included = 0                      
								while sent_included < np.floor(num_sent/2):
										start_key = sorted_id_to_weight[i][0]
										sentence_additions = id_to_sentence_num_dict[start_key]
										for idx in sentence_additions:
												new_train_data += train_sentence_list[idx]
												sent_included += 1
										id_to_model[start_key].append(iii + 1)
										i += 1

						

						if train:

								np.savetxt(checkpoint_dir + 'train_case_weights.out', train_case_weights, delimiter = ',')
								np.savetxt(checkpoint_dir + 'train_sentence_weights.out', train_sentence_weights, delimiter = ',')

								for i in range(config.max_max_epoch):
										lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
										m.assign_lr(session, config.learning_rate * lr_decay)

										print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
										train_perplexity = run_epoch(session, m, new_train_data, m.train_op, verbose=False)
										print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
										valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op())
										print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
										
										if (i+1) % 5 == 0 or (i+1) == config.max_max_epoch:
												saver.save(session, checkpoint_dir + 'model.ckpt', global_step = i+1)
						else:
								ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
								if ckpt and ckpt.model_checkpoint_path:
										saver.restore(session, ckpt.model_checkpoint_path)

						if train:

								case_scores = output_training_set_error_for_boosting(session, m2, train_data, tf.no_op())
						

								score = sum(case_scores)
								norm = len(case_scores) * -1 * gpu.log(float(1.0) / eval_config.vocab_size)
								epsilon_t =  (1 - (norm - score) / norm) / 2.0
								alpha_t = 0.5 * gpu.log((1 - epsilon_t)/ epsilon_t)
								alpha_t_list.append(alpha_t)

								if iii == 0:
										shutil.rmtree('simple-examples/ckpt/' + 'random training order ' + algo_name + '_' + model_name + '/' + 'alpha_t.out', ignore_errors = True)

								with open('simple-examples/ckpt/' + 'random training order ' + algo_name + '_' + model_name + '/' + 'alpha_t.out', 'ab') as f:
										f.write(str(alpha_t))
										f.write(',')
								
								train_case_weights = gpu.sqrt((1 - epsilon_t)/ epsilon_t) * np.multiply(train_case_weights, np.asarray(case_scores))
								train_case_weights = np.ravel(normalize(np.asarray(train_case_weights).reshape(1,-1), norm = 'l1'))
								
								if method == 'stddev':
										new_train_case_weights = reject_outliers(train_case_weights)
								elif method == 'sqrt':
										new_train_case_weights = sqrt_norm(train_case_weights)
								else:
										new_train_case_weights = train_case_weights
					
								start_idx = 0

								for i in range(len(train_sentence_list)):
										
										this_sentence_length = len(train_sentence_list[i])
										
										sentence_tokens = [v for v in new_train_case_weights[start_idx:(this_sentence_length+start_idx)] if np.isfinite(v)]
										if len(sentence_tokens) == 0:
												this_sentence_weights[i] = 0
										else:
												train_sentence_weights[i] = np.mean([v for v in new_train_case_weights[start_idx:(this_sentence_length+start_idx)] if np.isfinite(v)])
										


										start_idx += this_sentence_length


								train_sentence_weights = np.ravel(normalize(np.asarray(train_sentence_weights).reshape(1,-1), norm = 'l1'))

								for k,v in id_to_sentence_num_dict.items():
										total_wt = 0
										num_sentences = len(v)
										for sent_idx in v:
												total_wt += train_sentence_weights[sent_idx]
										id_to_weight[k] = total_wt/num_sentences

						test_set_probs = output_test_set_probs(session, mtest, test_data, tf.no_op(), partition = True)
						test_set_probs_no_alpha = test_set_probs
						np.savetxt(checkpoint_dir + 'test_set_probs_no_alpha.out', np.squeeze(test_set_probs_no_alpha), delimiter = ',')

						train_set_probs = output_test_set_probs(session, m2, train_data, tf.no_op(), partition = True)
						train_set_probs_no_alpha = train_set_probs
						
						np.savetxt(checkpoint_dir + 'train_set_probs_no_alpha.out', np.squeeze(train_set_probs_no_alpha), delimiter = ',')            
					
						test_perplexity = run_epoch(session, mtest, test_data, tf.no_op())
						print("Test Perplexity: %.3f" % test_perplexity)
		
		if random_training_order:
				new_folder = 'random training order ' + algo_name + '_' + model_name
		else:
				new_folder = algo_name + '_' + model_name
						
		checkpoint_dir = 'simple-examples/ckpt/' + new_folder + '/'
		with open(checkpoint_dir + 'id_to_model.out', 'w') as f:
				for k,v in id_to_model.items():
						f.write(str(k) + ',' + ','.join(str(id_to_model[k])) + '\n')

		with open(checkpoint_dir + 'id_to_sent_num.out', 'w') as ff:
				for k,v in id_to_sentence_num_dict.items():
						ff.write(str(k) + ',' + ','.join(str(id_to_sentence_num_dict[k])) + '\n')

		print('Test PPL: ' + str(evaluate_unigram_partition(data = test_data, batch_size = 1, num_steps = 1, num_ensembles = num_ensembles, eos_id = eos_id, fp = 'simple-examples/ckpt/random training order unigram_partition_small/', probs_fn = 'test_set_probs_no_alpha.out')))
		print('Train PPL: ' + str(evaluate_unigram_partition(data = train_data, batch_size = 1, num_steps = 1, num_ensembles = num_ensembles, eos_id = eos_id, fp = 'simple-examples/ckpt/random training order unigram_partition_small/', probs_fn = 'train_set_probs_no_alpha.out')))
Example #43
0
 def invert_output(self, z):
     return 0.5 * gnp.log((1 + z) / (1 - z))
Example #44
0
def safe_log(x):
    return gnp.log(x + _SMALL_CONSTANT)
Example #45
0
def ABISS(data_path, num_ensembles, model_name, method = 'stddev', train = True, random_training_order = False):
		
		algo_name = method + ' ' + 'ABISS'

		raw_data = reader.ptb_raw_data(data_path)
		train_data, valid_data, test_data, _, word_to_id = raw_data

		eos_id = word_to_id['<eos>']
		case_weight_length = len(train_data)-1
		train_case_weights = np.repeat(1.0/case_weight_length, case_weight_length).tolist()

		train_sentence_list = reader.get_sentence_list(train_data, eos_id)
		if random_training_order:
				perm = range(len(train_sentence_list))
				np.random.shuffle(perm)
				train_data = []
				for idx in perm:
						train_data += train_sentence_list[idx]
				train_sentence_list = reader.get_sentence_list(train_data, eos_id)
		num_sent = len(train_sentence_list)
		train_sentence_weights = np.repeat(1.0/num_sent, num_sent).tolist()

		new_train_data = reader.weighted_sentence_selection(train_sentence_list, train_sentence_weights, random_training_order)
	
		FLAGS.model = model_name

		config = get_config()
		eval_config = get_config()
		eval_config.batch_size = 1
		eval_config.num_steps = 1

		alpha_t_list = []
		full_test_set_logits = []
		for i in range(len(test_data)-1):
				full_test_set_logits.append(np.zeros((1,eval_config.vocab_size)))
		
		gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = 0.33)

		for iii in range(num_ensembles):
				with tf.Graph().as_default(), tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) as session:
						initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale)

						with tf.variable_scope("model", reuse=None, initializer=initializer):
								sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options))
								m = PTBModel(is_training=True, config=config)
								sess.close()


						with tf.variable_scope("model", reuse=True, initializer=initializer):
								mvalid = PTBModel(is_training=False, config=config)
								mtest = PTBModel(is_training=False, config=eval_config)
								#config.batch_size = 1
								m2 = PTBModel(is_training=False, config=eval_config)

						tf.initialize_all_variables().run()
						saver = tf.train.Saver()
						
						if random_training_order:
								new_folder = 'random training order' + algo_name + '_' + model_name + '/' + 'ensemble' + str(iii + 1)
						else:
								new_folder = algo_name + '_' + model_name + '/' + 'ensemble' + str(iii + 1)
						
						checkpoint_dir = 'simple-examples/ckpt/' + new_folder + '/'
						if not os.path.exists(checkpoint_dir):
								os.makedirs(checkpoint_dir)

						if train:
								np.savetxt(checkpoint_dir + 'train_case_weights.out', train_case_weights, delimiter = ',')
								np.savetxt(checkpoint_dir + 'train_sentence_weights.out', train_sentence_weights, delimiter = ',')

								for i in range(config.max_max_epoch):
										lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0)
										m.assign_lr(session, config.learning_rate * lr_decay)

										print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr)))
										train_perplexity = run_epoch(session, m, new_train_data, m.train_op, verbose=False)
										print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity))
										valid_perplexity = run_epoch(session, mvalid, valid_data, tf.no_op())
										print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity))
										
										if (i+1) % 5 == 0 or (i+1) == config.max_max_epoch:
												saver.save(session, checkpoint_dir + 'model.ckpt', global_step = i+1)
						else:
								ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
								if ckpt and ckpt.model_checkpoint_path:
										saver.restore(session, ckpt.model_checkpoint_path)

						case_scores = output_training_set_error_for_boosting(session, m2, train_data, tf.no_op())

						score = sum(case_scores)
						norm = len(case_scores) * -1 * gpu.log(float(1.0) / eval_config.vocab_size)
						epsilon_t =  (1 - (norm - score) / norm) / 2.0
						alpha_t = 0.5 * gpu.log((1 - epsilon_t)/ epsilon_t)
						alpha_t_list.append(alpha_t)

						if iii == 0:
								shutil.rmtree('simple-examples/ckpt/' + algo_name + '_' + model_name + '/' + 'alpha_t.out', ignore_errors = True)

						with open('simple-examples/ckpt/' + algo_name + '_' + model_name + '/' + 'alpha_t.out', 'ab') as f:
								f.write(str(alpha_t))
								f.write(',')
						
						train_case_weights = gpu.sqrt((1 - epsilon_t)/ epsilon_t) * np.multiply(train_case_weights, np.asarray(case_scores))
						train_case_weights = np.ravel(normalize(np.asarray(train_case_weights).reshape(1,-1), norm = 'l1'))

						if method == 'stddev':
								new_train_case_weights = reject_outliers(train_case_weights)
						elif method == 'sqrt':
								new_train_case_weights = sqrt_norm(train_case_weights)
			
						
						start_idx = 0

						for i in range(len(train_sentence_list)):
								this_sentence_length = len(train_sentence_list[i])
								sentence_tokens = [v for v in new_train_case_weights[start_idx:(this_sentence_length+start_idx)] if np.isfinite(v)]
								if len(sentence_tokens) == 0:
										this_sentence_weights[i] = 0
								else:
										train_sentence_weights[i] = np.mean([v for v in new_train_case_weights[start_idx:(this_sentence_length+start_idx)] if np.isfinite(v)])


								start_idx += this_sentence_length

						train_sentence_weights = np.ravel(normalize(np.asarray(train_sentence_weights).reshape(1,-1), norm = 'l1'))

						new_train_data = reader.weighted_sentence_selection(train_sentence_list, train_sentence_weights, random_training_order)

						test_set_probs = output_test_set_probs(session, mtest, test_data, tf.no_op())

						for i in range(len(test_set_probs)):
								test_set_probs[i] = test_set_probs[i] * alpha_t
								full_test_set_logits[i] += test_set_probs[i]
						
						test_perplexity = run_epoch(session, mtest, test_data, tf.no_op())
						print("Test Perplexity: %.3f" % test_perplexity)
		
		alpha_t_sum = np.sum(alpha_t_list)
		for i in range(len(full_test_set_logits)):
				full_test_set_logits[i] = full_test_set_logits[i] / alpha_t_sum

		ensemble_perplexity = classify_ensemble(test_data, full_test_set_logits, 1, 1)
		print(ensemble_perplexity)
Example #46
0
def evaluate_unigram_partition(data, batch_size, num_steps, num_ensembles, eos_id, fp = 'simple-examples/ckpt/random training order unigram_partition_small/', probs_fn = 'test_set_probs_no_alpha.out'):
		epoch_size = ((len(data) // batch_size) - 1) // num_steps
		start_time = time.time()
		costs = 0.0
		iters = 0
		
		full_probs = []
		for i in range(num_ensembles):
				print(i)
				#full_probs[i] = np.loadtxt(fp + 'ensemble' + str(i+1) + '/test_set_probs_no_alpha.out', delimiter = ',')
				full_probs.append(np.asarray(pd.read_csv(fp + 'ensemble' + str(i+1) + '/' + probs_fn, delimiter = ',', header = None)))
				print(np.shape(full_probs[i]))
				# for ii in range(len(full_probs[i])):
				#     full_probs[i][ii] = full_probs[i][ii] / np.sum(full_probs[i][ii])

		print('reading in probs done')
		id_to_model = {}
		with open(fp + 'id_to_model.out', 'rb') as f:
				csv_reader = csv.reader(f, delimiter = ',', quotechar = '|')
				for row in csv_reader:
						row_list = [x for x in row if (x != '[' and x != ']' and x != '' and x != ' ')]
						#print(row_list)
						#row_list = row.split(',')
						row_list = [int(i) for i in row_list]
						id_to_model[row_list[0]] = row_list[1:len(row_list)]

		print('reading in id_to_model done')
		#print(id_to_model[1344])
		#probs = tf.nn.softmax(probs)
		# print(np.sum(probs[0]))
		# print(np.sum(probs[50]))
		#print(len(probs[0]))
		next_is_start_of_sentence = True
		flaggg = True
		#sent_list = reader.get_sentence_list(data = data, eos_id = eos_id)
		for step, (x, y) in enumerate(reader.ptb_iterator(data, batch_size,
																										num_steps)):
				if next_is_start_of_sentence:
						x = x[0,0]
						if x in id_to_model:
								models_included = id_to_model[x]
								coef = 1
						else:
								models_included = [1,2,3,4,5,6,7,8,9]
								coef = 1
				if x == eos_id:
						#cost = -1 * gpu.log(full_probs[0][step])
						models_included = [1]
						coef = 1
						next_is_start_of_sentence = True 
				else:
						next_is_start_of_sentence = False
						#coef = 0.5
						#models_included = id_to_model[x]
				probs = 0
				denom = 0
				for m in models_included:
						if m == 1:
								probs += full_probs[m-1][step]
								denom += 1
						else:
								#coef = 0.5
								probs += coef*full_probs[m-1][step]
								denom += coef
				probs = probs / float(denom)
				cost = -1 * gpu.log(probs)

				# print(step)
				# print(x)
				# print(y)
				# print(probs)
				#print(probs[0])
				#cost = -1 * gpu.log(probs[step][0,y[0,0]])
				#print(cost)
				'''
				loss = tf.nn.seq2seq.sequence_loss_by_example(
						[logits],
						[tf.reshape(y, [-1])],
						[tf.ones([batch_size * num_steps], dtype=tf.float64)])

				print(loss)
				cost = tf.reduce_sum(loss) / batch_size
				print(cost)
				'''
				costs += cost
				iters += num_steps
		
				if step % (epoch_size // 10) == 10:
						print("%.3f perplexity: %.3f speed: %.0f wps" %
								(step * 1.0 / epoch_size, gpu.exp(costs / iters),
								iters * batch_size / (time.time() - start_time)))

		return gpu.exp(costs / iters) 
Example #47
0
 def loss(self, Y, Z, A=None):
     # cross entropy loss
     return -(Z * gnp.log(Y + const.epsilon)).sum()
Example #48
0
File: loss.py Project: yujiali/pynn
def safe_log(x):
    return gnp.log(x + _SMALL_CONSTANT)
Example #49
0
 def loss(self, Y, Z, A=None):
     # cross entropy loss
     return -(Z * gnp.log(Y + const.epsilon)).sum()
Example #50
0
 def H(X):
     from gnumpy import log
     return -(X*log(X+1e-10) + (1-X)*log(1-X+1e-10))
Example #51
0
 def softmax_old(x):
     y = gp.max(x, axis=1)[:, gp.newaxis]
     logsumexp = y + gp.log(gp.sum((gp.exp(x - y)), axis=1))[:, gp.newaxis]
     return gp.exp(x - logsumexp)
Example #52
0
 def invert_output(self, z):
     return gnp.log(z / (1 - z))
Example #53
0
    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        y = gnp.exp(pred - pred.max(axis=1)[:,gnp.newaxis])
        y = y / y.sum(axis=1)[:,gnp.newaxis]

        return -(self.target * gnp.log(y + _SMALL_CONSTANT)).sum(), y - self.target
Example #54
0
def score_softmax(y_target, y_predicted):
    assert (type(y_target) == type(y_predicted))
    if type(y_target) is g.garray:
        return g.sum(y_target * g.log(y_predicted + 1e-30))
    else:
        return np.sum(y_target * np.log(y_predicted + 1e-300))