Exemple #1
0
 def _softmax_loss(self, X, y, *args):
     N = X.shape[0]
     scores = self._forward(X, *args)
     scores = np.exp(scores - np.max(scores, axis=1, keepdims=True))
     prob = scores / np.sum(scores, axis=1, keepdims=True)
     loss = np.sum(-np.log(prob[np.arange(N), y])) / float(N)
     return loss
Exemple #2
0
def softmax_loss(x, y):
    """
    Computes the loss and gradient for softmax classification.

    Inputs:
    - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
      for the ith input.
    - y: Either of the followings:
      - One hot encoding of labels, of shape (N, C)
      - Label index of shape (N, ), each y[i] is the label of i^th example
        (0 <= y[i] < C)

    Returns a tuple of:
    - loss: Scalar giving the loss
    """
    N = x.shape[0]
    C = x.shape[1]
    if len(y.shape) == 1:
        #convert it to one hot encoding
        onehot_y = np.zeros([N, C])
        np.onehot_encode(y, onehot_y)
    else:
        onehot_y = y
    probs = x - np.max(x, axis=1, keepdims=True)
    loss = -np.sum(probs * onehot_y) / N
    loss += np.sum(np.log(np.sum(np.exp(probs), axis=1, keepdims=True))) / N
    return loss
def softmax_loss(x, y):
  """
  Computes the loss and gradient for softmax classification.

  Inputs:
  - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
    for the ith input.
  - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and
    0 <= y[i] < C

  Returns a tuple of:
  - loss: Scalar giving the loss
  - dx: Gradient of the loss with respect to x
  """
  #np.expand_dims(correct_class_scores, axis = 1)
  #probs = np.exp(x - np.max(x, axis=1, keepdims=True))
  #print "x.shape", x.shape

  #Somehow Buggy. Max doesn't work.
  probs = np.exp(x - np.max(x, axis=1))
  #probs /= np.expand_dims(np.sum(probs, axis=1), axis = 1)
  probs /= np.expand_dims(np.sum(probs, axis=1), axis = 1)
  N = x.shape[0]
  loss = -np.sum(np.log(probs[np.arange(N), y])) / N

  dx = probs.copy()
  dx[np.arange(N), y] -= 1
  dx /= N

  return loss, dx
Exemple #4
0
def softmax_loss(x, y):
    """
    Computes the loss and gradient for softmax classification.

    Inputs:
    - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
      for the ith input.
    - y: Either of the followings:
      - One hot encoding of labels, of shape (N, C)
      - Label index of shape (N, ), each y[i] is the label of i^th example
        (0 <= y[i] < C)

    Returns a tuple of:
    - loss: Scalar giving the loss
    """
    N = x.shape[0]
    C = x.shape[1]
    if len(y.shape) == 1:
        #convert it to one hot encoding
        onehot_y = np.zeros([N, C])
        np.onehot_encode(y, onehot_y)
    else:
        onehot_y = y
    probs = x - np.max(x, axis=1, keepdims=True)
    loss = -np.sum(probs * onehot_y) / N
    loss += np.sum(np.log(np.sum(np.exp(probs), axis=1, keepdims=True))) / N
    return loss
Exemple #5
0
def softmax_loss(x, y):
    """
  Computes the loss and gradient for softmax classification.

  Inputs:
  - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
    for the ith input.
  - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and
    0 <= y[i] < C

  Returns a tuple of:
  - loss: Scalar giving the loss
  - dx: Gradient of the loss with respect to x
  """
    #np.expand_dims(correct_class_scores, axis = 1)
    #probs = np.exp(x - np.max(x, axis=1, keepdims=True))
    #print "x.shape", x.shape

    #Somehow Buggy. Max doesn't work.
    probs = np.exp(x - np.max(x, axis=1))
    #probs /= np.expand_dims(np.sum(probs, axis=1), axis = 1)
    probs /= np.expand_dims(np.sum(probs, axis=1), axis=1)
    N = x.shape[0]
    loss = -np.sum(np.log(probs[np.arange(N), y])) / N

    dx = probs.copy()
    dx[np.arange(N), y] -= 1
    dx /= N

    return loss, dx
def rel_error(x, y):
    """Returns relative error"""
    if isinstance(x, (int, float, Number)):
        x = float(x)
        y = float(y)
        return abs(x - y) / max(1e-8, abs(x) + abs(y))
    else:
        return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))
Exemple #7
0
 def forward(self, X):
     a = np.dot(self.params['fc1'], X.T)
     h = np.maximum(0, a)
     logits = np.dot(h.T, self.params['policy_fc_last'].T)
     ps = np.exp(logits - np.max(logits, axis=1, keepdims=True))
     ps /= np.sum(ps, axis=1, keepdims=True)
     vs = np.dot(h.T, self.params['vf_fc_last'].T) + self.params['vf_fc_last_bias']
     return ps, vs
def softmax(x, y):
    import numpy as np
    y = y.astype(int)
    probs = np.exp(x - np.max(x, axis=1, keepdims=True))
    probs /= np.sum(probs, axis=1, keepdims=True)
    N = x.shape[0]
    loss = -np.sum(np.log(probs[np.arange(N), y])) / N
    return loss
Exemple #9
0
def softmax_probability(p, channel):
    N, C = p.shape
    p -= np.max(p, axis=1).reshape((N, 1))
    code = np.zeros((N, C))
    np.onehot_encode(channel, code)
    p = np.exp(p)
    selected_p = p * code
    total_p = np.sum(p, axis=1).reshape((N, 1))
    return np.sum(selected_p / total_p, axis=1)
Exemple #10
0
def softmax_crossentropy(x, y):
    # x should be (batch, prob)
    # y should be (batch, )

    x_dev = x - np.max(x, axis=1, keepdims=True) # minpy doesn't support x.max()
    sm = x_dev - np.log(np.sum(np.exp(x_dev), axis=1, keepdims=True))
    ids = np.arange(0, y.shape[0])*x.shape[1] + y
    ce = -np.sum(sm.reshape((sm.shape[0]*sm.shape[1],))[ids])/(1.0*y.shape[0])  # minpy doesn't support -1 in shape inference
    return ce
Exemple #11
0
 def _loss_function(*args):
     normal_loss = model.loss(model.forward(X, 'train'), Y)
     noisy_output = model.forward(noisy_X, 'train')
     noisy_output -= np.max(noisy_output, axis=1).reshape((K, 1))
     noisy_output = np.exp(noisy_output)
     model_p_noisy_X = noisy_output / np.sum(noisy_output, axis=1).reshape(
         (K, 1))
     kl = KL(1.0 / N_CLASSES, model_p_noisy_X)
     noisy_loss = gamma * np.sum(kl) / float(K)
     return gamma * normal_loss + (1 - gamma) * noisy_loss
 def grad(g):
     import numpy as np
     y = label.astype(int)
     probs = np.exp(x - np.max(x, axis=1, keepdims=True))
     probs /= np.sum(probs, axis=1, keepdims=True)
     N = x.shape[0]
     dx = probs.copy()
     dx[np.arange(N), y] -= 1
     dx /= N
     return dx
Exemple #13
0
def softmax_crossentropy(x, y):
    EPSI = 1e-6
    batch_size, seq_len, prob_dim = x.shape
    x = x.reshape((x.shape[0] * x.shape[1], x.shape[2]))
    y = y.reshape((y.shape[0] * y.shape[1], ))

    #print x.shape, y.shape
    # x should be (batch, prob)
    # y should be (batch, )

    x_dev = x - np.max(x, axis=1,
                       keepdims=True)  # minpy doesn't support x.max()
    sm = x_dev - np.log(EPSI + np.sum(np.exp(x_dev), axis=1, keepdims=True))
    ids = np.arange(0, y.shape[0]) * seq_len + y
    ce = -np.sum(sm.reshape((sm.shape[0] * sm.shape[1], ))[ids]) / (
        1.0 * y.shape[0])  # minpy doesn't support -1 in shape inference
    return ce
def softmax_loss(x, y):
  """
  Computes the loss and gradient for softmax classification.

  Inputs:
  - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
    for the ith input.
  - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and
    0 <= y[i] < C

  Returns a tuple of:
  - loss: Scalar giving the loss
  """
  #TODO: Missing Max Operator 
  probs = np.exp(x - np.expand_dims(np.max(x, axis=1), axis = 1))
  probs = probs / np.expand_dims(np.sum(probs, axis=1), axis = 1)
  N = x.shape[0]
  loss = -np.sum(np.log(probs[np.arange(N), y])) / N

  return loss
Exemple #15
0
def softmax_loss(x, y):
    """
  Computes the loss and gradient for softmax classification.

  Inputs:
  - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
    for the ith input.
  - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and
    0 <= y[i] < C

  Returns a tuple of:
  - loss: Scalar giving the loss
  """
    #TODO: Missing Max Operator
    probs = np.exp(x - np.expand_dims(np.max(x, axis=1), axis=1))
    probs = probs / np.expand_dims(np.sum(probs, axis=1), axis=1)
    N = x.shape[0]
    loss = -np.sum(np.log(probs[np.arange(N), y])) / N

    return loss
Exemple #16
0
def temporal_softmax_loss(x, y, mask, verbose=False):
    """
    A temporal version of softmax loss for use in RNNs. We assume that we are
    making predictions over a vocabulary of size V for each timestep of a
    timeseries of length T, over a minibatch of size N. The input x gives scores
    for all vocabulary elements at all timesteps, and y gives the indices of the
    ground-truth element at each timestep. We use a cross-entropy loss at each
    timestep, summing the loss over all timesteps and averaging across the
    minibatch.

    As an additional complication, we may want to ignore the model output at some
    timesteps, since sequences of different length may have been combined into a
    minibatch and padded with NULL tokens. The optional mask argument tells us
    which elements should contribute to the loss.

    Inputs:
    - x: Input scores, of shape (N, T, V)
    - y: Ground-truth indices, of shape (N, T) where each element is in the range
       0 <= y[i, t] < V
    - mask: Boolean array of shape (N, T) where mask[i, t] tells whether or not
    the scores at x[i, t] should contribute to the loss.

    Returns a tuple of:
    - loss: Scalar giving loss
    - dx: Gradient of loss with respect to scores x.
    """
    N, T, V = x.shape

    x_flat = x.reshape(N * T, V)
    y_flat = y.reshape(N * T)
    mask_flat = mask.reshape(N * T)

    probs = np.exp(x_flat - np.max(x_flat, axis=1, keepdims=True))
    probs = probs / np.sum(probs, axis=1, keepdims=True)
    loss = -np.sum(mask_flat * np.log(probs[np.arange(N * T), y_flat])) / N

    return loss
Exemple #17
0
def temporal_softmax_loss(x, y, mask, verbose=False):
    """
    A temporal version of softmax loss for use in RNNs. We assume that we are
    making predictions over a vocabulary of size V for each timestep of a
    timeseries of length T, over a minibatch of size N. The input x gives scores
    for all vocabulary elements at all timesteps, and y gives the indices of the
    ground-truth element at each timestep. We use a cross-entropy loss at each
    timestep, summing the loss over all timesteps and averaging across the
    minibatch.

    As an additional complication, we may want to ignore the model output at some
    timesteps, since sequences of different length may have been combined into a
    minibatch and padded with NULL tokens. The optional mask argument tells us
    which elements should contribute to the loss.

    Inputs:
    - x: Input scores, of shape (N, T, V)
    - y: Ground-truth indices, of shape (N, T) where each element is in the range
       0 <= y[i, t] < V
    - mask: Boolean array of shape (N, T) where mask[i, t] tells whether or not
    the scores at x[i, t] should contribute to the loss.

    Returns a tuple of:
    - loss: Scalar giving loss
    - dx: Gradient of loss with respect to scores x.
    """
    N, T, V = x.shape

    x_flat = x.reshape(N * T, V)
    y_flat = y.reshape(N * T)
    mask_flat = mask.reshape(N * T)

    probs = np.exp(x_flat - np.max(x_flat, axis=1, keepdims=True))
    probs = probs / np.sum(probs, axis=1, keepdims=True)
    loss = -np.sum(mask_flat * np.log(probs[np.arange(N * T), y_flat])) / N

    return loss
Exemple #18
0
 def red3(x):
     return mp.max(x, axis=1, keepdims=True)
Exemple #19
0
def logsumexp(X, axis=1):
    max_X = np.max(X)
    return max_X + np.log(np.sum(np.exp(X - max_X), axis=axis, keepdims=True))
Exemple #20
0
def logsoftmax(x, valid_idx):
    x[np.array(valid_idx)] += 1e6
    x_max = np.max(x)
    return x - x_max - np.log(np.sum(np.exp(x - x_max)))
Exemple #21
0
minimum = {key: [] for key in model.params}
maximum = {key: [] for key in model.params}

for i in range(iterations):
    X_batch = data[0][batch_index * batch_size:(batch_index + 1) * batch_size]
    Y_batch = data[1][batch_index * batch_size:(batch_index + 1) * batch_size]
    batch_index = (batch_index + 1) % batches

    gradients, loss = gradient_loss(model, X_batch, Y_batch)
    loss = loss.asnumpy()[0]
    loss_history.append(loss)

    for key, value in zip(model.params.keys(), gradients):
        mean[key].append(np.mean(value).asnumpy())
        std[key].append(np.std(value).asnumpy())
        L_2[key].append(np.mean(value**2).asnumpy())
        minimum[key].append(np.min(value).asnumpy())
        maximum[key].append(np.max(value).asnumpy())

    updater.update(gradients)

    if (i + 1) % rescaling_interval == 0:
        rescale(mlp, data[2], model.params)  # validation data
        print 'rescaled'

    if (i + 1) % interval == 0:
        print 'iteration %d loss %f' % (i + 1, loss)

pickle.dump((loss_history, mean, std, L_2, minimum, maximum),
            open('dr-g-norm-%d' % rescaling_interval, 'wb'))
Exemple #22
0
 def red1(x):
     return mp.max(x)
Exemple #23
0
 def red1(x):
     return mp.max(x)
Exemple #24
0
def logsumexp(X, axis=1):
    max_X = np.max(X)
    return max_X + np.log(np.sum(np.exp(X - max_X), axis=axis, keepdims=True))
Exemple #25
0
 def red2(x):
     return mp.max(x, axis=1)
def logsumexp(X, axis, keepdims=False):
    max_X = np.max(X)
    return max_X + np.log(np.sum(np.exp(X - max_X), axis=axis, keepdims=keepdims))
Exemple #27
0
 def red3(x):
     return mp.max(x, axis=1, keepdims=True)
Exemple #28
0
 def red5(x):
     return mp.max(x, axis=0, keepdims=True)
Exemple #29
0
 def red4(x):
     return mp.max(x, axis=0)
Exemple #30
0
 def red5(x):
     return mp.max(x, axis=0, keepdims=True)
Exemple #31
0
 def red4(x):
     return mp.max(x, axis=0)
Exemple #32
0
 def red2(x):
     return mp.max(x, axis=1)