Exemple #1
0
  def train(self, X, Y, random_features=100, regularization=1, 
            multiclass=True, zerobased=False, subtype=None):
    """
    Train the model.
    
    
    Parameters
    ----------
    X: m x n input matrix
    
    Y: m x 1 label vector (if multi-class classification problem, 
       labels are from 0 to K-1 - trains one-vs-rest)
    
    regularization: regularization parameter

    multiclass: is it a multiclass problem or not

    zerobased: for multiclass, whether the labels start with 0 or 1

    random_features: number of random features to use
    
    subtype: subtype for random features sketching

    Returns
    --------
    Nothing. Internally sets the model parameters.
    
    """

    self._rft = self._kernel.rft(random_features, subtype)
    Z = self._rft / X
    
    I = numpy.identity(random_features)
    if multiclass:
      Y= utils.dummycoding(Y,zerobased=zerobased)
      Y = 2*Y - 1
      
    A = numpy.dot(Z.T, Z) + regularization*I
    weights = scipy.linalg.solve(A, numpy.dot(Z.T, Y), sym_pos=True)
    self.model = {"kernel": self._kernel,
                  "rft": self._rft,
                  "weights": weights, 
                  "random_features": random_features,  
                  "regularization": regularization, 
                  "multiclass":multiclass,
                  "zerobased":zerobased}
Exemple #2
0
  def train(self,X, Y, random_features=100, regularization=1, 
            probdist='uniform', multiclass=True, zerobased=False):
    """
    :param probdist: probability distribution of rows. Either 'uniform' or 'leverages'.
    :param l: number of Nystrom random samples to take
    :param k: rank-k approximation to the Gram matrix of the sampled data is used
    """
    m,n = X.shape
    nz_values = range(0, m)
    
    #uniform
    if probdist == 'uniform':
      nz_prob_dist = numpy.ones((m,1))/m
    elif probdist ==  'leverages':
      # TODO the following is probably not correct as leverages are define w.r. 
      #      to rank.
      K = self._kernel.gram(X)
      Im = numpy.identity(m)
      nz_prob_dist = numpy.diag(K*scipy.linalg.inv(K+regularization*Im))
      nz_prob_dist = nz_prob_dist/sum(nz_prob_dist)
    else:
      raise skylark.errors.InvalidParamterError("Unknown probability distribution strategy")

    SX = skylark.sketch.NonUniformSampler(m, random_features, nz_prob_dist) * X
    K_II = self._kernel.gram(SX)
    I = numpy.identity(random_features)
    eps = 1e-8
    (evals, evecs) = scipy.linalg.eigh(K_II + eps*I)
    Z = self._kernel.gram(SX, X)
    U = (evecs*numpy.diagflat(1.0/numpy.sqrt(evals)))
    Z = numpy.dot(Z, U)
    if multiclass:
      Y= utils.dummycoding(Y, zerobased=zerobased)
      Y = 2*Y - 1
      
    A = numpy.dot(Z.T, Z) + regularization*I
    weights = scipy.linalg.solve(A, numpy.dot(Z.T, Y), sym_pos=True)
    self.model = {"kernel": self._kernel, 
                  "weights": weights, 
                  "random_features": random_features,  
                  "regularization": regularization, 
                  "multiclass":multiclass, 
                  "zerobased":zerobased,
                  "SX":SX, 
                  "U":U }
Exemple #3
0
  def train(self, X, Y, regularization=1,  multiclass=True, zerobased=False):
    """
    Train the model.
    
    
    Parameters
    ----------
    X: m x n input matrix
    
    Y: m x 1 label vector (if multi-class classification problem, 
       labels are from 0 to K-1 - trains one-vs-rest)
    
    regularization: regularization parameter

    multiclass: is it a multiclass problem or not

    zerobased: for multiclass, whether the labels start with 0 or 1
    
    Returns
    --------
    Nothing. Internally sets the model parameters.
    
    """
    m,n = X.shape
    K = self._kernel.gram(X)
    I = numpy.identity(m)
    if multiclass:
      Y = utils.dummycoding(Y, zerobased=zerobased)
      Y = 2*Y - 1
    A = K + regularization*I
    alpha = scipy.linalg.solve(A, Y, sym_pos=True)
    self.model = {"kernel": self._kernel, 
                  "alpha": alpha, 
                  "regularization": regularization, 
                  "data": X, 
                  "multiclass":multiclass,
                  "zerobased":zerobased}
Exemple #4
0
  def train(self, X, Y, rank, s=None, t=None, samplesize=None, 
            multiclass=True, zerobased=False, subtype=None):
    """
    Train the model.
    
    
    Parameters
    ----------
    X: m x n input matrix
    
    Y: m x 1 label vector (if multi-class classification problem, 
       labels are from 0 to K-1 - trains one-vs-rest)
    
    rank: number of principal components to use.

    s: First parameter for sketching. Defaults to rank * 2.
    
    t: Second parameter for sketching. Defaults to s * 2.

    samplesize: If not None, then will sample this amount of examples
                for X and use only them to produce the projection.

    multiclass: is it a multiclass problem or not

    zerobased: for multiclass, whether the labels start with 0 or 1

    subtype: subtype for kernel sketching.
    
    Returns
    --------
    Nothing. Internally sets the model parameters.
    
    """

    if s is None:
      s = 2 * rank
    if t is None:
      t = 2 * s
    
    if samplesize is None:
      Xs = X
    else:
      Xs = skylark.sketch.UniformSampler(X.shape[0], samplesize) * X

    Z, S, R, V = lr.approximate_domsubspace_basis(Xs, rank, s, t, 
                                                  self._kernel, subtype)
        
    if multiclass:
      Y= utils.dummycoding(Y, zerobased=zerobased)
      Y = 2*Y - 1

    if samplesize is None:
      # Z is the features and it is orthogonal.
      weights0 = numpy.dot(Z.T, Y)      
    else:
      Z = numpy.dot(S / X, scipy.linalg.solve_triangular(R, V, lower=False))
      weights0 = numpy.linalg.lstsq(Z, Y)[0]
      
    weights = scipy.linalg.solve_triangular(R, numpy.dot(V, weights0), lower=False)
    
    self._rft = S
    self.model = {"kernel": self._kernel,
                  "rft": self._rft,
                  "weights": weights, 
                  "s": s,
                  "t": t,
                  "rank": rank,
                  "multiclass":multiclass,
                  "zerobased":zerobased}