Ejemplo n.º 1
0
  def train(self, data, samples):
    self.gammas = self.determine_gammas_from(data)
    
    sequences = self.make_sequences(data)
    labels = data[self.sequence_length:,:].astype('float32')
    
    # Randomly sample |samples| sequences
    full = np.hstack([sequences, np.expand_dims(labels,1)])
    np.random.shuffle(full)
    self.sequences = full[:samples,:-1,:]
    self.labels = full[:samples,-1,:]

    #print full[0,:,0]
    #print "%s -> %s" % (str(self.sequences[0,:,0]), self.labels[0,0])
    #print full[1,:,0]
    #print "%s -> %s" % (str(self.sequences[1,:,0]), self.labels[1,0])

    # [gamma][dimension]
    self.SVMs = []
    for gamma in self.gammas:
      print "Computing kernel with gamma=%s" % gamma
      kk = kernel_matrix(self.sequences, self.sequences, gamma)
      
      g_SVMs = []
      for dimension in range(data.shape[1]):
        l = self.labels[:,dimension]
        
        # NOTE: this is where you would branch for nu/C
        hyp = NuSVR(nu=.5)
        hyp.train(kk,l)
        g_SVMs.append(hyp)
        
        print "--> SVM Trained: %s percent SV's, risk=%s" % ( hyp.SV_percent, hyp.risk ) 
      self.SVMs.append(g_SVMs)
Ejemplo n.º 2
0
  def make_subsets(self, X, Y):
    kk = []
    for s in self.active_slices:
      if isinstance(s[1], int):
        subset_X = X[:,s[0],s[1]].reshape(X.shape[0], s[0]+1, 1 )
        subset_Y = Y[:,s[0],s[1]].reshape(Y.shape[0], s[0]+1, 1 )
      else:
        subset_X = X[:,s[0],s[1]].reshape(X.shape[0], s[0]+1, len(s[1]))
        subset_Y = Y[:,s[0],s[1]].reshape(Y.shape[0], s[0]+1, len(s[1]))
      
      for gamma in self.gammas:
        # NOTE:  returning to test on single matrix
        kk.append( kernel_matrix(subset_X, subset_Y, gamma) )

    
    # Construct the sparse block diagonal of the kernel matrices and extend the labels to match
    # return bdiag(kk, format='csr')
    
    if len(kk) > 1:
      row = np.hstack(kk)
      
      zeros = np.zeros( (kk[0].shape[0] * (len(kk)-1), kk[0].shape[1] * (len(kk)-1)) )
      k_column = np.vstack( kk[1:] )

      base = sp.sparse.csr_matrix( np.hstack( 
          [ k_column, zeros ] 
      ) )
      KK = sp.sparse.vstack([row,base]).todense()

      return KK
    else:
      return kk[0]
Ejemplo n.º 3
0
  def train(self, data, slices=[[0,[0]]]):
    #self.gammas = self.determine_gammas_from(data)
    self.gammas = [.1,]
    print "Gammas determined: %s" % str(self.gammas)
    
    # [gamma][sequence offset][dimension]
    #self.active_slices = np.mgrid[0:1,0:data.shape[1]].T.reshape(data.shape[1],2).tolist()
    # Make a single slice consisting of the 1st sequence element and all 3 dimensions
    #self.active_slices = [ [0,[0,1]], [0,[0]] ]
    self.active_slices = slices
    
    # Working with 1 sequence element for now
    sequences = data[:-1,:2].astype('float32')
    labels = data[1:,self.dimension].astype('float32')

    self.sequences = sequences
    self.labels = labels
    
    return 
    
    kx = kernel_matrix( np.expand_dims( sequences, 1), np.expand_dims( sequences, 1), self.gammas[-1] )
    ky = kernel_matrix( labels.reshape(labels.shape[0],1,1), labels.reshape(labels.shape[0],1,1), self.gammas[-1] )
    
    _P = np.triu( 2 * kx * ky )
    _q = np.zeros( [sequences.shape[0], 1] )
    _G = np.vstack( [labels.T * kx, -labels.T * kx] )
    _h = np.hstack( [self.sigma + labels, self.sigma - labels] ).astype('float')
    _A = np.ones( [1, sequences.shape[0]] )
    _b = np.ones( [1,1] )

    solution = qp( matrix(_P), matrix(_q), matrix(_G), matrix(_h), matrix(_A), matrix(_b) )
    
    if solution['status'] == 'optimal':
      X = np.array( solution['x'] )
      
      print X.shape
      print sequences.shape
      print labels.shape
      
      self.SV_mask = ( X > 0 )
      self.beta = np.ma.compress_rows( np.ma.array( X, mask = self.SV_mask ) ).astype('float32')
      self.SVx = np.ma.compress_rows( np.ma.array( sequences, mask = np.repeat( self.SV_mask, sequences.shape[1], 1) ) ).astype('float32')
      self.SVy = np.ma.compress_rows( np.ma.array( labels.reshape(labels.shape[0],1), mask = self.SV_mask ) ).astype('float32')
      
    
    print "--> SVM Trained: %s SV's of %s" % ( self.SV_mask.sum(), self.SV_mask.shape[0] ) 
Ejemplo n.º 4
0
 def predict(self, data):
   
   # [sequence][point][dimension]
   points = self.make_sequences(data)
       
   # [test_sequence][gamma][dimension]
   predictions = np.array([]).reshape(points.shape[0], 0, data.shape[1])
   risks = np.array([]).reshape(1, 0, data.shape[1])
   for i in range( len(self.SVMs) ):
     gamma = self.gammas[i]
     g_SVMs = self.SVMs[i]
     
     # [train_i][test_j]
     kk = kernel_matrix(self.sequences, points, gamma).T
     print "Computed kernel with gamma=%s, %s non-null entries" % (gamma, (kk > .00001).sum())
   
     g_predictions = np.array([]).reshape(points.shape[0], 0)
     g_risk = np.array([]).reshape(1, 0)
     for dimension in range( len(g_SVMs) ):
       SVM = g_SVMs[dimension]
               
       # [test][dimension]
       prediction = np.expand_dims( SVM.predict(kk), 1)
       #print prediction
       
       # Normalize by risk
       #prediction = prediction * SVM.SV_loss
       #risk += SVM.risk
       
       g_predictions = np.hstack( [g_predictions, prediction] )
       g_risk = np.hstack( [ g_risk, np.array(SVM.risk).reshape(1,1) ])
       
     predictions = np.hstack( [predictions, np.expand_dims( g_predictions, 1) ])
     risks = np.hstack( [risks, np.expand_dims( g_risk, 1 ) ])
   
   #print data[:self.sequence_length+4,0]
   #print "%s -> %s" % (str(points[0,:,0]), predictions[0,0,0])
   #print "%s -> %s" % (str(points[1,:,0]), predictions[1,0,0])
     
   # For now, just average them
   #return predictions.sum(1) / len(self.gammas)
   return predictions, risks
Ejemplo n.º 5
0
 def predict(self, data):
   
   # [sequence][point][dimension]
   #points = self.make_sequences(data)
   points = data.astype('float32')
   
   
   #print points.shape
   #print self.sequences.shape
   
   #print points.shape
   #print self.SVx.reshape(self.nSV,1,1).shape
   
   kk = kernel_matrix( points, self.SVx.reshape(self.nSV,1,1), self.gammas[-1] )
   
   #print self.SVy.shape
   print self.beta
   
   prediction = (self.labels.T * self.beta.T * kk ).sum(1) / (self.beta.T * kk).sum(1)
   
   #print prediction.shape
   
   return prediction
Ejemplo n.º 6
0
  def train(self, data, slices=[[0,[0]]]):
    #self.gammas = self.determine_gammas_from(data)
    self.gammas = [.1,]
    print "Gammas determined: %s" % str(self.gammas)
    
    # [gamma][sequence offset][dimension]
    #self.active_slices = np.mgrid[0:1,0:data.shape[1]].T.reshape(data.shape[1],2).tolist()
    # Make a single slice consisting of the 1st sequence element and all 3 dimensions
    #self.active_slices = [ [0,[0,1]], [0,[0]] ]
    self.active_slices = slices
    
    # Working with 1 sequence element for now
    sequences = data[:-1].astype('float32').reshape(data.shape[0]-1,1)
    labels = data[1:].astype('float32').reshape(data.shape[0]-1,1)
    
    l = sequences.shape[0]
    jitter = ( ( np.random.randn(l,1) / 10 ) ).astype('float32')
    jittery = ( ( np.random.randn(l,1) / 10 ) ).astype('float32')
    self.sequences = sequences + jitter
    self.labels = labels + jittery
    
    
    print "Calculating kernel matrix"
    kx = kernel_matrix(self.sequences.reshape(l,1,1), self.sequences.reshape(l,1,1), self.gammas[-1])
    ky = kernel_matrix(self.labels.reshape(l,1,1), self.labels.reshape(l,1,1), self.gammas[-1])
    sigma = 1000
    
    print "Constructing constraints"
    
    P = self.labels * self.labels.T * kx
    
    q = np.zeros((l,1))

    G_1 = self.labels.T * kx
    
    G_2 = -self.labels.T * kx
    
    h_1 = sigma + self.labels
    
    h_2 = sigma - self.labels
    
    G = np.vstack([G_1,G_2])
    h = np.vstack([h_1,h_2])
    
    A = kx
    
    b = np.ones((l,1))
        
    print "p(A[0])=%s" % A.shape[0]
    print "n(G[1],A[1])=%s or %s" % (G.shape[1], A.shape[1])
    print "rank P: %s" % rank(P)
    print "rank G: %s" % rank(G)
    print "rank A: %s" % rank(A)
    print "rand kernel: %s" % rank(kx)
    print "unique source: %s" % np.unique(self.sequences).shape[0]
    
    print "Solving"
    solution = solvers.coneqp( 
      matrix(P.astype('float')), 
      matrix(q.astype('float')), 
      matrix(G.astype('float')), 
      matrix(h.astype('float')),
      None,
      matrix(A.astype('float')),
      matrix(b.astype('float'))
    )
    
    print "Handling Solution"
    if solution['status'] == 'optimal':
      X = np.array( solution['x'] )
      #R_emp = np.array( solution['x'][-1] )
      #print solution['x']
      self.SV_mask = ( np.abs(X) < 1e-8 )
      self.beta = np.ma.compress_rows( np.ma.array( X, mask = self.SV_mask ) ).astype('float32')
      self.SVx = np.ma.compress_rows( np.ma.array( sequences, mask = np.repeat( self.SV_mask, sequences.shape[1], 1) ) ).astype('float32')
      self.SVy = np.ma.compress_rows( np.ma.array( labels.reshape(labels.shape[0],1), mask = self.SV_mask ) ).astype('float32')
      self.nSV = self.beta.shape[0]
    
      #print self.beta
      #print self.SVx.shape
      #print self.SVy.shape
      #print self.nSV
      #print self.SV_mask
      #print solution['x']
    print "--> SVM Trained: %s SV's of %s" % ( self.nSV, self.SV_mask.shape[0] ) 
Ejemplo n.º 7
0
  def train(self, data, slices=[[0,[0]]]):
    #self.gammas = self.determine_gammas_from(data)
    self.gammas = [.5,]
    print "Gammas determined: %s" % str(self.gammas)
    
    # [gamma][sequence offset][dimension]
    #self.active_slices = np.mgrid[0:1,0:data.shape[1]].T.reshape(data.shape[1],2).tolist()
    # Make a single slice consisting of the 1st sequence element and all 3 dimensions
    #self.active_slices = [ [0,[0,1]], [0,[0]] ]
    self.active_slices = slices
    
    # Working with 1 sequence element for now
    sequences = data[:-1].astype('float32').reshape(data.shape[0]-1,1)
    labels = data[1:].astype('float32').reshape(data.shape[0]-1,1)

    self.sequences = sequences
    self.labels = labels
    l = self.sequences.shape[0]
    
    print "Calculating kernel matrix"
    kk = kernel_matrix(self.sequences.reshape(l,1,1), self.sequences.reshape(l,1,1), self.gammas[-1])
    
    print "Constructing constraints"
    # column
    c = np.hstack( [
      np.zeros(l),
      [0,1]
    ]) 
    
    #[constraint][variable]
    A_A = np.hstack( [
      kk.sum(0) / l,
      [0,0]
    ] )
    #[constraint]
    b_A = np.ones(1)
    
    A_G = np.hstack( [
      np.zeros(l),
      [1,-1]
    ] )
    b_G = np.zeros(1)


    G_B = np.hstack( [
      self.labels.flatten() * kk.sum(0),
      [0,-1]
    ] )
    h_B = np.array( [self.labels.sum() ] )
    
    G_C = np.hstack( [
      -self.labels.flatten() * kk.sum(0),
      [-1,0]
    ] )
    h_C = np.array( [ -self.labels.sum() ] )
    
    # [variable][variable]
    G_D = np.hstack( [
      -np.identity(l),
      np.zeros((l,2))
    ])
    h_D = np.zeros(l)
    
    G_E = np.hstack( [
      np.zeros(l),
      [-1,0]
    ])
    h_E = np.zeros(1)

    G_F = np.hstack( [
      np.zeros(l),
      [0,-1]
    ])
    h_F = np.zeros(1)

    G = np.vstack([G_B,G_C,G_D,G_E,G_F])
    #G = np.vstack([G_B,G_C,G_D]).astype('float')
    h = np.hstack([h_B,h_C,h_D,h_E,h_F])
    #h = np.hstack([h_B,h_C,h_D]).astype('float')
    
    print G
    print h
    #A = np.expand_dims( A_G, 0) 
    A = np.vstack([A_A,A_G])
    #b = b_G 
    b = np.vstack([b_A,b_G])
    
    #print G.shape
    #print h.shape
    print A.shape
    print b.shape
    
    print "Solving"
    solution = lp( matrix(c), matrix(G), matrix(h), matrix(A), matrix(b) )
    
    print "Handling Solution"
    if solution['status'] == 'optimal':
      X = np.array( solution['x'][:-2] )
      R_emp = np.array( solution['x'][-1] )
      
      self.SV_mask = ( X < 0 )
      self.beta = np.ma.compress_rows( np.ma.array( X, mask = self.SV_mask ) ).astype('float32')
      self.SVx = np.ma.compress_rows( np.ma.array( sequences, mask = np.repeat( self.SV_mask, sequences.shape[1], 1) ) ).astype('float32')
      self.SVy = np.ma.compress_rows( np.ma.array( labels.reshape(labels.shape[0],1), mask = self.SV_mask ) ).astype('float32')
      self.nSV = self.beta.shape[0]
    
      #print self.SVx.shape
      #print self.SVy.shape
      #print self.nSV
      #print self.SV_mask
      print solution['x']
    print "--> SVM Trained: %s SV's of %s, Risk=%s" % ( self.nSV, self.SV_mask.shape[0], R_emp/X.shape[0] )