def splitonmaxarg(self, x_tl, y_tl, features, D_t, isSparse=0):
     ret = []
     pplus = sum(D_t * (y_tl > 0))
     for feature_i in range(features):
         (dv, err) = ds.stump_fit(x_tl[:, feature_i], y_tl, D_t, pplus)
         ret.append((feature_i, dv, err))
     a_ret = array(ret)
     arg = argmax(abs(0.5 - a_ret[:, 2]))
     return a_ret[arg]
def adaboost_train (x, y, T):
  cf = x.shape[1]
  n = y.shape[0]
  weights = ones(n)/n

  H = []
  A = []
  I = []
  TE = []

  for t in range(T):
    pplus = sum(weights * (y > 0))

    # Let's train on all the features and find the one that works the best
    decisionVariables = []
    score = []
    we = []
    for idx in range(cf):
      f = x[:,idx]

      # train the stump
      (dv, err) = ds.stump_fit(f, y, weights, pplus)
      we.append( err )
      decisionVariables.append(dv)

      # score the classifiers on all features for this round
      score.append(abs(.5-err))
     
    print "Round: ", t, str(datetime.now())
    # choose the one feature we'll use for this round's classifier
    I.append(np.argmax(score))
    H.append(decisionVariables[I[t]])
    eps = we[I[t]]
    
    # calculate our alpha
    A.append(.5 * math.log((1-eps)/eps))

    # update the weights
    numerators = weights * np.exp( -A[t] *  y * ds.stump_predict(x[:,I[t]], H[t]) )
    Z = numerators.sum()
    weights = numerators / Z

    # Calculate the overall training errors
    y_hat = adaboost_predict(A,H,I,x, len(A))
    TE.append((y_hat * y < 0).sum() / float(n))

  return A, H, I, TE
Example #3
0
#!/usr/bin/python2
from scipy import *
import scipy.sparse as sp
import dstump as ds

(f, y) = ds.two_clusters(100)
pr = ones(len(y))/len(y)

#This quantity is invariant for each Adaboost step, and helps us take 
#advantage of sparsity.
pplus = sum(pr * (y > 0))

#The decision stump training routine accepts either a dense 1-d 
#array or a sparse 1-d CSC matrix. The resulting decision variable 
#might be different for dense and sparse data, but the errors are
#the same. See implementation for details.
(dv, err) = ds.stump_fit(f, y, pr, pplus)

#Inplace transpose of a CSR matrix gives a CSC matrix.
fs = sp.csr_matrix(f).T 
(dvs, errs) = ds.stump_fit(fs, y, pr, pplus)