def train(self, data, samples): self.gammas = self.determine_gammas_from(data) sequences = self.make_sequences(data) labels = data[self.sequence_length:,:].astype('float32') # Randomly sample |samples| sequences full = np.hstack([sequences, np.expand_dims(labels,1)]) np.random.shuffle(full) self.sequences = full[:samples,:-1,:] self.labels = full[:samples,-1,:] #print full[0,:,0] #print "%s -> %s" % (str(self.sequences[0,:,0]), self.labels[0,0]) #print full[1,:,0] #print "%s -> %s" % (str(self.sequences[1,:,0]), self.labels[1,0]) # [gamma][dimension] self.SVMs = [] for gamma in self.gammas: print "Computing kernel with gamma=%s" % gamma kk = kernel_matrix(self.sequences, self.sequences, gamma) g_SVMs = [] for dimension in range(data.shape[1]): l = self.labels[:,dimension] # NOTE: this is where you would branch for nu/C hyp = NuSVR(nu=.5) hyp.train(kk,l) g_SVMs.append(hyp) print "--> SVM Trained: %s percent SV's, risk=%s" % ( hyp.SV_percent, hyp.risk ) self.SVMs.append(g_SVMs)
def make_subsets(self, X, Y): kk = [] for s in self.active_slices: if isinstance(s[1], int): subset_X = X[:,s[0],s[1]].reshape(X.shape[0], s[0]+1, 1 ) subset_Y = Y[:,s[0],s[1]].reshape(Y.shape[0], s[0]+1, 1 ) else: subset_X = X[:,s[0],s[1]].reshape(X.shape[0], s[0]+1, len(s[1])) subset_Y = Y[:,s[0],s[1]].reshape(Y.shape[0], s[0]+1, len(s[1])) for gamma in self.gammas: # NOTE: returning to test on single matrix kk.append( kernel_matrix(subset_X, subset_Y, gamma) ) # Construct the sparse block diagonal of the kernel matrices and extend the labels to match # return bdiag(kk, format='csr') if len(kk) > 1: row = np.hstack(kk) zeros = np.zeros( (kk[0].shape[0] * (len(kk)-1), kk[0].shape[1] * (len(kk)-1)) ) k_column = np.vstack( kk[1:] ) base = sp.sparse.csr_matrix( np.hstack( [ k_column, zeros ] ) ) KK = sp.sparse.vstack([row,base]).todense() return KK else: return kk[0]
def train(self, data, slices=[[0,[0]]]): #self.gammas = self.determine_gammas_from(data) self.gammas = [.1,] print "Gammas determined: %s" % str(self.gammas) # [gamma][sequence offset][dimension] #self.active_slices = np.mgrid[0:1,0:data.shape[1]].T.reshape(data.shape[1],2).tolist() # Make a single slice consisting of the 1st sequence element and all 3 dimensions #self.active_slices = [ [0,[0,1]], [0,[0]] ] self.active_slices = slices # Working with 1 sequence element for now sequences = data[:-1,:2].astype('float32') labels = data[1:,self.dimension].astype('float32') self.sequences = sequences self.labels = labels return kx = kernel_matrix( np.expand_dims( sequences, 1), np.expand_dims( sequences, 1), self.gammas[-1] ) ky = kernel_matrix( labels.reshape(labels.shape[0],1,1), labels.reshape(labels.shape[0],1,1), self.gammas[-1] ) _P = np.triu( 2 * kx * ky ) _q = np.zeros( [sequences.shape[0], 1] ) _G = np.vstack( [labels.T * kx, -labels.T * kx] ) _h = np.hstack( [self.sigma + labels, self.sigma - labels] ).astype('float') _A = np.ones( [1, sequences.shape[0]] ) _b = np.ones( [1,1] ) solution = qp( matrix(_P), matrix(_q), matrix(_G), matrix(_h), matrix(_A), matrix(_b) ) if solution['status'] == 'optimal': X = np.array( solution['x'] ) print X.shape print sequences.shape print labels.shape self.SV_mask = ( X > 0 ) self.beta = np.ma.compress_rows( np.ma.array( X, mask = self.SV_mask ) ).astype('float32') self.SVx = np.ma.compress_rows( np.ma.array( sequences, mask = np.repeat( self.SV_mask, sequences.shape[1], 1) ) ).astype('float32') self.SVy = np.ma.compress_rows( np.ma.array( labels.reshape(labels.shape[0],1), mask = self.SV_mask ) ).astype('float32') print "--> SVM Trained: %s SV's of %s" % ( self.SV_mask.sum(), self.SV_mask.shape[0] )
def predict(self, data): # [sequence][point][dimension] points = self.make_sequences(data) # [test_sequence][gamma][dimension] predictions = np.array([]).reshape(points.shape[0], 0, data.shape[1]) risks = np.array([]).reshape(1, 0, data.shape[1]) for i in range( len(self.SVMs) ): gamma = self.gammas[i] g_SVMs = self.SVMs[i] # [train_i][test_j] kk = kernel_matrix(self.sequences, points, gamma).T print "Computed kernel with gamma=%s, %s non-null entries" % (gamma, (kk > .00001).sum()) g_predictions = np.array([]).reshape(points.shape[0], 0) g_risk = np.array([]).reshape(1, 0) for dimension in range( len(g_SVMs) ): SVM = g_SVMs[dimension] # [test][dimension] prediction = np.expand_dims( SVM.predict(kk), 1) #print prediction # Normalize by risk #prediction = prediction * SVM.SV_loss #risk += SVM.risk g_predictions = np.hstack( [g_predictions, prediction] ) g_risk = np.hstack( [ g_risk, np.array(SVM.risk).reshape(1,1) ]) predictions = np.hstack( [predictions, np.expand_dims( g_predictions, 1) ]) risks = np.hstack( [risks, np.expand_dims( g_risk, 1 ) ]) #print data[:self.sequence_length+4,0] #print "%s -> %s" % (str(points[0,:,0]), predictions[0,0,0]) #print "%s -> %s" % (str(points[1,:,0]), predictions[1,0,0]) # For now, just average them #return predictions.sum(1) / len(self.gammas) return predictions, risks
def predict(self, data): # [sequence][point][dimension] #points = self.make_sequences(data) points = data.astype('float32') #print points.shape #print self.sequences.shape #print points.shape #print self.SVx.reshape(self.nSV,1,1).shape kk = kernel_matrix( points, self.SVx.reshape(self.nSV,1,1), self.gammas[-1] ) #print self.SVy.shape print self.beta prediction = (self.labels.T * self.beta.T * kk ).sum(1) / (self.beta.T * kk).sum(1) #print prediction.shape return prediction
def train(self, data, slices=[[0,[0]]]): #self.gammas = self.determine_gammas_from(data) self.gammas = [.1,] print "Gammas determined: %s" % str(self.gammas) # [gamma][sequence offset][dimension] #self.active_slices = np.mgrid[0:1,0:data.shape[1]].T.reshape(data.shape[1],2).tolist() # Make a single slice consisting of the 1st sequence element and all 3 dimensions #self.active_slices = [ [0,[0,1]], [0,[0]] ] self.active_slices = slices # Working with 1 sequence element for now sequences = data[:-1].astype('float32').reshape(data.shape[0]-1,1) labels = data[1:].astype('float32').reshape(data.shape[0]-1,1) l = sequences.shape[0] jitter = ( ( np.random.randn(l,1) / 10 ) ).astype('float32') jittery = ( ( np.random.randn(l,1) / 10 ) ).astype('float32') self.sequences = sequences + jitter self.labels = labels + jittery print "Calculating kernel matrix" kx = kernel_matrix(self.sequences.reshape(l,1,1), self.sequences.reshape(l,1,1), self.gammas[-1]) ky = kernel_matrix(self.labels.reshape(l,1,1), self.labels.reshape(l,1,1), self.gammas[-1]) sigma = 1000 print "Constructing constraints" P = self.labels * self.labels.T * kx q = np.zeros((l,1)) G_1 = self.labels.T * kx G_2 = -self.labels.T * kx h_1 = sigma + self.labels h_2 = sigma - self.labels G = np.vstack([G_1,G_2]) h = np.vstack([h_1,h_2]) A = kx b = np.ones((l,1)) print "p(A[0])=%s" % A.shape[0] print "n(G[1],A[1])=%s or %s" % (G.shape[1], A.shape[1]) print "rank P: %s" % rank(P) print "rank G: %s" % rank(G) print "rank A: %s" % rank(A) print "rand kernel: %s" % rank(kx) print "unique source: %s" % np.unique(self.sequences).shape[0] print "Solving" solution = solvers.coneqp( matrix(P.astype('float')), matrix(q.astype('float')), matrix(G.astype('float')), matrix(h.astype('float')), None, matrix(A.astype('float')), matrix(b.astype('float')) ) print "Handling Solution" if solution['status'] == 'optimal': X = np.array( solution['x'] ) #R_emp = np.array( solution['x'][-1] ) #print solution['x'] self.SV_mask = ( np.abs(X) < 1e-8 ) self.beta = np.ma.compress_rows( np.ma.array( X, mask = self.SV_mask ) ).astype('float32') self.SVx = np.ma.compress_rows( np.ma.array( sequences, mask = np.repeat( self.SV_mask, sequences.shape[1], 1) ) ).astype('float32') self.SVy = np.ma.compress_rows( np.ma.array( labels.reshape(labels.shape[0],1), mask = self.SV_mask ) ).astype('float32') self.nSV = self.beta.shape[0] #print self.beta #print self.SVx.shape #print self.SVy.shape #print self.nSV #print self.SV_mask #print solution['x'] print "--> SVM Trained: %s SV's of %s" % ( self.nSV, self.SV_mask.shape[0] )
def train(self, data, slices=[[0,[0]]]): #self.gammas = self.determine_gammas_from(data) self.gammas = [.5,] print "Gammas determined: %s" % str(self.gammas) # [gamma][sequence offset][dimension] #self.active_slices = np.mgrid[0:1,0:data.shape[1]].T.reshape(data.shape[1],2).tolist() # Make a single slice consisting of the 1st sequence element and all 3 dimensions #self.active_slices = [ [0,[0,1]], [0,[0]] ] self.active_slices = slices # Working with 1 sequence element for now sequences = data[:-1].astype('float32').reshape(data.shape[0]-1,1) labels = data[1:].astype('float32').reshape(data.shape[0]-1,1) self.sequences = sequences self.labels = labels l = self.sequences.shape[0] print "Calculating kernel matrix" kk = kernel_matrix(self.sequences.reshape(l,1,1), self.sequences.reshape(l,1,1), self.gammas[-1]) print "Constructing constraints" # column c = np.hstack( [ np.zeros(l), [0,1] ]) #[constraint][variable] A_A = np.hstack( [ kk.sum(0) / l, [0,0] ] ) #[constraint] b_A = np.ones(1) A_G = np.hstack( [ np.zeros(l), [1,-1] ] ) b_G = np.zeros(1) G_B = np.hstack( [ self.labels.flatten() * kk.sum(0), [0,-1] ] ) h_B = np.array( [self.labels.sum() ] ) G_C = np.hstack( [ -self.labels.flatten() * kk.sum(0), [-1,0] ] ) h_C = np.array( [ -self.labels.sum() ] ) # [variable][variable] G_D = np.hstack( [ -np.identity(l), np.zeros((l,2)) ]) h_D = np.zeros(l) G_E = np.hstack( [ np.zeros(l), [-1,0] ]) h_E = np.zeros(1) G_F = np.hstack( [ np.zeros(l), [0,-1] ]) h_F = np.zeros(1) G = np.vstack([G_B,G_C,G_D,G_E,G_F]) #G = np.vstack([G_B,G_C,G_D]).astype('float') h = np.hstack([h_B,h_C,h_D,h_E,h_F]) #h = np.hstack([h_B,h_C,h_D]).astype('float') print G print h #A = np.expand_dims( A_G, 0) A = np.vstack([A_A,A_G]) #b = b_G b = np.vstack([b_A,b_G]) #print G.shape #print h.shape print A.shape print b.shape print "Solving" solution = lp( matrix(c), matrix(G), matrix(h), matrix(A), matrix(b) ) print "Handling Solution" if solution['status'] == 'optimal': X = np.array( solution['x'][:-2] ) R_emp = np.array( solution['x'][-1] ) self.SV_mask = ( X < 0 ) self.beta = np.ma.compress_rows( np.ma.array( X, mask = self.SV_mask ) ).astype('float32') self.SVx = np.ma.compress_rows( np.ma.array( sequences, mask = np.repeat( self.SV_mask, sequences.shape[1], 1) ) ).astype('float32') self.SVy = np.ma.compress_rows( np.ma.array( labels.reshape(labels.shape[0],1), mask = self.SV_mask ) ).astype('float32') self.nSV = self.beta.shape[0] #print self.SVx.shape #print self.SVy.shape #print self.nSV #print self.SV_mask print solution['x'] print "--> SVM Trained: %s SV's of %s, Risk=%s" % ( self.nSV, self.SV_mask.shape[0], R_emp/X.shape[0] )