def __init__(self, algorithm_name=None, test_name=None): Table.__init__(self,default_value=0) self.algorithm_name = algorithm_name self.test_name = test_name self.classes = set() self.successes = 0 self.failures = 0 self.total = 0 self.setColumnFormat('Rate',"%0.4f") self.setColumnFormat('Lower',"%0.4f") self.setColumnFormat('Upper',"%0.4f") self.setColumnFormat('Bar',"%-10s")
def __init__(self, algorithm_name=None, test_name=None): Table.__init__(self, default_value=0) self.algorithm_name = algorithm_name self.test_name = test_name self.classes = set() self.successes = 0 self.failures = 0 self.total = 0 self.setColumnFormat('Rate', "%0.4f") self.setColumnFormat('Lower', "%0.4f") self.setColumnFormat('Upper', "%0.4f") self.setColumnFormat('Bar', "%-10s")
def test_regression_linear(self): # synthetic linear regression rega = RidgeRegression() filename = os.path.join(pyvision.__path__[0],'data','synthetic','regression.dat') reg_file = open(filename,'r') labels = [] vectors = [] for line in reg_file: datapoint = line.split() labels.append(float(datapoint[0])) vectors.append([float(datapoint[3]),float(datapoint[4]),float(datapoint[5])]) for i in range(50): rega.addTraining(labels[i],vectors[i]) rega.train() mse = 0.0 total = 0 table = Table() for i in range(50,len(labels)): p = rega.predict(vectors[i]) e = p - labels[i] table.setElement(i,'truth',labels[i]) table.setElement(i,'pred',p) table.setElement(i,'Residual',e) #print labels[i],p,e mse += e*e #print table #table.save('../../tmp.csv') mse = mse/(len(labels)-50) self.assertAlmostEqual(mse,0.24301122718491874,places=4)
def __init__(self,probe_set,gallery_set,algorithm_name=None, data_set=None, distances=True, is_match=firstFive): Table.__init__(self) self.algorithm_name = algorithm_name i = 0 self.probe_set = {} for each in probe_set: self.probe_set[each] = i i += 1 probe_size = i i = 0 self.gallery_set = {} for each in gallery_set: self.gallery_set[each] = i i += 1 gallery_size = i self.distances = True self.data = zeros((probe_size,gallery_size),'d')
def test_regression_linear(self): # synthetic linear regression rega = RidgeRegression() filename = os.path.join(pyvision.__path__[0], 'data', 'synthetic', 'regression.dat') reg_file = open(filename, 'r') labels = [] vectors = [] for line in reg_file: datapoint = line.split() labels.append(float(datapoint[0])) vectors.append([ float(datapoint[3]), float(datapoint[4]), float(datapoint[5]) ]) for i in range(50): rega.addTraining(labels[i], vectors[i]) rega.train() mse = 0.0 total = 0 table = Table() for i in range(50, len(labels)): p = rega.predict(vectors[i]) e = p - labels[i] table.setElement(i, 'truth', labels[i]) table.setElement(i, 'pred', p) table.setElement(i, 'Residual', e) #print labels[i],p,e mse += e * e #print table #table.save('../../tmp.csv') mse = mse / (len(labels) - 50) self.assertAlmostEqual(mse, 0.24301122718491874, places=4)
def train_SVR_Linear(self, labels, vectors, verbose, C_range, callback=None): '''Private use only''' # combine the labels and vectors into one set. data = [] for i in range(len(labels)): data.append([labels[i], vectors[i]]) #shuffle the data rng = random.Random() if self.random_seed != None: rng.seed(self.random_seed) rng.shuffle(data) # partition into validation and training if type( self.validation_size ) == float and self.validation_size > 0.0 and self.validation_size < 1.0: training_cutoff = int(len(data) * (1.0 - self.validation_size)) elif type(self.validation_size ) == int and self.validation_size < len(labels): training_cutoff = len(labels) - self.validation_size else: raise NotImplementedError( "Cannot determine validation set from %s" % self.validation_size) if verbose: print "Training Cutoff:", len(labels), training_cutoff training_data = data[:training_cutoff] validation_data = data[training_cutoff:] tmp_labels = [] tmp_vectors = [] for each in training_data: tmp_labels.append(each[0]) tmp_vectors.append(each[1]) prob = svm.svm_problem(tmp_labels, tmp_vectors) training_info = [] training_svm = [] training_table = Table() self.training_table = training_table i = 0 for C in C_range: param = svm.svm_parameter(svm_type=self.svm_type, kernel_type=svm.LINEAR, C=C, p=self.epsilon, nu=self.nu) test_svm = svm.svm_model(prob, param) mse = 0.0 total = len(validation_data) for label, vector in validation_data: pred = test_svm.predict(vector) error = label - pred mse += error * error mse = mse / total training_svm.append(test_svm) training_info.append([C, mse]) training_table.setElement(i, 'C', C) training_table.setElement(i, 'mse', mse) i += 1 if callback != None: callback(int(100 * float(i) / len(C_range))) if verbose: print if verbose: print "------------------------------" if verbose: print " Tuning Information:" if verbose: print " C error" if verbose: print "------------------------------" best = training_info[0] best_svm = training_svm[0] for i in range(len(training_info)): each = training_info[i] if verbose: print " %8.3e %0.8f" % (each[0], each[1]) if best[-1] > each[-1]: best = each best_svm = training_svm[i] if verbose: print "------------------------------" if verbose: print if verbose: print "------------------------------" if verbose: print " Best Tuning:" if verbose: print " C error" if verbose: print "------------------------------" if verbose: print " %8.3e %0.8f" % (best[0], best[1]) if verbose: print "------------------------------" if verbose: print self.training_info = training_info self.C = best[0] self.error = best[1] self.svm = best_svm
def train_SVR_Linear(self,labels,vectors,verbose, C_range, callback=None): '''Private use only''' # combine the labels and vectors into one set. data = [] for i in range(len(labels)): data.append([labels[i],vectors[i]]) #shuffle the data rng = random.Random() if self.random_seed != None: rng.seed(self.random_seed) rng.shuffle(data) # partition into validation and training if type(self.validation_size) == float and self.validation_size > 0.0 and self.validation_size < 1.0: training_cutoff = int(len(data)*(1.0-self.validation_size)) elif type(self.validation_size) == int and self.validation_size < len(labels): training_cutoff = len(labels)-self.validation_size else: raise NotImplementedError("Cannot determine validation set from %s"%self.validation_size) if verbose: print "Training Cutoff:",len(labels),training_cutoff training_data = data[:training_cutoff] validation_data = data[training_cutoff:] tmp_labels = [] tmp_vectors = [] for each in training_data: tmp_labels.append(each[0]) tmp_vectors.append(each[1]) prob = svm.svm_problem(tmp_labels,tmp_vectors) training_info = [] training_svm = [] training_table = Table() self.training_table = training_table i=0 for C in C_range: param = svm.svm_parameter(svm_type=self.svm_type,kernel_type = svm.LINEAR, C = C, p=self.epsilon,nu=self.nu) test_svm = svm.svm_model(prob, param) mse = 0.0 total = len(validation_data) for label,vector in validation_data: pred = test_svm.predict(vector) error = label - pred mse += error*error mse = mse/total training_svm.append(test_svm) training_info.append([C,mse]) training_table.setElement(i,'C',C) training_table.setElement(i,'mse',mse) i+=1 if callback != None: callback(int(100*float(i)/len(C_range))) if verbose: print if verbose: print "------------------------------" if verbose: print " Tuning Information:" if verbose: print " C error" if verbose: print "------------------------------" best = training_info[0] best_svm = training_svm[0] for i in range(len(training_info)): each = training_info[i] if verbose: print " %8.3e %0.8f"%(each[0],each[1]) if best[-1] > each[-1]: best = each best_svm = training_svm[i] if verbose: print "------------------------------" if verbose: print if verbose: print "------------------------------" if verbose: print " Best Tuning:" if verbose: print " C error" if verbose: print "------------------------------" if verbose: print " %8.3e %0.8f"%(best[0],best[1]) if verbose: print "------------------------------" if verbose: print self.training_info = training_info self.C = best[0] self.error = best[1] self.svm = best_svm
def test_kernel_regression_rbf4(self): rega = KernelRidgeRegression(random_seed=28378) filename = os.path.join(pyvision.__path__[0],'data','synthetic','synth1_quad.txt') reg_file = open(filename,'r') labels = [] vectors = [] truth = [] for line in reg_file: datapoint = line.split() labels.append(float(datapoint[1])) vectors.append([float(datapoint[0])]) truth.append(float(datapoint[2])) for i in range(100): rega.addTraining(labels[i],vectors[i]) rega.train(verbose=False) #print rega.w mse = 0.0 ase = 0.0 table = Table() for i in range(100,len(labels)): p = rega.predict(vectors[i]) e = p - labels[i] mse += e*e a = p - truth[i] ase += a*a table.setElement(i,'x',vectors[i][0]) table.setElement(i,'measure',labels[i]) table.setElement(i,'truth',truth[i]) table.setElement(i,'pred',p) table.setElement(i,'e',e) table.setElement(i,'a',a) mse = mse/(len(labels)-100) ase = ase/(len(labels)-100) #print "Regression Error:",mse #print table #table.save("../../rbf4.csv") self.assertAlmostEqual(mse,0.063883259792847411,places=7) self.assertAlmostEqual(ase,0.028811752673991175,places=7)
def test_kernel_regression_rbf2(self): rega = KernelRidgeRegression(lams=0.1,kernels=RBF()) filename = os.path.join(pyvision.__path__[0],'data','synthetic','synth1_mix.txt') reg_file = open(filename,'r') labels = [] vectors = [] truth = [] for line in reg_file: datapoint = line.split() labels.append(float(datapoint[1])) vectors.append([float(datapoint[0])]) truth.append(float(datapoint[2])) for i in range(100): rega.addTraining(labels[i],vectors[i]) rega.train() #print rega.w mse = 0.0 ase = 0.0 table = Table() for i in range(100,len(labels)): p = rega.predict(vectors[i]) e = p - labels[i] mse += e*e a = p - truth[i] ase += a*a table.setElement(i,'x',vectors[i][0]) table.setElement(i,'measure',labels[i]) table.setElement(i,'truth',truth[i]) table.setElement(i,'pred',p) table.setElement(i,'e',e) table.setElement(i,'a',a) mse = mse/(len(labels)-100) ase = ase/(len(labels)-100) #print "Regression Error:",mse #print table #table.save("../../rbf2.csv") self.assertAlmostEqual(mse,0.563513669235162,places=7) self.assertAlmostEqual(ase,0.51596869146460422,places=7)