def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Classifier import ECOCStrategy, ECOCDiscriminantEncoder, ECOCHDDecoder feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) encoder = ECOCDiscriminantEncoder() encoder.set_features(feats_train) encoder.set_labels(labels) encoder.set_sffs_iterations(50) strategy = ECOCStrategy(encoder, ECOCHDDecoder()) classifier = LinearMulticlassMachine(strategy, feats_train, classifier, labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def liblinear (): print 'LibLinear' from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import LibLinear realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) C=0.9 epsilon=1e-5 num_threads=1 labels=Labels(label_train_twoclass) svm=LibLinear(C, feats_train, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) print svm.classify().get_labels()
def train_svm(feats_train, labels, C=1): from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, L2R_L2LOSS_SVC_DUAL epsilon = 1e-3 svm = LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC) svm.set_epsilon(epsilon) svm.set_bias_enabled(False) svm.train() return svm
def features_director_dot_modular (fm_train_real, fm_test_real, label_train_twoclass, C, epsilon): from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL from shogun.Mathematics import Math_init_random Math_init_random(17) feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) labels=BinaryLabels(label_train_twoclass) dfeats_train=NumpyFeatures(fm_train_real) dfeats_test=NumpyFeatures(fm_test_real) dlabels=BinaryLabels(label_train_twoclass) print feats_train.get_computed_dot_feature_matrix() print dfeats_train.get_computed_dot_feature_matrix() svm=LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.apply().get_labels() predictions = svm.apply() dfeats_train.__disown__() dfeats_train.parallel.set_num_threads(1) dsvm=LibLinear(C, dfeats_train, dlabels) dsvm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) dsvm.set_epsilon(epsilon) dsvm.set_bias_enabled(True) dsvm.train() dfeats_test.__disown__() dfeats_test.parallel.set_num_threads(1) dsvm.set_features(dfeats_test) dsvm.apply().get_labels() dpredictions = dsvm.apply() return predictions, svm, predictions.get_labels()
def features_director_dot_modular(fm_train_real, fm_test_real, label_train_twoclass, C, epsilon): from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL from shogun.Mathematics import Math_init_random Math_init_random(17) feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = BinaryLabels(label_train_twoclass) dfeats_train = NumpyFeatures(fm_train_real) dfeats_test = NumpyFeatures(fm_test_real) dlabels = BinaryLabels(label_train_twoclass) print feats_train.get_computed_dot_feature_matrix() print dfeats_train.get_computed_dot_feature_matrix() svm = LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.apply().get_labels() predictions = svm.apply() dfeats_train.__disown__() dfeats_train.parallel.set_num_threads(1) dsvm = LibLinear(C, dfeats_train, dlabels) dsvm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) dsvm.set_epsilon(epsilon) dsvm.set_bias_enabled(True) dsvm.train() dfeats_test.__disown__() dfeats_test.parallel.set_num_threads(1) dsvm.set_features(dfeats_test) dsvm.apply().get_labels() dpredictions = dsvm.apply() return predictions, svm, predictions.get_labels()
def classifier_multiclasslinearmachine_modular( fm_train_real=traindat, fm_test_real=testdat, label_train_multiclass=label_traindat, label_test_multiclass=label_testdat, lawidth=2.1, C=1, epsilon=1e-5): from shogun.Features import RealFeatures, MulticlassLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine from shogun.Classifier import ECOCStrategy, ECOCDiscriminantEncoder, ECOCHDDecoder feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = MulticlassLabels(label_train_multiclass) classifier = LibLinear(L2R_L2LOSS_SVC) classifier.set_epsilon(epsilon) classifier.set_bias_enabled(True) encoder = ECOCDiscriminantEncoder() encoder.set_features(feats_train) encoder.set_labels(labels) encoder.set_sffs_iterations(50) strategy = ECOCStrategy(encoder, ECOCHDDecoder()) classifier = LinearMulticlassMachine(strategy, feats_train, classifier, labels) classifier.train() label_pred = classifier.apply(feats_test) out = label_pred.get_labels() if label_test_multiclass is not None: from shogun.Evaluation import MulticlassAccuracy labels_test = MulticlassLabels(label_test_multiclass) evaluator = MulticlassAccuracy() acc = evaluator.evaluate(label_pred, labels_test) print('Accuracy = %.4f' % acc) return out
def solver_dcd_shogun_debug(C, all_xt, all_lt, task_indicator, M, L): """ use standard LibLinear for debugging purposes """ xt = numpy.array(all_xt) lt = numpy.array(all_lt) tt = numpy.array(task_indicator, dtype=numpy.int32) tsm = numpy.array(M) num_tasks = L.shape[0] # sanity checks assert len(xt) == len(lt) == len(tt) assert M.shape == L.shape assert num_tasks == len(set(tt)) # set up shogun objects if type(xt[0]) == str: feat = create_hashed_features_wdk(xt, 8) else: feat = RealFeatures(xt.T) lab = Labels(lt) # set up machinery svm = LibLinear() svm.set_liblinear_solver_type(L2R_L1LOSS_SVC_DUAL) svm.io.set_loglevel(MSG_DEBUG) svm.set_C(C, C) svm.set_bias_enabled(False) # invoke training svm.set_labels(lab) svm.train(feat) # get model parameters W = [svm.get_w()] return W, 42, 42
def classifier_liblinear_modular(fm_train_real, fm_test_real, label_train_twoclass, C, epsilon): from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL from shogun.Mathematics import Math_init_random Math_init_random(17) feats_train = RealFeatures(fm_train_real) feats_test = RealFeatures(fm_test_real) labels = Labels(label_train_twoclass) svm = LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.apply().get_labels() predictions = svm.apply() return predictions, svm, predictions.get_labels()
############################################# # compute pre-svm ############################################# # create real-valued features as first step examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64) examples_presvm = numpy.transpose(examples_presvm) feat_presvm = RealFeatures(examples_presvm) lab_presvm = Labels(numpy.array(labels_presvm)) wdk_presvm = LinearKernel(feat_presvm, feat_presvm) presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm) presvm_liblinear.set_max_iterations(10000) presvm_liblinear.set_bias_enabled(False) presvm_liblinear.train() presvm_libsvm = LibSVM(1, wdk_presvm, lab_presvm) #presvm_libsvm = SVMLight(1, wdk_presvm, lab_presvm) #presvm_libsvm.io.set_loglevel(MSG_DEBUG) presvm_libsvm.set_bias_enabled(False) presvm_libsvm.train() my_w = presvm_liblinear.get_w() presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm) presvm_liblinear.set_w(my_w) ############################################# # compute linear term manually #############################################
def classifier_perceptron_graphical(n=100, distance=5, learn_rate=1., max_iter=1000, num_threads=1, seed=None, nperceptrons=5): from shogun.Features import RealFeatures, BinaryLabels from shogun.Classifier import Perceptron, LibLinear, L2R_L2LOSS_SVC from modshogun import MSG_INFO # 2D data _DIM = 2 # To get the nice message that the perceptron has converged dummy = BinaryLabels() # dummy.io.set_loglevel(MSG_INFO) np.random.seed(seed) # Produce some (probably) linearly separable training data by hand # Two Gaussians at a far enough distance X = np.array(np.random.randn(_DIM,n))+distance Y = np.array(np.random.randn(_DIM,n)) label_train_twoclass = np.hstack((np.ones(n), -np.ones(n))) fm_train_real = np.hstack((X,Y)) feats_train = RealFeatures(fm_train_real) labels = BinaryLabels(label_train_twoclass) perceptron = Perceptron(feats_train, labels) perceptron.set_learn_rate(learn_rate) perceptron.set_max_iter(max_iter) perceptron.set_initialize_hyperplane(False) # Find limits for visualization x_min = min(np.min(X[0,:]), np.min(Y[0,:])) x_max = max(np.max(X[0,:]), np.max(Y[0,:])) y_min = min(np.min(X[1,:]), np.min(Y[1,:])) y_max = max(np.max(X[1,:]), np.max(Y[1,:])) fig1, axes1 = plt.subplots(1,1) fig2, axes2 = plt.subplots(1,1) for i in xrange(nperceptrons): # Initialize randomly weight vector and bias perceptron.set_w(np.random.random(2)) perceptron.set_bias(np.random.random()) # Run the perceptron algorithm perceptron.train() # Construct the hyperplane for visualization # Equation of the decision boundary is w^T x + b = 0 b = perceptron.get_bias() w = perceptron.get_w() hx = np.linspace(x_min-1,x_max+1) hy = -w[1]/w[0] * hx axes1.plot(hx, -1/w[1]*(w[0]*hx+b)) axes2.plot(hx, -1/w[1]*(w[0]*hx+b), alpha=0.5) print('minimum distance with perceptron is %f' % min_distance(w, b, feats_train)) C = 1 epsilon = 1e-3 svm = LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() b = svm.get_bias() w = svm.get_w() print('minimum distance with svm is %f' % min_distance(w, b, feats_train)) hx = np.linspace(x_min-1,x_max+1) hy = -w[1]/w[0] * hx axes2.plot(hx, -1/w[1]*(w[0]*hx+b), 'k', linewidth=2.0) # Plot the two-class data axes1.scatter(X[0,:], X[1,:], s=40, marker='o', facecolors='none', edgecolors='b') axes1.scatter(Y[0,:], Y[1,:], s=40, marker='s', facecolors='none', edgecolors='r') axes2.scatter(X[0,:], X[1,:], s=40, marker='o', facecolors='none', edgecolors='b') axes2.scatter(Y[0,:], Y[1,:], s=40, marker='s', facecolors='none', edgecolors='r') # Customize the plot axes1.axis([x_min-1, x_max+1, y_min-1, y_max+1]) axes1.set_title('Rosenblatt\'s Perceptron Algorithm') axes1.set_xlabel('x') axes1.set_ylabel('y') axes2.axis([x_min-1, x_max+1, y_min-1, y_max+1]) axes2.set_title('Support Vector Machine') axes2.set_xlabel('x') axes2.set_ylabel('y') plt.show() return perceptron
def features_director_dot_modular (fm_train_real, fm_test_real, label_train_twoclass, C, epsilon): try: from shogun.Features import DirectorDotFeatures from shogun.Library import RealVector except ImportError: print "recompile shogun with --enable-swig-directors" return class NumpyFeatures(DirectorDotFeatures): # variables data=numpy.empty((1,1)) # constructor def __init__(self, d): DirectorDotFeatures.__init__(self) self.data = d # overloaded methods def add_to_dense_sgvec(self, alpha, vec_idx1, vec2, abs): if abs: vec2+=alpha*numpy.abs(self.data[:,vec_idx1]) else: vec2+=alpha*self.data[:,vec_idx1] def dot(self, vec_idx1, df, vec_idx2): return numpy.dot(self.data[:,vec_idx1], df.get_computed_dot_feature_vector(vec_idx2)) def dense_dot_sgvec(self, vec_idx1, vec2): return numpy.dot(self.data[:,vec_idx1], vec2[0:vec2.vlen]) def get_num_vectors(self): return self.data.shape[1] def get_dim_feature_space(self): return self.data.shape[0] # operators # def __add__(self, other): # return NumpyFeatures(self.data+other.data) # def __sub__(self, other): # return NumpyFeatures(self.data-other.data) # def __iadd__(self, other): # return NumpyFeatures(self.data+other.data) # def __isub__(self, other): # return NumpyFeatures(self.data-other.data) from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL from shogun.Mathematics import Math_init_random Math_init_random(17) feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) labels=BinaryLabels(label_train_twoclass) dfeats_train=NumpyFeatures(fm_train_real) dfeats_test=NumpyFeatures(fm_test_real) dlabels=BinaryLabels(label_train_twoclass) print feats_train.get_computed_dot_feature_matrix() print dfeats_train.get_computed_dot_feature_matrix() svm=LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.apply().get_labels() predictions = svm.apply() dfeats_train.__disown__() dfeats_train.parallel.set_num_threads(1) dsvm=LibLinear(C, dfeats_train, dlabels) dsvm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) dsvm.set_epsilon(epsilon) dsvm.set_bias_enabled(True) dsvm.train() dfeats_test.__disown__() dfeats_test.parallel.set_num_threads(1) dsvm.set_features(dfeats_test) dsvm.apply().get_labels() dpredictions = dsvm.apply() return predictions, svm, predictions.get_labels()
# create real-valued features as first step examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64) examples_presvm = numpy.transpose(examples_presvm) feat_presvm = RealFeatures(examples_presvm) lab_presvm = Labels(numpy.array(labels_presvm)) wdk_presvm = LinearKernel(feat_presvm, feat_presvm) presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm) presvm_liblinear.set_max_iterations(10000) presvm_liblinear.set_bias_enabled(False) presvm_liblinear.train() presvm_libsvm = LibSVM(1, wdk_presvm, lab_presvm) #presvm_libsvm = SVMLight(1, wdk_presvm, lab_presvm) #presvm_libsvm.io.set_loglevel(MSG_DEBUG) presvm_libsvm.set_bias_enabled(False) presvm_libsvm.train() my_w = presvm_liblinear.get_w() presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm) presvm_liblinear.set_w(my_w) #############################################
def classifier_non_separable_svm(n=100, m=10, distance=5, seed=None): ''' n is the number of examples per class and m is the number of examples per class that gets its label swapped to force non-linear separability ''' from shogun.Features import RealFeatures, BinaryLabels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC # 2D data _DIM = 2 # To get the nice message that the perceptron has converged dummy = BinaryLabels() np.random.seed(seed) # Produce some (probably) linearly separable training data by hand # Two Gaussians at a far enough distance X = np.array(np.random.randn(_DIM, n)) + distance Y = np.array(np.random.randn(_DIM, n)) # The last five points of each class are swapped to force non-linear separable data label_train_twoclass = np.hstack( (np.ones(n - m), -np.ones(m), -np.ones(n - m), np.ones(m))) fm_train_real = np.hstack((X, Y)) feats_train = RealFeatures(fm_train_real) labels = BinaryLabels(label_train_twoclass) # Train linear SVM C = 1 epsilon = 1e-3 svm = LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() # Get hyperplane parameters b = svm.get_bias() w = svm.get_w() # Find limits for visualization x_min = min(np.min(X[0, :]), np.min(Y[0, :])) x_max = max(np.max(X[0, :]), np.max(Y[0, :])) y_min = min(np.min(X[1, :]), np.min(Y[1, :])) y_max = max(np.max(X[1, :]), np.max(Y[1, :])) hx = np.linspace(x_min - 1, x_max + 1) hy = -w[1] / w[0] * hx plt.plot(hx, -1 / w[1] * (w[0] * hx + b), 'k', linewidth=2.0) # Plot the two-class data pos_idxs = label_train_twoclass == +1 plt.scatter(fm_train_real[0, pos_idxs], fm_train_real[1, pos_idxs], s=40, marker='o', facecolors='none', edgecolors='b') neg_idxs = label_train_twoclass == -1 plt.scatter(fm_train_real[0, neg_idxs], fm_train_real[1, neg_idxs], s=40, marker='s', facecolors='none', edgecolors='r') # Customize the plot plt.axis([x_min - 1, x_max + 1, y_min - 1, y_max + 1]) plt.title('SVM with non-linearly separable data') plt.xlabel('x') plt.ylabel('y') plt.show() return svm
def get_presvm(B=2.0): examples_presvm = [numpy.array([ 2.1788894 , 3.89163458, 5.55086917, 6.4022742 , 3.14964751, -0.4622959 , 5.38538904, 5.9962938 , 6.29690849]), numpy.array([ 2.1788894 , 3.89163458, 5.55086917, 6.4022742 , 3.14964751, -0.4622959 , 5.38538904, 5.9962938 , 6.29690849]), numpy.array([ 0.93099452, 0.38871617, 1.57968949, 1.25672527, -0.8123137 , 0.20786586, 1.378121 , 1.15598866, 0.80265343]), numpy.array([ 0.68705535, 0.15144113, -0.81306157, -0.7664577 , 1.16452945, -0.2712956 , 0.483094 , -0.16302007, -0.39094812]), numpy.array([-0.71374437, -0.16851719, 1.43826895, 0.95961166, -0.2360497 , -0.30425755, 1.63157052, 1.15990427, 0.63801465]), numpy.array([ 0.68705535, 0.15144113, -0.81306157, -0.7664577 , 1.16452945, -0.2712956 , 0.483094 , -0.16302007, -0.39094812]), numpy.array([-0.71374437, -0.16851719, 1.43826895, 0.95961166, -0.2360497 , -0.30425755, 1.63157052, 1.15990427, 0.63801465]), numpy.array([-0.98028302, -0.23974489, 2.1687206 , 1.99338824, -0.67070205, -0.33167281, 1.3500379 , 1.34915685, 1.13747975]), numpy.array([ 0.67109612, 0.12662017, -0.48254886, -0.49091898, 1.31522237, -0.34108933, 0.57832179, -0.01992828, -0.26581628]), numpy.array([ 0.3193611 , 0.44903416, 3.62187778, 4.1490827 , 1.58832961, 1.95583397, 1.36836023, 1.92521945, 2.41114998])] labels_presvm = [-1.0, -1.0, 1.0, 1.0, 1.0, -1.0, -1.0, -1.0, -1.0, 1.0] examples = [numpy.array([-0.49144487, -0.19932263, -0.00408188, -0.21262012, 0.14621013, -0.50415481, 0.32317317, -0.00317602, -0.21422637]), numpy.array([ 0.0511817 , -0.04226666, -0.30454651, -0.38759116, 0.31639514, 0.32558471, 0.49364473, 0.04515591, -0.06963456]), numpy.array([-0.30324369, -0.11909251, -0.03210278, -0.2779561 , 1.31488853, -0.33165365, 0.60176018, -0.00384946, -0.15603975]), numpy.array([ 0.59282756, -0.0039991 , -0.26028983, -0.26722552, 1.63314995, -0.51199338, 0.33340685, -0.0170519 , -0.19211039]), numpy.array([-0.18338766, -0.07783465, 0.42019824, 0.201753 , 2.01160098, 0.33326111, 0.75591909, 0.36631525, 0.1761829 ]), numpy.array([ 0.10273793, -0.02189574, 0.91092358, 0.74827973, 0.51882902, -0.1286531 , 0.64463658, 0.67468349, 0.55587266]), numpy.array([-0.09727099, -0.13413522, 0.18771062, 0.19411594, 1.48547364, -0.43169608, 0.55064534, 0.24331473, 0.10878847]), numpy.array([-0.22494375, -0.15492964, 0.28017737, 0.29794467, 0.96403895, 0.43880289, 0.08053425, 0.07456818, 0.12102371]), numpy.array([-0.18161417, -0.17692039, 0.19554942, -0.00785625, 1.38315115, -0.05923183, -0.05723568, -0.15463646, -0.24249483]), numpy.array([-0.36538359, -0.20040061, -0.38384388, -0.40206556, -0.25040256, 0.94205875, 0.40162798, 0.00327328, -0.24107393])] labels = [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, -1.0] examples_test = [numpy.array([-0.45159799, -0.11401394, 1.28574573, 1.09144306, 0.92253119, -0.47230164, 0.77032486, 0.83047366, 0.74768906]), numpy.array([ 0.42613105, 0.0092778 , -0.78640296, -0.71632445, 0.41154244, 0.88380309, 0.19475759, -0.14195876, -0.30479425]), numpy.array([-0.09727099, -0.13413522, 0.18771062, 0.19411594, 1.48547364, -0.43169608, 0.55064534, 0.24331473, 0.10878847]), numpy.array([ 0.11558796, -0.08867647, -0.26432074, -0.30924546, -1.08243017, -0.1339607 , -0.1956124 , -0.2428358 , -0.25761213]), numpy.array([ 1.23679696, 0.18753081, -0.25593329, -0.12051991, 0.64976989, -0.17184101, 0.14951337, 0.01988587, -0.0356698 ]), numpy.array([ 1.03355002, 0.05316195, -0.97905368, -0.75482121, 0.28673776, 2.27142733, 0.02654739, -0.31109851, -0.44555277]), numpy.array([-0.53662325, -0.21434756, -0.12105795, -0.27531257, 0.66947047, 0.05474302, -0.00717455, -0.17700575, -0.22253444]), numpy.array([ 0.11272632, -0.12674826, -0.49736457, -0.51445609, 0.88518932, -0.51558669, -0.12000557, -0.32973613, -0.38488736]), numpy.array([ 0.8372111 , 0.06972199, -1.00454229, -0.79869642, 1.19376333, -0.40160273, -0.25122157, -0.46417918, -0.50234858]), numpy.array([-0.36325018, -0.12206184, 0.10525247, -0.15663416, 1.03616948, -0.51699463, 0.59566286, 0.35363369, 0.10545559])] ############################################# # compute pre-svm ############################################# # create real-valued features as first step examples_presvm = numpy.array(examples_presvm, dtype=numpy.float64) examples_presvm = numpy.transpose(examples_presvm) feat_presvm = RealFeatures(examples_presvm) lab_presvm = Labels(numpy.array(labels_presvm)) wdk_presvm = LinearKernel(feat_presvm, feat_presvm) presvm_liblinear = LibLinear(1, feat_presvm, lab_presvm) presvm_liblinear.set_max_iterations(10000) presvm_liblinear.set_bias_enabled(False) presvm_liblinear.train() #return presvm_liblinear #def get_da_svm(presvm_liblinear): ############################################# # compute linear term manually ############################################# examples = numpy.array(examples, dtype=numpy.float64) examples = numpy.transpose(examples) feat = RealFeatures(examples) lab = Labels(numpy.array(labels)) dasvm_liblinear = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_liblinear, B) dasvm_liblinear.set_bias_enabled(False) dasvm_liblinear.train() helper.save("/tmp/svm", presvm_liblinear) presvm_pickle = helper.load("/tmp/svm") dasvm_pickle = DomainAdaptationSVMLinear(1.0, feat, lab, presvm_pickle, B) dasvm_pickle.set_bias_enabled(False) dasvm_pickle.train() helper.save("/tmp/dasvm", dasvm_liblinear) dasvm_pickle2 = helper.load("/tmp/dasvm") ############################################# # load test data ############################################# examples_test = numpy.array(examples_test, dtype=numpy.float64) examples_test = numpy.transpose(examples_test) feat_test = RealFeatures(examples_test) # check if pickled and unpickled classifiers behave the same out1 = dasvm_liblinear.classify(feat_test).get_labels() out2 = dasvm_pickle.classify(feat_test).get_labels() # compare outputs for i in xrange(len(out1)): try: assert(abs(out1[i]-out2[i])<= 0.001) except: print "(%.5f, %.5f)" % (out1[i], out2[i]) print "classification agrees."
def SVMLinear(traindata, trainlabs, testdata, C=1.0, eps=1e-5, threads=1, getw=False, useLibLinear=False, useL1R=False): """ Does efficient linear SVM using the OCAS subgradient solver (as interfaced by shogun). Handles multiclass problems using a one-versus-all approach. NOTE: the training and testing data should both be scaled such that each dimension ranges from 0 to 1 traindata = n by d training data array trainlabs = n-length training data label vector (should be normalized so labels range from 0 to c-1, where c is the number of classes) testdata = m by d array of data to test C = SVM regularization constant eps = precision parameter used by OCAS threads = number of threads to use getw = whether or not to return the learned weight vector from the SVM (note: only works for 2-class problems) returns: m-length vector containing the predicted labels of the instances in testdata if problem is 2-class and getw == True, then a d-length weight vector is also returned """ numc = trainlabs.max() + 1 # # when using an L1 solver, we need the data transposed # # trainfeats = wrapFeatures(traindata, sparse=True) # testfeats = wrapFeatures(testdata, sparse=True) if not useL1R: ### traindata directly here for LR2_L2LOSS_SVC trainfeats = wrapFeatures(traindata, sparse=False) else: ### traindata.T here for L1R_LR trainfeats = wrapFeatures(traindata.T, sparse=False) testfeats = wrapFeatures(testdata, sparse=False) if numc > 2: preds = np.zeros(testdata.shape[0], dtype=np.int32) predprobs = np.zeros(testdata.shape[0]) predprobs[:] = -np.inf for i in xrange(numc): # set up svm tlabs = np.int32(trainlabs == i) tlabs[tlabs == 0] = -1 # print tlabs # print i, ' ', np.sum(tlabs==-1), ' ', np.sum(tlabs==1) labels = BinaryLabels(np.float64(tlabs)) if useLibLinear: # Use LibLinear and set the solver type svm = LibLinear(C, trainfeats, labels) if useL1R: # this is L1 regularization on logistic loss svm.set_liblinear_solver_type(L1R_LR) else: # most of my results were computed with this (ucf50) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC) else: # Or Use SVMOcas svm = SVMOcas(C, trainfeats, labels) svm.set_epsilon(eps) svm.parallel.set_num_threads(threads) svm.set_bias_enabled(True) # train svm.train() # test res = svm.apply(testfeats).get_labels() thisclass = res > predprobs preds[thisclass] = i predprobs[thisclass] = res[thisclass] return preds else: tlabs = trainlabs.copy() tlabs[tlabs == 0] = -1 labels = Labels(np.float64(tlabs)) svm = SVMOcas(C, trainfeats, labels) svm.set_epsilon(eps) svm.parallel.set_num_threads(threads) svm.set_bias_enabled(True) # train svm.train() # test res = svm.classify(testfeats).get_labels() res[res > 0] = 1 res[res <= 0] = 0 if getw == True: return res, svm.get_w() else: return res
descs_neg, origdescs_neg = sel_descs(config, imnos, ratio_keep, use_masks2=2) #SVM npos = descs.shape[0] sel = range(descs_neg.shape[0]) random.shuffle(sel) sel = sel[:npos] feats = np.vstack((descs.astype('float64'), descs_neg[sel].astype('float64'))) feats = norm_descs(feats) feats = RealFeatures(feats.T) labels = Labels(np.hstack((np.ones((1,descs.shape[0])), -1*np.ones((1,len(sel)))))[0]) svm = LibLinear(1, feats, labels) #k = Chi2Kernel(feats,feats, 1.0, 100) #svm = LibSVM(1, k, labels) svm.train() visualize_classes(svm, config, range(max(imnos),config.nimg)) print "Writting SVM" pf=open('last_classifier.pkl','w') cPickle.dump(svm, pf) pf.close() #VISUALIZE CLASSES #pylab.ion() #for i in range(len(centroids[0])): #pylab.figure(2) #visualize_one_desc(centroids[0][i],nori=4,nspa=4) #pylab.draw() #pylab.draw()