def fit(self, x, y_multiclass, kernel=poly(1), C=0.001): y_multiclass=y_multiclass.reshape(-1).astype(np.float64) self.x = sparse.COO(x.astype(np.float64)) self.m = self.x.shape[0] self.y_multiclass = y_multiclass self.kernel = kernel self.C = C ys = [sparse.COO(self.cast(y_multiclass, k)) for k in range(self.n_svm)] self.y_matrix = sparse.stack(ys,0) del ys for k in range(self.n_svm): print("training ",k,"th SVM in ",self.n_svm) y = self.y_matrix[k, :].reshape((-1,1)) yx = y * self.x G = kernel(yx, yx) # Gram matrix compensate = (sparse.eye(self.m)*1e-7).astype(np.float64) G = (G + compensate) objective = cp.Maximize(cp.sum(self.a[k])-(1/2)*cp.quad_form(self.a[k], G.tocsr())) if not objective.is_dcp(): print("Not solvable!") assert objective.is_dcp() constraints = [self.a[k] <= C, cp.sum(cp.multiply(self.a[k],y.todense())) == 0] # box constraint prob = cp.Problem(objective, constraints) result = prob.solve() x_pos = x[y.todense()[:,0]==1,:] x_neg = x[y.todense()[:,0]==-1,:] b_min = -np.min(self.wTx(k,x_pos)) if x_pos.shape[0]!=0 else 0 b_max = -np.max(self.wTx(k,x_neg)) if x_neg.shape[0]!=0 else 0 self.b[k,0] = (1/2)*(b_min + b_max) self.a_matrix = np.stack([i.value.reshape(-1) for i in self.a],0) self.a_matrix = sparse.COO(self.a_matrix)
def main(): data_file = 'ionosphere.data' data = np.genfromtxt(data_file, delimiter=',', dtype='|S10') instances = np.array(data[:, :-1], dtype='float') labels = np.array(data[:, -1] == 'g', dtype='int') n, d = instances.shape nlabels = labels.size if n != nlabels: raise Exception('Expected same no. of feature vector as no. of labels') train_data = instances[:200] # first 200 examples train_labels = labels[:200] # first 200 labels test_data = instances[200:] # example 201 onwards test_labels = labels[200:] # label 201 onwards # parameters for the kernels we'll use gamma = 1.0 / d intercept = 0 kernel_dict = { 'linear': ker.linear, 'polynomial': ker.poly(degree=3, gamma=gamma), 'rbf/gaussian': ker.rbf(gamma=gamma), 'sigmoid/arctan': ker.sigmoid(gamma=gamma) } for kernel_name in sorted(kernel_dict.keys()): print 'Training an SVM using the %s kernel...' % kernel_name svm_classifier = svm_train(train_data, train_labels, kernel_dict[kernel_name]) confusion_mat = evaluate_classifier(svm_classifier, test_data, test_labels) print_evaluation_summary(confusion_mat) print
def main(): data_file = 'ionosphere.data' data = np.genfromtxt(data_file, delimiter=',', dtype='|S10') instances = np.array(data[:, :-1], dtype='float') labels = np.array(data[:, -1] == 'g', dtype='int') n, d = instances.shape nlabels = labels.size if n != nlabels: raise Exception('Expected same no. of feature vector as no. of labels') train_data = instances[:200] # first 200 examples train_labels = labels[:200] # first 200 labels test_data = instances[200:] # example 201 onwards test_labels = labels[200:] # label 201 onwards # parameters for the kernels we'll use gamma = 1.0/d intercept = 0 kernel_dict = {'linear': ker.linear, 'polynomial': ker.poly(degree=3, gamma=gamma), 'rbf/gaussian': ker.rbf(gamma=gamma), 'sigmoid/arctan': ker.sigmoid(gamma=gamma)} for kernel_name in sorted(kernel_dict.keys()): print 'Training an SVM using the %s kernel...' % kernel_name svm_classifier = svm_train(train_data, train_labels, kernel_dict[kernel_name]) confusion_mat = evaluate_classifier(svm_classifier, test_data, test_labels) print_evaluation_summary(confusion_mat) print
def __init__(self, m,n_class): self.n_svm = n_class * (n_class - 1)//2 self.m = m # number of samples self.n_class = n_class # multiplier self.a = [cp.Variable(shape=(m,1),pos=True) for i in range(self.n_svm)] # bias self.b = np.zeros((self.n_svm,1)) # kernel function should input x [n,d] y [m,d] output [n,m] # Example of kernels: poly(3) self.kernel = poly(1) # Binary setting for every SVM, # Mij says the SVMj should give # Mij label to sample with class i self.lookup_matrix=np.zeros((self.n_class, self.n_svm)) # The two classes SVMi concerns, # lookup_class[i]=[pos, neg] self.lookup_class=np.zeros((self.n_svm, 2)) k=0 for i in range(n_class-1): for j in range(i+1,n_class): self.lookup_class[k, 0]=i self.lookup_class[k, 1]=j k += 1 for i in range(n_class): for j in range(self.n_svm): if i == self.lookup_class[j,0] or i == self.lookup_class[j,1]: if self.lookup_class[j, 0]==i: self.lookup_matrix[i,j]=1.0 else: self.lookup_matrix[i,j]=-1.0
args = parser.parse_args() if args.dataset == "himoon": x, y, _, _, xtest, ytest = data_gen.himoon(n_samples=args.n_samples, n_dims=args.n_dims) elif args.dataset == "mmgauss": x, y, _, _, xtest, ytest = data_gen.mmgauss(n_samples=args.n_samples, n_dims=args.n_dims) else: raise ValueError("Unknown dataset") kernels = dict( zip( ["rbf", "linear", "poly", "sigmoid"], [k.rbf(), k.linear(), k.poly(), k.sigmoid()], )) try: kernel = kernels.get(args.kernel) except KeyError as e: kernel = "linear" df = pd.DataFrame() print(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}: Running " + f"{args.dataset} with {args.dimension} dimensions and " + f"epsilon={args.epsilon} with {args.kernel} kernel for " + f"{args.repetitions} repetitions.") for run in range(args.repetitions):