def svm_chi2(training, labels, test, real): chi2 = chi2_kernel(training, gamma = 0.02) chi2_test = chi2_kernel(test, training, gamma = 0.02) model = SVC(C = 1, kernel = 'precomputed',max_iter = -1) model.fit(chi2, labels) accuracy = model.score(chi2_test, real) print(accuracy)
def test_chi_square_kernel(): rng = np.random.RandomState(0) X = rng.random_sample((5, 4)) Y = rng.random_sample((10, 4)) K_add = additive_chi2_kernel(X, Y) gamma = 0.1 K = chi2_kernel(X, Y, gamma=gamma) assert_equal(K.dtype, np.float) for i, x in enumerate(X): for j, y in enumerate(Y): chi2 = -np.sum((x - y) ** 2 / (x + y)) chi2_exp = np.exp(gamma * chi2) assert_almost_equal(K_add[i, j], chi2) assert_almost_equal(K[i, j], chi2_exp) # check diagonal is ones for data with itself K = chi2_kernel(Y) assert_array_equal(np.diag(K), 1) # check off-diagonal is < 1 but > 0: assert_true(np.all(K > 0)) assert_true(np.all(K - np.diag(np.diag(K)) < 1)) # check that float32 is preserved X = rng.random_sample((5, 4)).astype(np.float32) Y = rng.random_sample((10, 4)).astype(np.float32) K = chi2_kernel(X, Y) assert_equal(K.dtype, np.float32) # check integer type gets converted, # check that zeros are handled X = rng.random_sample((10, 4)).astype(np.int32) K = chi2_kernel(X, X) assert_true(np.isfinite(K).all()) assert_equal(K.dtype, np.float) # check that kernel of similar things is greater than dissimilar ones X = [[.3, .7], [1., 0]] Y = [[0, 1], [.9, .1]] K = chi2_kernel(X, Y) assert_greater(K[0, 0], K[0, 1]) assert_greater(K[1, 1], K[1, 0]) # test negative input # sparse matrices assert_raises(ValueError, chi2_kernel, [[0, -1]]) assert_raises(ValueError, chi2_kernel, [[0, -1]], [[-1, -1]]) assert_raises(ValueError, chi2_kernel, [[0, 1]], [[-1, -1]]) # different n_features in X and Y assert_raises(ValueError, chi2_kernel, [[0, 1]], [[.2, .2, .6]]) assert_raises(ValueError, chi2_kernel, csr_matrix(X), csr_matrix(Y)) assert_raises(ValueError, additive_chi2_kernel, csr_matrix(X), csr_matrix(Y))
def test_chi_square_kernel(): rng = np.random.RandomState(0) X = rng.random_sample((5, 4)) Y = rng.random_sample((10, 4)) K_add = additive_chi2_kernel(X, Y) gamma = 0.1 K = chi2_kernel(X, Y, gamma=gamma) assert_equal(K.dtype, np.float) for i, x in enumerate(X): for j, y in enumerate(Y): chi2 = -np.sum((x - y) ** 2 / (x + y)) chi2_exp = np.exp(gamma * chi2) assert_almost_equal(K_add[i, j], chi2) assert_almost_equal(K[i, j], chi2_exp) # check diagonal is ones for data with itself K = chi2_kernel(Y) assert_array_equal(np.diag(K), 1) # check off-diagonal is < 1 but > 0: assert np.all(K > 0) assert np.all(K - np.diag(np.diag(K)) < 1) # check that float32 is preserved X = rng.random_sample((5, 4)).astype(np.float32) Y = rng.random_sample((10, 4)).astype(np.float32) K = chi2_kernel(X, Y) assert_equal(K.dtype, np.float32) # check integer type gets converted, # check that zeros are handled X = rng.random_sample((10, 4)).astype(np.int32) K = chi2_kernel(X, X) assert np.isfinite(K).all() assert_equal(K.dtype, np.float) # check that kernel of similar things is greater than dissimilar ones X = [[.3, .7], [1., 0]] Y = [[0, 1], [.9, .1]] K = chi2_kernel(X, Y) assert_greater(K[0, 0], K[0, 1]) assert_greater(K[1, 1], K[1, 0]) # test negative input assert_raises(ValueError, chi2_kernel, [[0, -1]]) assert_raises(ValueError, chi2_kernel, [[0, -1]], [[-1, -1]]) assert_raises(ValueError, chi2_kernel, [[0, 1]], [[-1, -1]]) # different n_features in X and Y assert_raises(ValueError, chi2_kernel, [[0, 1]], [[.2, .2, .6]]) # sparse matrices assert_raises(ValueError, chi2_kernel, csr_matrix(X), csr_matrix(Y)) assert_raises(ValueError, additive_chi2_kernel, csr_matrix(X), csr_matrix(Y))
def svm_with_cv(data, labels): """ SVM with chi2 kernel and 5 fold cross validation """ best_params, best_cv_score = grid_cv(data, labels) if ARGS.verbose: print('CV:', best_cv_score, best_params['clf__C']) svm_clf = SVC(C=best_params['clf__C'], kernel='precomputed') gram_matrix = chi2_kernel(data) svm_clf = svm_clf.fit(gram_matrix, labels) # Train a logistic regression to convert the output of # SVM into probabilities out = svm_clf.decision_function(gram_matrix) out = out.reshape(-1, 1) # print('out:', out.shape, 'labels:', labels.shape) lr_clf = LogisticRegression() lr_clf.fit(out, labels) if ARGS.verbose: lr_pred = lr_clf.predict(out) print("LR:", np.mean(labels == lr_pred)) return svm_clf, lr_clf
def _get_kernel_matrix(self, X1, X2): # K is len(X1)-by-len(X2) matrix if self._kernel == 'rbf': K = pairwise.rbf_kernel(X1, X2, gamma=self._gamma) elif self._kernel == 'poly': K = pairwise.polynomial_kernel(X1, X2, degree=self._degree, gamma=self._gamma, coef0=self._coef0) elif self._kernel == 'linear': K = pairwise.linear_kernel(X1, X2) elif self._kernel == 'laplacian': K = pairwise.laplacian_kernel(X1, X2, gamma=self._gamma) elif self._kernel == 'chi2': K = pairwise.chi2_kernel(X1, X2, gamma=self._gamma) elif self._kernel == 'additive_chi2': K = pairwise.additive_chi2_kernel(X1, X2) elif self._kernel == 'sigmoid': K = pairwise.sigmoid_kernel(X1, X2, gamma=self._gamma, coef0=self._coef0) else: print('[Error] Unknown kernel') K = None return K
def process_similarity(self, similarity): if similarity == "cosine": x, y = np.triu_indices(self._similarity_matrix.shape[0], k=1) self._similarity_matrix[x, y] = cosine_similarity(self._data.sp_i_train_ratings)[x, y] elif similarity == "dot": self._similarity_matrix = (self._data.sp_i_train_ratings @ self._data.sp_i_train_ratings.T).toarray() elif similarity == "euclidean": x, y = np.triu_indices(self._similarity_matrix.shape[0], k=1) self._similarity_matrix[x, y] = (1 / (1 + euclidean_distances(self._data.sp_i_train_ratings)))[x, y] elif similarity == "manhattan": x, y = np.triu_indices(self._similarity_matrix.shape[0], k=1) self._similarity_matrix[x, y] = (1 / (1 + manhattan_distances(self._data.sp_i_train_ratings)))[x, y] elif similarity == "haversine": x, y = np.triu_indices(self._similarity_matrix.shape[0], k=1) self._similarity_matrix[x, y] = (1 / (1 + haversine_distances(self._data.sp_i_train_ratings)))[x, y] elif similarity == "chi2": x, y = np.triu_indices(self._similarity_matrix.shape[0], k=1) self._similarity_matrix[x, y] = (1 / (1 + chi2_kernel(self._data.sp_i_train_ratings)))[x, y] elif similarity in ['cityblock', 'l1', 'l2']: x, y = np.triu_indices(self._similarity_matrix.shape[0], k=1) self._similarity_matrix[x, y] = (1 / (1 + pairwise_distances(self._data.sp_i_train_ratings, metric=similarity)))[x, y] elif similarity in ['braycurtis', 'canberra', 'chebyshev', 'correlation', 'dice', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule']: x, y = np.triu_indices(self._similarity_matrix.shape[0], k=1) self._similarity_matrix[x, y] = (1 / (1 + pairwise_distances(self._data.sp_i_train_ratings.toarray(), metric=similarity)))[x, y] else: raise Exception("Not implemented similarity")
def _apply_kernel(self, x, y): """Apply the selected kernel function to the data.""" if self.kernel == 'linear': phi = linear_kernel(x, y) elif self.kernel == 'rbf': phi = rbf_kernel(x, y, self.coef1) elif self.kernel == 'poly': phi = polynomial_kernel(x, y, self.degree, self.coef1, self.coef0) elif self.kernel == 'sigmoid': coef0 = self.coef0 if self.coef0 is not None else 1 phi = sigmoid_kernel(x, y, self.gamma, coef0) elif self.kernel == 'chi2': gamma = self.gamma if self.gamma is not None else 1 phi = chi2_kernel(x, y, self.gamma) elif self.kernel == 'laplacian': phi = laplacian_kernel(x, y, self.gamma) elif callable(self.kernel): phi = self.kernel(x, y) if len(phi.shape) != 2: raise ValueError( "Custom kernel function did not return 2D matrix") if phi.shape[0] != x.shape[0]: raise ValueError( "Custom kernel function did not return matrix with rows" " equal to number of data points." "") else: raise ValueError("Kernel selection is invalid.") if self.bias_used: phi = np.append(phi, np.ones((phi.shape[0], 1)), axis=1) return phi
def myKernel(x, y): gamma_hist = 1 now = time() hist_kernel = 0 hist_kernel = chi2_kernel(x[:, :25], y[:, :25], gamma_hist) hist_kernel += chi2_kernel(x[:, 25:33], y[:, 25:33], gamma_hist) hist_kernel += chi2_kernel(x[:, 33:41], y[:, 33:41], gamma_hist) print(time()-now,flush=True) now = time() rbf_kern = rbf_kernel(x[:,41:], y[:,41:],1/40) print(time()-now,flush=True) return hist_kernel + rbf_kern
def cross_validate(X, y): svc = svm.SVC(kernel='linear', C = 0.0625) lin_svc = svm.LinearSVC(C = 4.0, dual = False) rbf_svc = svm.SVC(kernel='rbf', gamma = 0.0009765625, C = 32.0) poly_svc = svm.SVC(kernel='poly', degree = 2 , C = 2048.0) hist_svc = svm.SVC(kernel = 'precomputed') chi2_svc = svm.SVC(kernel = 'precomputed') # random_forest = RandomForestClassifier(n_estimators = 200, max_features = 50, min_samples_split = 20, random_state = 100) # 5-fold cross validation for model in [svc, lin_svc, rbf_svc, poly_svc]: print model scores = cross_val_score(model, X, y, cv=10) print scores print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) print hist_svc K = hist_intersection(X, X) scores = cross_val_score(model, K, y, cv=10) print scores print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2)) print chi2_svc K = chi2_kernel(X, gamma = 0.3) scores = cross_val_score(model, K, y, cv=10) print scores print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
def evaluate_chi2_kernel(data, gamma, C): train_data, train_labels, test_data, test_labels = data log.message('Training chi^2 kernel ...') start = time.time() svc = svm.SVC(kernel='precomputed', C=C) train_kernel = chi2_kernel(train_data, gamma=gamma) svc.fit(train_kernel, train_labels) log.time(time.time() - start) log.message('Evaluating ...') test_kernel = chi2_kernel(test_data, train_data, gamma=gamma) predicted_labels = svc.predict(test_kernel) log.message(classification_report(test_labels, predicted_labels)) total_accuracy = np.count_nonzero(np.array(test_labels) == np.array(predicted_labels)) / len(test_labels) log.accuracy(total_accuracy)
def createMatrixLocLoc(self): matrix = [] t = Task5() for i in range(1, len(t.locations) + 1): listofmax = [] for x in t.Models: listofmaximages = [] for j in range(1, len(t.locations) + 1): list1 = t.searchFirstFile(i, x) list2 = t.searchFirstFile(j, x) if (len(list1) == 0 or len(list2) == 0): listofmaximages.append(0) continue df = np.array(list1) df2 = np.array(list2) if x in ["CM3x3", "GLRLM", "CN3x3", "CN", "CM"]: dist = euclidean_distances(df[:, 1:], df2[:, 1:]) elif x in ["LBP", "GLRLM3x3", "LBP3x3"]: dist = chi2_kernel(df[:, 1:], df2[:, 1:]) elif x in ["HOG", "CSD"]: dist = cosine_distances(df[:, 1:], df2[:, 1:]) min = dist.min(axis=1) s = np.sum(np.array(min)) listofmaximages.append(s) listofmax.append(listofmaximages) listofm = np.array(listofmax) indexes = np.argsort(listofm, axis=1) locations = np.argsort(indexes, axis=1) sumlocation = np.sum(locations, axis=0) matrix.append(sumlocation) print(i) return matrix
def build_train_kernels(categories, datamanager): kernels = [] gammas = [] for c in categories: X = datamanager.build_sample_matrix("train", c) gamma = approximate_gamma(X) gammas.append(gamma) kernels.append(chi2_kernel(X, X, gamma=1.0 / gamma)) return kernels, gammas
def histogram_similarity(hist1, hist2): """Calculate the similarity between two molecule histograms. Args: hist1: molecule histogram hist2: molecule histogram """ return pairwise.chi2_kernel(hist1, hist2)
def build_train_kernels(categories, datamanager): kernels = [] gammas = [] for c in categories: X = datamanager.build_sample_matrix("train", c) gamma = approximate_gamma(X) gammas.append(gamma) kernels.append(chi2_kernel(X, X, gamma=1.0/gamma)) return kernels, gammas
def svms(trainData, testData, trainOutcomes): linear = SVC(kernel='linear', class_weight='balanced', probability=True) linear.fit(trainData, trainOutcomes) svm_linear_posterior = linear.predict_proba(testData) rbf = SVC(class_weight='balanced', probability=True) rbf.fit(trainData, trainOutcomes) svm_rbf_posterior = rbf.predict_proba(testData) trainDistances = chi2_kernel(trainData, trainData) testDistances = chi2_kernel(testData, trainData) svc = SVC(kernel='precomputed', class_weight='balanced', probability=True) svc.fit(trainDistances, trainOutcomes) chi2svm_posterior = svc.predict_proba(testDistances) return svm_linear_posterior, svm_rbf_posterior, chi2svm_posterior
def similarityEuclidean(self, LocationId, k): t = Task5() listofmax = [] distSum = {} listofmaximages = [] for x in t.Models: for j in range(1, 31): if j != LocationId: list1 = t.searchFirstFile(LocationId, x) list2 = t.searchFirstFile(j, x) df = np.array(list1) df2 = np.array(list2) if x in ["CM3x3", "GLRLM", "CN3x3", "CN", "CM"]: dist = euclidean_distances(df[:, 1:], df2[:, 1:]) elif x in ["LBP", "GLRLM3x3", "LBP3x3"]: dist = chi2_kernel(df[:, 1:], df2[:, 1:]) elif x in ["HOG", "CSD"]: dist = cosine_distances(df[:, 1:], df2[:, 1:]) max = dist.argmin(axis=1) s = 0 for i in range(len(df)): s += dist[i, max[i]] distSum = {} distSum["value"] = s distSum["Model"] = x distSum["Location"] = j listofmaximages.append(distSum) listofmax.append(sorted(listofmaximages, key=lambda k: k["value"])) listofmaximages = [] map = {} for i in range(31): if i != LocationId: rank = 0 for list in listofmax: count = 1 for x in list: if i == x["Location"]: rank += count break else: count += 1 map[i + 1] = rank sorted_dict = sorted(map.items(), key=operator.itemgetter(1)) for i in range(k): print("Mached Locations") print(t.locations[sorted_dict[i][0]]) for list in listofmax: for x in list: if x["Location"] == sorted_dict[i][0]: print("Model: ", end="") print(x["Model"]) print("Value", end="") print(x["value"])
def calc_gaussian_sim(data_matrix, method): if method == "rbf": return rbf_kernel(data_matrix) elif method == "chi2": return chi2_kernel(data_matrix) elif method == "laplacian": return laplacian_kernel(data_matrix) elif method == "sigmoid": return sigmoid_kernel(data_matrix) else: raise ValueError("Wron method parameter ind calc_gaussian_sim()")
def transform(self, X, Y): if self.type == 'rbf': return rbf_kernel(X, Y, self.gamma)[0] elif self.type == 'Chi2': return chi2_kernel(X, Y, self.gamma)[0] elif self.type == 'AChi2': return -additive_chi2_kernel(X, Y)[0] elif self.type == 'laplacian': return laplacian_kernel(X, Y, self.gamma)[0] elif self.type == 'sigmoid': return sigmoid_kernel(X, Y, self.gamma, self.coef0)[0]
def __init__(self, kernel_name='rbf', type='classification'): self.kernel_name = kernel_name self.type = type self.kernel_dict = { "rbf": lambda x, y=None: rbf_kernel(x, y), "linear": lambda x, y=None: linear_kernel(x, y), "add_chi2": lambda x, y=None: additive_chi2_kernel(x, y), "chi2": lambda x, y=None: chi2_kernel(x, y), "poly": lambda x, y=None: polynomial_kernel(x, y), "laplace": lambda x, y=None: laplacian_kernel(x, y) }
def calculateMultipleKernel(x, y): theta = random.sample(range(1,47),46) # given a random theta for now # Convert our 2d arrays to numpy arrays x = np.array(x) y = np.array(y) # Reshape the array-like input vectors since we only have one sample x = x.reshape(1,-1) y = y.reshape(1,-1) # Variables to aggregate the kernel result kernelResult = 0; index = 0; for i in range(0,3): kernelResult += theta[index] * additive_chi2_kernel(x,y) index += 1 for i in range(0,3): kernelResult += theta[index] * chi2_kernel(x,y,theta[index+1]) index += 2 for i in range(0,3): kernelResult += theta[index] * cosine_similarity(x,y) index += 1 for i in range(0,3): kernelResult += theta[index] * linear_kernel(x,y) index += 1 for i in range(0,3): kernelResult += theta[index] * polynomial_kernel( x,y,theta[index+1],theta[index+2], theta[index+3]) index += 4 for i in range(0,3): kernelResult += theta[index] * rbf_kernel(x,y,theta[index+1]) index += 2 for i in range(0,3): kernelResult += theta[index] * laplacian_kernel(x,y,theta[index+1]) index += 2 for i in range(0,3): kernelResult += theta[index] * sigmoid_kernel(x,y,theta[index+1]) index += 2 return kernelResult
def coding_unified(self, codebook_, feats_): feats = feats_.copy() codebook = codebook_.copy() # feats = np.concatenate(feats) if self.debug: print('\t- coding features ...') sys.stdout.flush() if 'hard' in self.coding_poling: coded_feats = np.zeros((feats.shape[:2] + (self.codebook_size, )), dtype=np.int) feats = feats.reshape(feats.shape[0], feats.shape[1], -1) idxs_cuboid = np.arange(feats.shape[1]) codebook -= codebook.min(axis=1).reshape(-1, 1) for sample in range(feats.shape[0]): feats[sample] -= feats[sample].min(axis=1).reshape(-1, 1) idxs = np.argmin(pairwise_distances(feats[sample], codebook, metric="cosine"), axis=1) coded_feats[sample, idxs_cuboid, idxs] = 1 elif 'soft' in self.coding_poling: coded_feats = np.zeros((feats.shape[:2] + (self.codebook_size, )), dtype=np.float) beta = 1.0 / (2.0 * self.variance) codebook -= codebook.min(axis=1).reshape(-1, 1) for sample in range(feats.shape[0]): feats[sample] -= feats[sample].min(axis=1).reshape(-1, 1) coded_feats[sample] = chi2_kernel(feats[sample], codebook, gamma=beta) cfnorm = coded_feats[sample].sum(axis=1).reshape(-1, 1) cfnorm[cfnorm == 0] = 1. coded_feats[sample] /= cfnorm else: raise ValueError('Coding method not implemented') return coded_feats
def calculateMultipleKernel(x, y): theta = random.sample(range(1, 47), 46) # given a random theta for now # Convert our 2d arrays to numpy arrays x = np.array(x) y = np.array(y) # Reshape the array-like input vectors since we only have one sample x = x.reshape(1, -1) y = y.reshape(1, -1) # Variables to aggregate the kernel result kernelResult = 0 index = 0 for i in range(0, 3): kernelResult += theta[index] * additive_chi2_kernel(x, y) index += 1 for i in range(0, 3): kernelResult += theta[index] * chi2_kernel(x, y, theta[index + 1]) index += 2 for i in range(0, 3): kernelResult += theta[index] * cosine_similarity(x, y) index += 1 for i in range(0, 3): kernelResult += theta[index] * linear_kernel(x, y) index += 1 for i in range(0, 3): kernelResult += theta[index] * polynomial_kernel( x, y, theta[index + 1], theta[index + 2], theta[index + 3]) index += 4 for i in range(0, 3): kernelResult += theta[index] * rbf_kernel(x, y, theta[index + 1]) index += 2 for i in range(0, 3): kernelResult += theta[index] * laplacian_kernel(x, y, theta[index + 1]) index += 2 for i in range(0, 3): kernelResult += theta[index] * sigmoid_kernel(x, y, theta[index + 1]) index += 2 return kernelResult
def chi2Vector(imgG, imgQ, windowSize): hImgG = ce.horizontalS2(imgG, 0, imgG.shape[1], 0, imgG.shape[0], 4) vImgG = ce.verticalS2(imgG, 0, imgG.shape[1], 0, imgG.shape[0], 4) hImgQ = ce.horizontalS2(imgQ, 0, imgQ.shape[1], 0, imgQ.shape[0], 4) vImgQ = ce.verticalS2(imgQ, 0, imgQ.shape[1], 0, imgQ.shape[0], 4) histG = HoGLBP([hImgG, vImgG], imgG, windowSize) histQ = HoGLBP([hImgQ, vImgQ], imgQ, windowSize) fFHoG = histG["HoG"][:1200] fFLBP = histG["HoG"][1200:] sFHoG = histQ["HoG"][:1200] sFLBP = histQ["HoG"][1200:] HoG_chi2 = [ chi2_kernel([fFHoG[i * 8:(i + 1) * 8]], [sFHoG[i * 8:(i + 1) * 8]]).ravel()[0] for i in range(0, 150) ] LBP_chi2 = [ chi2_kernel([fFLBP[i * 10:(i + 1) * 10]], [sFLBP[i * 10:(i + 1) * 10]]).ravel()[0] for i in range(0, 30) ] t1H = np.concatenate(histG["imageSurroundingPointsH"], axis=1) t2H = np.concatenate(histQ["imageSurroundingPointsH"], axis=1) tH = np.concatenate((t1H, t2H), axis=0) t1V = np.concatenate(histG["imageSurroundingPointsV"], axis=1) t2V = np.concatenate(histQ["imageSurroundingPointsV"], axis=1) tV = np.concatenate((t1V, t2V), axis=0) tT = np.concatenate((tH, tV), axis=0) return { "chi2_vector": np.concatenate((HoG_chi2, LBP_chi2), axis=0).tolist(), "combined_image": tT }
def chi_kernel(x1,x2,sigma=0.5): ####################################################################### # TODO # # Compute Gaussian kernel # # 1 line of code expected # ####################################################################### k = chi2_kernel(x1, x2, gamma=sigma) ####################################################################### # end of code # ####################################################################### return k
def tracks_2_kernel(tracks_X, fields, tracks_Y=None, gamma=1.0, weights=None): """ Compute kernel from trajectories. Parameters ---------- tracks_X : structured array tracks_Y : strucutred array or None gamma : float fields : sequence of string Fields from trajectories to use for building kernel. weights : sequence of float or None Weights for fields. Returns ------- """ if not weights: weights = np.ones(shape=(len(fields), )) weights = weights / np.sum(weights) if tracks_Y is None: tracks_Y = tracks_X K = np.zeros(shape=(len(tracks_X), len(tracks_Y))) print(humansize(K.nbytes)) # print('Kernel {} - {}'.format(K.shape, humansize(K.nbytes))) for name, weight in zip(fields, weights): print('Kernel {} - {}'.format(K.shape, name)) X = tracks_X[name] Y = tracks_Y[name] if name == 'trajectory': X = X.reshape((len(X), -1)) Y = Y.reshape((len(Y), -1)) old_min, old_max = -1, 1 new_min, new_max = 0, 1 X = ((X - old_min) / (old_max - old_min)) * (new_max - new_min) + new_min Y = ((Y - old_min) / (old_max - old_min)) * (new_max - new_min) + new_min chi2 = pairwise.chi2_kernel(X, Y, gamma=gamma) # mu = 1.0 / kernel.mean() # K_train += weight * np.exp(-mu * kernel) K += weight * chi2 return K
def test_nystroem_default_parameters(): rnd = np.random.RandomState(42) X = rnd.uniform(size=(10, 4)) # rbf kernel should behave as gamma=None by default # aka gamma = 1 / n_features nystroem = Nystroem(n_components=10) X_transformed = nystroem.fit_transform(X) K = rbf_kernel(X, gamma=None) K2 = np.dot(X_transformed, X_transformed.T) assert_array_almost_equal(K, K2) # chi2 kernel should behave as gamma=1 by default nystroem = Nystroem(kernel='chi2', n_components=10) X_transformed = nystroem.fit_transform(X) K = chi2_kernel(X, gamma=1) K2 = np.dot(X_transformed, X_transformed.T) assert_array_almost_equal(K, K2)
def test_1(): emb1 = load_embeddings("embeddings_elias.pkl") emb2 = load_embeddings("embeddings_matthias.pkl") emb3 = load_embeddings("embeddings_laia.pkl") emb_lfw = load_embeddings("embeddings_lfw.pkl") # prepare ds np.random.shuffle(emb2) if len(emb2) % 2 != 0: emb2 = emb2[:-1] split_set = np.array_split(emb2, 2) X_train = split_set[0] X_test = split_set[1] X_outliers = emb_lfw K = chi2_kernel(X_train, gamma=.5) print K
def _kernel(self, X, Y=None): kernel = None if self.kernel == 'chi2': kernel = chi2_kernel(X, Y, gamma=self.gamma) elif self.kernel == 'laplacian': kernel = laplacian_kernel(X, Y, gamma=self.gamma) elif self.kernel == 'linear': kernel = linear_kernel(X, Y) elif self.kernel == 'polynomial': kernel = polynomial_kernel(X, Y, degree=self.degree, gamma=self.gamma, coef0=self.coef0) elif self.kernel == 'rbf': kernel = rbf_kernel(X, Y, gamma=self.gamma) elif self.kernel == 'sigmoid': kernel = sigmoid_kernel(X, Y, gamma=self.gamma, coef0=self.coef0) return kernel
def grid_cv(X, y, k=5): """ Grid search over param space with k-fold cross validation """ K = chi2_kernel(X) pipeline = Pipeline([ ('clf', SVC(kernel='precomputed')), ]) params = { 'clf__C': (1e-2, 1e-1, 1, 1e+1, 1e+2), } grid_search = GridSearchCV(pipeline, params, n_jobs=1, verbose=0, cv=k) grid_search.fit(K, y) best_params = grid_search.best_estimator_.get_params() return best_params, grid_search.best_score_
def coding_unified(self, codebook_, feats_): feats = feats_.copy() codebook = codebook_.copy() if self.debug: print '\t- coding features ...' sys.stdout.flush() if 'hard' in self.coding_poling: coded_feats = np.zeros((feats.shape[:2] + (self.codebook_size,)), dtype=np.int) feats = feats.reshape(feats.shape[0], feats.shape[1], -1) idxs_cuboid = np.arange(feats.shape[1]) codebook -= codebook.min(axis=1).reshape(-1, 1) for sample in range(feats.shape[0]): feats[sample] -= feats[sample].min(axis=1).reshape(-1, 1) idxs = np.argmin(pairwise_distances(feats[sample], codebook, metric="cosine"), axis=1) coded_feats[sample, idxs_cuboid, idxs] = 1 elif 'soft' in self.coding_poling: coded_feats = np.zeros((feats.shape[:2] + (self.codebook_size,)), dtype=np.float) beta = 1.0 / (2.0 * self.variance) codebook -= codebook.min(axis=1).reshape(-1, 1) for sample in range(feats.shape[0]): feats[sample] -= feats[sample].min(axis=1).reshape(-1, 1) coded_feats[sample] = chi2_kernel(feats[sample], codebook, gamma=beta) cfnorm = coded_feats[sample].sum(axis=1).reshape(-1, 1) cfnorm[cfnorm == 0] = 1. coded_feats[sample] /= cfnorm else: raise ValueError('Coding method not implemented') return coded_feats
def predict_features(train_data, test_data, test_labels, clfs): """ Predict class probabilites for a given set of test features. SVM followed by logistic regression """ svm_clf, lr_clf = clfs gram_matrix = chi2_kernel(test_data, train_data) # y_out has perpendicular distances between decision boundary # and each point y_out = svm_clf.decision_function(gram_matrix) y_out = y_out.reshape(-1, 1) # convert the above distances into probabilities y_prob = lr_clf.predict_proba(y_out) if ARGS.verbose: y_pred = lr_clf.predict(y_out) print("LR test acc:", np.mean(y_pred == test_labels)) return y_prob
def get_kernel_matrix(X1, X2=None, kernel='rbf',gamma = 1, degree = 3, coef0=1): #Obtain N1xN2 kernel matrix from N1xM and N2xM data matrices if kernel == 'rbf': K = pairwise.rbf_kernel(X1,X2,gamma = gamma); elif kernel == 'poly': K = pairwise.polynomial_kernel(X1,X2,degree = degree, gamma = gamma, coef0 = coef0); elif kernel == 'linear': K = pairwise.linear_kernel(X1,X2); elif kernel == 'laplacian': K = pairwise.laplacian_kernel(X1,X2,gamma = gamma); elif kernel == 'chi2': K = pairwise.chi2_kernel(X1,X2,gamma = gamma); elif kernel == 'additive_chi2': K = pairwise.additive_chi2_kernel(X1,X2); elif kernel == 'sigmoid': K = pairwise.sigmoid_kernel(X1,X2,gamma = gamma,coef0 = coef0); else: print('[Error] Unknown kernel'); K = None; return K;
from sklearn.svm import SVC from sklearn.metrics.pairwise import chi2_kernel ''' The chi-squared kernel is a very popular choice for training non-linear SVMs in computer vision applications. ''' X = [[0, 1], [1, 0], [.2, .8], [.7, .3]] y = [0, 1, 0, 1] K = chi2_kernel(X, gamma=.5) # Usage 1. passed to an sklearn.svm.SVC with kernel="precomputed": svm = SVC(kernel='precomputed').fit(K, y) svm.predict(K) # Usage 2. directly used as the kernel argument: svm = SVC(kernel=chi2_kernel).fit(X, y) svm.predict(X)
def coding_class_based(self, codebook_pos_, codebook_neg_, feats_): feats = feats_.copy() codebook_pos = codebook_pos_.copy() codebook_neg = codebook_neg_.copy() if self.debug: print '\t- coding features ...' sys.stdout.flush() if 'hard' in self.coding_poling: print "\t- feats.shape", feats.shape coded_feats = np.zeros((feats.shape[:2] + (self.codebook_size + self.codebook_size,)), dtype=np.int) feats = feats.reshape(feats.shape[0], feats.shape[1], -1) idxs_cuboid = np.arange(feats.shape[1]) codebook_pos -= codebook_pos.min(axis=1).reshape(-1, 1) codebook_neg -= codebook_neg.min(axis=1).reshape(-1, 1) for sample in range(feats.shape[0]): feats[sample] -= feats[sample].min(axis=1).reshape(-1, 1) dists_pos = pairwise_distances(feats[sample], codebook_pos, metric="cosine") dists_neg = pairwise_distances(feats[sample], codebook_neg, metric="cosine") dists = np.hstack((dists_neg, dists_pos)) idxs = np.argmin(dists, axis=1) coded_feats[sample, idxs_cuboid, idxs] = 1 elif 'soft' in self.coding_poling: print "\t- feats.shape", feats.shape coded_feats = np.zeros((feats.shape[:2] + (self.codebook_size + self.codebook_size,)), dtype=np.float) feats = feats.reshape(feats.shape[0], feats.shape[1], -1) beta = 1.0 / (2.0 * self.variance) codebook_pos -= codebook_pos.min(axis=1).reshape(-1, 1) codebook_neg -= codebook_neg.min(axis=1).reshape(-1, 1) for sample in range(feats.shape[0]): feats[sample] -= feats[sample].min(axis=1).reshape(-1, 1) dists_pos = chi2_kernel(feats[sample], codebook_pos, gamma=beta) dists_neg = chi2_kernel(feats[sample], codebook_neg, gamma=beta) cfnorm = dists_pos.sum(axis=1).reshape(-1, 1) cfnorm[cfnorm == 0] = 1. dists_pos /= cfnorm cfnorm = dists_neg.sum(axis=1).reshape(-1, 1) cfnorm[cfnorm == 0] = 1. dists_neg /= cfnorm coded_feats[sample] = np.hstack((dists_neg, dists_pos)) else: raise ValueError('Coding method not implemented') return coded_feats
for n in pd.DataFrame(ipos['Lead Mgr'].unique(), columns=['Name']).sort_values('Name')['Name']: service_args=['--ignore-ssl-errors=true']) driver.implicitly_wait(20) driver.get(url) driver.save_screenshot(r'flight_explorer.png') import gspread from oauth2client.client import SignedJwtAssertionCredentials json_key = json.load(open(r'/PATH_TO_KEY/KEY.json')) scope = ['https://spreadsheets.google.com/feeds'] credentials = SignedJwtAssertionCredentials(json_key['client_email'], json_key['private_key'].encode(), scope) gc = gspread.authorize(credenti from sklearn.metrics.pairwise import chi2_kernel k_sim = chi2_kernel(X[0].reshape(1,-1), X) kf = pd.DataFrame(k_sim).T kf.columns = ['similarity'] kf.sort_values('similarity', ascending=False) import sys import pandas as pd import numpy as np import requests from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.common.by import By from flask import Flask, request, redirect import twilio.twiml import pandas as pd import re
from emd import emd import numpy as np from sklearn.metrics.pairwise import chi2_kernel import code l = np.genfromtxt('./names_and_counts.txt', dtype=str, delimiter=',') l= l[:,1:] l=l.astype(float) row_sums = l.sum(axis=1) new_matrix = l / row_sums[:, np.newaxis] xx, yy = new_matrix.shape simMatrix = chi2_kernel(new_matrix) code.interact(local=locals()) np.savetxt("chi_sim.csv", simMatrix, delimiter=",")
def compute_kernel(X): print('computing kernel...') K = chi2_kernel(X) print('precomputed kernel')
def main(top_folder, image_folder): init_time = timeit.default_timer() #//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////# # Download labels for pictures print('Getting Labels --------------------------------------------------') mat = loadmat(top_folder + '/' + top_folder +'imagelabels.mat') labels = mat['labels'][0].tolist() print('Done') print('------------------------------------------------------------------\n') #//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////# print('Getting Info on Train, Val, Test Data --------------------------') mat = loadmat(top_folder + '/' + top_folder + 'datasplits.mat') trn = [mat['trn1'][0].tolist(), mat['trn2'][0].tolist(), mat['trn3'][0].tolist()] tst = [mat['tst1'][0].tolist(), mat['tst2'][0].tolist(), mat['tst3'][0].tolist()] val = [mat['val1'][0].tolist(), mat['val2'][0].tolist(), mat['val3'][0].tolist()] print('Done') print('------------------------------------------------------------------\n') #//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////# print('Getting Images Names --------------------------------------------') imagefiles = [] # Training model on 32X32 images for imagefile in glob(top_folder + image_folder + '/image_*.jpg'): imagefiles.append(imagefile) imagefiles = sorted(imagefiles) print('Done') print('------------------------------------------------------------------\n') fout_test = open('image_labels.txt','w') for ind, element in enumerate(imagefiles): fout_test.write(str(labels[ind]) + ' ' + element + '\n') #//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////# print('Getting Images H,S,V --------------------------------------------') print('and Seperating Traning, Validation, and Test Data ---------------') trn_hsv_images = [] val_hsv_images = [] tst_hsv_images = [] trn_labels = [] val_labels = [] tst_labels = [] for idx, imagefile in enumerate(imagefiles): img = Image.open(imagefile) img.load() # Get tuple of r,g,b values for each pixel rgb_image = list(img.getdata()) # transform r,g,b to h,s,v hsv_image = hsv_image_calculate(rgb_image) ''' hsv_image = [] for pixel in rgb_image: r = pixel[0] g = pixel[1] b = pixel[2] h,s,v = colorsys.rgb_to_hsv(r,g,b) hsv_image.append((h,s,v)) ''' # sort between training, testing, and validation if idx+1 in trn[0]: trn_hsv_images.append(hsv_image) trn_labels.append(labels[idx]) elif idx+1 in val[0]: val_hsv_images.append(hsv_image) val_labels.append(labels[idx]) elif idx+1 in tst[0]: tst_hsv_images.append(hsv_image) tst_labels.append(labels[idx]) else: print idx print('# of training images: ' + str(len(trn_hsv_images))) print('# of validation images: ' + str(len(val_hsv_images))) print('# of test images: ' + str(len(tst_hsv_images))) print('') print('# of traning labels:' + str(len(trn_labels))) print('# of validation labels:' + str(len(val_labels))) print('# of test labels:' + str(len(val_labels))) print('------------------------------------------------------------------\n') #//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////# print('Getting H,S,V Features from Training Set ------------------------') start_time = timeit.default_timer() hsv_features = [] for image in trn_hsv_images: # getting features from 1/3 of the training images if random.randrange(0,4,1) == 1: hsv_features = hsv_features + image hsv_features = numpy.asarray(hsv_features) print 'Feature matrix: ' + str(hsv_features.shape) print('Elapsed Time: ' + str(timeit.default_timer() - start_time)) print('------------------------------------------------------------------\n') #//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////# print('Training and Validation ----------------------------------------') training_time = timeit.default_timer() minError = -1 bestWords = -1 for numWords in range(200, 1001, 100): start_time2 = timeit.default_timer() #################################################################################################################### print(str(numWords) + ' Word Vocabulary ---------------------------------------------') print('Clustering Features...') start_time3 = timeit.default_timer() kmodel = kmeans_cluster(numWords, hsv_features) print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n') #################################################################################################################### print('Hot Encoding Training Data...') start_time3 = timeit.default_timer() trn_hsv_hot_vectors = [] for image in trn_hsv_images: image_as_clusters = kmodel.predict(numpy.asarray(image)).tolist() hot_vector = [0]*numWords for element in image_as_clusters: hot_vector[element]+=1 trn_hsv_hot_vectors.append(hot_vector) print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n') #################################################################################################################### print('Calculate uf...') start_time3 = timeit.default_timer() sum_chi_distance = 0 count = 0 for i in range(0,len(trn_hsv_hot_vectors),1): for j in range(i+1,len(trn_hsv_hot_vectors),1): x = numpy.asarray(trn_hsv_hot_vectors[i]) y = numpy.asarray(trn_hsv_hot_vectors[j]) with numpy.errstate(divide='ignore', invalid='ignore'): z = numpy.true_divide((x-y)**2,x+y) z[z == numpy.inf] = 0 z = numpy.nan_to_num(z) sum_chi_distance += z.sum() count += 1 uf = 2*count/sum_chi_distance #uf = 0.5 print('uf = ' + str(uf)) print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n') #################################################################################################################### print('Computing Training Kernel...') start_time3 = timeit.default_timer() #K = linear_kernel(numpy.asarray(trn_hsv_hot_vectors)) K = chi2_kernel(numpy.asarray(trn_hsv_hot_vectors), gamma=uf) print('Size of kernel: ' + str(K.shape)) print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n') #################################################################################################################### print('Applying Kernel to SVM...') start_time3 = timeit.default_timer() svm = SVC(kernel='precomputed').fit(K,numpy.asarray(trn_labels)) trn_predict = svm.predict(K).tolist() trn_error = 0 for i in range(0,len(trn_predict),1): if trn_labels[i] != trn_predict[i]: trn_error+=1 print('Error on training data: ' + str(trn_error) + '/' + str(len(trn_labels))) print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n') #################################################################################################################### print('Hot Encoding Validation Data...') start_time3 = timeit.default_timer() val_hsv_hot_vectors = [] for image in val_hsv_images: image_as_clusters = kmodel.predict(numpy.asarray(image)).tolist() hot_vector = [0]*numWords for element in image_as_clusters: hot_vector[element]+=1 val_hsv_hot_vectors.append(hot_vector) print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n') #################################################################################################################### print('Computing Validation Kernel...') start_time3 = timeit.default_timer() #K = linear_kernel(X=numpy.asarray(val_hsv_hot_vectors), Y=numpy.asarray(trn_hsv_hot_vectors)) K = chi2_kernel(X=numpy.asarray(val_hsv_hot_vectors), Y=numpy.asarray(trn_hsv_hot_vectors), gamma=uf) print('Size of kernel: ' + str(K.shape)) print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n') #################################################################################################################### print('Predicting Values for Validation Data...') start_time3 = timeit.default_timer() val_predict = svm.predict(K) print 'Prediction array size: ' + str(val_predict.shape) print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n') #################################################################################################################### print('Calculating Error...') start_time3 = timeit.default_timer() val_predict = val_predict.tolist() error = 0 for i in range(0,len(val_predict),1): if val_labels[i] != val_predict[i]: error+=1 print('Error = ' + str(error) + '/' + str(len(val_labels))) print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n') #################################################################################################################### if minError == -1 or error <= minError: minError = error bestWords = numWords print('------------------------------------------------------------------') print('Elapsed Time for ' + str(numWords) + ' words: ' + str(timeit.default_timer() - start_time2)) print('------------------------------------------------------------------\n') print('Training elapsed Time: ' + str(timeit.default_timer() - training_time)) print('Best number of words is ' + str(bestWords)) print('Error = ' + str(minError)+ '\n') print('------------------------------------------------------------------\n') #//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////# #//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////# print('Testing --------------------------------------------------------') testing_time = timeit.default_timer() ######################################################################################################################## print('Getting H,S,V Features from Training and Validation Set...') start_time = timeit.default_timer() hsv_features = [] for image in trn_hsv_images + val_hsv_images: # getting features from 1/4 of the training and validation images if random.randrange(0,4,1) == 1: hsv_features = hsv_features + image hsv_features = numpy.asarray(hsv_features) print 'Feature matrix: ' + str(hsv_features.shape) print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n') ######################################################################################################################## print('Clustering Features...') start_time = timeit.default_timer() kmodel = kmeans_cluster(bestWords, hsv_features) print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n') ######################################################################################################################## print('Hot Encoding Training Data...') start_time = timeit.default_timer() trn_hsv_hot_vectors = [] for image in trn_hsv_images + val_hsv_images: image_as_clusters = kmodel.predict(numpy.asarray(image)).tolist() hot_vector = [0]*bestWords for element in image_as_clusters: hot_vector[element]+=1 trn_hsv_hot_vectors.append(hot_vector) print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n') ######################################################################################################################## print('Calculate uf...') start_time = timeit.default_timer() sum_chi_distance = 0 count = 0 for i in range(0,len(trn_hsv_hot_vectors),1): for j in range(i+1,len(trn_hsv_hot_vectors),1): x = numpy.asarray(trn_hsv_hot_vectors[i]) y = numpy.asarray(trn_hsv_hot_vectors[j]) with numpy.errstate(divide='ignore', invalid='ignore'): z = numpy.true_divide((x-y)**2,x+y) z[z == numpy.inf] = 0 z = numpy.nan_to_num(z) sum_chi_distance += z.sum() count += 1 uf = count/sum_chi_distance print('uf = ' + str(uf)) print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n') ######################################################################################################################## print('Computing Training Kernel...') start_time = timeit.default_timer() #K = linear_kernel(numpy.asarray(trn_hsv_hot_vectors)) K = chi2_kernel(numpy.asarray(trn_hsv_hot_vectors), gamma=uf) print('Size of kernel: ' + str(K.shape)) print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n') ######################################################################################################################## print('Applying Kernel to SVM...') start_time = timeit.default_timer() svm = SVC(kernel='precomputed').fit(K,numpy.asarray(trn_labels + val_labels)) trn_predict = svm.predict(K).tolist() trn_error = 0 trn_labels = trn_labels + val_labels #Overwrites trn_labels for i in range(0,len(trn_predict),1): if trn_labels[i] != trn_predict[i]: trn_error+=1 print('Error on training data: ' + str(trn_error) + '/' + str(len(trn_labels+val_labels))) print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n') ######################################################################################################################## print('Hot Encoding Testing Data...') start_time = timeit.default_timer() tst_hsv_hot_vectors = [] for image in tst_hsv_images: image_as_clusters = kmodel.predict(numpy.asarray(image)).tolist() hot_vector = [0]*bestWords for element in image_as_clusters: hot_vector[element]+=1 tst_hsv_hot_vectors.append(hot_vector) print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n') ######################################################################################################################## print('Computing Testing Kernel...') start_time = timeit.default_timer() #K = linear_kernel(X=numpy.asarray(tst_hsv_hot_vectors), Y=numpy.asarray(trn_hsv_hot_vectors)) K = chi2_kernel(X=numpy.asarray(tst_hsv_hot_vectors), Y=numpy.asarray(trn_hsv_hot_vectors), gamma=uf) print('Size of kernel: ' + str(K.shape)) print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n') ######################################################################################################################## print('Predicting Values for Testing Data...') start_time = timeit.default_timer() tst_predict = svm.predict(K) print 'Prediction array size: ' + str(tst_predict.shape) print('Elapsed Time: ' + str(timeit.default_timer() - start_time) + '\n') ######################################################################################################################## print('Calculating Error...') start_time3 = timeit.default_timer() tst_predict = tst_predict.tolist() error = 0 for i in range(0,len(tst_predict),1): if tst_labels[i] != tst_predict[i]: error+=1 print('Error = ' + str(error) + '/' + str(len(tst_labels))) print('Elapsed Time: ' + str(timeit.default_timer() - start_time3) + '\n') ######################################################################################################################## print('Testing Elapsed Time: ' + str(timeit.default_timer() - testing_time) + '\n') full_time = init_time - timeit.default_timer() print('Writing to file...') fout = open('svm_results.txt','w') fout.write('Execution Time: ' + str(full_time) + '\n') fout.write('Minimum Validation Error: ' + str(minError) + '/' + str(len(val_labels)) + '\n') fout.write('Testing Error: ' + str(error) + '/' + str(len(tst_labels)) + '\n\n') fout.write('Real: Predict:' + '\n') for i in range(0,len(tst_labels),1): fout.write(str(tst_labels[i]) + ' ' + str(tst_predict[i])) if tst_labels[i] == tst_predict[i]: fout.write(' X') fout.write('\n------------------------------\n') fout.close()
def __call__(self, x, y): return chi2_kernel(x, y, gamma=self.gamma)