def predict(self, X, return_projection=False): """Predicts the class of input test data. Parameters ---------- x : numpy.array The test data which is to be classified. return_projection : bool, optional returns the projection of test data on the margin along with the class prediction. Returns ------- prediction : numpy.array A numpy.array consisting of 1, 0, -1 denoting the class of test data projections : numpy.array, optional The projection formed by the test data point on the margin. """ if len(X.shape) == 1: X = np.expand_dims(X, axis=0) if len(self.classifiers) == 0: raise ModelNotFittedError( "Predict called before fitting the model.") projections = np.zeros(X.shape[0]) predictions = np.zeros(X.shape[0], dtype=np.int64) # If the input labels are not of as desired by svc i.e - [-1, 1] if sorted(self.uniques) != [-1, 1]: for j, x in enumerate(X): for i, clas in enumerate(self.classifiers): prediction, projection = self.classifiers[i].predict( x, return_projection=True) if int(prediction) == 1: if projections[j] != 0: projections[j] = 0 predictions[j] = 0 break else: predictions[j] = self.y[self.ind[i]] projections[j] = float(projection) else: for j, x in enumerate(X): projection = self.b for i in range(self.n_support_vectors): projection += self.support_lagrange_multipliers[i] * self.support_vectors_y[i] *\ self.kernel(self.support_vectors[i], x) projections[j] = projection predictions[j] = np.sign(projection) if return_projection: return predictions, projections return predictions
def predict(self, X_test): """Fits and predicts using the dbscan unsupervised clustering algorithm. Parameters ---------- X_test : numpy.ndarray The testing features. Returns ------- y_target : numpy.ndarray The class label corresponding to each testing feature. """ y_target = np.zeros(len(X_test), dtype=np.int64) for i, x_test in enumerate(X_test): # get its euclidean distance from each feature in training set. dist = np.array([np.linalg.norm(x_test - x_train) for x_train in self.X]) # get the id of top n_neighbors closest neighbours sorted_index = dist.argsort()[:self.n_neighbors] # get the votes of these neighbors k_nearest_neighbor_votes = self.y[sorted_index] # get the mode of all the votes to get the final prediction votes = Counter(k_nearest_neighbor_votes).most_common() winner = votes[0][0] y_target[i] = winner return y_target
def vocab_one_hot(Y, vocab_size): temp = np.zeros((Y.shape[0], Y.shape[1], vocab_size)) for i, _ in enumerate(Y): temp[i] = np.eye(vocab_size)[Y[i]] return temp
def setUp(self): n_features = 5 n_dim = 3 self.n_comp = 3 self.X = np.zeros([n_features, n_dim]) for i in range(n_features): self.X[i, 2] = i self.X[i, 1] = 5 * i + 2 # X[i,1] = pow(i,2) self.X[i, 0] = (5 - 2 * self.X[i, 0] - 3 * self.X[i, 1]) / 2
def to_onehot(y): try: import cupy if isinstance(y, cupy.core.core.ndarray): y = np.asnumpy(y) except ImportError: pass unq, _ = numpy.unique(y, return_inverse=True) a = np.zeros((len(y), len(unq))) for i in range(len(y)): a[i][int(y[i])] = 1. return a
def setUp(self): X11 = Distribution.radial_binary(pts=300, mean=[0, 0], st=1, ed=2, seed=20) X22 = Distribution.radial_binary(pts=300, mean=[0, 0], st=4, ed=5, seed=10) Y11 = np.ones(X11.shape[0]) Y22 = np.zeros(X11.shape[0]) X = np.vstack((X11, X22)) y = np.hstack((Y11, Y22)) y = to_onehot(y) self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( X, y, test_size=50, random_state=42)
def fit(self, X, y, multiplier_threshold=1e-5): """Fits the svc model on training data. Parameters ---------- X : numpy.array The training features. y : numpy.array The training labels. multiplier_threshold : float The threshold for selecting lagrange multipliers. Returns ------- kernel_matrix : list of svm.SVC A list of all the classifiers used for multi class classification """ X = np.array(X) self.y = y self.n = self.y.shape[0] self.uniques, self.ind = np.unique(self.y, return_index=True) self.n_classes = len(self.uniques) # Do multi class classification if sorted(self.uniques) != [-1, 1]: y_list = [np.where(self.y == u, 1, -1) for u in self.uniques] for y_i in y_list: # Copy the current initializer clf = SVC() clf.kernel = self.kernel clf.C = self.C self.classifiers.append(clf.fit(X, y_i)) return # create a gram matrix by taking the outer product of y gram_matrix_y = np.outer(self.y, self.y) K = self.__create_kernel_matrix(X) gram_matrix_xy = gram_matrix_y * K P = cvxopt.matrix(gram_matrix_xy) q = cvxopt.matrix(-np.ones(self.n)) G1 = cvxopt.spmatrix(-1.0, range(self.n), range(self.n)) G2 = cvxopt.spmatrix(1, range(self.n), range(self.n)) G = cvxopt.matrix([[G1, G2]]) h1 = cvxopt.matrix(np.zeros(self.n)) h2 = cvxopt.matrix(np.ones(self.n) * self.C) h = cvxopt.matrix([[h1, h2]]) A = cvxopt.matrix(self.y.astype(np.double)).trans() b = cvxopt.matrix(0.0) lagrange_multipliers = np.array( list(cvxopt.solvers.qp(P, q, G, h, A, b)['x'])) lagrange_multiplier_indices = np.greater_equal(lagrange_multipliers, multiplier_threshold) lagrange_multiplier_indices = list( map(list, lagrange_multiplier_indices.nonzero()))[0] # self.support_vectors = np.take(X, lagrange_multiplier_indices, axis=1) self.support_vectors = X[lagrange_multiplier_indices] # print(X) # print(lagrange_multiplier_indices) # print(self.support_vectors) # self.support_vectors_y = np.take(self.y, lagrange_multiplier_indices) self.support_vectors_y = self.y[lagrange_multiplier_indices] # self.support_lagrange_multipliers = np.take(lagrange_multipliers, lagrange_multiplier_indices) self.support_lagrange_multipliers = lagrange_multipliers[ lagrange_multiplier_indices] self.b = 0 self.n_support_vectors = self.support_vectors.shape[0] for i in range(self.n_support_vectors): kernel_trick = K[[lagrange_multiplier_indices[i]], lagrange_multiplier_indices] self.b += self.support_vectors_y[i] - np.sum( self.support_lagrange_multipliers * self.support_vectors_y * kernel_trick) self.b /= self.n_support_vectors self.classifiers = [self] return self