def fit(self, X, y): """ Fit data Inputs: X (N, M) : A design matrix with N M-dimensional vectors as rows y (N,) OR (N, D): The output labels. Can be binary vector of 0's and 1's or True's and False's or a one-hot encoding of D classes """ assert is_numpy(X), 'X must be a numpy array' assert is_numpy(y), 'y must be a numpy array' assert has_dims(X, 2), 'X must be 2-dimensional' if has_dims(y, 1): self.one_class = True self.y = y.reshape(-1, 1) elif has_dims(y, 2): self.one_class = y.shape[1] == 1 self.y = y else: raise TypeError('y must be 1 or 2 dimensional') self.X = X self.fitted = True
def predict(self, X): """ Predict on new design matrix X Inputs: X (N, M) : A design matrix with N M-dimensional vectors as rows """ assert self.fitted, 'Fit data before predicting' assert is_numpy(X), 'X must be a numpy array' assert has_dims(X, 2), 'X must be 2-dimensional' if self.one_class: distances = self.pairwise_distances(self.X, X) sortd = np.argsort(distances, axis=0)[:self.k, :] labels = self.y[sortd.T] if self.weights == 'uniform': guess = np.mean(labels, axis=1) >= 0.5 return guess elif self.weights == 'distance': ws = np.nan_to_num(1 / np.take_along_axis(distances, sortd, axis=0).T) ws = ws / ws.sum(1)[:, None] guess = (labels * ws).sum(1) >= 0.5 return guess else: raise NotImplementedError('this hasnt been done yet... ') #TODO implement one-hot encoding for KNN raise ValueError('Weights set incorrectly')
def pairwise_distances(self, X1, X2): """ Return an (N1, N2) matrix where the element (i, j) is the euclidean distance between the vectors represented by the ith row of X1 and the jth row of X2. Inputs: X1 (N1, M): A matrix with N1 M-dimensional vectors as rows X2 (N2, M): A matrix with N2 M-dimensional vectors as rows Returns: D (N1, N2): The pairwise euclidean distances """ assert is_numpy(X1), 'X1 must be a numpy array' assert is_numpy(X2), 'X2 must be a numpy array' assert has_dims(X1, 2), 'X1 must be 2-dimensional' assert has_dims(X2, 2), 'X2 must be 2-dimensional' return (- 2 * X1 @ X2.T + (X1 ** 2).sum(1)[:, None] + (X2 ** 2).sum(1)) ** 0.5
def sigmoid(x): """ Simple sigmoid Inputs: x: float or int or numpy array Returns: σ(x) """ assert any([is_integer(x), is_float(x), is_numpy(x)]), 'x must be an integer or a float or numpy array' return (1 + np.exp(-x))**-1
def fit(self, X, y, method='L-BFGS-B', display_opt=False): """ Fit data Inputs: X (N, M) : A design matrix with N M-dimensional vectors as rows y (N,) OR (N, D): The output labels. Can be binary vector of 0's and 1's or True's and False's or a one-hot encoding of D classes """ assert is_numpy(X), 'X must be a numpy array' assert is_numpy(y), 'y must be a numpy array' assert has_dims(X, 2), 'X must be 2-dimensional' assert y.shape[0] == X.shape[ 0], 'X and y must have the same 1st dimension length' if has_dims(y, 1): self.one_class = True self.y = y.reshape(-1, 1) elif has_dims(y, 2): self.one_class = y.shape[1] == 1 self.y = y else: raise TypeError('y must be 1 or 2 dimensional') self.N, self.M = X.shape self.z = 2 * self.y - 1 self.X = X options = {'maxiter': 500, 'disp': display_opt} result = minimize(self.loss, np.random.normal(size=self.M), method=method, jac=self.grad, options=options) self.w = result.x.reshape(-1, 1)