def mle_batch(self, data, batch, k): """ Calculates LID values of data w.r.t batch Args: data: samples to calculate LIDs of batch: samples to calculate LIDs against k: the number of nearest neighbors to consider Returns: the calculated LID values """ k = min(k, len(data) - 1) f = lambda v: -k / np.sum(np.log(v / v[-1])) gamma = self.classifier.kernel.gamma if gamma is None: gamma = 1.0 / self.training_data_ndarray.shape[1] if batch is None: # K = cdist(data, data) K = rbf_kernel(data, Y=data, gamma=gamma) K = np.reciprocal(K) # get the closest k neighbours a = np.apply_along_axis(np.sort, axis=1, arr=K)[:, 1:k + 1] else: batch = np.asarray(batch, dtype=np.float32) # K = cdist(data, batch) K = rbf_kernel(data, Y=batch, gamma=gamma) K = np.reciprocal(K) # get the closest k neighbours a = np.apply_along_axis(np.sort, axis=1, arr=K)[:, 0:k] a = np.apply_along_axis(f, axis=1, arr=a) return np.nan_to_num(a)
def lid_cost(self, xc, closest_neighbours, k): gamma = self.classifier.kernel.gamma if gamma is None: gamma = 1.0 / self.training_data_ndarray.shape[1] r_max = np.sum((closest_neighbours[-1, :] - xc)**2, axis=1) r_max *= -gamma r_max = np.exp(r_max) # exponentiate r_max r_max = np.reciprocal(r_max) sum = 0 for i in range(closest_neighbours.shape[0]): r_i = np.sum((closest_neighbours[i, :] - xc)**2, axis=1) r_i *= -gamma r_i = np.exp(r_i) # exponentiate r_i r_i = np.reciprocal(r_i) sum += np.log((r_i / r_max) + 1e-9) lid = -k / sum # r_max = np.sqrt(np.sum((closest_neighbours[-1, :] - xc) ** 2, axis=1)) # sum = 0 # for i in range(closest_neighbours.shape[0]): # r_i = np.sqrt(np.sum((closest_neighbours[i, :] - xc) ** 2, axis=1)) # sum += np.log((r_i / r_max) + 1e-9) # lid = -k / sum # return lid lid_cost = self.lid_cost_coefficient * ( lid - self.original_lid_values[self._idx])**2 return lid_cost
def mle_batch_euclidean(self, data, k): """ Calculates LID values of data w.r.t batch Args: data: samples to calculate LIDs of batch: samples to calculate LIDs against k: the number of nearest neighbors to consider Returns: the calculated LID values """ batch = self.training_data_ndarray f = lambda v: -k / np.sum(np.log((v / v[-1]) + 1e-9)) gamma = self.classifier.kernel.gamma if gamma is None: gamma = 1.0 / self.training_data_ndarray.shape[1] K = rbf_kernel(data, Y=batch, gamma=gamma) K = np.reciprocal(K) # K = cdist(data, batch) # get the closest k neighbours if self.xc is not None and self.xc.shape[0] == 1: # only one attack sample sorted_distances = np.sort(K)[0, 1:1 + k] else: sorted_distances = np.sort(K)[0, 0:k] a = np.apply_along_axis(f, axis=0, arr=sorted_distances) return np.nan_to_num(a)
def squared_error(self, performances: np.ndarray, features: np.ndarray, labels: np.ndarray, weights: np.ndarray, sample_weights: np.ndarray): """Compute squared error for regression Arguments: performances {np.ndarray} -- [description] features {np.ndarray} -- [description] weights {np.ndarray} -- [description] sample_weights {np.ndarray} -- weights of the individual samples Returns: [type] -- [description] """ loss = 0 # add one column for bias feature_values = np.hstack((features, np.ones((features.shape[0], 1)))) utilities = None if self.use_exp_for_regression: utilities = np.exp(np.dot(weights, feature_values.T)) else: utilities = np.dot(weights, feature_values.T) inverse_utilities = utilities if self.use_reciprocal_for_regression: inverse_utilities = np.reciprocal(utilities) indices = labels.T - 1 loss += np.mean(sample_weights[:, np.newaxis] * np.square( np.subtract(performances, inverse_utilities.T[np.arange(len(labels)), indices].T))) return loss
def log_entropy(w, b, mean_w, std_w, mean_b, std_b): """ Computes the approximate posterior q, or the log entropy. Returns a scalar. Arguments: - w: Weights array of size x - b: Biases array of size y - mean_w: Mean-weights array of size x - std_w: Standard deviation of the weights, size x - mean_b: Mean-biases array of size y - std_w: Standard deviation of the biases, size y """ q_weights = np.dot(np.square(w - mean_w), np.reciprocal( np.exp(std_w))) / -2 q_biases = np.dot(np.square(b - mean_b), np.reciprocal(np.exp(std_b))) / -2 return q_weights + q_biases
def _LOOCrossValidation(self, hyperparameters): # scales, nuggets): self._CalculateNecessaryMatrices(scales=hyperparameters[1:], nuggets=hyperparameters[0]) Kinv_diag = np.diag(np.linalg.inv(self.cov_matrix)).reshape(-1, 1) LOO_mean_minus_target = self.alpha / Kinv_diag LOO_sigma = np.reciprocal(Kinv_diag) log_CV = -0.5 * (np.log(LOO_sigma) + np.square(LOO_mean_minus_target) * Kinv_diag + np.log(2 * np.pi)) # print(self.alpha.shape, self.cholesky.shape, self.cov_matrix.shape, Kinv_diag.shape) return log_CV.sum()
def testFindSufficientStatisticNodes(self): def log_joint(x, y, matrix): # Linear in x: y^T x result = np.einsum('i,i->', x, y) # Quadratic form: x^T matrix x result += np.einsum('ij,i,j->', matrix, x, x) # Rank-1 quadratic form: (x**2)^T(y**2) result += np.einsum('i,i,j,j->', x, y, x, y) # Linear in log(x): y^T log(x) result += np.einsum('i,i->', y, np.log(x)) # Linear in reciprocal(x): y^T reciprocal(x) result += np.einsum('i,i->', y, np.reciprocal(x)) # More obscurely linear in log(x): y^T matrix log(x) result += np.einsum('i,ij,j->', y, matrix, np.log(x)) # Linear in x * log(x): y^T (x * log(x)) result += np.einsum('i,i->', y, x * np.log(x)) return result n_dimensions = 5 x = np.exp(np.random.randn(n_dimensions)) y = np.random.randn(n_dimensions) matrix = np.random.randn(n_dimensions, n_dimensions) env = {'x': x, 'y': y, 'matrix': matrix} expr = make_expr(log_joint, x, y, matrix) expr = canonicalize(expr) sufficient_statistic_nodes = find_sufficient_statistic_nodes(expr, 'x') suff_stats = [eval_expr(GraphExpr(node, expr.free_vars), env) for node in sufficient_statistic_nodes] correct_suff_stats = [x, x.dot(matrix.dot(x)), np.square(x.dot(y)), np.log(x), np.reciprocal(x), y.dot(x * np.log(x))] self.assertTrue(_perfect_match_values(suff_stats, correct_suff_stats)) expr = make_expr(log_joint, x, y, matrix) expr = canonicalize(expr) sufficient_statistic_nodes = find_sufficient_statistic_nodes( expr, 'x', split_einsums=True) suff_stats = [eval_expr(GraphExpr(node, expr.free_vars), env) for node in sufficient_statistic_nodes] correct_suff_stats = [x, np.outer(x, x), x * x, np.log(x), np.reciprocal(x), x * np.log(x)] self.assertTrue(_match_values(suff_stats, correct_suff_stats))
def log_joint(x, y, matrix): # Linear in x: y^T x result = np.einsum('i,i->', x, y) # Quadratic form: x^T matrix x result += np.einsum('ij,i,j->', matrix, x, x) # Rank-1 quadratic form: (x**2)^T(y**2) result += np.einsum('i,i,j,j->', x, y, x, y) # Linear in log(x): y^T log(x) result += np.einsum('i,i->', y, np.log(x)) # Linear in reciprocal(x): y^T reciprocal(x) result += np.einsum('i,i->', y, np.reciprocal(x)) # More obscurely linear in log(x): y^T matrix log(x) result += np.einsum('i,ij,j->', y, matrix, np.log(x)) # Linear in x * log(x): y^T (x * log(x)) result += np.einsum('i,i->', y, x * np.log(x)) return result
def predict_performances(self, features: np.ndarray): """Predict a vector of performance values. Arguments: features {np.ndarray} -- Instance feature values Returns: pd.DataFrame -- Ranking of algorithms """ # compute utility scores features = np.hstack((features, [1])) utility_scores = None if self.use_exp_for_regression: utility_scores = np.exp(np.dot(self.weights, features)) else: utility_scores = np.dot(self.weights, features) if self.use_reciprocal_for_regression: return np.reciprocal(utility_scores) return utility_scores
def test_reciprocal(): fun = lambda x : np.reciprocal(x) d_fun = grad(fun) check_grads(fun, npr.rand()) check_grads(d_fun, npr.rand())
def test_reciprocal(): fun = lambda x: np.reciprocal(x) check_grads(fun)(npr.rand())
def TransformCov(self, data_cov): transform = np.diag(np.reciprocal(self.Ysigma)) return np.matmul(np.matmul(transform.T, data_cov), transform)
def test_reciprocal(): fun = lambda x : np.reciprocal(x) check_grads(fun)(npr.rand())
def fun(x): return np.reciprocal(x)