def masks(mask: np.ndarray) -> Sequence[np.ndarray]: masks = [mask] mask2 = mask.copy() mask2[0, 0, 0] = 1 masks.append(mask2) mask3 = mask.copy() mask3[2, 2, 2] = 0 masks.append(mask3) return masks
def __init__(self, train_x: np.ndarray, train_y: np.ndarray, features_name=None, do_standardization=True): # ensure that train_y is (N x 1) train_y = train_y.reshape((train_y.shape[0], 1)) self.train_x = train_x self._raw_train_x = train_x.copy() self._raw_train_y = train_y.copy() self.train_y = train_y self.features_name = features_name self.do_standardization = do_standardization self._x_std_ = None self._x_mean_ = None
def inv_zform(data: np.ndarray, out: Optional[np.ndarray] = None, clone: bool = True, sigma: float = 1, mu: float = 1) -> np.ndarray: if clone or out is None: out = data.copy()
def inverse_additive_log_ratio(Y: np.ndarray, ind=-1): """ Inverse additive log ratio transform. """ assert Y.ndim in [1, 2] X = Y.copy() dimensions = X.shape[X.ndim-1] idx = np.arange(0, dimensions+1) if ind != -1: idx = np.array(list(idx[idx < ind]) + [-1] + list(idx[idx >= ind+1]-1)) # Add a zero-column and reorder columns if Y.ndim == 2: X = np.concatenate((X, np.zeros((X.shape[0], 1))), axis=1) X = X[:, idx] else: X = np.append(X, np.array([0])) X = X[idx] # Inverse log and closure operations X = np.exp(X) X = close(X) return X
def make_move(self, board: np.ndarray, move: int) -> np.ndarray: moving_player = self.get_active_player(board) new_board: np.ndarray = board.copy() available_idx, = np.where(new_board[:, move] == 0) new_board[available_idx[-1]][move] = moving_player return new_board
def old_sweep(A: np.ndarray, ind: range(1)): """This subroutine executes the sweep operator. "As input, SWEEP requires a symmetric matrix A where mean vector m and covariance matrix S are arranged in a special manner that simplifies the calculations." See Dempster (1969), Goodnight (1979). The SWEEP operator allows a statistician to quickly regress all variables against one specified variable, obtaining OLS estimates for regression coefficients and variances in a single application. Subsequent applications of the SWP operator allows for regressing against more variables. """ S = A.copy() p = A.shape[1] for j in ind: S[j, j] = -1 / A[j, j] for i in range(0, p): if i != j: S[i, j] = -A[i, j] * S[j, j] S[j, i] = S[i, j] for i in range(0, p): if i != j: for k in range(0, p): if k != j: S[i, k] = A[i ,k] - S[i, j] * A[j, k] S[k, i] = S[i, k] return S
def make_move(self, board: np.ndarray, move: int) -> np.ndarray: new_board: np.ndarray = board.copy() player = new_board[-1] new_board[-1] = -player if move == self.board_width * self.board_height: return new_board # It's a pass. spaces = self.get_spaces(new_board) start_row = move // self.board_width start_column = move % self.board_width for di in range(-1, 2): for dj in range(-1, 2): if not (di or dj): continue to_flip: typing.List[typing.Tuple[int, int]] = [] # [(i, j)] i = start_row + di j = start_column + dj while 0 <= i < self.board_height and 0 <= j < self.board_width: piece = spaces[i, j] if piece == player: for i, j in to_flip: spaces[i, j] *= -1 break if piece == self.NO_PLAYER: break else: to_flip.append((i, j)) i += di j += dj spaces[start_row, start_column] = player return new_board
def logsumexp_double_complement(a: np.ndarray, rel_tol: float = 1e-3) -> float: """Calculates the following expression in a numerically stable fashion: log(1 - (1 - exp(a_0)) x (1 - exp(a_1)) x ...) where a_i are the entries of `a` and assumed to be non-positive. The algorithm is as follows: We define: exp(x_n) = 1 - \prod_{i=0}^n (1 - exp(a_n)), Thus, we have x_0 = a_0 and the recursion relation: exp(x_{n+1}) = exp(x_n) + exp(b_{n+1}), where b_{n+1} = a_{n+1} + log(1 - exp(x_n)). We sort `a` in the descending order and update `x` term by term. It is easy to show that x_{n} is monotonically increasing and that |x_{N} - x_{n}| < (N - n) |x_{n} - x_{n-1}|. We use the last inequality to bound the error for early stopping. Args: a: a float array rel_tol: relative error tolerance for early stopping of calculation Returns: a float scalar """ try: assert isinstance(a, np.ndarray) a = np.asarray(a.copy(), dtype=np.float) except AssertionError: try: a = np.asarray(a, dtype=np.float) except ValueError: raise ValueError("The input argument must be castable to a float ndarray.") assert len(a) > 0 assert 0. <= rel_tol < 1.0 # enforce all entries of a to be negative or zero a[a > 0.] = 0. if len(a) == 1: return np.asscalar(a) else: a = np.sort(a.flatten())[::-1] x = a[0] sz = len(a) for i, entry in enumerate(a[1:]): x_new = np.logaddexp(x, entry + logp_complement(x)) if np.abs(x_new - x) * (sz - i - 1) < rel_tol * np.abs(x): return x_new else: x = x_new return x
def luv_to_lch(luv_nd: ndarray) -> ndarray: uv_nd = luv_nd[..., slice(1, 2)] uv_nd[uv_nd == -0.0] = 0.0 # -0.0 screws up atan2 lch_nd = luv_nd.copy() U, V = luv_nd[..., 1], luv_nd[..., 2] C, H = lch_nd[..., 1], lch_nd[..., 2] C[:] = ne.evaluate("(U ** 2 + V ** 2) ** 0.5") H[:] = np.degrees(ne.evaluate("arctan2(V, U)")) H[H < 0.0] += 360.0 return lch_nd
def luv_to_lch(luv_nd: ndarray) -> ndarray: uv_nd = _channel(luv_nd, slice(1, 2)) uv_nd[uv_nd == -0.0] = 0.0 # -0.0 screws up atan2 lch_nd = luv_nd.copy() U, V = (_channel(luv_nd, n) for n in range(1, 3)) C, H = (_channel(lch_nd, n) for n in range(1, 3)) C[:] = (U ** 2 + V ** 2) ** 0.5 hrad = np.arctan2(V, U) H[:] = np.degrees(hrad) H[H < 0.0] += 360.0 return lch_nd
def find_zilany_scaling_factor(anr: np.ndarray) -> float: w1_max = 0.0 w1_target_amplitude = 0.15e-6 tolerance = 1e-9 step = 1e-18 v1 = 1e-18 while abs(w1_max - w1_target_amplitude) >= tolerance: temp_anr = anr.copy() * v1 w1_max = _total_hack(temp_anr) v1 += step if v1 > 0.5: print("couldn't converge") break print("V1 set to {:0.5E} and gave a wave 1 amplitude of {:0.5E}".format(v1, w1_max))
def add_mask(field: np.ndarray): t_field = None for k in range(4): good = True t_field = field.copy() for i in range(field.shape[0]): for j in range(mask[(i + k) % 4], field.shape[1], 4): if t_field[i, j] != shaft_const: t_field[i, j] = mask_const else: good = False else: if good: break return t_field
def additive_log_ratio(X: np.ndarray, ind: int=-1): """Additive log ratio transform. """ Y = X.copy() assert Y.ndim in [1, 2] dimensions = Y.shape[Y.ndim-1] if ind < 0: ind += dimensions if Y.ndim == 2: Y = np.divide(Y, Y[:, ind][:, np.newaxis]) Y = np.log(Y[:, [i for i in range(dimensions) if not i==ind]]) else: Y = np.divide(X, X[ind]) Y = np.log(Y[[i for i in range(dimensions) if not i==ind]]) return Y
def _find(self, goban_img: np.ndarray): """ The stones detection main algorithm. Delegate work to each Region and show some results on an image. Baselines : - trust contours to find single stones. but it will fail to detect all stones in a chain or cluster. - trust clustering to find all stones if their density is high enough. but it will fail if too few stones -> dynamically assign one finder per region, periodically trying to introduce clustering where it's not assigned """ # 0. if startup phase: let background model get initialized (see StonesFinder._doframe()) if self.total_f_processed < self.bg_init_frames: self.display_message("BACKGROUND SAMPLING ({0}/{1})".format(self.total_f_processed, self.bg_init_frames)) return # 1. delegate all the work to Regions else: ref_stones = self.get_stones() canvas = goban_img.copy() for r in range(self.split): for c in range(self.split): self.regions[r, c].process(goban_img, ref_stones, canvas=canvas) self._show(canvas)
def calculate(X: np.ndarray, V: np.ndarray, E: np.array, B: np.array, t0: float, dt: float, iter_count: int) -> np.ndarray: """ Функция рассчитывает траектории электронов в поле скрещенных сил. Все координаты, скорости и вектора сил полагаются трехмерными. Для расчета позиции в момент времени t + dt используется метод Эйлера. :param X: Начальные позиции электронов, двумерный numpy массив :param V: Начальные скорости электронов :param E: Вектор напряженности электрического поля :param B: Вектор магнитной индукции :param t0: Начальное время расчета :param dt: Временной шаг :param iter_count: Количество шагов алгоритма. :return: Трехмерный numpy массив следующей конфигурации: Количество итераций X Количество электронов X Размерность пространства (3), содержащий в себе позиции электронов на каждой итерации алгоритма. Тип данных массива будет выведен из массива позиций X. """ # Удельный заряд электрона: EM = -1.602176565e-19 / 9.10938356e-31 t = t0 electron_count = len(X) result = np.empty((iter_count, electron_count, DIMENSION), dtype=X.dtype) # В указанном промежутке времени for j in range(iter_count): # Сохраним текущую координату в результирующий массив result[j] = X.copy() # Для каждого электрона... for i in range(electron_count): # Сначала пересчитаем его позицию X[i] += V[i] * dt # Затем вычислим моментальное ускорение a = (E + np.cross(V[i], B)) * EM # Затем пересчитаем скорость электрона V[i] += a * dt # Рассчитав позиции всех электронов, перейдем к следующему моменту времени t += dt return result
def shared_nearest_neighbors(D:np.ndarray, k:int=10, metric='distance'): """Transform distance matrix using shared nearest neighbors [1]_. SNN similarity is based on computing the overlap between the `k` nearest neighbors of two objects. SNN approaches try to symmetrize nearest neighbor relations using only rank and not distance information [2]_. Parameters ---------- D : np.ndarray The ``n x n`` symmetric distance (similarity) matrix. k : int, optional (default: 10) Neighborhood radius: The `k` nearest neighbors are used to calculate SNN. metric : {'distance', 'similarity'}, optional (default: 'distance') Define, whether the matrix `D` is a distance or similarity matrix Returns ------- D_snn : ndarray Secondary distance SNN matrix References ---------- .. [1] R. Jarvis and E. A. Patrick, “Clustering using a similarity measure based on shared near neighbors,” IEEE Transactions on Computers, vol. 22, pp. 1025–1034, 1973. .. [2] Flexer, A., & Schnitzer, D. (2013). Can Shared Nearest Neighbors Reduce Hubness in High-Dimensional Spaces? 2013 IEEE 13th International Conference on Data Mining Workshops, 460–467. http://doi.org/10.1109/ICDMW.2013.101 """ IO._check_distance_matrix_shape(D) IO._check_valid_metric_parameter(metric) if metric == 'distance': self_value = 0. sort_order = 1 exclude = np.inf if metric == 'similarity': self_value = 1. sort_order = -1 exclude = -np.inf distance = D.copy() np.fill_diagonal(distance, exclude) n = np.shape(distance)[0] knn = np.zeros_like(distance, bool) # find nearest neighbors for each point for i in range(n): di = distance[i, :] nn = np.argsort(di)[::sort_order] knn[i, nn[0:k]] = True D_snn = np.zeros_like(distance) for i in range(n): knn_i = knn[i, :] j_idx = slice(i+1, n) # using broadcasting Dij = np.sum(np.logical_and(knn_i, knn[j_idx, :]), 1) if metric == 'distance': D_snn[i, j_idx] = 1. - Dij / k else: # metric == 'similarity': D_snn[i, j_idx] = Dij / k D_snn += D_snn.T np.fill_diagonal(D_snn, self_value) return D_snn
def get_sample_fn(self, image: np.ndarray, p_sample: float = 0.5) -> Tuple[np.ndarray, Callable]: """ Create sampling function and superpixel mask. Parameters ---------- image Image to be explained p_sample Probability for a pixel to be represented by the average value of its superpixel or the pixel value of a superimposed image Returns ------- segments Superpixels generated from image sample_fn Function returning the sampled images with label """ # check if grayscale images need to be converted to RGB for superpixel generation if not self.custom_segmentation and image.shape[-1] == 1: image_segm = np.repeat(image, 3, axis=2) else: image_segm = image.copy() segments = self.segmentation_fn(image_segm) # generate superpixels # each superpixel is a feature features = list(np.unique(segments)) n_features = len(features) # true label is prediction on original image true_label = self.predict_fn(np.expand_dims(image, axis=0))[0] def sample_fn_image( present: list, num_samples: int, compute_labels: bool = True ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ Create sampling function by masking certain superpixels from the original image and replacing them with the pixel values from superimposed images. Parameters ---------- present List with features (= superpixels) present in the proposed anchor num_samples Number of samples used compute_labels Boolean whether to use labels coming from model predictions as 'true' labels Returns ------- raw_data "data" output concatenated with the indices of the chosen background images for each sample data Nb of samples times nb of features matrix indicating whether a feature (= a superpixel) is present in the sample or masked labels Create labels using model predictions if compute_labels equals True """ if not compute_labels: # for each sample, randomly sample whether a superpixel is represented by its average value or not data = np.random.randint(0, 2, num_samples * n_features).reshape( (num_samples, n_features)) data[:, present] = 1 # superpixels in candidate anchor need to be present return np.array([]), data, np.array([]) # for each sample, randomly sample whether a superpixel is represented by its # average value or not according to p_sample data = np.random.choice([0, 1], num_samples * n_features, p=[p_sample, 1 - p_sample]).reshape( (num_samples, n_features)) data[:, present] = 1 # superpixels in candidate anchor need to be present # for each sample, need to sample one of the background images chosen = np.random.choice(range(len(self.images_background)), data.shape[0], replace=True) # create masked images imgs = [] for d, r in zip(data, chosen): temp = copy.deepcopy(image) zeros = np.where( d == 0)[0] # unused superpixels for the sample # create mask for each superpixel not present in the sample mask = np.zeros(segments.shape).astype(bool) for z in zeros: mask[segments == z] = True # for mask: replace values with those of background image temp[mask] = self.images_background[r][mask] imgs.append(temp) imgs = np.array(imgs) preds = self.predict_fn(imgs) # make prediction on masked images # check if label for the masked images are the same as the true label labels = np.array((preds == true_label).astype(int)) # concat data and indices of chosen background images for each sample raw_data = np.hstack( (data, chosen.reshape(-1, 1))) # nb of samples * (nb of superpixels + 1) return raw_data, data, labels if type(self.images_background) == np.ndarray: return segments, sample_fn_image # create fudged image where the pixel value in each superpixel is set to the average over the # superpixel for each channel fudged_image = image.copy() for x in np.unique(segments): fudged_image[segments == x] = [ np.mean(image[segments == x][:, i]) for i in range(image.shape[-1]) ] def sample_fn_fudged( present: list, num_samples: int, compute_labels: bool = True ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """ Create sampling function by masking certain superpixels from the original image and replacing them with that superpixel's average value. Parameters ---------- present List with features (= superpixels) present in the proposed anchor num_samples Number of samples used compute_labels Boolean whether to use labels coming from model predictions as 'true' labels Returns ------- raw_data Same as data data Nb of samples times nb of features matrix indicating whether a feature (= a superpixel) is present in the sample or masked labels Create labels using model predictions if compute_labels equals True """ if not compute_labels: # for each sample, randomly sample whether a superpixel is represented by its average value or not data = np.random.randint(0, 2, num_samples * n_features).reshape( (num_samples, n_features)) data[:, present] = 1 # superpixels in candidate anchor need to be present return np.array([]), data, np.array([]) # for each sample, randomly sample whether a superpixel is represented by its # average value or not according to p_sample data = np.random.choice([0, 1], num_samples * n_features, p=[p_sample, 1 - p_sample]).reshape( (num_samples, n_features)) data[:, present] = 1 # superpixels in candidate anchor need to be present # create perturbed (fudged) image for each sample using image masks imgs = [] for row in data: temp = copy.deepcopy(image) zeros = np.where( row == 0)[0] # superpixels to be averaged for the sample # create mask for each pixel in the superpixels that are averaged mask = np.zeros(segments.shape).astype(bool) for z in zeros: mask[segments == z] = True temp[mask] = fudged_image[mask] imgs.append(temp) imgs = np.array(imgs) preds = self.predict_fn(imgs) # make prediction on masked images # check if labels for the masked images are the same as the true label labels = (preds == true_label).astype(int) raw_data = data return raw_data, data, labels return segments, sample_fn_fudged
def mutual_proximity_empiric( D: np.ndarray, metric: str = "distance", test_set_ind: np.ndarray = None, verbose: int = 0 ): """Transform a distance matrix with Mutual Proximity (empiric distribution). Applies Mutual Proximity (MP) [1]_ on a distance/similarity matrix using the empiric data distribution (EXACT, rather SLOW). The resulting secondary distance/similarity matrix should show lower hubness. Parameters ---------- D : ndarray or csr_matrix - ndarray: The ``n x n`` symmetric distance or similarity matrix. - csr_matrix: The ``n x n`` symmetric similarity matrix. NOTE: In case of sparse ``D`, zeros are interpreted as missing values and ignored during calculations. Thus, results may differ from using a dense version. metric : {'distance', 'similarity'}, optional (default: 'distance') Define, whether matrix `D` is a distance or similarity matrix. NOTE: In case of sparse `D`, only 'similarity' is supported. test_sed_ind : ndarray, optional (default: None) Define data points to be hold out as part of a test set. Can be: - None : Rescale all distances - ndarray : Hold out points indexed in this array as test set. verbose : int, optional (default: 0) Increasing level of output (progress report). Returns ------- D_mp : ndarray Secondary distance MP empiric matrix. References ---------- .. [1] Schnitzer, D., Flexer, A., Schedl, M., & Widmer, G. (2012). Local and global scaling reduce hubs in space. The Journal of Machine Learning Research, 13(1), 2871–2902. """ # Initialization n = D.shape[0] log = Logging.ConsoleLogging() # Check input IO._check_distance_matrix_shape(D) IO._check_valid_metric_parameter(metric) if metric == "similarity": self_value = 1 exclude_value = np.inf else: # metric == 'distance': self_value = 0 exclude_value = -np.inf if issparse(D): raise ValueError("MP sparse only supports similarity matrices.") if test_set_ind is None: pass # TODO implement # train_set_ind = slice(0, n) elif not np.all(~test_set_ind): raise NotImplementedError("MP empiric does not yet support train/" "test splits.") # train_set_ind = np.setdiff1d(np.arange(n), test_set_ind) # Start MP D = D.copy() if issparse(D): return _mutual_proximity_empiric_sparse(D, test_set_ind, verbose, log) # ensure correct self distances (NOT done for sparse matrices!) np.fill_diagonal(D, exclude_value) D_mp = np.zeros_like(D) # Calculate MP empiric for i in range(n - 1): if verbose and ((i + 1) % 1000 == 0 or i == n - 2): log.message("MP_empiric: {} of {}.".format(i + 1, n - 1), flush=True) # Calculate only triu part of matrix j_idx = i + 1 dI = D[i, :][np.newaxis, :] dJ = D[j_idx:n, :] d = D[j_idx:n, i][:, np.newaxis] if metric == "similarity": D_mp[i, j_idx:] = np.sum((dI <= d) & (dJ <= d), 1) / (n - 1) else: # metric == 'distance': D_mp[i, j_idx:] = 1 - (np.sum((dI > d) & (dJ > d), 1) / (n - 1)) # Mirror, so that matrix is symmetric D_mp += D_mp.T np.fill_diagonal(D_mp, self_value) return D_mp
def hubness(D:np.ndarray, k:int=5, metric='distance', verbose:int=0): """Compute hubness of a distance matrix. Hubness [1]_ is the skewness of the `k`-occurrence histogram (reverse nearest neighbor count, i.e. how often does a point occur in the `k`-nearest neighbor lists of other points). Parameters ---------- D : ndarray The ``n x n`` symmetric distance (similarity) matrix. k : int, optional (default: 5) Neighborhood size for `k`-occurence. metric : {'distance', 'similarity'}, optional (default: 'distance') Define, whether matrix `D` is a distance or similarity matrix verbose : int, optional (default: 0) Increasing level of output (progress report). Returns ------- S_k : float Hubness (skewness of `k`-occurence distribution) D_k : ndarray `k`-nearest neighbor lists N_k : ndarray `k`-occurence list References ---------- .. [1] Radovanović, M., Nanopoulos, A., & Ivanović, M. (2010). Hubs in Space : Popular Nearest Neighbors in High-Dimensional Data. Journal of Machine Learning Research, 11, 2487–2531. Retrieved from http://jmlr.csail.mit.edu/papers/volume11/radovanovic10a/ radovanovic10a.pdf """ log = Logging.ConsoleLogging() IO._check_distance_matrix_shape(D) IO._check_valid_metric_parameter(metric) if metric == 'distance': d_self = np.inf sort_order = 1 if metric == 'similarity': d_self = -np.inf sort_order = -1 if verbose: log.message("Hubness calculation (skewness of {}-occurence)".format(k)) D = D.copy() D_k = np.zeros((k, D.shape[1]), dtype=np.float32) n = D.shape[0] if issparse(D): pass # correct self-distance must be ensured upstream for sparse else: # Set self dist to inf np.fill_diagonal(D, d_self) # make non-finite (NaN, Inf) appear at the end of the sorted list D[~np.isfinite(D)] = d_self for i in range(n): if verbose and ((i+1)%10000==0 or i+1==n): log.message("NN: {} of {}.".format(i+1, n), flush=True) if issparse(D): d = D[i, :].toarray().ravel() # dense copy of one row else: # normal ndarray d = D[i, :] d[i] = d_self d[~np.isfinite(d)] = d_self # Randomize equal values in the distance matrix rows to avoid the # problem case if all numbers to sort are the same, which would yield # high hubness, even if there is none. rp = np.random.permutation(n) d2 = d[rp] d2idx = np.argsort(d2, axis=0)[::sort_order] D_k[:, i] = rp[d2idx[0:k]] # N-occurence N_k = np.bincount(D_k.astype(int).ravel(), minlength=n) # Hubness S_k = stats.skew(N_k) # return k-hubness, k-nearest neighbors, k-occurence if verbose: log.message("Hubness calculation done.", flush=True) return S_k, D_k, N_k
def __init__(self, mean: np.ndarray, cov: np.ndarray) -> None: super().__init__() self._mean = mean.flatten() self._cov = cov.copy()
def _update_obs(array: np.ndarray): return torch.unsqueeze(torch.from_numpy(array.copy()), dim=0)
def _projected_sinkhorn( self, x: np.ndarray, x_init: np.ndarray, cost_matrix: np.ndarray, eps: np.ndarray ) -> np.ndarray: """ The projected sinkhorn_optimizer. :param x: Current adversarial examples. :param x_init: An array with the original inputs. :param cost_matrix: A non-negative cost matrix. :param eps: Maximum perturbation that the attacker can introduce. :return: Adversarial examples. """ # Normalize inputs normalization = x_init.reshape(x.shape[0], -1).sum(-1).reshape(x.shape[0], 1, 1, 1) x = x.copy() / normalization x_init = x_init.copy() / normalization # Dimension size for each example m = np.prod(x_init.shape[1:]) # Initialize beta = np.log(np.ones(x.shape) / m) exp_beta = np.exp(-beta) psi = np.ones(x.shape[0]) var_k = np.expand_dims(np.expand_dims(np.expand_dims(psi, -1), -1), -1) var_k = np.exp(-var_k * cost_matrix - 1) convergence = -np.inf for _ in range(self.projected_sinkhorn_max_iter): # Block coordinate descent iterates x_init[x_init == 0.0] = EPS_LOG # Prevent divide by zero in np.log alpha = np.log(self._local_transport(var_k, exp_beta, self.kernel_size)) - np.log(x_init) exp_alpha = np.exp(-alpha) beta = ( self.regularization * np.exp(self.regularization * x) * self._local_transport(var_k, exp_alpha, self.kernel_size) ) beta[beta > 1e-10] = np.real(lambertw(beta[beta > 1e-10])) beta -= self.regularization * x exp_beta = np.exp(-beta) # Newton step var_g = -eps + self._batch_dot( exp_alpha, self._local_transport(cost_matrix * var_k, exp_beta, self.kernel_size) ) var_h = -self._batch_dot( exp_alpha, self._local_transport(cost_matrix * cost_matrix * var_k, exp_beta, self.kernel_size) ) delta = var_g / var_h # Ensure psi >= 0 tmp = np.ones(delta.shape) neg = psi - tmp * delta < 0 while neg.any() and np.min(tmp) > 1e-2: tmp[neg] /= 2 neg = psi - tmp * delta < 0 psi = np.maximum(psi - tmp * delta, 0) # Update K var_k = np.expand_dims(np.expand_dims(np.expand_dims(psi, -1), -1), -1) var_k = np.exp(-var_k * cost_matrix - 1) # Check for convergence next_convergence = self._projected_sinkhorn_evaluation( x, x_init, alpha, exp_alpha, beta, exp_beta, psi, var_k, eps, ) if (np.abs(convergence - next_convergence) <= 1e-4 + 1e-4 * np.abs(next_convergence)).all(): break convergence = next_convergence result = (beta / self.regularization + x) * normalization return result
def spectral_heart_rate(filtered_sig: np.ndarray, fs: Real, hr_fs_band: Optional[Sequence[Real]] = None, sig_fmt: str = "channel_first", mode: str = 'hr', verbose: int = 0) -> Real: """ finished, NOT checked, compute heart rate of a ecg signal using spectral method (from the frequency domain) Parameters: ----------- filtered_sig: ndarray, the filtered 12-lead ecg signal, with units in mV fs: real number, sampling frequency of `filtered_sig` hr_fs_band: sequence of real number, optional, frequency band (bounds) of heart rate sig_fmt: str, default "channel_first", format of the multi-lead ecg signal, 'channel_last' (alias 'lead_last'), or 'channel_first' (alias 'lead_first', original) mode: str, default 'hr', mode of computation (return mean heart rate or mean rr intervals), can also be 'heart_rate' (alias of 'hr'), and 'rr' (with an alias of 'rr_interval'), case insensitive verbose: int, default 0, print verbosity Returns: -------- ret_val: real number, mean heart rate of the ecg signal, with units in bpm; or mean rr intervals, with units in ms NOTE: for high frequency signal with short duration, the lowest frequency of the spectrogram might be too high for computing heart rate """ assert sig_fmt.lower() in [ 'channel_first', 'lead_first', 'channel_last', 'lead_last' ] if sig_fmt.lower() in ['channel_last', 'lead_last']: s = filtered_sig.T else: s = filtered_sig.copy() # psd of shape (c,n,k), freqs of shape (n,) # where n = length of signal, c = number of leads, k rel. to freq bands # freqs, _, psd = SS.spectrogram(s, fs, axis=-1) freqs, psd = SS.welch(s, fs, axis=-1) if not _check_feasibility(freqs): raise ValueError( "it is not feasible to compute heart rate in frequency domain") fs_band = hr_fs_band or FeatureCfg.spectral_hr_fs_band assert len( fs_band ) >= 2, "frequency band of heart rate should at least has 2 bounds" fs_band = sorted(fs_band) fs_band = [fs_band[0], fs_band[-1]] if verbose >= 1: print(f"signal shape = {s.shape}") print(f"fs_band = {fs_band}") print(f"freqs.shape = {freqs.shape}, psd.shape = {psd.shape}") print(f"freqs = {freqs.tolist()}") inds_of_interest = np.where((fs_band[0] <= freqs) & (freqs <= fs_band[-1]))[0] # psd_of_interest of shape (c, m), freqs_of_interest of shape (m,) # where m = length of inds_of_interest freqs_of_interest = freqs[inds_of_interest] psd_of_interest = psd[..., inds_of_interest] peak_inds = np.argmax(psd_of_interest, axis=-1) if verbose >= 1: print(f"inds_of_interest = {inds_of_interest.tolist()}") print(f"freqs_of_interest = {freqs_of_interest.tolist()}") print( f"peak_inds.shape = {peak_inds.shape}, peak_inds = {peak_inds.tolist()}" ) print(f"psd_of_interest.shape = {psd_of_interest.shape}") # averaging at a neighborhood of `peak_idx` n_nbh = 1 psd_mask = np.zeros_like(psd_of_interest, dtype=int) for l in range(psd_mask.shape[0]): psd_mask[l, max(0, peak_inds[l] - n_nbh):min(psd_mask.shape[-1], peak_inds[l] + n_nbh)] = 1 psd_of_interest = psd_of_interest * psd_mask # ret_val with units in second^{-1} ret_val = np.mean( np.dot(psd_of_interest, freqs_of_interest) / np.sum(psd_of_interest, axis=-1)) if mode.lower() in ['hr', 'heart_rate']: ret_val = 60 * ret_val elif mode.lower() in ['rr', 'rr_interval']: ret_val = 1000 / ret_val return ret_val
def extract_buildings(x: np.ndarray): """ Returns a mask of the buildings in x """ buildings = x.copy() buildings[x < 4] = 1 buildings[x >= 4] = 0 return buildings
def _conjugate_sinkhorn(self, x: np.ndarray, grad: np.ndarray, cost_matrix: np.ndarray) -> np.ndarray: """ The conjugate sinkhorn_optimizer. :param x: Current adversarial examples. :param grad: The loss gradients. :param cost_matrix: A non-negative cost matrix. :return: Adversarial examples. """ # Normalize inputs normalization = x.reshape(x.shape[0], -1).sum(-1).reshape(x.shape[0], 1, 1, 1) x = x.copy() / normalization # Dimension size for each example m = np.prod(x.shape[1:]) # Initialize alpha = np.log(np.ones(x.shape) / m) + 0.5 exp_alpha = np.exp(-alpha) beta = -self.regularization * grad beta = beta.astype(np.float64) exp_beta = np.exp(-beta) # Check for overflow if (exp_beta == np.inf).any(): raise ValueError("Overflow error in `_conjugate_sinkhorn` for exponential beta.") cost_matrix_new = cost_matrix.copy() + 1 cost_matrix_new = np.expand_dims(np.expand_dims(cost_matrix_new, 0), 0) i_nonzero = self._batch_dot(x, self._local_transport(cost_matrix_new, grad, self.kernel_size)) != 0 i_nonzero_ = np.zeros(alpha.shape).astype(bool) i_nonzero_[:, :, :, :] = np.expand_dims(np.expand_dims(np.expand_dims(i_nonzero, -1), -1), -1) psi = np.ones(x.shape[0]) var_k = np.expand_dims(np.expand_dims(np.expand_dims(psi, -1), -1), -1) var_k = np.exp(-var_k * cost_matrix - 1) convergence = -np.inf for _ in range(self.conjugate_sinkhorn_max_iter): # Block coordinate descent iterates x[x == 0.0] = EPS_LOG # Prevent divide by zero in np.log alpha[i_nonzero_] = (np.log(self._local_transport(var_k, exp_beta, self.kernel_size)) - np.log(x))[ i_nonzero_ ] exp_alpha = np.exp(-alpha) # Newton step var_g = -self.eps_step + self._batch_dot( exp_alpha, self._local_transport(cost_matrix * var_k, exp_beta, self.kernel_size) ) var_h = -self._batch_dot( exp_alpha, self._local_transport(cost_matrix * cost_matrix * var_k, exp_beta, self.kernel_size) ) delta = var_g / var_h # Ensure psi >= 0 tmp = np.ones(delta.shape) neg = psi - tmp * delta < 0 while neg.any() and np.min(tmp) > 1e-2: tmp[neg] /= 2 neg = psi - tmp * delta < 0 psi[i_nonzero] = np.maximum(psi - tmp * delta, 0)[i_nonzero] # Update K var_k = np.expand_dims(np.expand_dims(np.expand_dims(psi, -1), -1), -1) var_k = np.exp(-var_k * cost_matrix - 1) # Check for convergence next_convergence = self._conjugated_sinkhorn_evaluation(x, alpha, exp_alpha, exp_beta, psi, var_k) if (np.abs(convergence - next_convergence) <= 1e-4 + 1e-4 * np.abs(next_convergence)).all(): break convergence = next_convergence result = exp_beta * self._local_transport(var_k, exp_alpha, self.kernel_size) result[~i_nonzero] = 0 result *= normalization return result
def _sink_callback(self, y: np.ndarray, meta: FrameMetaData): r"""Callback where features are collected.""" self._y.append(y.copy()) self._starts.append(meta.time) self._ends.append(meta.time + meta.lengthSec)
def train(self, x_array: np.ndarray, y_array: np.ndarray, epochs: int = None, batch_size: int = None, epoch_callback=None, scale_data=False, shuffle_data=False): if not epochs: raise ValueError("Missing required kwarg: epochs") if not batch_size: batch_size = int(1e20) optimizer = torch.optim.Adam(self.mlp.parameters()) # optimizer = torch.optim.RMSprop(self.mlp.parameters()) x_array = x_array.copy() y_array = y_array.copy() if scale_data: self._input_scaler = StandardScaler() self._input_scaler.fit(x_array) self._target_scaler = StandardScaler() self._target_scaler.fit(y_array) self._input_scaler.transform(x_array, copy=False) self._target_scaler.transform(y_array, copy=False) n_batches = int(np.ceil(1.0 * x_array.shape[0] / batch_size)) losses = np.zeros(epochs) for t in range(epochs): if shuffle_data: idx = np.random.permutation(x_array.shape[0]) x_array = x_array[idx] y_array = y_array[idx] for batch in range(n_batches): idx_lb = batch_size * batch idx_up = min(batch_size * (batch + 1), x_array.shape[0]) x_batch = x_array[idx_lb:idx_up] y_batch = y_array[idx_lb:idx_up] x_variable = torch.from_numpy(x_batch.astype(np.float32)).to( self.device) # type: tensor.Tensor y_variable = torch.from_numpy(y_batch.astype(np.float32)).to( self.device) # type: tensor.Tensor assert hasattr(y_variable, 'requires_grad') y_variable.requires_grad = False output = self.mlp.forward(x_variable) loss = self.loss_function(output, y_variable) optimizer.zero_grad() loss.backward() optimizer.step() losses[t] += loss.item() losses[t] /= n_batches if callable(epoch_callback): epoch_callback(t, losses[t]) else: print(t, losses[t]) return losses
def __init__(self, grid: np.ndarray): self._grid = grid.copy()
def intrinsic_dimension(X:np.ndarray, k1:int=6, k2:int=12, estimator:str='levina', metric:str='vector', trafo:str='var', mem_threshold:int=5000): """Calculate intrinsic dimension based on the MLE by Levina and Bickel [1]_. Parameters ---------- X : ndarray - An ``m x n`` vector data matrix with ``n`` objects in an ``m`` dimensional feature space - An ``n x n`` distance matrix. NOTE: The type must be defined via parameter `metric`! k1 : int, optional (default: 6) Start of neighborhood range to search in. k2 : int, optional (default: 12) End of neighborhood range to search in. estimator : {'levina', 'mackay'}, optional (default: 'levina') Determine the summation strategy: see [2]_. metric : {'vector', 'distance'}, optional (default: 'vector') Determine data type of `X`. NOTE: the MLE was derived for euclidean distances. Using other dissimilarity measures may lead to undefined results. trafo : {None, 'std', 'var'}, optional (default: 'var') Transform vector data. - None: no transformation - 'std': standardization - 'var': subtract mean, divide by variance (default behavior of Laurens van der Maaten's DR toolbox; most likely for other ID/DR techniques). mem_treshold : int, optional, default: 5000 Controls speed-memory usage trade-off: If number of points is higher than the given value, don't calculate complete distance matrix at once (fast, high memory), but per row (slower, less memory). Returns ------- d_mle : int Intrinsic dimension estimate (rounded to next integer) References ---------- .. [1] Levina, E., & Bickel, P. (2004). Maximum likelihood estimation of intrinsic dimension. Advances in Neural Information …, 17, 777–784. http://doi.org/10.2307/2335172 .. [2] http://www.inference.phy.cam.ac.uk/mackay/dimension/ """ n = X.shape[0] if estimator not in ['levina', 'mackay']: raise ValueError("Parameter 'estimator' must be 'levina' or 'mackay'.") if k1 < 1 or k2 < k1 or k2 >= n: raise ValueError("Invalid neighborhood: Please make sure that " "0 < k1 <= k2 < n. (Got k1={} and k2={}).". format(k1, k2)) X = X.copy().astype(float) if metric == 'vector': # New array with unique rows X = X[np.lexsort(np.fliplr(X).T)] if trafo is None: pass elif trafo == 'var': X -= X.mean(axis=0) # broadcast X /= X.var(axis=0) + 1e-7 # broadcast elif trafo == 'std': # Standardization X -= X.mean(axis=0) # broadcast X /= X.std(axis=0) + 1e-7 # broadcast else: raise ValueError("Transformation must be None, 'std', or 'var'.") # Compute matrix of log nearest neighbor distances X2 = (X**2).sum(1) if n <= mem_threshold: # speed-memory trade-off distance = X2.reshape(-1, 1) + X2 - 2*np.dot(X, X.T) #2x br.cast distance.sort(1) # Replace invalid values with a small number distance[distance<0] = 1e-7 knnmatrix = .5 * np.log(distance[:, 1:k2+1]) else: knnmatrix = np.zeros((n, k2)) for i in range(n): distance = np.sort(X2[i] + X2 - 2 * np.dot(X, X[i, :])) # Replace invalid values with a small number distance[distance < 0] = 1e-7 knnmatrix[i, :] = .5 * np.log(distance[1:k2+1]) elif metric == 'distance': raise NotImplementedError("ID currently only supports vector data.") #======================================================================= # # TODO calculation WRONG # X.sort(1) # X[X < 0] = 1e-7 # knnmatrix = np.log(X[:, 1:k2+1]) #======================================================================= elif metric == 'similarity': raise NotImplementedError("ID currently only supports vector data.") #======================================================================= # # TODO calculation WRONG # print("WARNING: using similarity data may return " # "undefined results.", file=sys.stderr) # X[X < 0] = 0 # distance = 1 - (X / X.max()) # knnmatrix = np.log(distance[:, 1:k2+1]) #======================================================================= else: raise ValueError("Parameter 'metric' must be 'vector' or 'distance'.") # Compute the ML estimate S = np.cumsum(knnmatrix, 1) indexk = np.arange(k1, k2+1) # broadcasted afterwards dhat = -(indexk - 2) / (S[:, k1-1:k2] - knnmatrix[:, k1-1:k2] * indexk) if estimator == 'levina': # Average over estimates and over values of k no_dims = dhat.mean() if estimator == 'mackay': # Average over inverses dhat **= -1 dhat_k = dhat.mean(0) no_dims = (dhat_k ** -1).mean() return int(no_dims.round())
def fill_in_data(data_all_original: np.ndarray, data_mean): # this will fill in data_all_original's all NaN with data_mean, in corresponding locations. data_all_original = data_all_original.copy() assert np.isscalar(data_mean) data_all_original[np.isnan(data_all_original)] = data_mean return data_all_original
def hubness(D:np.ndarray, k:int=5, metric='distance', verbose:int=0, n_jobs:int=-1): """Compute hubness of a distance matrix. Hubness [1]_ is the skewness of the `k`-occurrence histogram (reverse nearest neighbor count, i.e. how often does a point occur in the `k`-nearest neighbor lists of other points). Parameters ---------- D : ndarray The ``n x n`` symmetric distance (similarity) matrix. k : int, optional (default: 5) Neighborhood size for `k`-occurence. metric : {'distance', 'similarity'}, optional (default: 'distance') Define, whether matrix `D` is a distance or similarity matrix verbose : int, optional (default: 0) Increasing level of output (progress report). n_jobs : int, optional (default: -1) Number of parallel processes spawned for hubness calculation. Default value (-1): number of available CPUs. Returns ------- S_k : float Hubness (skewness of `k`-occurence distribution) D_k : ndarray `k`-nearest neighbor lists N_k : ndarray `k`-occurence list References ---------- .. [1] Radovanović, M., Nanopoulos, A., & Ivanović, M. (2010). Hubs in Space : Popular Nearest Neighbors in High-Dimensional Data. Journal of Machine Learning Research, 11, 2487–2531. Retrieved from http://jmlr.csail.mit.edu/papers/volume11/radovanovic10a/ radovanovic10a.pdf """ log = Logging.ConsoleLogging() IO._check_distance_matrix_shape(D) IO._check_valid_metric_parameter(metric) if metric == 'distance': d_self = np.inf sort_order = 1 if metric == 'similarity': d_self = -np.inf sort_order = -1 if verbose: log.message("Hubness calculation (skewness of {}-occurence)".format(k)) # Initialization n = D.shape[0] D = D.copy() D_k = np.zeros((k, D.shape[1]), dtype=np.float32 ) if issparse(D): pass # correct self-distance must be ensured upstream for sparse else: # Set self dist to inf np.fill_diagonal(D, d_self) # make non-finite (NaN, Inf) appear at the end of the sorted list D[~np.isfinite(D)] = d_self # Parallelization if n_jobs == -1: # take all cpus NUMBER_OF_PROCESSES = mp.cpu_count() # @UndefinedVariable else: NUMBER_OF_PROCESSES = n_jobs tasks = [] batches = [] batch_size = n // NUMBER_OF_PROCESSES for i in range(NUMBER_OF_PROCESSES-1): batches.append( np.arange(i*batch_size, (i+1)*batch_size) ) batches.append( np.arange((NUMBER_OF_PROCESSES-1)*batch_size, n) ) for idx, batch in enumerate(batches): submatrix = D[batch[0]:batch[-1]+1] tasks.append((_partial_hubness, (k, d_self, log, sort_order, batch, submatrix, idx, n, verbose))) task_queue = mp.Queue() # @UndefinedVariable done_queue = mp.Queue() # @UndefinedVariable for task in tasks: task_queue.put(task) for i in range(NUMBER_OF_PROCESSES): # @UnusedVariable mp.Process(target=_worker, args=(task_queue, done_queue)).start() # @UndefinedVariable for i in range(len(tasks)): # @UnusedVariable rows, Dk_part = done_queue.get() D_k[:, rows[0]:rows[-1]+1] = Dk_part for i in range(NUMBER_OF_PROCESSES): # @UnusedVariable task_queue.put('STOP') # k-occurence N_k = np.bincount(D_k.astype(int).ravel()) # Hubness S_k = stats.skew(N_k) if verbose: log.message("Hubness calculation done.", flush=True) # return hubness, k-nearest neighbors, N occurence return S_k, D_k, N_k
def soft_convert_objects( values: np.ndarray, datetime: bool = True, numeric: bool = True, timedelta: bool = True, coerce: bool = False, copy: bool = True, ): """ if we have an object dtype, try to coerce dates and/or numbers """ validate_bool_kwarg(datetime, "datetime") validate_bool_kwarg(numeric, "numeric") validate_bool_kwarg(timedelta, "timedelta") validate_bool_kwarg(coerce, "coerce") validate_bool_kwarg(copy, "copy") conversion_count = sum((datetime, numeric, timedelta)) if conversion_count == 0: raise ValueError("At least one of datetime, numeric or timedelta must be True.") elif conversion_count > 1 and coerce: raise ValueError( "Only one of 'datetime', 'numeric' or " "'timedelta' can be True when when coerce=True." ) if not is_object_dtype(values.dtype): # If not object, do not attempt conversion values = values.copy() if copy else values return values # If 1 flag is coerce, ensure 2 others are False if coerce: # Immediate return if coerce if datetime: from pandas import to_datetime return to_datetime(values, errors="coerce").to_numpy() elif timedelta: from pandas import to_timedelta return to_timedelta(values, errors="coerce").to_numpy() elif numeric: from pandas import to_numeric return to_numeric(values, errors="coerce") # Soft conversions if datetime: # GH 20380, when datetime is beyond year 2262, hence outside # bound of nanosecond-resolution 64-bit integers. try: values = lib.maybe_convert_objects(values, convert_datetime=True) except OutOfBoundsDatetime: pass if timedelta and is_object_dtype(values.dtype): # Object check to ensure only run if previous did not convert values = lib.maybe_convert_objects(values, convert_timedelta=True) if numeric and is_object_dtype(values.dtype): try: converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) # If all NaNs, then do not-alter values = converted if not isna(converted).all() else values values = values.copy() if copy else values except Exception: pass return values
def create_dla_partitions( dataset: XY, dirichlet_dist: np.ndarray = np.empty(0), num_partitions: int = 100, concentration: float = 0.5, ) -> Tuple[np.ndarray, XYList]: """Create ibalanced non-iid partitions using Dirichlet Latent Allocation(LDA) without resampling. Args: dataset (XY): Datasets containing samples X and labels Y. dirichlet_dist (numpy.ndarray, optional): previously generated distribution to be used. This s useful when applying the same distribution for train and validation sets. num_partitions (int, optional): Number of partitions to be created. Defaults to 100. concentration (float, optional): Dirichlet Concentration (:math:`\\alpha`) parameter. An :math:`\\alpha \\to \\Inf` generates uniform distributions over classes. An :math:`\\alpha \\to 0.0` generates on class per client. Defaults to 0.5. Returns: Tuple[numpy.ndarray, XYList]: List of XYList containing partitions for each dataset. """ x, y = dataset x, y = shuffle(x, y) x, y = sort_by_label(x, y) x_l: List[np.ndarray] = list(x) # Get number of classes and verify if they matching with classes, num_samples_per_class = np.unique(y, return_counts=True) num_classes: int = classes.size remaining_indices = [j for j in range(num_classes)] if dirichlet_dist.size != 0: dist_num_partitions, dist_num_classes = dirichlet_dist.shape if dist_num_classes != num_classes: raise ValueError(f"""Number of classes in dataset ({num_classes}) differs from the one in the provided partitions {dist_num_classes}.""" ) if dist_num_partitions != num_partitions: raise ValueError( f"""The value in `num_partitions` ({num_partitions}) differs from the one from `dirichlet_dist` {dist_num_partitions}.""" ) # Assuming balanced distribution num_samples = x.shape[0] num_samples_per_partition = num_samples // num_partitions boundaries: List[int] = np.append([0], np.cumsum(num_samples_per_class, dtype=np.int)) list_samples_per_class: List[List[np.ndarray]] = [ x_l[boundaries[idx]:boundaries[idx + 1]] # noqa: E203 for idx in range(num_classes) # noqa: E203 ] if dirichlet_dist.size == 0: dirichlet_dist = np.random.dirichlet(alpha=[concentration] * num_classes, size=num_partitions) original_dirichlet_dist = dirichlet_dist.copy() data: List[List[Optional[np.ndarray]]] = [[] for _ in range(num_partitions)] target: List[List[Optional[np.ndarray]]] = [[] for _ in range(num_partitions)] for partition_id in range(num_partitions): for _ in range(num_samples_per_partition): sample_class: int = np.where( np.random.multinomial(1, dirichlet_dist[partition_id]) == 1)[0][0] sample: np.ndarray = list_samples_per_class[sample_class].pop() data[partition_id].append(sample) target[partition_id].append(sample_class) # If last sample of the class was drawn, # then set pdf to zero for that class. num_samples_per_class[sample_class] -= 1 if num_samples_per_class[sample_class] == 0: remaining_indices.remove( np.where(classes == sample_class)[0][0]) # Be careful to distinguish between original zero-valued # classes and classes that are empty dirichlet_dist[:, sample_class] = 0.0 dirichlet_dist[:, remaining_indices] += 1e-5 sum_rows = np.sum(dirichlet_dist, axis=1) dirichlet_dist = dirichlet_dist / (sum_rows[:, np.newaxis] + np.finfo(float).eps) partitions = [(np.concatenate([data[idx]]), np.concatenate([target[idx]])[..., np.newaxis]) for idx in range(num_partitions)] return partitions, original_dirichlet_dist
def _minimal_perturbation(self, x: np.ndarray, y: np.ndarray, mask: np.ndarray) -> np.ndarray: """ Iteratively compute the minimal perturbation necessary to make the class prediction change. Stop when the first adversarial example was found. :param x: An array with the original inputs. :param y: Target values (class labels) one-hot-encoded of shape (nb_samples, nb_classes). :return: An array holding the adversarial examples. """ adv_x = x.copy() # Compute perturbation with implicit batching for batch_id in range( int(np.ceil(adv_x.shape[0] / float(self.batch_size)))): batch_index_1, batch_index_2 = ( batch_id * self.batch_size, (batch_id + 1) * self.batch_size, ) batch = adv_x[batch_index_1:batch_index_2] batch_labels = y[batch_index_1:batch_index_2] mask_batch = mask if mask is not None: # Here we need to make a distinction: if the masks are different for each input, we need to index # those for the current batch. Otherwise (i.e. mask is meant to be broadcasted), keep it as it is. if len(mask.shape) == len(x.shape): mask_batch = mask[batch_index_1:batch_index_2] # Get perturbation perturbation = self._compute_perturbation(batch, batch_labels, mask_batch) # Get current predictions active_indices = np.arange(len(batch)) if isinstance(self.eps, np.ndarray): if len(self.eps.shape) == len( x.shape) and self.eps.shape[0] == x.shape[0]: current_eps = self.eps_step[batch_index_1:batch_index_2] partial_stop_condition = ( current_eps <= self.eps[batch_index_1:batch_index_2]).all() else: current_eps = self.eps_step partial_stop_condition = (current_eps <= self.eps).all() else: current_eps = self.eps_step partial_stop_condition = current_eps <= self.eps while active_indices.size > 0 and partial_stop_condition: # Adversarial crafting current_x = self._apply_perturbation( x[batch_index_1:batch_index_2], perturbation, current_eps) # Update batch[active_indices] = current_x[active_indices] adv_preds = self.estimator.predict(batch) # If targeted active check to see whether we have hit the target, otherwise head to anything but if self.targeted: active_indices = np.where( np.argmax(batch_labels, axis=1) != np.argmax( adv_preds, axis=1))[0] else: active_indices = np.where( np.argmax(batch_labels, axis=1) == np.argmax( adv_preds, axis=1))[0] # Update current eps and check the stop condition if isinstance(self.eps, np.ndarray): if len(self.eps.shape) == len( x.shape) and self.eps.shape[0] == x.shape[0]: current_eps = current_eps + self.eps_step[ batch_index_1:batch_index_2] partial_stop_condition = ( current_eps <= self.eps[batch_index_1:batch_index_2]).all() else: current_eps = current_eps + self.eps_step partial_stop_condition = (current_eps <= self.eps).all() else: current_eps = current_eps + self.eps_step partial_stop_condition = current_eps <= self.eps adv_x[batch_index_1:batch_index_2] = batch return adv_x
def __init__(self, x: np.ndarray): """ :param x: list of the rocket's coordinates, m """ self.x = x.copy() self.dim = len(x)
def _compute( self, x: np.ndarray, x_init: np.ndarray, y: np.ndarray, mask: Optional[np.ndarray], eps: Union[int, float, np.ndarray], eps_step: Union[int, float, np.ndarray], project: bool, random_init: bool, ) -> np.ndarray: if random_init: n = x.shape[0] m = np.prod(x.shape[1:]).item() random_perturbation = random_sphere(n, m, eps, self.norm).reshape( x.shape).astype(ART_NUMPY_DTYPE) if mask is not None: random_perturbation = random_perturbation * ( mask.astype(ART_NUMPY_DTYPE)) x_adv = x.astype(ART_NUMPY_DTYPE) + random_perturbation if self.estimator.clip_values is not None: clip_min, clip_max = self.estimator.clip_values x_adv = np.clip(x_adv, clip_min, clip_max) else: if x.dtype == np.object: x_adv = x.copy() else: x_adv = x.astype(ART_NUMPY_DTYPE) # Compute perturbation with implicit batching for batch_id in range(int(np.ceil(x.shape[0] / float(self.batch_size)))): batch_index_1, batch_index_2 = batch_id * self.batch_size, ( batch_id + 1) * self.batch_size batch_index_2 = min(batch_index_2, x.shape[0]) batch = x_adv[batch_index_1:batch_index_2] batch_labels = y[batch_index_1:batch_index_2] mask_batch = mask if mask is not None: # Here we need to make a distinction: if the masks are different for each input, we need to index # those for the current batch. Otherwise (i.e. mask is meant to be broadcasted), keep it as it is. if len(mask.shape) == len(x.shape): mask_batch = mask[batch_index_1:batch_index_2] # Get perturbation perturbation = self._compute_perturbation(batch, batch_labels, mask_batch) # Compute batch_eps and batch_eps_step if isinstance(eps, np.ndarray): if len(eps.shape) == len( x.shape) and eps.shape[0] == x.shape[0]: batch_eps = eps[batch_index_1:batch_index_2] batch_eps_step = eps_step[batch_index_1:batch_index_2] else: batch_eps = eps batch_eps_step = eps_step else: batch_eps = eps batch_eps_step = eps_step # Apply perturbation and clip x_adv[batch_index_1:batch_index_2] = self._apply_perturbation( batch, perturbation, batch_eps_step) if project: if x_adv.dtype == np.object: for i_sample in range(batch_index_1, batch_index_2): if isinstance( batch_eps, np.ndarray ) and batch_eps.shape[0] == x_adv.shape[0]: perturbation = projection( x_adv[i_sample] - x_init[i_sample], batch_eps[i_sample], self.norm) else: perturbation = projection( x_adv[i_sample] - x_init[i_sample], batch_eps, self.norm) x_adv[i_sample] = x_init[i_sample] + perturbation else: perturbation = projection( x_adv[batch_index_1:batch_index_2] - x_init[batch_index_1:batch_index_2], batch_eps, self.norm) x_adv[batch_index_1:batch_index_2] = x_init[ batch_index_1:batch_index_2] + perturbation return x_adv
def mutual_proximity_gammai(D: np.ndarray, metric: str = "distance", test_set_ind: np.ndarray = None, verbose: int = 0): """Transform a distance matrix with Mutual Proximity (indep. Gamma distr.). Applies Mutual Proximity (MP) [1]_ on a distance/similarity matrix. Gammai variant assumes independent Gamma distributed distances (FAST). The resulting second. distance/similarity matrix should show lower hubness. Parameters ---------- D : ndarray or csr_matrix - ndarray: The ``n x n`` symmetric distance or similarity matrix. - csr_matrix: The ``n x n`` symmetric similarity matrix. NOTE: In case of sparse `D`, zeros are interpreted as missing values and ignored during calculations. Thus, results may differ from using a dense version. metric : {'distance', 'similarity'}, optional (default: 'distance') Define, whether matrix `D` is a distance or similarity matrix. NOTE: In case of sparse `D`, only 'similarity' is supported. test_sed_ind : ndarray, optional (default: None) Define data points to be hold out as part of a test set. Can be: - None : Rescale all distances - ndarray : Hold out points indexed in this array as test set. verbose : int, optional (default: 0) Increasing level of output (progress report). Returns ------- D_mp : ndarray Secondary distance MP gammai matrix. References ---------- .. [1] Schnitzer, D., Flexer, A., Schedl, M., & Widmer, G. (2012). Local and global scaling reduce hubs in space. The Journal of Machine Learning Research, 13(1), 2871–2902. """ # Initialization n = D.shape[0] log = Logging.ConsoleLogging() # Checking input IO._check_distance_matrix_shape(D) IO._check_valid_metric_parameter(metric) if metric == "similarity": self_value = 1 else: # metric == 'distance': self_value = 0 if test_set_ind is None: train_set_ind = slice(0, n) else: train_set_ind = np.setdiff1d(np.arange(n), test_set_ind) # Start MP if verbose: log.message("Mutual proximity Gammai rescaling started.", flush=True) D = D.copy() if issparse(D): return _mutual_proximity_gammai_sparse(D, test_set_ind, verbose, log) np.fill_diagonal(D, np.nan) mu = np.nanmean(D[train_set_ind], 0) va = np.nanvar(D[train_set_ind], 0, ddof=1) A = (mu ** 2) / va B = va / mu D_mp = np.zeros_like(D) # MP gammai for i in range(n): if verbose and ((i + 1) % 1000 == 0 or i + 1 == n): log.message("MP_gammai: {} of {}".format(i + 1, n), flush=True) j_idx = slice(i + 1, n) if metric == "similarity": p1 = _local_gamcdf(D[i, j_idx], A[i], B[i]) p2 = _local_gamcdf(D[j_idx, i], A[j_idx], B[j_idx]) D_mp[i, j_idx] = (p1 * p2).ravel() else: # distance p1 = 1 - _local_gamcdf(D[i, j_idx], A[i], B[i]) p2 = 1 - _local_gamcdf(D[j_idx, i], A[j_idx], B[j_idx]) D_mp[i, j_idx] = (1 - p1 * p2).ravel() # Mirroring the matrix D_mp += D_mp.T # set correct self dist/sim np.fill_diagonal(D_mp, self_value) return D_mp
def homogeneous_func(values: np.ndarray): if values.size == 0: return values.copy() return np.apply_along_axis(func, self.axis, values)
def time_conv( dt: float, c_in: _np.ndarray, rtd: _np.ndarray, c_equilibration: _typing.Optional[_np.ndarray] = None, logger: _typing.Optional[_logger.RtdLogger] = None) -> _np.ndarray: """Perform convolution on time axis. First time-point of `c_in` and `c_rtd` is at t == 0 (and not `dt`). Convolution is applied to all species of `c_in`. Parameters ---------- dt Time step. c_in Starting concentration profile for each specie. `c_in`.shape == [n_species, n_time_steps] rtd Residence time distribution (= unit impulse response). c_equilibration Initial concentrations inside the unit operation. E.g.: Composition of equilibration buffer for flow-through chromatography. logger Logger for messaging events. Returns ------- c_out: ndarray Final concentration profile for each specie. `c_out`.shape == `c_in`.shape """ # it can happen that array is empty, then just return empty one if c_in.size == 0: if logger: logger.i("Convolution: Got empty c_in") return c_in.copy() if rtd.size == 0: if logger: logger.w("Convolution: Got empty bio_rtd") return c_in.copy() if c_equilibration is not None and _np.all(c_equilibration == 0): c_equilibration = None c_out = _np.zeros_like(c_in) # simulate pre-flushing and washout c_ext = c_in n_prepend = rtd.size if c_equilibration is not None else 0 if c_equilibration is not None: c_ext = _np.pad(c_ext, ((0, 0), (n_prepend, 0)), mode="constant") c_ext[:, :n_prepend] = c_equilibration # convolution for j in range(c_out.shape[0]): c_out[j] = _np.convolve(c_ext[j], rtd)[n_prepend:n_prepend + c_in.shape[1]] * dt return c_out
def make_move(self, board: np.ndarray, move: int) -> np.ndarray: moving_player = self.get_active_player(board) new_board: np.ndarray = board.copy() i, j = move // self.board_width, move % self.board_width new_board[i, j] = moving_player return new_board
def proximal_l0(self, x: np.ndarray, eta: float) -> np.ndarray: z = x.copy() z[np.abs(x) < np.sqrt(2 * eta)] = 0 return z
def _impute_inactive(self, X: np.ndarray) -> np.ndarray: X = X.copy() X[~np.isfinite(X)] = -1 return X
def draw_inference_on_hook2(img: np.ndarray, cleaned_keypoints, kpt_labels: List[str], kpt_skeleton: List[list], score: float, bbox: BBox, vis_keypoints: list, kpt_confidences: list, conf_idx_list: list, not_conf_idx_list: list, conf_keypoints, conf_kpt_labels, not_conf_keypoints, not_conf_kpt_labels, conf_thresh: float = 0.3, show_bbox_border: bool = False, bbox_label_mode: str = 'euler', index_offset: int = 0, diameter=1): # printj.red(len(vis_keypoints)) result = img.copy() # diameter = 10 # printj.yellow(f'bbox = {bbox}') # printj.yellow(f'vis_keypoints = {vis_keypoints}') # printj.yellow(f'dist = {self.dist([0, 1], [1, 2])}') point_a = vis_keypoints[0] point_b = vis_keypoints[1] point_cb = vis_keypoints[2] point_c = vis_keypoints[3] point_cd = vis_keypoints[4] point_d = vis_keypoints[5] point_e = vis_keypoints[6] # point_dl = vis_keypoints[5] # point_dr = vis_keypoints[6] len_ab = dist(point_a, point_b) # printj.red(len_ab) if diameter <= 0: length_ratio = np.inf else: length_ratio = len_ab / diameter pass_condition = (length_ratio > 4) bbox_color = [0, 0, 255] # fail: ab < 4*d_rl kpt_ab_color = [50, 255, 255] c_text = 'ab < 4D' if pass_condition: bbox_color = [0, 255, 0] # pass: ab > 4*d_rl kpt_ab_color = [0, 255, 0] c_text = 'ab > 4D' # bbox_height = np.absolute(bbox.ymax - bbox.ymin) # bbox_width = np.absolute(bbox.xmax - bbox.xmin) # length_diff = np.absolute(bbox_height - bbox_width) # if bbox_height > bbox_width: # bbox.xmin = bbox.xmin - int(length_diff/2) # bbox.xmax = bbox.xmax + int(length_diff/2) # else: # bbox.ymin = bbox.ymin - int(length_diff/2) # bbox.ymax = bbox.ymax + int(length_diff/2) # printj.cyan(bbox) # printj.cyan(bbox.to_int()) # printj.cyan(bbox.to_int().to_list()) if bbox_label_mode == 'euler': # bbox_text = str(round(length_ratio, 2)) + 'D' bbox_text = f'h {score}' result = draw_bbox(img=result, color=bbox_color, bbox=bbox, text=bbox_text, label_only=not show_bbox_border, label_orientation='top') # result = draw_bbox(img=result, color=bbox_color, bbox=bbox, text=c_text, # label_only=not show_bbox_border, label_orientation='bottom') result = draw_bbox(img=result, color=bbox_color, bbox=bbox, text=str(score), label_only=not show_bbox_border, label_orientation='bottom') result = draw_skeleton(img=result, keypoints=vis_keypoints, keypoint_skeleton=kpt_skeleton, index_offset=index_offset, thickness=2, color=[255, 0, 0], ignore_kpt_idx=[]) # ab result = draw_skeleton(img=result, keypoints=vis_keypoints, keypoint_skeleton=kpt_skeleton, index_offset=index_offset, thickness=2, color=kpt_ab_color, ignore_kpt_idx=[2, 3, 4, 5, 6]) # d_lr # result = draw_skeleton( # img=result, keypoints=vis_keypoints, keypoint_skeleton=kpt_skeleton, index_offset=index_offset, thickness=2, color=[255, 255, 0], # ignore_kpt_idx=[0, 1, 2, 3, 4] # ) result = draw_keypoints(img=result, keypoints=vis_keypoints, radius=2, color=[0, 0, 255], keypoint_labels=kpt_labels, show_keypoints_labels=True, label_thickness=1, ignore_kpt_idx=conf_idx_list) if len(conf_keypoints) > 0: result = draw_keypoints(img=result, keypoints=vis_keypoints, radius=2, color=[0, 255, 0], keypoint_labels=kpt_labels, show_keypoints_labels=True, label_thickness=1, ignore_kpt_idx=not_conf_idx_list) return result, len_ab
def nankurt( values: np.ndarray, axis: Optional[int] = None, skipna: bool = True, mask: Optional[np.ndarray] = None, ) -> float: """ Compute the sample excess kurtosis The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G2, computed directly from the second and fourth central moment. Parameters ---------- values : ndarray axis: int, optional skipna : bool, default True mask : ndarray[bool], optional nan-mask if known Returns ------- result : float64 Unless input is a float array, in which case use the same precision as the input array. Examples -------- >>> import pandas.core.nanops as nanops >>> s = pd.Series([1, np.nan, 1, 3, 2]) >>> nanops.nankurt(s) -1.2892561983471076 """ values = extract_array(values, extract_numpy=True) mask = _maybe_get_mask(values, skipna, mask) if not is_float_dtype(values.dtype): values = values.astype("f8") count = _get_counts(values.shape, mask, axis) else: count = _get_counts(values.shape, mask, axis, dtype=values.dtype) if skipna and mask is not None: values = values.copy() np.putmask(values, mask, 0) mean = values.sum(axis, dtype=np.float64) / count if axis is not None: mean = np.expand_dims(mean, axis) adjusted = values - mean if skipna and mask is not None: np.putmask(adjusted, mask, 0) adjusted2 = adjusted ** 2 adjusted4 = adjusted2 ** 2 m2 = adjusted2.sum(axis, dtype=np.float64) m4 = adjusted4.sum(axis, dtype=np.float64) with np.errstate(invalid="ignore", divide="ignore"): adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3)) numer = count * (count + 1) * (count - 1) * m4 denom = (count - 2) * (count - 3) * m2 ** 2 # floating point error # # #18044 in _libs/windows.pyx calc_kurt follow this behavior # to fix the fperr to treat denom <1e-14 as zero numer = _zero_out_fperr(numer) denom = _zero_out_fperr(denom) if not isinstance(denom, np.ndarray): # if ``denom`` is a scalar, check these corner cases first before # doing division if count < 4: return np.nan if denom == 0: return 0 with np.errstate(invalid="ignore", divide="ignore"): result = numer / denom - adj dtype = values.dtype if is_float_dtype(dtype): result = result.astype(dtype) if isinstance(result, np.ndarray): result = np.where(denom == 0, 0, result) result[count < 4] = np.nan return result
def score(D:np.ndarray, target:np.ndarray, k=5, metric:str='distance', test_set_ind:np.ndarray=None, verbose:int=0): """Perform `k`-nearest neighbor classification. Use the ``n x n`` symmetric distance matrix `D` and target class labels `target` to perform a `k`-NN experiment (leave-one-out cross-validation or evaluation of test set; see parameter `test_set_ind`). Ties are broken by the nearest neighbor. Parameters ---------- D : ndarray The ``n x n`` symmetric distance (similarity) matrix. target : ndarray (of dtype=int) The ``n x 1`` target class labels (ground truth). k : int or array_like (of dtype=int), optional (default: 5) Neighborhood size for `k`-NN classification. For each value in `k`, one `k`-NN experiment is performed. HINT: Providing more than one value for `k` is a cheap means to perform multiple `k`-NN experiments at once. Try e.g. ``k=[1, 5, 20]``. metric : {'distance', 'similarity'}, optional (default: 'distance') Define, whether matrix `D` is a distance or similarity matrix test_sed_ind : ndarray, optional (default: None) Define data points to be hold out as part of a test set. Can be: - None : Perform a LOO-CV experiment - ndarray : Hold out points indexed in this array as test set. Fit model to remaining data. Evaluate model on test set. verbose : int, optional (default: 0) Increasing level of output (progress report). Returns ------- acc : ndarray (shape=(n_k x 1), dtype=float) Classification accuracy (`n_k`... number of items in parameter `k`) HINT: Refering to the above example... ... ``acc[0]`` gives the accuracy of the ``k=1`` experiment. corr : ndarray (shape=(n_k x n), dtype=int) Raw vectors of correctly classified items HINT: ... ``corr[1, :]`` gives these items for the ``k=5`` experiment. cmat : ndarray (shape=(n_k x n_t x n_t), dtype=int) Confusion matrix (``n_t`` number of unique items in parameter target) HINT: ... ``cmat[2, :, :]`` gives the confusion matrix of the ``k=20`` experiment. """ # Check input sanity log = Logging.ConsoleLogging() IO._check_distance_matrix_shape(D) IO._check_distance_matrix_shape_fits_labels(D, target) IO._check_valid_metric_parameter(metric) if metric == 'distance': d_self = np.inf sort_order = 1 if metric == 'similarity': d_self = -np.inf sort_order = -1 # Copy, because data is changed D = D.copy() target = target.astype(int) if verbose: log.message("Start k-NN experiment.") # Handle LOO-CV vs. test set mode if test_set_ind is None: n = D.shape[0] test_set_ind = range(n) # dummy train_set_ind = n # dummy else: # number of points to be classified n = test_set_ind.size # Indices of training examples train_set_ind = np.setdiff1d(np.arange(n), test_set_ind) # Number of k-NN parameters try: k_length = k.size except AttributeError as e: if isinstance(k, int): k = np.array([k]) k_length = k.size elif isinstance(k, list): k = np.array(k) k_length = k.size else: raise e acc = np.zeros((k_length, 1)) corr = np.zeros((k_length, D.shape[0])) cl = np.sort(np.unique(target)) cmat = np.zeros((k_length, len(cl), len(cl))) classes = target.copy() for idx, cur_class in enumerate(cl): # change labels to 0, 1, ..., len(cl)-1 classes[target == cur_class] = idx cl = range(len(cl)) # Classify each point in test set for i in test_set_ind: seed_class = classes[i] if issparse(D): row = D.getrow(i).toarray().ravel() else: row = D[i, :] row[i] = d_self # Sort points in training set according to distance # Randomize, in case there are several points of same distance # (this is especially relevant for SNN rescaling) rp = train_set_ind rp = np.random.permutation(rp) d2 = row[rp] d2idx = np.argsort(d2, axis=0)[::sort_order] idx = rp[d2idx] # More than one k is useful for cheap multiple k-NN experiments at once for j in range(k_length): nn_class = classes[idx[0:k[j]]] cs = np.bincount(nn_class.astype(int)) max_cs = np.where(cs == np.max(cs))[0] # "tie": use nearest neighbor if len(max_cs) > 1: if seed_class == nn_class[0]: acc[j] += 1/n corr[j, i] = 1 cmat[j, seed_class, nn_class[0]] += 1 # majority vote else: if cl[max_cs[0]] == seed_class: acc[j] += 1/n corr[j, i] = 1 cmat[j, seed_class, cl[max_cs[0]]] += 1 if verbose: log.message("Finished k-NN experiment.") return acc, corr, cmat
def _get_values( values: np.ndarray, skipna: bool, fill_value: Any = None, fill_value_typ: Optional[str] = None, mask: Optional[np.ndarray] = None, ) -> Tuple[np.ndarray, Optional[np.ndarray], np.dtype, np.dtype, Any]: """ Utility to get the values view, mask, dtype, dtype_max, and fill_value. If both mask and fill_value/fill_value_typ are not None and skipna is True, the values array will be copied. For input arrays of boolean or integer dtypes, copies will only occur if a precomputed mask, a fill_value/fill_value_typ, and skipna=True are provided. Parameters ---------- values : ndarray input array to potentially compute mask for skipna : bool boolean for whether NaNs should be skipped fill_value : Any value to fill NaNs with fill_value_typ : str Set to '+inf' or '-inf' to handle dtype-specific infinities mask : Optional[np.ndarray] nan-mask if known Returns ------- values : ndarray Potential copy of input value array mask : Optional[ndarray[bool]] Mask for values, if deemed necessary to compute dtype : dtype dtype for values dtype_max : dtype platform independent dtype fill_value : Any fill value used """ # In _get_values is only called from within nanops, and in all cases # with scalar fill_value. This guarantee is important for the # maybe_upcast_putmask call below assert is_scalar(fill_value) values = extract_array(values, extract_numpy=True) mask = _maybe_get_mask(values, skipna, mask) dtype = values.dtype if needs_i8_conversion(values.dtype): # changing timedelta64/datetime64 to int64 needs to happen after # finding `mask` above values = np.asarray(values.view("i8")) dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative # dtype for it) fill_value = _get_fill_value( dtype, fill_value=fill_value, fill_value_typ=fill_value_typ ) if skipna and (mask is not None) and (fill_value is not None): values = values.copy() if dtype_ok and mask.any(): np.putmask(values, mask, fill_value) # promote if needed else: values, _ = maybe_upcast_putmask(values, mask, fill_value) # return a platform independent precision dtype dtype_max = dtype if is_integer_dtype(dtype) or is_bool_dtype(dtype): dtype_max = np.int64 elif is_float_dtype(dtype): dtype_max = np.float64 return values, mask, dtype, dtype_max, fill_value
def etrs_to_enu(positions: np.ndarray, location: EarthLocation = nenufar_position) -> np.ndarray: r""" Local east, north, up (ENU) coordinates centered on the position ``location``. The conversion from cartesian coordinates :math:`(x, y, z)` to ENU :math:`(e, n, u)` is done as follows: .. math:: \pmatrix{ e \\ n \\ u } = \pmatrix{ -\sin(b) & \cos(l) & 0\\ -\sin(l) \cos(b) & -\sin(l) \sin(b) & \cos(l)\\ \cos(l)\cos(b) & \cos(l) \sin(b) & \sin(l) } \pmatrix{ \delta x\\ \delta y\\ \delta z } where :math:`b` is the longitude, :math:`l` is the latitude and :math:`(\delta x, \delta y, \delta z)` are the cartesian coordinates with respect to the center ``location``. :param positions: ETRS positions :type positions: :class:`~numpy.ndarray` :param location: Center of ENU frame. Default is NenuFAR's location. :type location: :class:`~astropy.coordinates.EarthLocation` :returns: Wavelength in meters, same shape as ``frequency``. :rtype: :class:`~numpy.ndarray` :Example: .. code-block:: python from nenupy import nenufar_position from nenupy.astro import etrs_to_enu etrs_positions = np.array([ [4323934.57369062, 165585.71569665, 4670345.01314493], [4323949.24009871, 165567.70236494, 4670332.18016874] ]) enu = etrs_to_enu( positions=etrs_positions, location=nenufar_position ) """ assert (len(positions.shape)==2) and positions.shape[1]==3,\ 'positions should be an array of shape (n, 3)' xyz = positions.copy() xyz_center = geo_to_etrs(location) xyz -= xyz_center cos_lat = np.cos(location.lat.rad) sin_lat = np.sin(location.lat.rad) cos_lon = np.cos(location.lon.rad) sin_lon = np.sin(location.lon.rad) transformation = np.array( [[-sin_lon, cos_lon, 0], [-sin_lat * cos_lon, -sin_lat * sin_lon, cos_lat], [cos_lat * cos_lon, cos_lat * sin_lon, sin_lat]]) return np.matmul(xyz, transformation.T)
def nanskew( values: np.ndarray, axis: Optional[int] = None, skipna: bool = True, mask: Optional[np.ndarray] = None, ) -> float: """ Compute the sample skewness. The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G1. The algorithm computes this coefficient directly from the second and third central moment. Parameters ---------- values : ndarray axis: int, optional skipna : bool, default True mask : ndarray[bool], optional nan-mask if known Returns ------- result : float64 Unless input is a float array, in which case use the same precision as the input array. Examples -------- >>> import pandas.core.nanops as nanops >>> s = pd.Series([1, np.nan, 1, 2]) >>> nanops.nanskew(s) 1.7320508075688787 """ values = extract_array(values, extract_numpy=True) mask = _maybe_get_mask(values, skipna, mask) if not is_float_dtype(values.dtype): values = values.astype("f8") count = _get_counts(values.shape, mask, axis) else: count = _get_counts(values.shape, mask, axis, dtype=values.dtype) if skipna and mask is not None: values = values.copy() np.putmask(values, mask, 0) mean = values.sum(axis, dtype=np.float64) / count if axis is not None: mean = np.expand_dims(mean, axis) adjusted = values - mean if skipna and mask is not None: np.putmask(adjusted, mask, 0) adjusted2 = adjusted ** 2 adjusted3 = adjusted2 * adjusted m2 = adjusted2.sum(axis, dtype=np.float64) m3 = adjusted3.sum(axis, dtype=np.float64) # floating point error # # #18044 in _libs/windows.pyx calc_skew follow this behavior # to fix the fperr to treat m2 <1e-14 as zero m2 = _zero_out_fperr(m2) m3 = _zero_out_fperr(m3) with np.errstate(invalid="ignore", divide="ignore"): result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2 ** 1.5) dtype = values.dtype if is_float_dtype(dtype): result = result.astype(dtype) if isinstance(result, np.ndarray): result = np.where(m2 == 0, 0, result) result[count < 3] = np.nan return result else: result = 0 if m2 == 0 else result if count < 3: return np.nan return result
def logmart(A: np.ndarray, b: np.ndarray, *, relax: float = 1., x0: float = None, sigma: float = None, max_iter: int = 200) -> tuple: """ Displays delta Chisquare. Program is stopped if Chisquare increases. A is NxM array b is Nx1 vector returns Mx1 vector relax user specified relaxation constant (default is 20.) x0 user specified initial guess (N vector) (default is backproject y, i.e., y#A) max_iter user specified max number of iterations (default is 20) AUTHOR: Joshua Semeter LAST MODIFIED: 5-2015 Simple test problem A = np.diag([5, 5, 5]) x = np.array([1,2,3]) b = A @ x """ # %% parameter check if b.ndim != 1: raise ValueError('y must be a column vector') if A.ndim != 2: raise ValueError('A must be a matrix') if A.shape[0] != b.size: raise ValueError('A and y number of rows must match') if not isinstance(relax, float): raise ValueError('relax is a scalar float') b = b.copy() # needed to avoid modifying outside this function! # %% set defaults if sigma is None: sigma = np.ones_like(b) if x0 is None: # backproject x = A.T @ b / A.ravel().sum() xA = A @ x x = x * b.max() / xA.max() elif isinstance(x0, (float, int)) or x0.size == 1: # replicate x = x0 * np.ones_like(b) else: x = x0 # %% make sure there are no 0's in y b[b <= 1e-8] = 1e-8 # W=sigma; # W=linspace(1,0,size(A,1))'; # W=rand(size(A,1),1); W = np.ones(A.shape[0]) W = W / W.sum() i = 0 done = False arg = ((A @ x - b)/sigma)**2. chi2 = np.sqrt(arg.sum()) # %% iterate solution, plot estimated data (diag elems of x#A) while not done: i += 1 xold = x xA = A @ x t = (1/xA).min() C = relax*t*(1.-(xA/b)) x = x / (1 - x*(A.T @ (W*C))) # %% monitor solution chiold = chi2 chi2 = np.sqrt((((xA - b)/sigma)**2).sum()) # dchi2=(chi2-chiold); done = ((chi2 > chiold) & (i > 2)) | (i == max_iter) | (chi2 < 0.7) # %% plot # figure(9); clf; hold off; # Nest=reshape(x,69,83); # imagesc(Nest); caxis([0,1e11]); # set(gca,'YDir','normal'); set(gca,'XDir','normal'); # pause(0.02) y_est = A @ xold return xold, y_est, chi2, i
def piece_wise_time_conv( dt: float, f_in: _np.ndarray, c_in: _np.ndarray, t_cycle: float, rt_mean: float, rtd: _np.ndarray, c_equilibration: _typing.Optional[_np.ndarray] = None, c_wash: _typing.Optional[_np.ndarray] = None, logger: _typing.Optional[_logger.RtdLogger] = None) -> _np.ndarray: """Perform convolution on time axis with periodic switching. First time-point of `c_in` and `c_rtd` is at t == 0 (and not `dt`). Convolution is applied to all species of `c_in`. Parameters ---------- dt Time step. f_in Flow rate profile. It has to be either constant or box-shaped. c_in Starting concentration profile for each specie. `c_in`.shape == [n_species, n_time_steps] t_cycle Switch cycle duration. rt_mean Delay between inlet and outlet switch times. rtd Residence time distribution (= unit impulse response). c_equilibration Composition of equilibration buffer. c_wash Composition of wash buffer. logger Logger for messaging events. Returns ------- c_out: ndarray Final concentration profile for each specie. `c_out`.shape == `c_in`.shape """ assert c_in.shape[1] == f_in.size assert t_cycle > 0 assert rt_mean >= 0 # If input array is empty, then return empty. if c_in.size == 0: if logger: logger.i("Convolution: Got empty c_in") return c_in.copy() elif rtd.size == 0: if logger: logger.w("Convolution: Got empty bio_rtd") return c_in.copy() elif f_in.sum() == 0: if logger: logger.i("Convolution: Got empty f_in") return _np.zeros_like(c_in) i_cycle = int(round(t_cycle / dt)) i_rt_mean = int(round(rt_mean / dt)) i_start, i_end = _vectors.true_start_and_end(f_in > 0) assert _np.all(f_in[i_start:i_end] == f_in.max()), \ "Flow rate profile must be boxed shaped" i_switch_inlet = _np.rint(_np.arange(i_start, i_end, t_cycle / dt)).astype(int) i_switch_inlet_off = _np.append(i_switch_inlet[1:], i_end) i_switch_outlet = (i_switch_inlet + i_rt_mean).clip(max=f_in.size) i_switch_outlet_off = _np.append( i_switch_outlet[1:], min(i_switch_outlet[-1] + i_cycle, f_in.size)) c_out = _np.zeros_like(c_in) for i in range(i_switch_inlet.size): # Inlet concentration profile for the cycle. # Profile is prolonged by wash buffer. c_conv_inlet = c_in[:, i_switch_inlet[i]:i_switch_outlet_off[i]].copy() c_conv_inlet[:, i_switch_inlet_off[i] - i_switch_inlet[i]:] = \ c_wash if c_wash is not None else 0 # Calculate outlet concentration profile. c_conv_outlet = time_conv(dt, c_conv_inlet, rtd, c_equilibration, logger) # Insert the result into outlet vector. c_out[:, i_switch_outlet[i]:i_switch_outlet_off[ i]] = c_conv_outlet[:, i_switch_outlet[i] - i_switch_inlet[i]:i_switch_outlet_off[i] - i_switch_inlet[i]] return c_out
async def output(self, offset: Tuple[int, ...], value: np.ndarray, present: np.ndarray) -> None: self.calls.append((offset, value.copy(), present.copy()))
def add_shafts(field: np.ndarray): t_field = field.copy() for shaft in conf.shafts: t_field[shaft[1], shaft[0]] = shaft_const return t_field
def transition(self, state: np.ndarray, action: np.ndarray) -> float: """Action a = (i, j) swaps elements in positions i and j.""" new_state = state.copy() i, j = action new_state[[i, j]] = new_state[[j, i]] return new_state
def mutual_proximity_gauss(D: np.ndarray, metric: str = "distance", test_set_ind: np.ndarray = None, verbose: int = 0): """Transform a distance matrix with Mutual Proximity (normal distribution). Applies Mutual Proximity (MP) [1]_ on a distance/similarity matrix. Gauss variant assumes dependent normal distributions (VERY SLOW). The resulting second. distance/similarity matrix should show lower hubness. Parameters ---------- D : ndarray - ndarray: The ``n x n`` symmetric distance or similarity matrix. metric : {'distance', 'similarity'}, optional (default: 'distance') Define, whether matrix `D` is a distance or similarity matrix. test_sed_ind : ndarray, optional (default: None) Define data points to be hold out as part of a test set. Can be: - None : Rescale all distances - ndarray : Hold out points indexed in this array as test set. verbose : int, optional (default: 0) Increasing level of output (progress report). Returns ------- D_mp : ndarray Secondary distance MP gauss matrix. References ---------- .. [1] Schnitzer, D., Flexer, A., Schedl, M., & Widmer, G. (2012). Local and global scaling reduce hubs in space. The Journal of Machine Learning Research, 13(1), 2871–2902. """ # Initialization n = D.shape[0] log = Logging.ConsoleLogging() # Checking input IO._check_distance_matrix_shape(D) IO._check_valid_metric_parameter(metric) if metric == "similarity": self_value = 1 else: # metric == 'distance': self_value = 0 if issparse(D): log.error("Sparse matrices not supported by MP Gauss.") raise TypeError("Sparse matrices not supported by MP Gauss.") if test_set_ind is None: train_set_ind = slice(0, n) else: train_set_ind = np.setdiff1d(np.arange(n), test_set_ind) # Start MP D = D.copy() np.fill_diagonal(D, self_value) # np.fill_diagonal(D, np.nan) mu = np.mean(D[train_set_ind], 0) sd = np.std(D[train_set_ind], 0, ddof=0) # =========================================================================== # mu = np.nanmean(D[train_set_ind], 0) # sd = np.nanstd(D[train_set_ind], 0, ddof=0) # =========================================================================== # Code for the BadMatrixSigma error [derived from matlab] # =========================================================================== # eps = np.spacing(1) # epsmat = np.array([[1e5 * eps, 0], [0, 1e5 * eps]]) # =========================================================================== D_mp = np.zeros_like(D) # MP Gauss for i in range(n): if verbose and ((i + 1) % 1000 == 0 or i + 1 == n): log.message("MP_gauss: {} of {}.".format(i + 1, n)) for j in range(i + 1, n): # =================================================================== # mask = np.isnan(D[[i, j], :]) # D_mask = np.ma.array(D[[i, j], :], mask=mask) # c = np.ma.cov(D_mask, ddof=0) # =================================================================== c = np.cov(D[[i, j], :], ddof=0) x = np.array([D[i, j], D[j, i]]) m = np.array([mu[i], mu[j]]) low = np.tile(np.finfo(np.float32).min, 2) p12 = mvn.mvnun(low, x, m, c)[0] # [0]...p, [1]...inform if np.isnan(p12): # =============================================================== # power = 7 # while np.isnan(p12): # c += epsmat * (10**power) # p12 = mvn.mvnun(low, x, m, c)[0] # power += 1 # log.warning("p12 is NaN: i={}, j={}. Increased cov matrix by " # "O({}).".format(i, j, epsmat[0, 0]*(10**power))) # =============================================================== p12 = 0.0 log.warning("p12 is NaN: i={}, j={}. Set to zero.".format(i, j)) if metric == "similarity": D_mp[i, j] = p12 else: # distance p1 = norm.cdf(D[i, j], mu[i], sd[i]) p2 = norm.cdf(D[i, j], mu[j], sd[j]) D_mp[i, j] = p1 + p2 - p12 D_mp += D_mp.T np.fill_diagonal(D_mp, self_value) return D_mp
def sample(self, angles: np.ndarray, out: np.ndarray = None) -> np.ndarray: r""" Sample the continuous basis elements on the discrete set of angles in ``angles``. Optionally, store the resulting multidimentional array in ``out``. A value of ``nan`` is interpreted as the angle of a point placed on the origin of the axes. ``angles`` must be an array of shape `(1, N)`, where `N` is the number of points. Args: angles (~numpy.ndarray): angles where to evaluate the basis elements out (~numpy.ndarray, optional): pre-existing array to use to store the output Returns: the sampled basis """ assert len(angles.shape) == 2 assert angles.shape[0] == 1 if out is None: out = np.empty( (self.shape[0], self.shape[1], self.dim, angles.shape[1])) assert out.shape == (self.shape[0], self.shape[1], self.dim, angles.shape[1]) # find points in the origin origin = np.isnan(angles) angles = angles.copy() angles[origin] = 0. angles -= self.axis # the basis vectors depends on the shape of the input and output irreps, # while their frequencies depend on the irreps frequencies if self.shape[0] == 2 and self.shape[1] == 2: out = psichi(angles, s=self.s, k=self.mu, gamma=self.gamma, out=out) elif self.shape[0] == 1 and self.shape[1] == 2: out[0, 0, ...] = np.cos(self.mu * angles + self.gamma) out[0, 1, ...] = np.sin(self.mu * angles + self.gamma) elif self.shape[0] == 2 and self.shape[1] == 1: out[0, 0, ...] = np.cos(self.mu * angles + self.gamma) out[1, 0, ...] = np.sin(self.mu * angles + self.gamma) elif self.shape[0] == 1 and self.shape[1] == 1: out[0, 0, ...] = np.cos(self.mu * angles + self.gamma) else: raise ValueError(f"Shape {self.shape} not recognized!") if self._has_non_zero_frequencies: # In the origin, only 0-frequencies are permitted. # Therefore, any non-0 frequency base is set to 0 in the origin if np.any(origin): mask = self._non_zero_frequencies * origin out *= 1 - mask return out
def gradient(self, input_tensor: np.ndarray) -> np.ndarray: _result = input_tensor.copy() _result[input_tensor >= 0] = 1 _result[input_tensor < 0] = 0 return _result