def from_macro_pkas(cls, macropkas: np.ndarray, ph_values: np.ndarray): """Instantiate a titration curve specified using pKas Parameters ---------- macropkas - 1D-array of float pKa values ph_values - 1D-array of pH values that correspond to the curve """ macropkas.sort() instance = cls() instance.pkas = macropkas instance.sems = np.zeros_like(macropkas) instance.populations = populations_from_macro_pka(macropkas, ph_values) instance.free_energies = free_energy_from_population( instance.populations) instance.ph_values = ph_values state_ids: List[str] = ["Deprotonated"] nbound: List[int] = [0] for n, pKa in enumerate(macropkas, start=1): state_ids.append(f"+{n:d} protons (pKa={pKa:.2f})") nbound.append(n) instance.state_ids = state_ids instance.charges = np.asarray(nbound) instance.mean_charge = instance.charges @ instance.populations # Set lowest value to 0 instance.mean_charge -= int(round(min(instance.mean_charge))) return instance
def from_micro_pkas(cls, micropkas: np.ndarray, ph_values: np.ndarray): """Instantiate a titration curve specified using independent site micro pKas Parameters ---------- micropkas - 1D-array of float pKa values ph_values - 1D-array of pH values that correspond to the curve """ # Sort for convenience micropkas.sort() instance = cls() energies: List[np.ndarray] = list() state_ids: List[str] = list() # If we assume every pKa is an independent proton, # then the number of possible states is the powerset # of each equilibrium. for s, included_pks in enumerate(powerset(micropkas)): bound_protons = len(included_pks) # Free energy according to Ullmann (2003) energies.append( free_energy_from_pka(bound_protons, np.sum(included_pks), ph_values)) # Identifier for each state state_ids.append("+".join( ["{:.2f}".format(pk) for pk in included_pks])) instance.free_energies = np.asarray(energies) instance.populations = populations_from_free_energies( instance.free_energies) instance.ph_values = ph_values instance._state_ids = state_ids return instance
def _sort(img: np.ndarray): shape = img.shape img = img.reshape((img.shape[0] * img.shape[1], img.shape[2])) img.sort(0) return img.reshape(shape)
def convexPolygonArea(pivotPoint: typing.Tuple[float, float], vertices: np.ndarray) -> bool: """Determine the area of a convex polygon .. todo:: This function must be reworked. The fact that it requires a known pivot point is a hack. There is certainly a better way to do this. :param pivotPoint: A point that is known to be within the polygon :type pivotPoint: tuple[float,float] :param vertices: An array of points that defines the shape :type vertices: np.ndarray[[float,float]] :return: The area of the polygon :rtype: float """ vertices = np.subtract(vertices, pivotPoint) vertices = list(vertices) vertices.sort(key=lambda coord: math.atan2(coord[1], coord[0])) vertices = np.array(vertices) x = vertices[:, 0] y = vertices[:, 1] # This one-line implementation of Gauss's Shoelace Formula # written by stackoverflow user Mahdi return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))
def sort(img: numpy.ndarray): img.sort(0) img.sort(1) img = PILImage.fromarray(img) return img
def _sort_data_by_column_inplace(data: np.ndarray, dupe_rank: np.ndarray): assert data.ndim == 2 and dupe_rank.shape == data.shape data.sort(0) for i in range(data.shape[1]): data[:, i] = data[dupe_rank[:, i], i] return data
def convert_sample_to_wait_times(sample: np.ndarray): sample.sort() wait_times = [] last = 0 for time in sample: wait_times.append(time - last) last = time return wait_times
def get_canonical_4_polygon(points: np.ndarray): points = list(zip(points[0::2], points[1::2])) points.sort(key=lambda x: x[0]) if points[0][1] > points[1][1]: points[0], points[1] = points[1], points[0] if points[2][1] < points[3][1]: points[2], points[3] = points[3], points[2] return points
def calculateRMSE(originalData: np.ndarray, valuesFromDistribution: np.ndarray): originalData.sort() valuesFromDistribution.sort() sum = 0 for index, i in enumerate(originalData): sum += pow(i - valuesFromDistribution[index], 2) return math.sqrt(sum / len(originalData))
def _create_masks(indices: np.ndarray) -> np.ndarray: indices.sort() masks = np.empty(len(indices) + 1, dtype=_QSMask_dtype) for i, x in enumerate(indices): masks[i] = (1 << (x - i)) - 1 masks[-1] = ~0 for i in range(len(indices), 0, -1): masks[i] &= ~masks[i - 1] return masks
def interpolate_precision(pr: np.ndarray, recalls: np.ndarray) -> np.ndarray: """ Interpolates precision given a set of recall values Interpolated precision takes the maximum precision over all recalls greater than r. Args: pr: (precision, recall) column-table recalls: specifics recalls to interpolate a precision value """ recalls.sort() return np.array([np.max(pr[pr[:, 1] >= r, 0], initial=0) for r in recalls])
def _sort_data_by_column_inplace(self, data: np.ndarray): """ Use pre-calculated duplicate ranks array to sort random data with. This ensures that the resulting data (column-by-column) is ordered from lowest to highest """ assert data.ndim == 2 and self.data.duplicated_ranks_array.shape == data.shape data.sort(0) for i in range(data.shape[1]): data[:, i] = data[self.data.duplicated_ranks_array[:, i], i] return data
def union_find(scored_pairs: numpy.ndarray) -> Sequence[int]: root: Dict[RecordID, int] = {} components = {} edgelist = scored_pairs['pairs'] labels = scored_pairs['label'] it = numpy.nditer(edgelist, ['external_loop']) for i, (a, b) in enumerate(it): root_a = root.get(a) root_b = root.get(b) if root_a is None and root_b is None: root[a] = root[b] = i components[i] = array.array('I', [i]) elif root_a is None or root_b is None: if root_a is None: b = a root_a = root_b root_a = cast(int, root_a) components[root_a].append(i) root[b] = root_a elif root_a != root_b: if len(components[root_a]) < len(components[root_b]): root_a, root_b = root_b, root_a components[root_a].extend(components[root_b]) components[root_a].append(i) component_b = numpy.unique(edgelist[components[root_b]]) for node in component_b: root[node] = root_a del components[root_b] else: components[root_a].append(i) for label, component in components.items(): labels[component] = label # we want our selections to remain memmapped arrays # so we sort and get the indices where the components # change. This will allow us to slice pieces of the # memmapped array. Those slices will also be memmaped # arrays. scored_pairs.sort(order='label') return numpy.cumsum( numpy.unique(scored_pairs['label'], return_counts=True)[1])
def greedyMatching(dupes: numpy.ndarray) -> Links: A: Set[RecordID] = set() B: Set[RecordID] = set() dupes.sort(order='score') dupes = dupes[::-1] for (a, b), score in dupes: if a not in A and b not in B: A.add(a) B.add(b) yield (a, b), score
def pair_gazette_matching(scored_pairs: numpy.ndarray, threshold: float = 0.0, n_matches: int = 1) -> Links: scored_pairs.sort(order='pairs') group_key = scored_pairs['pairs'][:, 0] change_points = numpy.where(numpy.roll(group_key, 1) != group_key)[0] scored_blocks = numpy.split(scored_pairs, change_points) for match in gazetteMatching(scored_blocks, threshold, n_matches): if match: yield from match
def greedyMatching(dupes: numpy.ndarray, threshold: float = 0.5) -> Links: # AH upgrade threshold A: Set[RecordID] = set() B: Set[RecordID] = set() dupes.sort(order='score') dupes = dupes[::-1] for (a, b), score in dupes: if a not in A and b not in B: A.add(a) B.add(b) yield (a, b), score
def add_dates(self, dates: np.ndarray) -> None: dates.sort() for d in dates: current_date = d.date() if self.start_date > current_date: self.start_date = current_date else: break for d in dates[::-1]: current_date = d.date() if self.end_date < current_date: self.end_date = current_date else: break
def get_median_nan(arr: np.ndarray): """ Replaces np.nanmedian which is slower, and we do not compute the median as weighted average if array has an even length, but take the middle as for uneven length. So for even lenght, usually we would compute s.th. like e.g. (e1+e2)/2) ... Now we simply take the e1 value. :param arr: array where nan values will be ignored. :return: median """ arr = arr[~np.isnan(arr)] arr.flatten() arr.sort() if len(arr) % 2 == 0: median_it1 = int(len(arr) / 2 - 1) else: median_it1 = int(len(arr) / 2) return arr[median_it1]
def __init__(self, X: np.ndarray, majority_label: int = -1, minority_label: int = 1): assert len(X) >= 2, f'X: {X} has length less than 2!' # Sort array if not already sorted if not np.all(X[:-1] <= X[1:]): X.sort() self.X = X self.min = X[0] self.max = X[-1] assert self.min < self.max, 'minimum greater than or equal to maximum!' self.majority_label = majority_label self.minority_label = minority_label
def get_thresholds(scores: np.ndarray, num_gt, num_sample_pts=41): scores.sort() scores = scores[::-1] current_recall = 0 thresholds = [] for i, score in enumerate(scores): l_recall = (i + 1) / num_gt if i < (len(scores) - 1): r_recall = (i + 2) / num_gt else: r_recall = l_recall if (((r_recall - current_recall) < (current_recall - l_recall)) and (i < (len(scores) - 1))): continue thresholds.append(score) current_recall += 1 / (num_sample_pts - 1.0) return thresholds
def convex_hull(points: np.ndarray) -> np.ndarray: """Calculates convex hull of the given points Args: points np.ndarray (of shape n x 2) Returns: np.ndarray (of shape k x 2) containing the points in the convex hull """ if len(points) < 4: return points # get points as list points = points.tolist() n = len(points) # sort point by first on x coord and then y coord points.sort(key=lambda x: [x[0], x[1]]) # select point with min x as the start point start = points.pop(0) # sort points counterclockwise points.sort(key=lambda x: (slope(x, start), -x[1], x[0])) # get np representation points = np.array(points) # initialize hull hull = [start, points[0]] # consider points one by one for i in range(1, n - 1): # remove if angle is concave while np.cross(hull[-1] - hull[-2], points[i] - hull[-2]) <= 0: hull.pop() hull.append(points[i]) # remove last point if it is already inside if len(hull) > 3: if np.cross(hull[-1] - hull[-2], hull[0] - hull[-2]) <= 0: hull.pop() return np.array(hull)
def remove_dates(self, dates: np.ndarray) -> None: """ Removing dates extremities, google does not do it in transitfeed https://github.com/google/transitfeed/blob/master/transitfeed/serviceperiod.py#L80 """ dates.sort() for d in dates: current_date = d.date() if self.start_date == current_date: self.start_date = self.start_date + timedelta(days=1) else: break for d in dates[::-1]: current_date = d.date() if self.end_date == current_date: self.end_date = self.end_date - timedelta(days=1) else: break
def from_macro_pkas(cls, macropkas: np.ndarray, ph_values: np.ndarray): """Instantiate a titration curve specified using pKas Parameters ---------- macropkas - 1D-array of float pKa values ph_values - 1D-array of pH values that correspond to the curve """ macropkas.sort() instance = cls() instance.populations = populations_from_macro_pka(macropkas, ph_values) instance.free_energies = free_energy_from_population( instance.populations) instance.ph_values = ph_values state_ids = ["Deprotonated"] for n, pKa in enumerate(macropkas, start=1): state_ids.append(f"+{n:d} protons (pKa={pKa:.2f})") instance._state_ids = state_ids return instance
def get_quantile(data: np.ndarray, prob: float): """ Computes the quantile of a numpy array. :param data: The array. :param prob: The probability to be under desired quantile. :return: The quantile value. """ data = data.flatten() # Not the fastest algorithm, but a really simple implementation # We first sort the array. So that Wherever we pick a value, every values # before are smaller, and and any other value after are larger. data.sort() # That means that when we pick the value that is at xx% of the length of # the array, xx% of the value are smaller. This is then the quantile of 0.xx # Computing the index of the quantile (rounded) i = int(len(data) * prob + 0.5) if i >= len(data): # Computation leads to an invalid index if prob ~= 1 return data[-1] else: return data[i]
def _calculate_cross_species_rank_ratio_with_order_statistics(motif_id_rank_ratios_for_one_region_or_gene: np.ndarray) -> np.ndarray: """ Calculate cross-species combined rank ratio for a region/gene from rank ratios of a certain region/gene scored for a certain motif in multiple species with order statistics. Code based on applyOrderStatistics function: https://github.com/aertslab/orderstatistics/blob/master/OrderStatistics.java Paper: https://www.nature.com/articles/nbt1203 :param motif_id_rank_ratios_for_one_region_or_gene: Numpy array of rank ratios of a certain region/gene scored for a certain motif in multiple species. This array is sorted inplace, so if the original array is required afterwards, provide a copy to this function. :return: Cross species combined rank ratio. """ # Number of species for which to calculate a cross-species combined rank ratio score. rank_ratios_size = motif_id_rank_ratios_for_one_region_or_gene.shape[0] if rank_ratios_size == 0: return np.float64(1.0) else: # Sort rank ratios inplace. motif_id_rank_ratios_for_one_region_or_gene.sort() w = np.zeros((rank_ratios_size + 1,), dtype=np.float64) w[0] = np.float64(1.0) w[1] = motif_id_rank_ratios_for_one_region_or_gene[rank_ratios_size - 1] for k in range(2, rank_ratios_size + 1): f = np.float64(-1.0) for j in range(0, k): f = -(f * (k - j) * motif_id_rank_ratios_for_one_region_or_gene[rank_ratios_size - k]) / (j + 1.0) w[k] = w[k] + (w[k - j - 1] * f) # Cross species combined rank ratio. return w[rank_ratios_size]
def __init__(self,values_for_month:np.ndarray): self.sort_values = values_for_month.sort() size = len(values_for_month) self.dx_hour = size / 12
def input_cleanup(arr: np.ndarray) -> np.ndarray: arr.sort() return np.diff(np.concatenate(([0], arr, [arr[-1] + 3])))
def torch_nms(detections: np.ndarray, scores: np.ndarray, threshold: float = .5, top: int = 0) \ -> np.ndarray: """Apply non-maximum suppression based on Torch Arguments: detections: (tensor, (num, 4)) The location predictions for the image. scores: (tensor, (num)) The class prediction scores for the image. threshold: (float) The overlap thresh for suppressing unnecessary boxes. top: (int) slice top k Return: The indices of the kept boxes with respect to num. """ detections = torch.from_numpy(detections) scores = torch.from_numpy(scores) keep = scores.new(scores.size(0)).zero_().long() if detections.numel() == 0: return keep x1, y1 = detections[:, 0], detections[:, 1] x2, y2 = detections[:, 2], detections[:, 3] area = torch.mul(x2 - x1, y2 - y1) v, idx = scores.sort(0) if top: idx = idx[-top:] xx1, yy1, xx2, yy2 = [detections.new() for _ in range(4)] w, h = detections.new(), detections.new() count = 0 while idx.numel() > 0: keep[count] = i = idx[-1] count += 1 if idx.size(0) == 1: break idx = idx[:-1] torch.index_select(x1, 0, idx, out=xx1) torch.index_select(y1, 0, idx, out=yy1) torch.index_select(x2, 0, idx, out=xx2) torch.index_select(y2, 0, idx, out=yy2) xx1 = torch.clamp(xx1, min=x1[i]) yy1 = torch.clamp(yy1, min=y1[i]) xx2 = torch.clamp(xx2, max=x2[i]) yy2 = torch.clamp(yy2, max=y2[i]) w.resize_as_(xx2) h.resize_as_(yy2) w, h = xx2 - xx1, yy2 - yy1 w = torch.clamp(w, min=0.0) h = torch.clamp(h, min=0.0) inter = w * h rem_areas = torch.index_select(area, 0, idx) union = (rem_areas - inter) + area[i] IoU = inter / union idx = idx[IoU.le(threshold)] return keep
def which(a: np.ndarray, k: int): a.sort() return a[k]
def _nan_quantile( arr: np.ndarray, quantiles: np.ndarray, axis: int = 0, alpha: float = 1.0, beta: float = 1.0, ) -> Union[float, np.ndarray]: """ Get the quantiles of the array for the given axis. A linear interpolation is performed using alpha and beta. By default, alpha == beta == 1 which performs the 7th method of [Hyndman&Fan]_. with alpha == beta == 1/3 we get the 8th method. """ # --- Setup data_axis_length = arr.shape[axis] if data_axis_length == 0: return np.NAN if data_axis_length == 1: result = np.take(arr, 0, axis=axis) return np.broadcast_to(result, (quantiles.size, ) + result.shape) # The dimensions of `q` are prepended to the output shape, so we need the # axis being sampled from `arr` to be last. DATA_AXIS = 0 if axis != DATA_AXIS: # But moveaxis is slow, so only call it if axis!=0. arr = np.moveaxis(arr, axis, destination=DATA_AXIS) # nan_count is not a scalar nan_count = np.isnan(arr).sum(axis=DATA_AXIS).astype(float) valid_values_count = data_axis_length - nan_count # We need at least two values to do an interpolation too_few_values = valid_values_count < 2 if too_few_values.any(): # This will result in getting the only available value if it exists valid_values_count[too_few_values] = np.NaN # --- Computation of indexes # Add axis for quantiles valid_values_count = valid_values_count[..., np.newaxis] virtual_indexes = _compute_virtual_index(valid_values_count, quantiles, alpha, beta) virtual_indexes = np.asanyarray(virtual_indexes) previous_indexes, next_indexes = _get_indexes(arr, virtual_indexes, valid_values_count) # --- Sorting arr.sort(axis=DATA_AXIS) # --- Get values from indexes arr = arr[..., np.newaxis] previous = np.squeeze( np.take_along_axis(arr, previous_indexes.astype(int)[np.newaxis, ...], axis=0), axis=0, ) next_elements = np.squeeze( np.take_along_axis(arr, next_indexes.astype(int)[np.newaxis, ...], axis=0), axis=0, ) # --- Linear interpolation gamma = _get_gamma(virtual_indexes, previous_indexes) interpolation = _linear_interpolation(previous, next_elements, gamma) # When an interpolation is in Nan range, (near the end of the sorted array) it means # we can clip to the array max value. result = np.where(np.isnan(interpolation), np.nanmax(arr, axis=0), interpolation) # Move quantile axis in front result = np.moveaxis(result, axis, 0) return result