def split_molecules( umis: np.ndarray, data_split: float, overlap_factor: float = 0.0, random_state: np.random.RandomState = None, ) -> Tuple[np.ndarray, np.ndarray]: """Splits molecules into two (potentially overlapping) groups. :param umis: Array of molecules to split :param data_split: Proportion of molecules to assign to the first group :param overlap_factor: Overlap correction factor, if desired :param random_state: For reproducible sampling :return: umis_X and umis_Y, representing ``split`` and ``~(1 - split)`` counts sampled from the input array """ if random_state is None: random_state = np.random.RandomState() umis_X_disjoint = random_state.binomial(umis, data_split - overlap_factor) umis_Y_disjoint = random_state.binomial(umis - umis_X_disjoint, (1 - data_split) / (1 - data_split + overlap_factor)) overlap_factor = umis - umis_X_disjoint - umis_Y_disjoint umis_X = umis_X_disjoint + overlap_factor umis_Y = umis_Y_disjoint + overlap_factor return umis_X, umis_Y
def f(rng: np.random.RandomState, shape, p): return rng.binomial(n=1, p=p, size=shape) > 0