def rand_floats( gen: np.random.Generator, low: float = 0.0, high: float = 1.0, size: Optional[int] = None, dtype: np.dtype = np.float64, include_invalid: bool = False, ) -> np.ndarray: """ Generate a random array of floating-point values with the specified length and dtype. The elements of the array are drawn from the uniform distribution over the range ``[low, high)``. """ # Generate a random array for the given floating-point type. arr = gen.uniform(low=low, high=high, size=size).astype(dtype) # If we're including invalid values (NaN for floating-point types), draw a random integer # indicating how many elements we'll set to NaN; then generate a random integer array of # that length whose elements will be a fancy index we'll use to assign NaNs into the generated # floating-point array. # NOTE: The nancount we generate is approximate because we'll don't enforce that all the # elements of the fancy index are unique. if include_invalid: nancount = gen.integers(0, size, endpoint=True) nan_indices = gen.integers(0, size, size=nancount) arr[nan_indices] = np.nan return arr
def try_get_inter_island_crossover_parents( self, rng: np.random.Generator) -> Optional[Tuple[CnnGenome, ...]]: # peek since we may not actually be able to give this island a turn (i.e. get a genome from this island # and another island) island_turn = self.peek_next_island_turn() if self.islands[island_turn].population: # islands with a population that we can grab a genome from valid_islands = [] for i, island in enumerate(self.islands): if i == island_turn: continue if island.population: valid_islands.append(island) if not valid_islands: return None population_0 = self.islands[island_turn].population population_1 = valid_islands[rng.integers( 0, len(valid_islands))].population i0 = rng.integers(0, len(population_0)) i1 = rng.integers(0, len(population_1)) g0 = population_0[i0] g1 = population_1[i1] # we did use the peeked island, so this will increment the island turn self.next_island_turn() return (g0, g1) else: return None
def test_nonbonded_pair_list_interpolated_correctness( ixn_group_size, precision, rtol, atol, cutoff, beta, lamb, example_nonbonded_params, example_conf, example_box, rng: np.random.Generator, ): "Compares with jax reference implementation, with parameter interpolation." num_atoms, _ = example_conf.shape params = gen_params(example_nonbonded_params, rng) # randomly select 2 interaction groups and construct all pairwise interactions atom_idxs = rng.choice( num_atoms, size=( 2, ixn_group_size, ), replace=False, ).astype(np.int32) pair_idxs = np.stack(np.meshgrid(atom_idxs[0, :], atom_idxs[1, :])).reshape(2, -1).T num_pairs, _ = pair_idxs.shape scales = rng.uniform(0, 1, size=(num_pairs, 2)) lambda_plane_idxs = rng.integers(-2, 3, size=(num_atoms, ), dtype=np.int32) lambda_offset_idxs = rng.integers(-2, 3, size=(num_atoms, ), dtype=np.int32) ref_potential = nonbonded.interpolated( make_ref_potential(pair_idxs, scales, lambda_plane_idxs, lambda_offset_idxs, beta, cutoff)) test_potential = NonbondedPairListInterpolated(pair_idxs, scales, lambda_plane_idxs, lambda_offset_idxs, beta, cutoff) GradientTest().compare_forces( example_conf, params, example_box, lamb, ref_potential, test_potential, precision=precision, rtol=rtol, atol=atol, )
def rand_fancyindex( rng: np.random.Generator, index_length: int, dtype: np.dtype, source_arr_len: int, invalid_ratio: Optional[float] = None, ) -> np.ndarray: """Create a random fancy index with the specified length and dtype.""" check_params(dtype, invalid_ratio) if dtype.kind not in "iu": # TODO: Also support floats, since mbget allows that raise ValueError( f"Only integer dtypes are currently supported by this method. dtype={dtype.name}" ) # Generate the fancy index from the uniform integer distribution. fancyindex = FastArray( rng.integers(0, source_arr_len, size=index_length, dtype=dtype) ) # If the fancy index should have some invalids/NA values, add those in now. if invalid_ratio is not None and invalid_ratio > 0.0: # TODO: Also add in some out-of-bounds accesses (and not just invalid/NA values) here? invalid_outcomes = FastArray(rng.random(size=index_length)) putmask(fancyindex, invalid_outcomes < invalid_ratio, fancyindex.inv) return fancyindex
def _iter_random_indices(rng: np.random.Generator, buffer_size: int, random_batch_size=1000) -> Iterator[int]: while True: yield from ( int(i) for i in rng.integers(0, buffer_size, size=random_batch_size))
def test_pickle_frame_dyncodec(tmp_path, rng: np.random.Generator): file = tmp_path / 'data.bpk' df = pd.DataFrame({ 'key': np.arange(0, 5000, dtype='i4'), 'count': rng.integers(0, 1000, 5000), 'score': rng.normal(10, 2, 5000) }) def codec(buf): obj = memoryview(buf).obj if isinstance(obj, np.ndarray) and obj.dtype == np.float64: print('compacting double array') return codecs.Chain([numcodecs.AsType('f4', 'f8'), codecs.Blosc('zstd', 9)]) else: return codecs.Blosc('zstd', 9) with BinPickler.compressed(file, codec) as w: w.dump(df) with BinPickleFile(file) as bpf: assert not bpf.find_errors() df2 = bpf.load() print(df2) assert all(df2.columns == df.columns) assert all(df2['key'] == df['key']) assert all(df2['count'] == df['count']) assert all(df2['score'].astype('f4') == df['score'].astype('f4')) del df2
def update_params(thi_nil: np.ndarray, z: seed.Partition, mod: Model, ctrl: Controls, param_samplers: List[MyopicRwSampler], ome: np.random.Generator) -> (seed.Partition, np.ndarray): def coin() -> Generator[Tuple[bool, seed.Partition], bool, None]: new_z = z state = (yield None, new_z) while True: success, new_z = flip_param_coins(thi_nil, thi_prime, new_z, mod, ctrl, state, ome) state = (yield success, new_z) sector = ome.integers(0, len(thi_nil)) prop, log_p_for, log_p_back = param_samplers[sector].propose(ome) thi_prime = np.array( [thi_nil[i] if i != sector else prop for i in range(len(thi_nil))]) if ctrl.precoin: if not flip_param_precoin(thi_nil, thi_prime, log_p_for, log_p_back, mod, ome): param_samplers[sector].adapt(thi_nil[sector], 0) return thi_nil, z weight_nil = weight_prime = 0 else: weight_nil = eval_param_weight(thi_nil, mod) + log_p_for weight_prime = eval_param_weight(thi_prime, mod) + log_p_back accept, _, z_acc = sample_twocoin_joint(weight_prime, weight_nil, coin(), ctrl.pr_portkey, ome) thi_acc = thi_prime if accept else thi_nil param_samplers[sector].adapt(thi_acc[sector], float(accept)) return thi_acc, z_acc
def try_make_new_layer(self, upper_bound_layer: Layer, lower_bound_layer: Layer, rng: np.random.Generator) -> Optional[Layer]: """ When making a new layer, you must specify the volume size. Thus, two other layers are required to randomly select a valid volume size - a minimum and maximum volume size. If the lower bound layer is an output layer then only the upper bound layer will be considered. """ if type(upper_bound_layer) == OutputLayer: return None depth: int = hp.get_random_volume_depth(rng) upper_width, upper_height, upper_depth = upper_bound_layer.output_shape assert upper_width == upper_height # assume a square volume size if type(lower_bound_layer) == OutputLayer: # Minimum square size must be at least 2, if the upper bound is two we cant create a smaller layer. if upper_width <= 2: return None volume_size = rng.integers(2, upper_width) width, height = volume_size, volume_size else: lower_width, lower_height, lower_depth = lower_bound_layer.output_shape assert lower_width == lower_height if upper_width <= lower_width: return None # If this is 3 then the lower must be 2 and we cannot create a size inbetween 2 and 3 if upper_width <= 3: assert lower_width == 2 return None volume_size = rng.integers(lower_width, upper_width) width, height = volume_size, volume_size layer = Layer(Layer.get_next_layer_innovation_number(), width, height, depth) assert layer.layer_innovation_number not in self.layer_map self.layer_map[layer.layer_innovation_number] = layer return layer
def bootstrap_idxs(n, rng: np.random.Generator = None): """ Generate a set of boostrap indexes of length n, returning the pair (in_bag, out_bag) containing the in-bag and out-of-bag indexes as numpy arrays """ if rng is None or type(rng) is not np.random.Generator: rng = np.random.default_rng(rng) in_bag = rng.integers(low=0, high=n, size=n) out_bag = np.array(list(set(range(n)) - set(in_bag))) return in_bag, out_bag
def test_policy_log_policy_grad_vs_empirical(policy: policies.DiscretePolicy, rng: np.random.Generator): x0 = rng.uniform(-1, 1, policy.num_params()) n = 10 actions = rng.integers(policy.num_actions, size=n) states = rng.integers(policy.num_states, size=n) weights = rng.uniform(0, 1, n) def f(x): policy_matrix = policy.policy_matrix(x) return np.sum(weights * np.log(policy_matrix[states, actions])) def f_grad(x): return policy.log_policy_grad(actions=actions, states=states, weights=weights, x=x) assert scipy.optimize.check_grad(f, f_grad, x0) < 1e-6
def remove_one_element( rng: np.random.Generator, array: np.ndarray) -> Tuple[Tuple[int, int, int], np.ndarray]: ys, xs = np.where(array >= 0) # Arrays are y,x i = rng.integers(len(xs)) pos = ys[i], xs[i] array_new = array.copy() array_new[pos] = -1 return (*pos, array[pos]), array_new
def random_flip_cx(self, rng: np.random.Generator) -> Tuple[int, int]: """ Returns a randomly selected flippable CX gate in this CX circuit layer, using the given random number generator. """ cx_idx = rng.integers(self._num_flippable_cxs) for i, cx in enumerate(self._iter_flippable_cxs()): if i == cx_idx: return cx raise Exception("Cannot get here!")
def test_nonbonded_all_pairs_interpolated_correctness( num_atoms, precision, rtol, atol, cutoff, beta, lamb, example_nonbonded_params, example_conf, example_box, rng: np.random.Generator, ): "Compares with jax reference implementation, with parameter interpolation." conf = example_conf[:num_atoms] params_initial = example_nonbonded_params[:num_atoms, :] params = gen_params(params_initial, rng) lambda_plane_idxs = rng.integers(-2, 3, size=(num_atoms, ), dtype=np.int32) lambda_offset_idxs = rng.integers(-2, 3, size=(num_atoms, ), dtype=np.int32) ref_potential = nonbonded.interpolated( make_ref_potential(lambda_plane_idxs, lambda_offset_idxs, beta, cutoff)) test_potential = NonbondedAllPairsInterpolated(lambda_plane_idxs, lambda_offset_idxs, beta, cutoff) GradientTest().compare_forces(conf, params, example_box, lamb, ref_potential, test_potential, precision=precision, rtol=rtol, atol=atol)
def try_get_intra_island_crossover_parents( self, rng: np.random.Generator) -> Optional[Tuple[CnnGenome, ...]]: # Peek since we may or may not actually give this island a turn (i.e. actually get parent genomes from it) island_turn = self.peek_next_island_turn() if len(self.islands[island_turn].population) < 2: return None else: population: List[CnnGenome] = self.islands[island_turn].population i0 = rng.integers(0, len(population)) i1 = rng.integers(0, len(population) - 1) if i1 == i0: i1 += 1 assert i1 != i0 # we did use the peeked island turn so this will increment the island turn self.next_island_turn() return (population[i0], population[i1])
def rand_array(rng: np.random.Generator, length: int, dtype: np.dtype, invalid_ratio: Optional[float] = None) -> np.ndarray: # TODO: Implement a flag that controls whether invalid values are included in the array? Or (instead) an invalid_ratio parameter like our other functions? check_params(dtype, invalid_ratio) if dtype.kind in "iu": info = np.iinfo(dtype) arr = FastArray(rng.integers(info.min, info.max, size=length, dtype=dtype)) elif dtype.kind == "f": # PERF: Use an FMA function here if we ever implement one arr = (FastArray(rng.random(size=length, dtype=dtype)) * 1e10) - 0.5e10 elif dtype.kind == "S": # Generate integers in the upper ASCII range, then use a view to expose those # values as fixed-length ASCII strings. # TODO: Support other character ranges (lower-range ASCII 0-127, full ASCII 0-255, lowercase+uppercase+digits). arr = FastArray(rng.integers( 65, 90, size=length * dtype.itemsize, dtype=np.int8, endpoint=True ).view(dtype)) elif dtype.kind == "U": # Generate integers in the upper ASCII range. # TODO: Support other character ranges (lower-range ASCII 0-127, full ASCII 0-255, lowercase+uppercase+digits, Unicode chars >255). arr = FastArray(rng.integers( 65, 90, size=length * (dtype.itemsize // 4), dtype=np.int32, endpoint=True ).view(dtype)) else: # TODO: Handle other dtypes raise NotImplementedError( f"The dtype {dtype} is not yet supported by this function." ) # If the fancy index should have some invalids/NA values, add those in now. if invalid_ratio is not None and invalid_ratio > 0.0: # TODO: Also add in some out-of-bounds accesses (and not just invalid/NA values) here? invalid_outcomes = FastArray(rng.random(size=length)) putmask(arr, invalid_outcomes < invalid_ratio, arr.inv) return arr
def roll_signals( sig_original: ty.List[np.ndarray], max_time_shifts: ty.List[int], rng: np.random.Generator, ) -> ty.List[np.ndarray]: """MAKEDOC: what is roll_signals doing?""" sig_rolled: ty.List[np.ndarray] = [] for s in tqdm(sig_original): for max_shift in max_time_shifts: time_shift = rng.integers(-max_shift, max_shift + 1) rolled = np.roll(s, time_shift) sig_rolled.append(rolled) return sig_rolled
def _iter_random_indices( rng: np.random.Generator, num_sources: int, random_batch_size=1000, p: Optional[List[float]] = None, ) -> Iterator[int]: """Get an infinite iterator that randomly samples the index of the source to pick examples from.""" if p is None: while True: yield from (int(i) for i in rng.integers(0, num_sources, size=random_batch_size)) else: while True: yield from (int(i) for i in rng.choice(num_sources, size=random_batch_size, p=p))
def generate_weights( rng: np.random.Generator, year_begin: int, base_periods: Sequence[int], no_of_years: int, headers: IndexLabels, ) -> np.ndarray: """Generates fake weights. Selects random ints between 1 and 19 for weights in the first period. Then adds a random int between -2 and 2 for each subsequent weights update. Returns a numpy matrix of weights, clipped so that no weight can be lower than 1. Parameters ---------- rng: Generator Numpy generator for generating random numbers. year_begin: int The start year for the first set of weights. base_periods: sequence of int A list of months, given by the int equivalent, for a weights update each year. no_of_years: int The number of years to generate weights for. headers: label, or sequence of labels A label or list of labels for each time series column name. Returns ------- ndarray: A matrix of time series weights. """ x = no_of_years * len(base_periods) y = len(headers) # Weights randomly initiated as an int between 1 and 19. first_year_weights = rng.integers(1, 20, (1, y)) # Rearrange to length needed. weights = np.tile(first_year_weights, (x, 1)) # Assumes that weights increase or decrease by no more than an # increment of 2 each base price refresh. change = RNG.integers(-2, 2, (x, y), endpoint=True) change[0, :] = 0 # No change at weights start. change = change.cumsum(axis=0) # Add change to weights and ensure weights stay >= 1. return np.clip(weights + change, 1, None)
def test_pickle_array(tmp_path, rng: np.random.Generator): "Pickle a NumPy array" file = tmp_path / 'data.bpk' a = rng.integers(0, 5000, 1024, dtype='i4') with BinPickler(file) as w: w.dump(a) with BinPickleFile(file) as bpf: assert len(bpf.entries) == 2 a2 = bpf.load() assert len(a2) == len(a) assert all(a2 == a)
def enable_edge_mut(self, rng: np.random.Generator) -> bool: logging.info("attempting enable_edge mutation") if not self.disabled_edges: logging.info("failed to complete enable_edge mutation") return False disabled_edges: List[int] = list(self.disabled_edges) index: int = rng.integers(0, len(disabled_edges)) edge_in: int = disabled_edges[index] self.enable_edge(edge_in) logging.info("successfully completed enable_edge mutation") return True
def test_dump_frame(tmp_path, rng: np.random.Generator): "Pickle a Pandas data frame" file = tmp_path / 'data.bpk' df = pd.DataFrame({ 'key': np.arange(0, 5000), 'count': rng.integers(0, 1000, 5000), 'score': rng.normal(10, 2, 5000) }) dump(df, file) df2 = load(file) assert all(df2.columns == df.columns) for c in df2.columns: assert all(df2[c] == df[c])
def enable_layer_mut(self, rng: np.random.Generator) -> bool: logging.info("attempting enable_layer mutation") if not self.disabled_layers: logging.info("failed to complete enable_layer mutation") return False disabled_layers = list(self.disabled_layers) index: int = rng.integers(0, len(disabled_layers)) layer_in: int = disabled_layers[index] self.enable_layer(layer_in) logging.info(f"enabling layer {layer_in}") logging.info("successfully completed enable_layer mutation") return True
def rand_integers( gen: np.random.Generator, size: Optional[int] = None, dtype: np.dtype = np.int64, include_invalid: bool = False, ) -> np.ndarray: """ Generate a random array of integers with the specified length and dtype. The elements of the array will span the representable range of the dtype, optionally including the 'invalid' value for the type. The elements of the array are drawn from the 'discrete uniform' distribution. """ # Determine the range for the dtype. lo, hi = integer_range(dtype, include_invalid) return gen.integers(lo, hi, size, dtype=dtype, endpoint=True)
def test_raise_on_zero_diagonal( N: int, L: np.ndarray, v: np.ndarray, rng: np.random.Generator, method_kwargs: Dict[str, Any], ): """Tests whether a :class:`numpy.linalg.LinAlgError` is raised if the diagonal of the Cholesky factor contains zeros.""" L = L.copy(order="K") k = rng.integers(N) L[k, k] = 0.0 with pytest.raises(np.linalg.LinAlgError): cholupdates.rank_1.downdate(L, v, **method_kwargs)
def __init__( self, unlabeled_publisher_data_list: Iterable[PublisherData], order: OrderOptions, correlated_sets: CorrelatedSetsOptions, shared_prop: float, random_generator: np.random.Generator = None, name: str = "sequentially_correlated", ) -> DataSet: """Constructor for SequentiallyCorrelatedOverlapDataSet. Args: unlabeled_publisher_data_list: a list of PublisherDataSet indicating the reach curve of a publisher. order: The order of the sets to be returned. It should be one of 'original', 'reversed' and 'random'. Here a 'set' means the reached ids of a publisher. correlated_sets: One of 'all' and 'one', indicating how the current set is correlated with the previously generated sets when the order is 'original'. shared_prop: A number between 0 and 1 that specifies the proportion of ids in the current set that are overlapped with the previous set(s). See wfa_cardinality_estimation_evaluation_framework.simulations.set_generator for more explanations on the args order, correlated_sets, shared_prop. random_generator: a random Generator for generating the sequentially correlated reached ids. name: If specified, a human-readable name that will be associated to this DataSet. """ if random_generator: random_state = np.random.RandomState( seed=random_generator.integers(low=0, high=1e9)) else: random_state = np.random.RandomState(seed=1) super().__init__( unlabeled_publisher_data_list=unlabeled_publisher_data_list, overlap_generator=SequentiallyCorrelatedSetGenerator, overlap_generator_kwargs={ "order": order, "correlated_sets": correlated_sets, "shared_prop": shared_prop, "random_state": random_state, }, name=name, )
def try_make_new_conv_edge(self, input_layer: Layer, output_layer: Layer, rng: np.random.Generator, conv_edge_type=ConvEdge) -> Optional[Edge]: if not self.valid_connection(input_layer, output_layer): return None # No negative filter sizes input_width, input_height, input_depth = input_layer.output_shape output_width, output_height, output_depth = output_layer.output_shape if input_width < output_width or input_height < output_height: return None possible_strides: List[int] = get_possible_strides( *input_layer.output_shape, *output_layer.output_shape) # No duplicate output edges with the same stride for edge_in in input_layer.outputs: edge = self.edge_map[edge_in] if edge.input_layer_in == input_layer.layer_innovation_number and \ edge.output_layer_in == output_layer.layer_innovation_number and \ type(edge) == conv_edge_type: conv_edge: ConvEdge = cast(ConvEdge, edge) if conv_edge.stride in possible_strides: possible_strides.remove(conv_edge.stride) if not possible_strides: return None stride: int = possible_strides[rng.integers(0, len(possible_strides))] conv_edge = conv_edge_type(Edge.get_next_edge_innovation_number(), stride, input_layer.layer_innovation_number, output_layer.layer_innovation_number, self.layer_map) self.register_edge(conv_edge) edge = cast(Edge, conv_edge) logging.info(f"creating separable conv edge from layer {input_layer.layer_innovation_number} to layer " + \ f"{output_layer.layer_innovation_number}") return edge
def test_raise_on_vector_dimension_mismatch( N: int, L: np.ndarray, rng: np.random.Generator, method_kwargs: Dict[str, Any], ): """Tests whether a :class:`ValueError` is raised if the shape of the vector is not compatible with the shape of the Cholesky factor""" # Generate arbitrary v with incompatible length v_len = N + rng.integers(-N, N, endpoint=True) + 1 if v_len == N: v_len += 1 v = rng.random(v_len) with pytest.raises(ValueError): cholupdates.rank_1.downdate(L=L, v=v, **method_kwargs)
def _generate_random_examples( rng: np.random.Generator, num_markers: int = 3, num_sessions_min: int = 2, num_sessions_max: int = 10, ) -> Tuple[List[PerMarkerResults], PerMarkerCollectedNumbers]: """Generates a random number of random marker extraction results for some markers. Args: rng: a random number generator num_markers: the number of markers to be imitated Returns: a list containing a dictionary of the marker extraction results per marker, as well as a collection of the plain list of numbers used as "preceding user turns" in that extraction results """ num_sessions = int( rng.integers(low=num_sessions_min, high=num_sessions_max + 1)) markers = [f"marker{idx}" for idx in range(num_markers)] per_session_results: List[PerMarkerResults] = [] preceding_user_turn_numbers_used_per_marker: PerMarkerCollectedNumbers = { marker: [] for marker in markers } for _ in range(num_sessions - 1): # we append one more later result_dict = {} for marker in markers: ( event_list, num_list, ) = _generate_random_example_for_one_session_and_one_marker( rng=rng) result_dict[marker] = event_list preceding_user_turn_numbers_used_per_marker[marker].append( num_list) per_session_results.append(result_dict) # append a session where we didn't find any marker per_session_results.append({marker: [] for marker in markers}) for marker in preceding_user_turn_numbers_used_per_marker: preceding_user_turn_numbers_used_per_marker[marker].append([]) return per_session_results, preceding_user_turn_numbers_used_per_marker
def test_pickle_frame(tmp_path, rng: np.random.Generator, writer, direct): "Pickle a Pandas data frame" file = tmp_path / 'data.bpk' df = pd.DataFrame({ 'key': np.arange(0, 5000), 'count': rng.integers(0, 1000, 5000), 'score': rng.normal(10, 2, 5000) }) with writer(file) as w: w.dump(df) with BinPickleFile(file, direct=direct) as bpf: assert not bpf.find_errors() df2 = bpf.load() print(df2) assert all(df2.columns == df.columns) for c in df2.columns: assert all(df2[c] == df[c]) del df2
def test_write_buf(tmp_path, rng: np.random.Generator): "Write a file with a single array" file = tmp_path / 'data.bpk' a = rng.integers(0, 5000, 1024, dtype='i4') with BinPickler(file) as w: w._write_buffer(a) w._finish_file() with BinPickleFile(file, direct=True) as bpf: assert len(bpf.entries) == 1 e = bpf.entries[0] assert e.dec_length == a.nbytes assert e.enc_length == a.nbytes b2 = bpf._read_buffer(e) assert b2.nbytes == e.dec_length a2 = np.frombuffer(b2, dtype='i4') assert len(a2) == len(a) assert all(a2 == a) del a2 del b2