Beispiel #1
0
def rand_floats(
    gen: np.random.Generator,
    low: float = 0.0,
    high: float = 1.0,
    size: Optional[int] = None,
    dtype: np.dtype = np.float64,
    include_invalid: bool = False,
) -> np.ndarray:
    """
    Generate a random array of floating-point values with the specified length and dtype.

    The elements of the array are drawn from the uniform distribution over the
    range ``[low, high)``.
    """
    # Generate a random array for the given floating-point type.
    arr = gen.uniform(low=low, high=high, size=size).astype(dtype)

    # If we're including invalid values (NaN for floating-point types), draw a random integer
    # indicating how many elements we'll set to NaN; then generate a random integer array of
    # that length whose elements will be a fancy index we'll use to assign NaNs into the generated
    # floating-point array.
    # NOTE: The nancount we generate is approximate because we'll don't enforce that all the
    #       elements of the fancy index are unique.
    if include_invalid:
        nancount = gen.integers(0, size, endpoint=True)
        nan_indices = gen.integers(0, size, size=nancount)
        arr[nan_indices] = np.nan

    return arr
Beispiel #2
0
    def try_get_inter_island_crossover_parents(
            self, rng: np.random.Generator) -> Optional[Tuple[CnnGenome, ...]]:
        # peek since we may not actually be able to give this island a turn (i.e. get a genome from this island
        # and another island)
        island_turn = self.peek_next_island_turn()

        if self.islands[island_turn].population:
            # islands with a population that we can grab a genome from
            valid_islands = []
            for i, island in enumerate(self.islands):
                if i == island_turn:
                    continue
                if island.population:
                    valid_islands.append(island)

            if not valid_islands:
                return None

            population_0 = self.islands[island_turn].population
            population_1 = valid_islands[rng.integers(
                0, len(valid_islands))].population

            i0 = rng.integers(0, len(population_0))
            i1 = rng.integers(0, len(population_1))

            g0 = population_0[i0]
            g1 = population_1[i1]

            # we did use the peeked island, so this will increment the island turn
            self.next_island_turn()

            return (g0, g1)
        else:
            return None
def test_nonbonded_pair_list_interpolated_correctness(
    ixn_group_size,
    precision,
    rtol,
    atol,
    cutoff,
    beta,
    lamb,
    example_nonbonded_params,
    example_conf,
    example_box,
    rng: np.random.Generator,
):
    "Compares with jax reference implementation, with parameter interpolation."

    num_atoms, _ = example_conf.shape
    params = gen_params(example_nonbonded_params, rng)

    # randomly select 2 interaction groups and construct all pairwise interactions
    atom_idxs = rng.choice(
        num_atoms,
        size=(
            2,
            ixn_group_size,
        ),
        replace=False,
    ).astype(np.int32)

    pair_idxs = np.stack(np.meshgrid(atom_idxs[0, :],
                                     atom_idxs[1, :])).reshape(2, -1).T
    num_pairs, _ = pair_idxs.shape

    scales = rng.uniform(0, 1, size=(num_pairs, 2))

    lambda_plane_idxs = rng.integers(-2, 3, size=(num_atoms, ), dtype=np.int32)
    lambda_offset_idxs = rng.integers(-2,
                                      3,
                                      size=(num_atoms, ),
                                      dtype=np.int32)

    ref_potential = nonbonded.interpolated(
        make_ref_potential(pair_idxs, scales, lambda_plane_idxs,
                           lambda_offset_idxs, beta, cutoff))
    test_potential = NonbondedPairListInterpolated(pair_idxs, scales,
                                                   lambda_plane_idxs,
                                                   lambda_offset_idxs, beta,
                                                   cutoff)

    GradientTest().compare_forces(
        example_conf,
        params,
        example_box,
        lamb,
        ref_potential,
        test_potential,
        precision=precision,
        rtol=rtol,
        atol=atol,
    )
Beispiel #4
0
def rand_fancyindex(
    rng: np.random.Generator,
    index_length: int,
    dtype: np.dtype,
    source_arr_len: int,
    invalid_ratio: Optional[float] = None,
) -> np.ndarray:
    """Create a random fancy index with the specified length and dtype."""
    check_params(dtype, invalid_ratio)
    if dtype.kind not in "iu":  # TODO: Also support floats, since mbget allows that
        raise ValueError(
            f"Only integer dtypes are currently supported by this method. dtype={dtype.name}"
        )

    # Generate the fancy index from the uniform integer distribution.
    fancyindex = FastArray(
        rng.integers(0, source_arr_len, size=index_length, dtype=dtype)
    )

    # If the fancy index should have some invalids/NA values, add those in now.
    if invalid_ratio is not None and invalid_ratio > 0.0:
        # TODO: Also add in some out-of-bounds accesses (and not just invalid/NA values) here?
        invalid_outcomes = FastArray(rng.random(size=index_length))
        putmask(fancyindex, invalid_outcomes < invalid_ratio, fancyindex.inv)

    return fancyindex
Beispiel #5
0
 def _iter_random_indices(rng: np.random.Generator,
                          buffer_size: int,
                          random_batch_size=1000) -> Iterator[int]:
     while True:
         yield from (
             int(i)
             for i in rng.integers(0, buffer_size, size=random_batch_size))
Beispiel #6
0
def test_pickle_frame_dyncodec(tmp_path, rng: np.random.Generator):
    file = tmp_path / 'data.bpk'

    df = pd.DataFrame({
        'key': np.arange(0, 5000, dtype='i4'),
        'count': rng.integers(0, 1000, 5000),
        'score': rng.normal(10, 2, 5000)
    })

    def codec(buf):
        obj = memoryview(buf).obj
        if isinstance(obj, np.ndarray) and obj.dtype == np.float64:
            print('compacting double array')
            return codecs.Chain([numcodecs.AsType('f4', 'f8'), codecs.Blosc('zstd', 9)])
        else:
            return codecs.Blosc('zstd', 9)

    with BinPickler.compressed(file, codec) as w:
        w.dump(df)

    with BinPickleFile(file) as bpf:
        assert not bpf.find_errors()
        df2 = bpf.load()
        print(df2)
        assert all(df2.columns == df.columns)
        assert all(df2['key'] == df['key'])
        assert all(df2['count'] == df['count'])
        assert all(df2['score'].astype('f4') == df['score'].astype('f4'))
        del df2
Beispiel #7
0
def update_params(thi_nil: np.ndarray, z: seed.Partition, mod: Model,
                  ctrl: Controls, param_samplers: List[MyopicRwSampler],
                  ome: np.random.Generator) -> (seed.Partition, np.ndarray):
    def coin() -> Generator[Tuple[bool, seed.Partition], bool, None]:
        new_z = z
        state = (yield None, new_z)
        while True:
            success, new_z = flip_param_coins(thi_nil, thi_prime, new_z, mod,
                                              ctrl, state, ome)
            state = (yield success, new_z)

    sector = ome.integers(0, len(thi_nil))
    prop, log_p_for, log_p_back = param_samplers[sector].propose(ome)
    thi_prime = np.array(
        [thi_nil[i] if i != sector else prop for i in range(len(thi_nil))])

    if ctrl.precoin:
        if not flip_param_precoin(thi_nil, thi_prime, log_p_for, log_p_back,
                                  mod, ome):
            param_samplers[sector].adapt(thi_nil[sector], 0)
            return thi_nil, z
        weight_nil = weight_prime = 0
    else:
        weight_nil = eval_param_weight(thi_nil, mod) + log_p_for
        weight_prime = eval_param_weight(thi_prime, mod) + log_p_back

    accept, _, z_acc = sample_twocoin_joint(weight_prime, weight_nil, coin(),
                                            ctrl.pr_portkey, ome)
    thi_acc = thi_prime if accept else thi_nil
    param_samplers[sector].adapt(thi_acc[sector], float(accept))
    return thi_acc, z_acc
Beispiel #8
0
    def try_make_new_layer(self, upper_bound_layer: Layer,
                           lower_bound_layer: Layer,
                           rng: np.random.Generator) -> Optional[Layer]:
        """
        When making a new layer, you must specify the volume size. Thus, two other layers are required to
        randomly select a valid volume size - a minimum and maximum volume size. If the lower bound layer is
        an output layer then only the upper bound layer will be considered.
        """
        if type(upper_bound_layer) == OutputLayer:
            return None

        depth: int = hp.get_random_volume_depth(rng)

        upper_width, upper_height, upper_depth = upper_bound_layer.output_shape
        assert upper_width == upper_height

        # assume a square volume size
        if type(lower_bound_layer) == OutputLayer:
            # Minimum square size must be at least 2, if the upper bound is two we cant create a smaller layer.
            if upper_width <= 2:
                return None

            volume_size = rng.integers(2, upper_width)
            width, height = volume_size, volume_size
        else:
            lower_width, lower_height, lower_depth = lower_bound_layer.output_shape
            assert lower_width == lower_height

            if upper_width <= lower_width:
                return None

            # If this is 3 then the lower must be 2 and we cannot create a size inbetween 2 and 3
            if upper_width <= 3:
                assert lower_width == 2
                return None

            volume_size = rng.integers(lower_width, upper_width)
            width, height = volume_size, volume_size

        layer = Layer(Layer.get_next_layer_innovation_number(), width, height,
                      depth)

        assert layer.layer_innovation_number not in self.layer_map
        self.layer_map[layer.layer_innovation_number] = layer

        return layer
Beispiel #9
0
def bootstrap_idxs(n, rng: np.random.Generator = None):
    """
    Generate a set of boostrap indexes of length n, returning the pair (in_bag, out_bag) containing the in-bag and
    out-of-bag indexes as numpy arrays
    """
    if rng is None or type(rng) is not np.random.Generator:
        rng = np.random.default_rng(rng)
    in_bag = rng.integers(low=0, high=n, size=n)
    out_bag = np.array(list(set(range(n)) - set(in_bag)))
    return in_bag, out_bag
Beispiel #10
0
def test_policy_log_policy_grad_vs_empirical(policy: policies.DiscretePolicy,
                                             rng: np.random.Generator):
    x0 = rng.uniform(-1, 1, policy.num_params())
    n = 10
    actions = rng.integers(policy.num_actions, size=n)
    states = rng.integers(policy.num_states, size=n)
    weights = rng.uniform(0, 1, n)

    def f(x):
        policy_matrix = policy.policy_matrix(x)
        return np.sum(weights * np.log(policy_matrix[states, actions]))

    def f_grad(x):
        return policy.log_policy_grad(actions=actions,
                                      states=states,
                                      weights=weights,
                                      x=x)

    assert scipy.optimize.check_grad(f, f_grad, x0) < 1e-6
Beispiel #11
0
def remove_one_element(
        rng: np.random.Generator,
        array: np.ndarray) -> Tuple[Tuple[int, int, int], np.ndarray]:
    ys, xs = np.where(array >= 0)  # Arrays are y,x
    i = rng.integers(len(xs))
    pos = ys[i], xs[i]

    array_new = array.copy()
    array_new[pos] = -1
    return (*pos, array[pos]), array_new
Beispiel #12
0
 def random_flip_cx(self, rng: np.random.Generator) -> Tuple[int, int]:
     """
         Returns a randomly selected flippable CX gate in this CX circuit layer,
         using the given random number generator.
     """
     cx_idx = rng.integers(self._num_flippable_cxs)
     for i, cx in enumerate(self._iter_flippable_cxs()):
         if i == cx_idx:
             return cx
     raise Exception("Cannot get here!")
def test_nonbonded_all_pairs_interpolated_correctness(
    num_atoms,
    precision,
    rtol,
    atol,
    cutoff,
    beta,
    lamb,
    example_nonbonded_params,
    example_conf,
    example_box,
    rng: np.random.Generator,
):
    "Compares with jax reference implementation, with parameter interpolation."

    conf = example_conf[:num_atoms]
    params_initial = example_nonbonded_params[:num_atoms, :]
    params = gen_params(params_initial, rng)

    lambda_plane_idxs = rng.integers(-2, 3, size=(num_atoms, ), dtype=np.int32)
    lambda_offset_idxs = rng.integers(-2,
                                      3,
                                      size=(num_atoms, ),
                                      dtype=np.int32)

    ref_potential = nonbonded.interpolated(
        make_ref_potential(lambda_plane_idxs, lambda_offset_idxs, beta,
                           cutoff))
    test_potential = NonbondedAllPairsInterpolated(lambda_plane_idxs,
                                                   lambda_offset_idxs, beta,
                                                   cutoff)

    GradientTest().compare_forces(conf,
                                  params,
                                  example_box,
                                  lamb,
                                  ref_potential,
                                  test_potential,
                                  precision=precision,
                                  rtol=rtol,
                                  atol=atol)
Beispiel #14
0
    def try_get_intra_island_crossover_parents(
            self, rng: np.random.Generator) -> Optional[Tuple[CnnGenome, ...]]:
        # Peek since we may or may not actually give this island a turn (i.e. actually get parent genomes from it)
        island_turn = self.peek_next_island_turn()

        if len(self.islands[island_turn].population) < 2:
            return None
        else:
            population: List[CnnGenome] = self.islands[island_turn].population
            i0 = rng.integers(0, len(population))
            i1 = rng.integers(0, len(population) - 1)

            if i1 == i0:
                i1 += 1

            assert i1 != i0

            # we did use the peeked island turn so this will increment the island turn
            self.next_island_turn()

            return (population[i0], population[i1])
Beispiel #15
0
def rand_array(rng: np.random.Generator, length: int, dtype: np.dtype, invalid_ratio: Optional[float] = None) -> np.ndarray:
    # TODO: Implement a flag that controls whether invalid values are included in the array? Or (instead) an invalid_ratio parameter like our other functions?
    check_params(dtype, invalid_ratio)

    if dtype.kind in "iu":
        info = np.iinfo(dtype)
        arr = FastArray(rng.integers(info.min, info.max, size=length, dtype=dtype))

    elif dtype.kind == "f":
        # PERF: Use an FMA function here if we ever implement one
        arr = (FastArray(rng.random(size=length, dtype=dtype)) * 1e10) - 0.5e10

    elif dtype.kind == "S":
        # Generate integers in the upper ASCII range, then use a view to expose those
        # values as fixed-length ASCII strings.
        # TODO: Support other character ranges (lower-range ASCII 0-127, full ASCII 0-255, lowercase+uppercase+digits).
        arr = FastArray(rng.integers(
            65, 90, size=length * dtype.itemsize, dtype=np.int8, endpoint=True
        ).view(dtype))

    elif dtype.kind == "U":
        # Generate integers in the upper ASCII range.
        # TODO: Support other character ranges (lower-range ASCII 0-127, full ASCII 0-255, lowercase+uppercase+digits, Unicode chars >255).
        arr = FastArray(rng.integers(
            65, 90, size=length * (dtype.itemsize // 4), dtype=np.int32, endpoint=True
        ).view(dtype))

    else:
        # TODO: Handle other dtypes
        raise NotImplementedError(
            f"The dtype {dtype} is not yet supported by this function."
        )

     # If the fancy index should have some invalids/NA values, add those in now.
    if invalid_ratio is not None and invalid_ratio > 0.0:
        # TODO: Also add in some out-of-bounds accesses (and not just invalid/NA values) here?
        invalid_outcomes = FastArray(rng.random(size=length))
        putmask(arr, invalid_outcomes < invalid_ratio, arr.inv)

    return arr
Beispiel #16
0
def roll_signals(
    sig_original: ty.List[np.ndarray],
    max_time_shifts: ty.List[int],
    rng: np.random.Generator,
) -> ty.List[np.ndarray]:
    """MAKEDOC: what is roll_signals doing?"""
    sig_rolled: ty.List[np.ndarray] = []
    for s in tqdm(sig_original):
        for max_shift in max_time_shifts:
            time_shift = rng.integers(-max_shift, max_shift + 1)
            rolled = np.roll(s, time_shift)
            sig_rolled.append(rolled)
    return sig_rolled
 def _iter_random_indices(
     rng: np.random.Generator,
     num_sources: int,
     random_batch_size=1000,
     p: Optional[List[float]] = None,
 ) -> Iterator[int]:
     """Get an infinite iterator that randomly samples the index of the source to pick examples from."""
     if p is None:
         while True:
             yield from (int(i) for i in rng.integers(0, num_sources, size=random_batch_size))
     else:
         while True:
             yield from (int(i) for i in rng.choice(num_sources, size=random_batch_size, p=p))
Beispiel #18
0
def generate_weights(
    rng: np.random.Generator,
    year_begin: int,
    base_periods: Sequence[int],
    no_of_years: int,
    headers: IndexLabels,
) -> np.ndarray:
    """Generates fake weights.

    Selects random ints between 1 and 19 for weights in the first
    period. Then adds a random int between -2 and 2 for each subsequent
    weights update. Returns a numpy matrix of weights, clipped so that
    no weight can be lower than 1.

    Parameters
    ----------
    rng: Generator
        Numpy generator for generating random numbers.
    year_begin: int
        The start year for the first set of weights.
    base_periods: sequence of int
        A list of months, given by the int equivalent, for a weights
        update each year.
    no_of_years: int
        The number of years to generate weights for.
    headers: label, or sequence of labels
        A label or list of labels for each time series column name.

    Returns
    -------
    ndarray:
        A matrix of time series weights.

    """
    x = no_of_years * len(base_periods)
    y = len(headers)

    # Weights randomly initiated as an int between 1 and 19.
    first_year_weights = rng.integers(1, 20, (1, y))
    # Rearrange to length needed.
    weights = np.tile(first_year_weights, (x, 1))

    # Assumes that weights increase or decrease by no more than an
    # increment of 2 each base price refresh.
    change = RNG.integers(-2, 2, (x, y), endpoint=True)
    change[0, :] = 0  # No change at weights start.

    change = change.cumsum(axis=0)

    # Add change to weights and ensure weights stay >= 1.
    return np.clip(weights + change, 1, None)
Beispiel #19
0
def test_pickle_array(tmp_path, rng: np.random.Generator):
    "Pickle a NumPy array"
    file = tmp_path / 'data.bpk'

    a = rng.integers(0, 5000, 1024, dtype='i4')

    with BinPickler(file) as w:
        w.dump(a)

    with BinPickleFile(file) as bpf:
        assert len(bpf.entries) == 2
        a2 = bpf.load()
        assert len(a2) == len(a)
        assert all(a2 == a)
Beispiel #20
0
    def enable_edge_mut(self, rng: np.random.Generator) -> bool:
        logging.info("attempting enable_edge mutation")

        if not self.disabled_edges:
            logging.info("failed to complete enable_edge mutation")
            return False

        disabled_edges: List[int] = list(self.disabled_edges)
        index: int = rng.integers(0, len(disabled_edges))
        edge_in: int = disabled_edges[index]

        self.enable_edge(edge_in)

        logging.info("successfully completed enable_edge mutation")
        return True
Beispiel #21
0
def test_dump_frame(tmp_path, rng: np.random.Generator):
    "Pickle a Pandas data frame"
    file = tmp_path / 'data.bpk'

    df = pd.DataFrame({
        'key': np.arange(0, 5000),
        'count': rng.integers(0, 1000, 5000),
        'score': rng.normal(10, 2, 5000)
    })

    dump(df, file)
    df2 = load(file)

    assert all(df2.columns == df.columns)
    for c in df2.columns:
        assert all(df2[c] == df[c])
Beispiel #22
0
    def enable_layer_mut(self, rng: np.random.Generator) -> bool:
        logging.info("attempting enable_layer mutation")

        if not self.disabled_layers:
            logging.info("failed to complete enable_layer mutation")
            return False

        disabled_layers = list(self.disabled_layers)
        index: int = rng.integers(0, len(disabled_layers))
        layer_in: int = disabled_layers[index]

        self.enable_layer(layer_in)

        logging.info(f"enabling layer {layer_in}")
        logging.info("successfully completed enable_layer mutation")
        return True
Beispiel #23
0
def rand_integers(
    gen: np.random.Generator,
    size: Optional[int] = None,
    dtype: np.dtype = np.int64,
    include_invalid: bool = False,
) -> np.ndarray:
    """
    Generate a random array of integers with the specified length and dtype.

    The elements of the array will span the representable range of the dtype,
    optionally including the 'invalid' value for the type. The elements of the
    array are drawn from the 'discrete uniform' distribution.
    """
    # Determine the range for the dtype.
    lo, hi = integer_range(dtype, include_invalid)
    return gen.integers(lo, hi, size, dtype=dtype, endpoint=True)
def test_raise_on_zero_diagonal(
    N: int,
    L: np.ndarray,
    v: np.ndarray,
    rng: np.random.Generator,
    method_kwargs: Dict[str, Any],
):
    """Tests whether a :class:`numpy.linalg.LinAlgError` is raised if the diagonal of
    the Cholesky factor contains zeros."""
    L = L.copy(order="K")

    k = rng.integers(N)

    L[k, k] = 0.0

    with pytest.raises(np.linalg.LinAlgError):
        cholupdates.rank_1.downdate(L, v, **method_kwargs)
Beispiel #25
0
    def __init__(
        self,
        unlabeled_publisher_data_list: Iterable[PublisherData],
        order: OrderOptions,
        correlated_sets: CorrelatedSetsOptions,
        shared_prop: float,
        random_generator: np.random.Generator = None,
        name: str = "sequentially_correlated",
    ) -> DataSet:
        """Constructor for SequentiallyCorrelatedOverlapDataSet.

        Args:
          unlabeled_publisher_data_list:  a list of PublisherDataSet indicating the
            reach curve of a publisher.
          order: The order of the sets to be returned. It should be one of
            'original', 'reversed' and 'random'.
            Here a 'set' means the reached ids of a publisher.
          correlated_sets: One of 'all' and 'one', indicating how the current set
            is correlated with the previously generated sets when the order is
            'original'.
          shared_prop: A number between 0 and 1 that specifies the proportion of ids
            in the current set that are overlapped with the previous set(s).
            See wfa_cardinality_estimation_evaluation_framework.simulations.set_generator
            for more explanations on the args order, correlated_sets, shared_prop.
          random_generator: a random Generator for generating the sequentially correlated
            reached ids.
          name:  If specified, a human-readable name that will be associated to this
            DataSet.
        """
        if random_generator:
            random_state = np.random.RandomState(
                seed=random_generator.integers(low=0, high=1e9))
        else:
            random_state = np.random.RandomState(seed=1)

        super().__init__(
            unlabeled_publisher_data_list=unlabeled_publisher_data_list,
            overlap_generator=SequentiallyCorrelatedSetGenerator,
            overlap_generator_kwargs={
                "order": order,
                "correlated_sets": correlated_sets,
                "shared_prop": shared_prop,
                "random_state": random_state,
            },
            name=name,
        )
Beispiel #26
0
    def try_make_new_conv_edge(self,
                               input_layer: Layer,
                               output_layer: Layer,
                               rng: np.random.Generator,
                               conv_edge_type=ConvEdge) -> Optional[Edge]:
        if not self.valid_connection(input_layer, output_layer):
            return None

        # No negative filter sizes
        input_width, input_height, input_depth = input_layer.output_shape
        output_width, output_height, output_depth = output_layer.output_shape

        if input_width < output_width or input_height < output_height:
            return None

        possible_strides: List[int] = get_possible_strides(
            *input_layer.output_shape, *output_layer.output_shape)

        # No duplicate output edges with the same stride
        for edge_in in input_layer.outputs:
            edge = self.edge_map[edge_in]
            if  edge.input_layer_in == input_layer.layer_innovation_number and \
                edge.output_layer_in == output_layer.layer_innovation_number and \
                type(edge) == conv_edge_type:
                conv_edge: ConvEdge = cast(ConvEdge, edge)
                if conv_edge.stride in possible_strides:
                    possible_strides.remove(conv_edge.stride)

        if not possible_strides:
            return None

        stride: int = possible_strides[rng.integers(0, len(possible_strides))]

        conv_edge = conv_edge_type(Edge.get_next_edge_innovation_number(),
                                   stride, input_layer.layer_innovation_number,
                                   output_layer.layer_innovation_number,
                                   self.layer_map)
        self.register_edge(conv_edge)

        edge = cast(Edge, conv_edge)

        logging.info(f"creating separable conv edge from layer {input_layer.layer_innovation_number} to layer " + \
                     f"{output_layer.layer_innovation_number}")

        return edge
def test_raise_on_vector_dimension_mismatch(
    N: int,
    L: np.ndarray,
    rng: np.random.Generator,
    method_kwargs: Dict[str, Any],
):
    """Tests whether a :class:`ValueError` is raised if the shape of the vector is not
    compatible with the shape of the Cholesky factor"""

    # Generate arbitrary v with incompatible length
    v_len = N + rng.integers(-N, N, endpoint=True) + 1

    if v_len == N:
        v_len += 1

    v = rng.random(v_len)

    with pytest.raises(ValueError):
        cholupdates.rank_1.downdate(L=L, v=v, **method_kwargs)
Beispiel #28
0
def _generate_random_examples(
    rng: np.random.Generator,
    num_markers: int = 3,
    num_sessions_min: int = 2,
    num_sessions_max: int = 10,
) -> Tuple[List[PerMarkerResults], PerMarkerCollectedNumbers]:
    """Generates a random number of random marker extraction results for some markers.

    Args:
        rng: a random number generator
        num_markers: the number of markers to be imitated
    Returns:
        a list containing a dictionary of the marker extraction results per marker,
        as well as a collection of the plain list of numbers used as "preceding user
        turns" in that extraction results
    """
    num_sessions = int(
        rng.integers(low=num_sessions_min, high=num_sessions_max + 1))
    markers = [f"marker{idx}" for idx in range(num_markers)]
    per_session_results: List[PerMarkerResults] = []
    preceding_user_turn_numbers_used_per_marker: PerMarkerCollectedNumbers = {
        marker: []
        for marker in markers
    }
    for _ in range(num_sessions - 1):  # we append one more later
        result_dict = {}
        for marker in markers:
            (
                event_list,
                num_list,
            ) = _generate_random_example_for_one_session_and_one_marker(
                rng=rng)
            result_dict[marker] = event_list
            preceding_user_turn_numbers_used_per_marker[marker].append(
                num_list)
        per_session_results.append(result_dict)
    # append a session where we didn't find any marker
    per_session_results.append({marker: [] for marker in markers})
    for marker in preceding_user_turn_numbers_used_per_marker:
        preceding_user_turn_numbers_used_per_marker[marker].append([])
    return per_session_results, preceding_user_turn_numbers_used_per_marker
Beispiel #29
0
def test_pickle_frame(tmp_path, rng: np.random.Generator, writer, direct):
    "Pickle a Pandas data frame"
    file = tmp_path / 'data.bpk'

    df = pd.DataFrame({
        'key': np.arange(0, 5000),
        'count': rng.integers(0, 1000, 5000),
        'score': rng.normal(10, 2, 5000)
    })

    with writer(file) as w:
        w.dump(df)

    with BinPickleFile(file, direct=direct) as bpf:
        assert not bpf.find_errors()
        df2 = bpf.load()
        print(df2)
        assert all(df2.columns == df.columns)
        for c in df2.columns:
            assert all(df2[c] == df[c])
        del df2
Beispiel #30
0
def test_write_buf(tmp_path, rng: np.random.Generator):
    "Write a file with a single array"
    file = tmp_path / 'data.bpk'

    a = rng.integers(0, 5000, 1024, dtype='i4')

    with BinPickler(file) as w:
        w._write_buffer(a)
        w._finish_file()

    with BinPickleFile(file, direct=True) as bpf:
        assert len(bpf.entries) == 1
        e = bpf.entries[0]
        assert e.dec_length == a.nbytes
        assert e.enc_length == a.nbytes
        b2 = bpf._read_buffer(e)
        assert b2.nbytes == e.dec_length
        a2 = np.frombuffer(b2, dtype='i4')
        assert len(a2) == len(a)
        assert all(a2 == a)
        del a2
        del b2