def do_filtered_draw(self, data): # Set of indices that have been tried so far, so that we never test # the same element twice during a draw. known_bad_indices = set() # Start with ordinary rejection sampling. It's fast if it works, and # if it doesn't work then it was only a small amount of overhead. for _ in range(3): i = cu.integer_range(data, 0, len(self.elements) - 1) if i not in known_bad_indices: element = self.get_element(i) if element is not filter_not_satisfied: return element if not known_bad_indices: FilteredStrategy.note_retried(self, data) known_bad_indices.add(i) # If we've tried all the possible elements, give up now. max_good_indices = len(self.elements) - len(known_bad_indices) if not max_good_indices: return filter_not_satisfied # Figure out the bit-length of the index that we will write back after # choosing an allowed element. write_length = len(self.elements).bit_length() # Impose an arbitrary cutoff to prevent us from wasting too much time # on very large element lists. cutoff = 10000 max_good_indices = min(max_good_indices, cutoff) # Before building the list of allowed indices, speculatively choose # one of them. We don't yet know how many allowed indices there will be, # so this choice might be out-of-bounds, but that's OK. speculative_index = cu.integer_range(data, 0, max_good_indices - 1) # Calculate the indices of allowed values, so that we can choose one # of them at random. But if we encounter the speculatively-chosen one, # just use that and return immediately. Note that we also track the # allowed elements, in case of .map(some_stateful_function) allowed = [] for i in range(min(len(self.elements), cutoff)): if i not in known_bad_indices: element = self.get_element(i) if element is not filter_not_satisfied: allowed.append((i, element)) if len(allowed) > speculative_index: # Early-exit case: We reached the speculative index, so # we just return the corresponding element. data.draw_bits(write_length, forced=i) return element # The speculative index didn't work out, but at this point we've built # and can choose from the complete list of allowed indices and elements. if allowed: i, element = cu.choice(data, allowed) data.draw_bits(write_length, forced=i) return element # If there are no allowed indices, the filter couldn't be satisfied. return filter_not_satisfied
def draw_capped_multipart(data, min_value, max_value, duration_names=DATENAMES + TIMENAMES): assert isinstance(min_value, (dt.date, dt.time, dt.datetime)) assert type(min_value) == type(max_value) assert min_value <= max_value result = {} cap_low, cap_high = True, True for name in duration_names: low = getattr(min_value if cap_low else dt.datetime.min, name) high = getattr(max_value if cap_high else dt.datetime.max, name) if name == "day" and not cap_high: _, high = monthrange(**result) if name == "year": val = utils.integer_range(data, low, high, 2000) else: val = utils.integer_range(data, low, high) result[name] = val cap_low = cap_low and val == low cap_high = cap_high and val == high if hasattr(min_value, "fold"): # The `fold` attribute is ignored in comparison of naive datetimes. # In tz-aware datetimes it would require *very* invasive changes to # the logic above, and be very sensitive to the specific timezone # (at the cost of efficient shrinking and mutation), so at least for # now we stick with the status quo and generate it independently. result["fold"] = utils.integer_range(data, 0, 1) return result
def do_draw(self, data): result = {} cap_low, cap_high = True, True for name in ("year", "month", "day", "hour", "minute", "second", "microsecond"): low = getattr(self.min_dt if cap_low else dt.datetime.min, name) high = getattr(self.max_dt if cap_high else dt.datetime.max, name) if name == "day" and not cap_high: _, high = monthrange(**result) if name == "year": val = utils.integer_range(data, low, high, 2000) else: val = utils.integer_range(data, low, high) result[name] = val cap_low = cap_low and val == low cap_high = cap_high and val == high result = dt.datetime(**result) tz = data.draw(self.tz_strat) try: if is_pytz_timezone(tz): # Can't just construct; see http://pytz.sourceforge.net return tz.normalize(tz.localize(result)) return result.replace(tzinfo=tz) except (ValueError, OverflowError): msg = "Failed to draw a datetime between %r and %r with timezone from %r." data.note_event(msg % (self.min_dt, self.max_dt, self.tz_strat)) data.mark_invalid()
def do_draw(self, data): if len(self.intervals) > 256: if biased_coin(data, 0.2): i = integer_range(data, 256, len(self.intervals) - 1) else: i = integer_range(data, 0, 255) else: i = integer_range(data, 0, len(self.intervals) - 1) i = self.rewrite_integer(i) return chr(self.intervals[i])
def test_restricted_bits(): assert ( cu.integer_range( ConjectureData.for_buffer([1, 0, 0, 0, 0]), lower=0, upper=2 ** 64 - 1 ) == 0 )
def do_draw(self, data): should_draw = cu.many( data, min_size=self.min_size, max_size=self.max_size, average_size=self.average_size, ) seen_sets = tuple(set() for _ in self.keys) result = [] remaining = LazySequenceCopy(self.element_strategy.elements) while should_draw.more(): i = len(remaining) - 1 j = cu.integer_range(data, 0, i) if j != i: remaining[i], remaining[j] = remaining[j], remaining[i] value = remaining.pop() if all( key(value) not in seen for (key, seen) in zip(self.keys, seen_sets)): for key, seen in zip(self.keys, seen_sets): seen.add(key(value)) result.append(value) else: should_draw.reject() assert self.max_size >= len(result) >= self.min_size return result
def do_draw(self, data): return integer_range( data, self.lower, self.upper, center=self.center, )
def do_draw(self, data): should_draw = cu.many( data, min_size=self.min_size, max_size=self.max_size, average_size=self.average_size, ) seen_sets = tuple(set() for _ in self.keys) result = [] remaining = LazySequenceCopy(self.element_strategy.elements) while remaining and should_draw.more(): i = len(remaining) - 1 j = cu.integer_range(data, 0, i) if j != i: remaining[i], remaining[j] = remaining[j], remaining[i] value = self.element_strategy._transform(remaining.pop()) if value is not filter_not_satisfied and all( key(value) not in seen for key, seen in zip(self.keys, seen_sets)): for key, seen in zip(self.keys, seen_sets): seen.add(key(value)) if self.tuple_suffixes is not None: value = (value, ) + data.draw(self.tuple_suffixes) result.append(value) else: should_draw.reject() assert self.max_size >= len(result) >= self.min_size return result
def do_draw(self, data): if self.start is None and self.end is None: return d.unbounded_integers(data) if self.start is None: if self.end <= 0: return self.end - abs(d.unbounded_integers(data)) else: probe = self.end + 1 while self.end < probe: data.start_example(ONE_BOUND_INTEGERS_LABEL) probe = d.unbounded_integers(data) data.stop_example(discard=self.end < probe) return probe if self.end is None: if self.start >= 0: return self.start + abs(d.unbounded_integers(data)) else: probe = self.start - 1 while probe < self.start: data.start_example(ONE_BOUND_INTEGERS_LABEL) probe = d.unbounded_integers(data) data.stop_example(discard=probe < self.start) return probe return d.integer_range(data, self.start, self.end, center=0)
def do_draw(self, data): machine = data.draw(self_strategy) bundle = machine.bundle(self.name) if not bundle: data.mark_invalid() reference = bundle.pop() bundle.insert(integer_range(data, 0, len(bundle)), reference) return machine.names_to_values[reference.name]
def do_draw(self, data): return integer_range( data, self.lower, self.upper, center=self.center, distribution=self.distribution, )
def do_draw(self, data): i = integer_range( data, 0, len(self.intervals) - 1, center=self.zero_point, ) return hunichr(self.intervals[i])
def do_draw(self, data): # Reversed Fisher-Yates shuffle. Reverse order so that it shrinks # propertly: This way we prefer things that are lexicographically # closer to the identity. result = list(values) for i in hrange(len(result)): j = integer_range(data, i, len(result) - 1) result[i], result[j] = result[j], result[i] return result
def do_draw(self, data): while True: i = integer_range( data, 0, len(self.intervals) - 1, center=self.zero_point, ) c = hunichr(self.intervals[i]) if c not in self.blacklist_characters: return c
def do_draw(self, data): # type: (ConjectureData) -> Ex n = len(self.element_strategies) assert n > 0 if n == 1: return data.draw(self.element_strategies[0]) i = cu.integer_range(data, 0, n - 1) return data.draw(self.element_strategies[i], label=self.branch_labels[i])
def do_draw(self, data): n = len(self.element_strategies) assert n > 0 if n == 1: return data.draw(self.element_strategies[0]) elif self.sampler is None: i = cu.integer_range(data, 0, n - 1) else: i = self.sampler.sample(data) return data.draw(self.element_strategies[i])
def do_draw(self, data): machine = data.draw(self_strategy) bundle = machine.bundle(self.name) if not bundle: data.mark_invalid() # Shrink towards the right rather than the left. This makes it easier # to delete data generated earlier, as when the error is towards the # end there can be a lot of hard to remove padding. return bundle[ integer_range(data, 0, len(bundle) - 1, center=len(bundle)) ]
def do_draw(self, data): result = dict() low_bound = True high_bound = True for name in ("days", "seconds", "microseconds"): low = getattr(self.min_value if low_bound else dt.timedelta.min, name) high = getattr(self.max_value if high_bound else dt.timedelta.max, name) val = utils.integer_range(data, low, high, 0) result[name] = val low_bound = low_bound and val == low high_bound = high_bound and val == high return dt.timedelta(**result)
def do_draw(self, data): machine = data.draw(self_strategy) bundle = machine.bundle(self.name) if not bundle: data.mark_invalid() # Shrink towards the right rather than the left. This makes it easier # to delete data generated earlier, as when the error is towards the # end there can be a lot of hard to remove padding. return bundle[integer_range(data, 0, len(bundle) - 1, center=len(bundle))]
def do_draw(self, data): result = {} low_bound = True high_bound = True for name in ("days", "seconds", "microseconds"): low = getattr(self.min_value if low_bound else dt.timedelta.min, name) high = getattr(self.max_value if high_bound else dt.timedelta.max, name) val = utils.integer_range(data, low, high, 0) result[name] = val low_bound = low_bound and val == low high_bound = high_bound and val == high return dt.timedelta(**result)
def do_draw(self, data): result = data.draw(self.fixed) remaining = [k for k in self.optional_keys if not self.optional[k].is_empty] should_draw = cu.many( data, min_size=0, max_size=len(remaining), average_size=len(remaining) / 2 ) while should_draw.more(): j = cu.integer_range(data, 0, len(remaining) - 1) remaining[-1], remaining[j] = remaining[j], remaining[-1] key = remaining.pop() result[key] = data.draw(self.optional[key]) return result
def do_draw(self, data): n = len(self.element_strategies) if n == 0: data.mark_invalid() elif n == 1: return self.element_strategies[0].do_draw(data) elif self.sampler is None: i = cu.integer_range(data, 0, n - 1) else: i = self.sampler.sample(data) return data.draw(self.element_strategies[i])
def do_draw(self, data): # This strategy is slightly strange in its implementation. # We don't want the interpretation of the rule we draw to change based # on whether other rules satisfy their preconditions or have data in # their bundles. Therefore the index into the rule list needs to stay # stable. BUT we don't want to draw invalid rules. So what we do is we # draw an index. We *could* just loop until it's valid, but if most # rules are invalid then that could result in a very long loop. # So what we do is the following: # # 1. We first draw a rule unconditionally, and check if it's valid. # If it is, great. Nothing more to do, that's our rule. # 2. If it is invalid, we now calculate the list of valid rules and # draw from that list (if there are none, that's an error in the # definition of the machine and we complain to the user about it). # 3. Once we've drawn a valid rule, we write that back to the byte # stream. As a result, when shrinking runs the shrinker can delete # the initial failed draw + the draw that lead to us finding an # index into valid_rules, leaving just the written value of i. # When this is run, it will look as we got lucky and just happened # to pick a valid rule. # # Easy, right? n = len(self.rules) i = cu.integer_range(data, 0, n - 1) u, v = data.blocks[-1] block_length = v - u rule = self.rules[i] if not self.is_valid(rule): valid_rules = [ j for j, r in enumerate(self.rules) if self.is_valid(r) ] if not valid_rules: raise InvalidDefinition( u'No progress can be made from state %r' % (self.machine, )) i = valid_rules[cu.integer_range(data, 0, len(valid_rules) - 1)] data.write(int_to_bytes(i, block_length)) rule = self.rules[i] return (rule, data.draw(rule.arguments_strategy))
def do_draw(self, data): # This strategy is slightly strange in its implementation. # We don't want the interpretation of the rule we draw to change based # on whether other rules satisfy their preconditions or have data in # their bundles. Therefore the index into the rule list needs to stay # stable. BUT we don't want to draw invalid rules. So what we do is we # draw an index. We *could* just loop until it's valid, but if most # rules are invalid then that could result in a very long loop. # So what we do is the following: # # 1. We first draw a rule unconditionally, and check if it's valid. # If it is, great. Nothing more to do, that's our rule. # 2. If it is invalid, we now calculate the list of valid rules and # draw from that list (if there are none, that's an error in the # definition of the machine and we complain to the user about it). # 3. Once we've drawn a valid rule, we write that back to the byte # stream. As a result, when shrinking runs the shrinker can delete # the initial failed draw + the draw that lead to us finding an # index into valid_rules, leaving just the written value of i. # When this is run, it will look as we got lucky and just happened # to pick a valid rule. # # Easy, right? n = len(self.rules) i = cu.integer_range(data, 0, n - 1) u, v = data.blocks[-1].bounds block_length = v - u rule = self.rules[i] if not self.is_valid(rule): valid_rules = [ j for j, r in enumerate(self.rules) if self.is_valid(r) ] if not valid_rules: raise InvalidDefinition( u'No progress can be made from state %r' % (self.machine,) ) i = valid_rules[cu.integer_range(data, 0, len(valid_rules) - 1)] data.write(int_to_bytes(i, block_length)) rule = self.rules[i] return (rule, data.draw(rule.arguments_strategy))
def _attempt_one_draw(self, data): result = dict() cap_low, cap_high = True, True for name in ("year", "month", "day", "hour", "minute", "second", "microsecond"): low = getattr(self.min_dt if cap_low else dt.datetime.min, name) high = getattr(self.max_dt if cap_high else dt.datetime.max, name) if name == "year": val = utils.integer_range(data, low, high, 2000) else: val = utils.integer_range(data, low, high) result[name] = val cap_low = cap_low and val == low cap_high = cap_high and val == high tz = data.draw(self.tz_strat) try: result = dt.datetime(**result) if is_pytz_timezone(tz): # Can't just construct; see http://pytz.sourceforge.net return tz.normalize(tz.localize(result)) return result.replace(tzinfo=tz) except (ValueError, OverflowError): return None
def do_draw(self, data): n = len(self.element_strategies) if self.bias is None: i = cu.integer_range(data, 0, n - 1) else: def biased_i(random): while True: i = random.randint(0, n - 1) if random.random() <= self.weights[i]: return i i = cu.integer_range_with_distribution( data, 0, n - 1, biased_i) return data.draw(self.element_strategies[i])
def do_draw(self, data): # type: (ConjectureData) -> Ex n = len(self.element_strategies) assert n > 0 if n == 1: return data.draw(self.element_strategies[0]) if self.bias is None: i = cu.integer_range(data, 0, n - 1) else: i = self.sampler.sample(data) assert 0 <= i < n return data.draw(self.element_strategies[i], label=self.branch_labels[i])
def draw_capped_multipart(data, min_value, max_value): assert isinstance(min_value, (dt.date, dt.time, dt.datetime)) assert type(min_value) == type(max_value) assert min_value <= max_value result = {} cap_low, cap_high = True, True duration_names_by_type = { dt.date: DATENAMES, dt.time: TIMENAMES, dt.datetime: DATENAMES + TIMENAMES, } for name in duration_names_by_type[type(min_value)]: low = getattr(min_value if cap_low else dt.datetime.min, name) high = getattr(max_value if cap_high else dt.datetime.max, name) if name == "day" and not cap_high: _, high = monthrange(**result) if name == "year": val = utils.integer_range(data, low, high, 2000) else: val = utils.integer_range(data, low, high) result[name] = val cap_low = cap_low and val == low cap_high = cap_high and val == high return result
def do_draw(self, data): # type: (ConjectureData) -> Ex n = len(self.element_strategies) assert n > 0 if n == 1: return data.draw(self.element_strategies[0]) if self.bias is None: i = cu.integer_range(data, 0, n - 1) else: i = self.sampler.sample(data) assert 0 <= i < n return data.draw( self.element_strategies[i], label=self.branch_labels[i])
def do_draw(self, data): denom = math.log1p(-1 / 127) def d(random): if self.special and random.randint(0, 10) == 0: return random.choice(self.special) if len(self.intervals) <= 256 or random.randint(0, 1): i = random.randint(0, len(self.intervals.offsets) - 1) u, v = self.intervals.intervals[i] return self.intervals.offsets[i] + random.randint(0, v - u + 1) else: return min( len(self.intervals) - 1, int(math.log(random.random()) / denom)) while True: i = integer_range( data, 0, len(self.intervals) - 1, center=self.zero_point, distribution=d ) c = hunichr(self.intervals[i]) if c not in self.blacklist_characters: return c
def do_draw(self, data): while True: try: result = dt.datetime( year=cu.centered_integer_range(data, self.min_year, self.max_year, 2000), month=cu.integer_range(data, 1, 12), day=cu.integer_range(data, 1, 31), hour=cu.integer_range(data, 0, 24), minute=cu.integer_range(data, 0, 59), second=cu.integer_range(data, 0, 59), microsecond=cu.integer_range(data, 0, 999999), ) if not self.allow_naive or (self.timezones and cu.boolean(data)): result = cu.choice(data, self.timezones).localize(result) return result except (OverflowError, ValueError): pass
def do_draw(self, data): while True: try: result = dt.datetime( year=cu.centered_integer_range(data, self.min_year, self.max_year, 2000), month=cu.integer_range(data, 1, 12), day=cu.integer_range(data, 1, 31), hour=cu.integer_range(data, 0, 24), minute=cu.integer_range(data, 0, 59), second=cu.integer_range(data, 0, 59), microsecond=cu.integer_range(data, 0, 999999)) if (not self.allow_naive or (self.timezones and cu.boolean(data))): result = cu.choice(data, self.timezones).localize(result) return result except (OverflowError, ValueError): pass
def do_filtered_draw(self, data, filter_strategy): # Set of indices that have been tried so far, so that we never test # the same element twice during a draw. known_bad_indices = set() def check_index(i): """Return ``True`` if the element at ``i`` satisfies the filter condition. """ if i in known_bad_indices: return False ok = filter_strategy.condition(self.elements[i]) if not ok: if not known_bad_indices: filter_strategy.note_retried(data) known_bad_indices.add(i) return ok # Start with ordinary rejection sampling. It's fast if it works, and # if it doesn't work then it was only a small amount of overhead. for _ in hrange(3): i = d.integer_range(data, 0, len(self.elements) - 1) if check_index(i): return self.elements[i] # If we've tried all the possible elements, give up now. max_good_indices = len(self.elements) - len(known_bad_indices) if not max_good_indices: return filter_not_satisfied # Figure out the bit-length of the index that we will write back after # choosing an allowed element. write_length = bit_length(len(self.elements)) # Impose an arbitrary cutoff to prevent us from wasting too much time # on very large element lists. cutoff = 10000 max_good_indices = min(max_good_indices, cutoff) # Before building the list of allowed indices, speculatively choose # one of them. We don't yet know how many allowed indices there will be, # so this choice might be out-of-bounds, but that's OK. speculative_index = d.integer_range(data, 0, max_good_indices - 1) # Calculate the indices of allowed values, so that we can choose one # of them at random. But if we encounter the speculatively-chosen one, # just use that and return immediately. allowed_indices = [] for i in hrange(min(len(self.elements), cutoff)): if check_index(i): allowed_indices.append(i) if len(allowed_indices) > speculative_index: # Early-exit case: We reached the speculative index, so # we just return the corresponding element. data.draw_bits(write_length, forced=i) return self.elements[i] # The speculative index didn't work out, but at this point we've built # the complete list of allowed indices, so we can just choose one of # them. if allowed_indices: i = d.choice(data, allowed_indices) data.draw_bits(write_length, forced=i) return self.elements[i] # If there are no allowed indices, the filter couldn't be satisfied. return filter_not_satisfied
def test_does_draw_data_for_empty_range(): data = ConjectureData.for_buffer(b"\1") assert cu.integer_range(data, 1, 1) == 1 data.freeze() assert data.buffer == hbytes(b"\0")
def do_draw(self, data): days = utils.integer_range(data, 0, self.days_apart, center=self.center) return self.min_value + dt.timedelta(days=days)
def _hypothesis_do_random(self, method, kwargs): if method == "choices": key = (method, len(kwargs["population"]), kwargs.get("k")) elif method == "choice": key = (method, len(kwargs["seq"])) elif method == "shuffle": key = (method, len(kwargs["x"])) else: key = (method, ) + tuple(sorted(kwargs)) try: result, self.__state = self.__state.next_states[key] except KeyError: pass else: return self.__convert_result(method, kwargs, result) if method == "_randbelow": result = cu.integer_range(self.__data, 0, kwargs["n"] - 1) elif method in ("betavariate", "random"): result = self.__data.draw(UNIFORM) elif method == "uniform": a = normalize_zero(kwargs["a"]) b = normalize_zero(kwargs["b"]) result = self.__data.draw(st.floats(a, b)) elif method in ("weibullvariate", "gammavariate"): result = self.__data.draw( st.floats(min_value=0.0, allow_infinity=False)) elif method in ("gauss", "normalvariate"): mu = kwargs["mu"] result = mu + self.__data.draw( st.floats(allow_nan=False, allow_infinity=False)) elif method == "vonmisesvariate": result = self.__data.draw(st.floats(0, 2 * math.pi)) elif method == "randrange": if kwargs["stop"] is None: stop = kwargs["start"] start = 0 else: start = kwargs["start"] stop = kwargs["stop"] step = kwargs["step"] if start == stop: raise ValueError("empty range for randrange(%d, %d, %d)" % (start, stop, step)) if step != 1: endpoint = (stop - start) // step if (start - stop) % step == 0: endpoint -= 1 i = cu.integer_range(self.__data, 0, endpoint) result = start + i * step else: result = cu.integer_range(self.__data, start, stop - 1) elif method == "randint": result = cu.integer_range(self.__data, kwargs["a"], kwargs["b"]) elif method == "choice": seq = kwargs["seq"] result = cu.integer_range(self.__data, 0, len(seq) - 1) elif method == "choices": k = kwargs["k"] result = self.__data.draw( st.lists( st.integers(0, len(kwargs["population"]) - 1), min_size=k, max_size=k, )) elif method == "sample": k = kwargs["k"] seq = kwargs["population"] if k > len(seq) or k < 0: raise ValueError( "Sample size %d not in expected range 0 <= k <= %d" % (k, len(seq))) result = self.__data.draw( st.lists( st.sampled_from(range(len(seq))), min_size=k, max_size=k, unique=True, )) elif method == "getrandbits": result = self.__data.draw_bits(kwargs["n"]) elif method == "triangular": low = normalize_zero(kwargs["low"]) high = normalize_zero(kwargs["high"]) mode = normalize_zero(kwargs["mode"]) if mode is None: result = self.__data.draw(st.floats(low, high)) elif self.__data.draw_bits(1): result = self.__data.draw(st.floats(mode, high)) else: result = self.__data.draw(st.floats(low, mode)) elif method in ("paretovariate", "expovariate", "lognormvariate"): result = self.__data.draw(st.floats(min_value=0.0)) elif method == "shuffle": result = self.__data.draw(st.permutations(range(len(kwargs["x"])))) # This is tested for but only appears in 3.9 so doesn't appear in coverage. elif method == "randbytes": # pragma: no cover n = kwargs["n"] result = self.__data.draw(st.binary(min_size=n, max_size=n)) else: raise NotImplementedError(method) new_state = RandomState() self.__state.next_states[key] = (result, new_state) self.__state = new_state return self.__convert_result(method, kwargs, result)
def test_does_not_draw_data_for_empty_range(): assert integer_range(TestData.for_buffer(b""), 1, 1) == 1
def do_draw(self, data): return d.integer_range(data, self.start, self.end)
def do_draw(self, data): return integer_range(data, self.lower, self.upper, center=self.center)
def do_draw(self, data): if 0 in self.shape: return np.zeros(dtype=self.dtype, shape=self.shape) # This could legitimately be a np.empty, but the performance gains for # that would be so marginal that there's really not much point risking # undefined behaviour shenanigans. result = np.zeros(shape=self.array_size, dtype=self.dtype) if self.fill.is_empty: # We have no fill value (either because the user explicitly # disabled it or because the default behaviour was used and our # elements strategy does not produce reusable values), so we must # generate a fully dense array with a freshly drawn value for each # entry. if self.unique: seen = set() elements = cu.many( data, min_size=self.array_size, max_size=self.array_size, average_size=self.array_size ) i = 0 while elements.more(): # We assign first because this means we check for # uniqueness after numpy has converted it to the relevant # type for us. Because we don't increment the counter on # a duplicate we will overwrite it on the next draw. result[i] = data.draw(self.element_strategy) if result[i] not in seen: seen.add(result[i]) i += 1 else: elements.reject() else: for i in hrange(len(result)): result[i] = data.draw(self.element_strategy) else: # We draw numpy arrays as "sparse with an offset". We draw a # collection of index assignments within the array and assign # fresh values from our elements strategy to those indices. If at # the end we have not assigned every element then we draw a single # value from our fill strategy and use that to populate the # remaining positions with that strategy. elements = cu.many( data, min_size=0, max_size=self.array_size, # sqrt isn't chosen for any particularly principled reason. It # just grows reasonably quickly but sublinearly, and for small # arrays it represents a decent fraction of the array size. average_size=math.sqrt(self.array_size), ) needs_fill = np.full(self.array_size, True) seen = set() while elements.more(): i = cu.integer_range(data, 0, self.array_size - 1) if not needs_fill[i]: elements.reject() continue result[i] = data.draw(self.element_strategy) if self.unique: if result[i] in seen: elements.reject() continue else: seen.add(result[i]) needs_fill[i] = False if needs_fill.any(): # We didn't fill all of the indices in the early loop, so we # put a fill value into the rest. # We have to do this hilarious little song and dance to work # around numpy's special handling of iterable values. If the # value here were e.g. a tuple then neither array creation # nor putmask would do the right thing. But by creating an # array of size one and then assigning the fill value as a # single element, we both get an array with the right value in # it and putmask will do the right thing by repeating the # values of the array across the mask. one_element = np.zeros(shape=1, dtype=self.dtype) one_element[0] = data.draw(self.fill) fill_value = one_element[0] if self.unique: try: is_nan = np.isnan(fill_value) except TypeError: is_nan = False if not is_nan: raise InvalidArgument( 'Cannot fill unique array with non-NaN ' 'value %r' % (fill_value,)) np.putmask(result, needs_fill, one_element) return result.reshape(self.shape)
def do_draw(self, data): i = integer_range(data, 0, len(self.intervals) - 1, center=self.zero_point) return hunichr(self.intervals[i])
def do_draw(self, data): if 0 in self.shape: return self.xp.zeros(self.shape, dtype=self.dtype) if self.fill.is_empty: # We have no fill value (either because the user explicitly # disabled it or because the default behaviour was used and our # elements strategy does not produce reusable values), so we must # generate a fully dense array with a freshly drawn value for each # entry. elems = data.draw( st.lists( self.elements_strategy, min_size=self.array_size, max_size=self.array_size, unique=self.unique, )) try: result = self.xp.asarray(elems, dtype=self.dtype) except Exception as e: if len(elems) <= 6: f_elems = str(elems) else: f_elems = f"[{elems[0]}, {elems[1]}, ..., {elems[-2]}, {elems[-1]}]" types = tuple( sorted({type(e) for e in elems}, key=lambda t: t.__name__)) f_types = f"type {types[0]}" if len( types) == 1 else f"types {types}" raise InvalidArgument( f"Generated elements {f_elems} from strategy " f"{self.elements_strategy} could not be converted " f"to array of dtype {self.dtype}. " f"Consider if elements of {f_types} " f"are compatible with {self.dtype}.") from e for i in range(self.array_size): self.check_set_value(elems[i], result[i], self.elements_strategy) else: # We draw arrays as "sparse with an offset". We assume not every # element will be assigned and so first draw a single value from our # fill strategy to create a full array. We then draw a collection of # index assignments within the array and assign fresh values from # our elements strategy to those indices. fill_val = data.draw(self.fill) try: result = self.xp.full(self.array_size, fill_val, dtype=self.dtype) except Exception as e: raise InvalidArgument( f"Could not create full array of dtype={self.dtype} " f"with fill value {fill_val!r}") from e sample = result[0] self.check_set_value(fill_val, sample, self.fill) if self.unique and not self.xp.all(self.xp.isnan(result)): raise InvalidArgument( f"Array module {self.xp.__name__} did not recognise fill " f"value {fill_val!r} as NaN - instead got {sample!r}. " "Cannot fill unique array with non-NaN values.") elements = cu.many( data, min_size=0, max_size=self.array_size, # sqrt isn't chosen for any particularly principled reason. It # just grows reasonably quickly but sublinearly, and for small # arrays it represents a decent fraction of the array size. average_size=min( 0.9 * self.array_size, # ensure small arrays sometimes use fill max(10, math.sqrt(self.array_size)), # ...but *only* sometimes ), ) assigned = set() seen = set() while elements.more(): i = cu.integer_range(data, 0, self.array_size - 1) if i in assigned: elements.reject() continue val = data.draw(self.elements_strategy) if self.unique: if val in seen: elements.reject() continue else: seen.add(val) try: result[i] = val except Exception as e: raise InvalidArgument( f"Could not add generated array element {val!r} " f"of type {type(val)} to array of dtype {result.dtype}." ) from e self.check_set_value(val, result[i], self.elements_strategy) assigned.add(i) result = self.xp.reshape(result, self.shape) return result
def test_does_not_draw_data_for_empty_range(): assert integer_range(ConjectureData.for_buffer(b''), 1, 1) == 1