Esempio n. 1
0
        def just_draw_columns(draw):
            index = draw(index_strategy)
            local_index_strategy = st.just(index)

            data = OrderedDict((c.name, None) for c in rewritten_columns)

            # Depending on how the columns are going to be generated we group
            # them differently to get better shrinking. For columns with fill
            # enabled, the elements can be shrunk independently of the size,
            # so we can just shrink by shrinking the index then shrinking the
            # length and are generally much more free to move data around.

            # For columns with no filling the problem is harder, and drawing
            # them like that would result in rows being very far apart from
            # each other in the underlying data stream, which gets in the way
            # of shrinking. So what we do is reorder and draw those columns
            # row wise, so that the values of each row are next to each other.
            # This makes life easier for the shrinker when deleting blocks of
            # data.
            columns_without_fill = [
                c for c in rewritten_columns if c.fill.is_empty
            ]

            if columns_without_fill:
                for c in columns_without_fill:
                    data[c.name] = pandas.Series(np.zeros(shape=len(index),
                                                          dtype=c.dtype),
                                                 index=index)
                seen = {
                    c.name: set()
                    for c in columns_without_fill if c.unique
                }

                for i in range(len(index)):
                    for c in columns_without_fill:
                        if c.unique:
                            for _ in range(5):
                                value = draw(c.elements)
                                if value not in seen[c.name]:
                                    seen[c.name].add(value)
                                    break
                            else:
                                reject()
                        else:
                            value = draw(c.elements)
                        data[c.name][i] = value

            for c in rewritten_columns:
                if not c.fill.is_empty:
                    data[c.name] = draw(
                        series(
                            index=local_index_strategy,
                            dtype=c.dtype,
                            elements=c.elements,
                            fill=c.fill,
                            unique=c.unique,
                        ))

            return pandas.DataFrame(data, index=index)
Esempio n. 2
0
def urls() -> SearchStrategy[str]:
    """A strategy for :rfc:`3986`, generating http/https URLs."""
    def url_encode(s):
        return "".join(c if c in URL_SAFE_CHARACTERS else "%%%02X" % ord(c)
                       for c in s)

    schemes = st.sampled_from(["http", "https"])
    ports = st.integers(min_value=0, max_value=2**16 - 1).map(":{}".format)
    paths = st.lists(st.text(string.printable).map(url_encode)).map("/".join)

    return st.builds("{}://{}{}/{}".format, schemes, domains(),
                     st.just("") | ports, paths)
Esempio n. 3
0
def dates(min_value: dt.date = dt.date.min,
          max_value: dt.date = dt.date.max) -> SearchStrategy[dt.date]:
    """dates(min_value=datetime.date.min, max_value=datetime.date.max)

    A strategy for dates between ``min_value`` and ``max_value``.

    Examples from this strategy shrink towards January 1st 2000.
    """
    check_type(dt.date, min_value, "min_value")
    check_type(dt.date, max_value, "max_value")
    check_valid_interval(min_value, max_value, "min_value", "max_value")
    if min_value == max_value:
        return just(min_value)
    return DateStrategy(min_value, max_value)
Esempio n. 4
0
def timedeltas(
    min_value: dt.timedelta = dt.timedelta.min,
    max_value: dt.timedelta = dt.timedelta.max,
) -> SearchStrategy[dt.timedelta]:
    """timedeltas(min_value=datetime.timedelta.min, max_value=datetime.timedelta.max)

    A strategy for timedeltas between ``min_value`` and ``max_value``.

    Examples from this strategy shrink towards zero.
    """
    check_type(dt.timedelta, min_value, "min_value")
    check_type(dt.timedelta, max_value, "max_value")
    check_valid_interval(min_value, max_value, "min_value", "max_value")
    if min_value == max_value:
        return just(min_value)
    return TimedeltaStrategy(min_value=min_value, max_value=max_value)