Ejemplo n.º 1
0
class OneCharStringStrategy(SearchStrategy):
    """A strategy which generates single character strings of text type."""
    specifier = text_type
    zero_point = ord('0')

    def __init__(self,
                 whitelist_categories=None,
                 blacklist_categories=None,
                 blacklist_characters=None,
                 min_codepoint=None,
                 max_codepoint=None):
        intervals = charmap.query(
            include_categories=whitelist_categories,
            exclude_categories=blacklist_categories,
            min_codepoint=min_codepoint,
            max_codepoint=max_codepoint,
        )
        if not intervals:
            raise InvalidArgument('No valid characters in set')
        self.intervals = IntervalSet(intervals)
        if blacklist_characters:
            self.blacklist_characters = set(b for b in blacklist_characters
                                            if ord(b) in self.intervals)
            if len(self.blacklist_characters) == len(self.intervals):
                raise InvalidArgument('No valid characters in set')
        else:
            self.blacklist_characters = set()
        self.zero_point = self.intervals.index_above(ord('0'))
        self.special = []
        if '\n' not in self.blacklist_characters:
            n = ord('\n')
            try:
                self.special.append(self.intervals.index(n))
            except ValueError:
                pass

    def do_draw(self, data):
        denom = math.log1p(-1 / 127)

        def d(random):
            if self.special and random.randint(0, 10) == 0:
                return random.choice(self.special)
            if len(self.intervals) <= 256 or random.randint(0, 1):
                i = random.randint(0, len(self.intervals.offsets) - 1)
                u, v = self.intervals.intervals[i]
                return self.intervals.offsets[i] + random.randint(0, v - u + 1)
            else:
                return min(
                    len(self.intervals) - 1,
                    int(math.log(random.random()) / denom))

        while True:
            i = integer_range(data,
                              0,
                              len(self.intervals) - 1,
                              center=self.zero_point,
                              distribution=d)
            c = hunichr(self.intervals[i])
            if c not in self.blacklist_characters:
                return c
Ejemplo n.º 2
0
 def __init__(self,
              whitelist_categories=None,
              blacklist_categories=None,
              blacklist_characters=None,
              min_codepoint=None,
              max_codepoint=None,
              whitelist_characters=None):
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     if not intervals:
         raise InvalidArgument(
             'No valid characters in set'
         )
     self.intervals = IntervalSet(intervals)
     if whitelist_characters:
         self.whitelist_characters = set(whitelist_characters)
     else:
         self.whitelist_characters = set()
     self.zero_point = self.intervals.index_above(ord('0'))
Ejemplo n.º 3
0
 def __init__(self,
              whitelist_categories=None,
              blacklist_categories=None,
              blacklist_characters=None,
              min_codepoint=None,
              max_codepoint=None):
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
     )
     if not intervals:
         raise InvalidArgument('No valid characters in set')
     self.intervals = IntervalSet(intervals)
     if blacklist_characters:
         self.blacklist_characters = set(b for b in blacklist_characters
                                         if ord(b) in self.intervals)
         if len(self.blacklist_characters) == len(self.intervals):
             raise InvalidArgument('No valid characters in set')
     else:
         self.blacklist_characters = set()
     self.zero_point = self.intervals.index_above(ord('0'))
     self.special = []
     if '\n' not in self.blacklist_characters:
         n = ord('\n')
         try:
             self.special.append(self.intervals.index(n))
         except ValueError:
             pass
Ejemplo n.º 4
0
 def __init__(self,
              whitelist_categories=None,
              blacklist_categories=None,
              blacklist_characters=None,
              min_codepoint=None,
              max_codepoint=None,
              whitelist_characters=None):
     assert set(whitelist_categories or ()).issubset(charmap.categories())
     assert set(blacklist_categories or ()).issubset(charmap.categories())
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     if not intervals:
         arguments = [
             ('whitelist_categories', whitelist_categories),
             ('blacklist_categories', blacklist_categories),
             ('whitelist_characters', whitelist_characters),
             ('blacklist_characters', blacklist_characters),
             ('min_codepoint', min_codepoint),
             ('max_codepoint', max_codepoint),
         ]
         raise InvalidArgument(
             'No characters are allowed to be generated by this '
             'combination of arguments: ' +
             ', '.join('%s=%r' % arg
                       for arg in arguments if arg[1] is not None))
     self.intervals = IntervalSet(intervals)
     self.zero_point = self.intervals.index_above(ord('0'))
Ejemplo n.º 5
0
 def __init__(
     self,
     whitelist_categories=None,
     blacklist_categories=None,
     blacklist_characters=None,
     min_codepoint=None,
     max_codepoint=None,
     whitelist_characters=None,
 ):
     assert set(whitelist_categories or ()).issubset(charmap.categories())
     assert set(blacklist_categories or ()).issubset(charmap.categories())
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     if not intervals:
         arguments = [
             ("whitelist_categories", whitelist_categories),
             ("blacklist_categories", blacklist_categories),
             ("whitelist_characters", whitelist_characters),
             ("blacklist_characters", blacklist_characters),
             ("min_codepoint", min_codepoint),
             ("max_codepoint", max_codepoint),
         ]
         raise InvalidArgument(
             "No characters are allowed to be generated by this "
             "combination of arguments: " +
             ", ".join("%s=%r" % arg
                       for arg in arguments if arg[1] is not None))
     self.intervals = IntervalSet(intervals)
     self.zero_point = self.intervals.index_above(ord("0"))
Ejemplo n.º 6
0
def test_validates_index():
    with pytest.raises(IndexError):
        IntervalSet([])[1]

    with pytest.raises(IndexError):
        IntervalSet([[1, 10]])[11]

    with pytest.raises(IndexError):
        IntervalSet([[1, 10]])[-11]
Ejemplo n.º 7
0
 def __init__(self,
              whitelist_categories=None,
              blacklist_categories=None,
              blacklist_characters=None,
              min_codepoint=None,
              max_codepoint=None):
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
     )
     if not intervals:
         raise InvalidArgument(
             'No valid characters in set'
         )
     self.intervals = IntervalSet(intervals)
     if blacklist_characters:
         self.blacklist_characters = set(
             b for b in blacklist_characters if ord(b) in self.intervals
         )
         if len(self.blacklist_characters) == len(self.intervals):
             raise InvalidArgument(
                 'No valid characters in set'
             )
     else:
         self.blacklist_characters = set()
     self.zero_point = self.intervals.index_above(ord('0'))
     self.special = []
     if '\n' not in self.blacklist_characters:
         n = ord('\n')
         try:
             self.special.append(self.intervals.index(n))
         except ValueError:
             pass
Ejemplo n.º 8
0
 def __init__(self,
              whitelist_categories=None,
              blacklist_categories=None,
              blacklist_characters=None,
              min_codepoint=None,
              max_codepoint=None,
              whitelist_characters=None):
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
     )
     if not intervals:
         raise InvalidArgument(
             'No valid characters in set'
         )
     self.intervals = IntervalSet(intervals)
     if whitelist_characters:
         self.whitelist_characters = set(whitelist_characters)
     else:
         self.whitelist_characters = set()
     if blacklist_characters:
         self.blacklist_characters = set(
             b for b in blacklist_characters if ord(b) in self.intervals
         )
         if (len(self.whitelist_characters) == 0 and
                 len(self.blacklist_characters) == len(self.intervals)):
             raise InvalidArgument(
                 'No valid characters in set'
             )
     else:
         self.blacklist_characters = set()
     self.zero_point = self.intervals.index_above(ord('0'))
Ejemplo n.º 9
0
 def __init__(
     self,
     whitelist_categories=None,
     blacklist_categories=None,
     blacklist_characters=None,
     min_codepoint=None,
     max_codepoint=None,
     whitelist_characters=None,
 ):
     assert set(whitelist_categories or ()).issubset(charmap.categories())
     assert set(blacklist_categories or ()).issubset(charmap.categories())
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     if not intervals:
         arguments = [
             ("whitelist_categories", whitelist_categories),
             ("blacklist_categories", blacklist_categories),
             ("whitelist_characters", whitelist_characters),
             ("blacklist_characters", blacklist_characters),
             ("min_codepoint", min_codepoint),
             ("max_codepoint", max_codepoint),
         ]
         raise InvalidArgument(
             "No characters are allowed to be generated by this "
             "combination of arguments: "
             + ", ".join("%s=%r" % arg for arg in arguments if arg[1] is not None)
         )
     self.intervals = IntervalSet(intervals)
     self.zero_point = self.intervals.index_above(ord("0"))
Ejemplo n.º 10
0
 def __init__(
     self,
     whitelist_categories=None,
     blacklist_categories=None,
     blacklist_characters=None,
     min_codepoint=None,
     max_codepoint=None,
     whitelist_characters=None,
 ):
     assert set(whitelist_categories or ()).issubset(charmap.categories())
     assert set(blacklist_categories or ()).issubset(charmap.categories())
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     self._arg_repr = ", ".join(
         f"{k}={v!r}"
         for k, v in [
             ("whitelist_categories", whitelist_categories),
             ("blacklist_categories", blacklist_categories),
             ("whitelist_characters", whitelist_characters),
             ("blacklist_characters", blacklist_characters),
             ("min_codepoint", min_codepoint),
             ("max_codepoint", max_codepoint),
         ]
         if not (v in (None, "") or (k == "blacklist_categories" and v == ("Cs",)))
     )
     if not intervals:
         raise InvalidArgument(
             "No characters are allowed to be generated by this "
             f"combination of arguments: {self._arg_repr}"
         )
     self.intervals = IntervalSet(intervals)
     self.zero_point = self.intervals.index_above(ord("0"))
     self.Z_point = min(
         self.intervals.index_above(ord("Z")), len(self.intervals) - 1
     )
Ejemplo n.º 11
0
def build_intervals(ls):
    ls.sort()
    result = []
    for u, l in ls:
        v = u + l
        if result:
            a, b = result[-1]
            if u <= b:
                result[-1] = (a, v)
                continue
        result.append((u, v))
    return IntervalSet(result)
Ejemplo n.º 12
0
class OneCharStringStrategy(SearchStrategy):
    """A strategy which generates single character strings of text type."""

    specifier = text_type
    zero_point = ord('0')

    def __init__(self,
                 whitelist_categories=None,
                 blacklist_categories=None,
                 blacklist_characters=None,
                 min_codepoint=None,
                 max_codepoint=None,
                 whitelist_characters=None):
        assert set(whitelist_categories or ()).issubset(charmap.categories())
        assert set(blacklist_categories or ()).issubset(charmap.categories())
        intervals = charmap.query(
            include_categories=whitelist_categories,
            exclude_categories=blacklist_categories,
            min_codepoint=min_codepoint,
            max_codepoint=max_codepoint,
            include_characters=whitelist_characters,
            exclude_characters=blacklist_characters,
        )
        if not intervals:
            arguments = [
                ('whitelist_categories', whitelist_categories),
                ('blacklist_categories', blacklist_categories),
                ('whitelist_characters', whitelist_characters),
                ('blacklist_characters', blacklist_characters),
                ('min_codepoint', min_codepoint),
                ('max_codepoint', max_codepoint),
            ]
            raise InvalidArgument(
                'No characters are allowed to be generated by this '
                'combination of arguments: ' +
                ', '.join('%s=%r' % arg
                          for arg in arguments if arg[1] is not None))
        self.intervals = IntervalSet(intervals)
        if whitelist_characters:
            self.whitelist_characters = set(whitelist_characters)
        else:
            self.whitelist_characters = set()
        self.zero_point = self.intervals.index_above(ord('0'))

    def do_draw(self, data):
        i = integer_range(
            data,
            0,
            len(self.intervals) - 1,
            center=self.zero_point,
        )
        return hunichr(self.intervals[i])
Ejemplo n.º 13
0
class OneCharStringStrategy(SearchStrategy):

    """A strategy which generates single character strings of text type."""
    specifier = text_type
    zero_point = ord('0')

    def __init__(self,
                 whitelist_categories=None,
                 blacklist_categories=None,
                 blacklist_characters=None,
                 min_codepoint=None,
                 max_codepoint=None,
                 whitelist_characters=None):
        intervals = charmap.query(
            include_categories=whitelist_categories,
            exclude_categories=blacklist_categories,
            min_codepoint=min_codepoint,
            max_codepoint=max_codepoint,
            include_characters=whitelist_characters,
        )
        if not intervals:
            raise InvalidArgument(
                'No valid characters in set'
            )
        self.intervals = IntervalSet(intervals)
        if whitelist_characters:
            self.whitelist_characters = set(whitelist_characters)
        else:
            self.whitelist_characters = set()
        if blacklist_characters:
            self.blacklist_characters = set(
                b for b in blacklist_characters if ord(b) in self.intervals
            )
            if (len(self.whitelist_characters) == 0 and
                    len(self.blacklist_characters) == len(self.intervals)):
                raise InvalidArgument(
                    'No valid characters in set'
                )
        else:
            self.blacklist_characters = set()
        self.zero_point = self.intervals.index_above(ord('0'))

    def do_draw(self, data):
        while True:
            i = integer_range(
                data, 0, len(self.intervals) - 1,
                center=self.zero_point,
            )
            c = hunichr(self.intervals[i])
            if c not in self.blacklist_characters:
                return c
Ejemplo n.º 14
0
class OneCharStringStrategy(SearchStrategy):
    """A strategy which generates single character strings of text type."""

    specifier = text_type
    zero_point = ord('0')

    def __init__(self,
                 whitelist_categories=None,
                 blacklist_categories=None,
                 blacklist_characters=None,
                 min_codepoint=None,
                 max_codepoint=None,
                 whitelist_characters=None):
        assert set(whitelist_categories or ()).issubset(charmap.categories())
        assert set(blacklist_categories or ()).issubset(charmap.categories())
        intervals = charmap.query(
            include_categories=whitelist_categories,
            exclude_categories=blacklist_categories,
            min_codepoint=min_codepoint,
            max_codepoint=max_codepoint,
            include_characters=whitelist_characters,
            exclude_characters=blacklist_characters,
        )
        if not intervals:
            arguments = [
                ('whitelist_categories', whitelist_categories),
                ('blacklist_categories', blacklist_categories),
                ('whitelist_characters', whitelist_characters),
                ('blacklist_characters', blacklist_characters),
                ('min_codepoint', min_codepoint),
                ('max_codepoint', max_codepoint),
            ]
            raise InvalidArgument(
                'No characters are allowed to be generated by this '
                'combination of arguments: ' + ', '.join(
                    '%s=%r' % arg for arg in arguments if arg[1] is not None)
            )
        self.intervals = IntervalSet(intervals)
        if whitelist_characters:
            self.whitelist_characters = set(whitelist_characters)
        else:
            self.whitelist_characters = set()
        self.zero_point = self.intervals.index_above(ord('0'))

    def do_draw(self, data):
        i = integer_range(
            data, 0, len(self.intervals) - 1,
            center=self.zero_point,
        )
        return hunichr(self.intervals[i])
Ejemplo n.º 15
0
class OneCharStringStrategy(SearchStrategy):
    """A strategy which generates single character strings of text type."""
    specifier = text_type
    zero_point = ord('0')

    def __init__(self,
                 whitelist_categories=None,
                 blacklist_categories=None,
                 blacklist_characters=None,
                 min_codepoint=None,
                 max_codepoint=None,
                 whitelist_characters=None):
        intervals = charmap.query(
            include_categories=whitelist_categories,
            exclude_categories=blacklist_categories,
            min_codepoint=min_codepoint,
            max_codepoint=max_codepoint,
            include_characters=whitelist_characters,
        )
        if not intervals:
            raise InvalidArgument('No valid characters in set')
        self.intervals = IntervalSet(intervals)
        if whitelist_characters:
            self.whitelist_characters = set(whitelist_characters)
        else:
            self.whitelist_characters = set()
        if blacklist_characters:
            self.blacklist_characters = set(b for b in blacklist_characters
                                            if ord(b) in self.intervals)
            if (len(self.whitelist_characters) == 0
                    and len(self.blacklist_characters) == len(self.intervals)):
                raise InvalidArgument('No valid characters in set')
        else:
            self.blacklist_characters = set()
        self.zero_point = self.intervals.index_above(ord('0'))

    def do_draw(self, data):
        while True:
            i = integer_range(
                data,
                0,
                len(self.intervals) - 1,
                center=self.zero_point,
            )
            c = hunichr(self.intervals[i])
            if c not in self.blacklist_characters:
                return c
Ejemplo n.º 16
0
class OneCharStringStrategy(SearchStrategy):
    """A strategy which generates single character strings of text type."""
    def __init__(
        self,
        whitelist_categories=None,
        blacklist_categories=None,
        blacklist_characters=None,
        min_codepoint=None,
        max_codepoint=None,
        whitelist_characters=None,
    ):
        assert set(whitelist_categories or ()).issubset(charmap.categories())
        assert set(blacklist_categories or ()).issubset(charmap.categories())
        intervals = charmap.query(
            include_categories=whitelist_categories,
            exclude_categories=blacklist_categories,
            min_codepoint=min_codepoint,
            max_codepoint=max_codepoint,
            include_characters=whitelist_characters,
            exclude_characters=blacklist_characters,
        )
        if not intervals:
            arguments = [
                ("whitelist_categories", whitelist_categories),
                ("blacklist_categories", blacklist_categories),
                ("whitelist_characters", whitelist_characters),
                ("blacklist_characters", blacklist_characters),
                ("min_codepoint", min_codepoint),
                ("max_codepoint", max_codepoint),
            ]
            raise InvalidArgument(
                "No characters are allowed to be generated by this "
                "combination of arguments: " +
                ", ".join("%s=%r" % arg
                          for arg in arguments if arg[1] is not None))
        self.intervals = IntervalSet(intervals)
        self.zero_point = self.intervals.index_above(ord("0"))

    def do_draw(self, data):
        i = integer_range(data,
                          0,
                          len(self.intervals) - 1,
                          center=self.zero_point)
        return hunichr(self.intervals[i])
Ejemplo n.º 17
0
class OneCharStringStrategy(SearchStrategy):
    """A strategy which generates single character strings of text type."""

    def __init__(
        self,
        whitelist_categories=None,
        blacklist_categories=None,
        blacklist_characters=None,
        min_codepoint=None,
        max_codepoint=None,
        whitelist_characters=None,
    ):
        assert set(whitelist_categories or ()).issubset(charmap.categories())
        assert set(blacklist_categories or ()).issubset(charmap.categories())
        intervals = charmap.query(
            include_categories=whitelist_categories,
            exclude_categories=blacklist_categories,
            min_codepoint=min_codepoint,
            max_codepoint=max_codepoint,
            include_characters=whitelist_characters,
            exclude_characters=blacklist_characters,
        )
        if not intervals:
            arguments = [
                ("whitelist_categories", whitelist_categories),
                ("blacklist_categories", blacklist_categories),
                ("whitelist_characters", whitelist_characters),
                ("blacklist_characters", blacklist_characters),
                ("min_codepoint", min_codepoint),
                ("max_codepoint", max_codepoint),
            ]
            raise InvalidArgument(
                "No characters are allowed to be generated by this "
                "combination of arguments: "
                + ", ".join("%s=%r" % arg for arg in arguments if arg[1] is not None)
            )
        self.intervals = IntervalSet(intervals)
        self.zero_point = self.intervals.index_above(ord("0"))

    def do_draw(self, data):
        i = integer_range(data, 0, len(self.intervals) - 1, center=self.zero_point)
        return hunichr(self.intervals[i])
Ejemplo n.º 18
0
 def __init__(self,
              whitelist_categories=None,
              blacklist_categories=None,
              blacklist_characters=None,
              min_codepoint=None,
              max_codepoint=None,
              whitelist_characters=None):
     assert set(whitelist_categories or ()).issubset(charmap.categories())
     assert set(blacklist_categories or ()).issubset(charmap.categories())
     intervals = charmap.query(
         include_categories=whitelist_categories,
         exclude_categories=blacklist_categories,
         min_codepoint=min_codepoint,
         max_codepoint=max_codepoint,
         include_characters=whitelist_characters,
         exclude_characters=blacklist_characters,
     )
     if not intervals:
         arguments = [
             ('whitelist_categories', whitelist_categories),
             ('blacklist_categories', blacklist_categories),
             ('whitelist_characters', whitelist_characters),
             ('blacklist_characters', blacklist_characters),
             ('min_codepoint', min_codepoint),
             ('max_codepoint', max_codepoint),
         ]
         raise InvalidArgument(
             'No characters are allowed to be generated by this '
             'combination of arguments: ' + ', '.join(
                 '%s=%r' % arg for arg in arguments if arg[1] is not None)
         )
     self.intervals = IntervalSet(intervals)
     if whitelist_characters:
         self.whitelist_characters = set(whitelist_characters)
     else:
         self.whitelist_characters = set()
     self.zero_point = self.intervals.index_above(ord('0'))
Ejemplo n.º 19
0
def intervals_to_set(ints):
    return set(IntervalSet(ints))
Ejemplo n.º 20
0
def test_index_above_is_length_if_higher():
    assert IntervalSet([[1, 10]]).index_above(100) == 10
Ejemplo n.º 21
0
def test_index_above_is_index_if_present():
    assert IntervalSet([[1, 10]]).index_above(1) == 0
    assert IntervalSet([[1, 10]]).index_above(2) == 1
Ejemplo n.º 22
0
    ls = list(intervals)
    assert len(ls) == len(intervals)
    for i in range(len(ls)):
        assert ls[i] == intervals[i]
    for i in range(1, len(ls) - 1):
        assert ls[-i] == intervals[-i]


@given(Intervals)
def test_intervals_match_indexes(intervals):
    ls = list(intervals)
    for v in ls:
        assert ls.index(v) == intervals.index(v)


@example(intervals=IntervalSet(((1, 1),)), v=0)
@example(intervals=IntervalSet(()), v=0)
@given(Intervals, st.integers())
def test_error_for_index_of_not_present_value(intervals, v):
    assume(v not in intervals)
    with pytest.raises(ValueError):
        intervals.index(v)


def test_validates_index():
    with pytest.raises(IndexError):
        IntervalSet([])[1]

    with pytest.raises(IndexError):
        IntervalSet([[1, 10]])[11]
Ejemplo n.º 23
0
    ls = list(intervals)
    assert len(ls) == len(intervals)
    for i in range(len(ls)):
        assert ls[i] == intervals[i]
    for i in range(1, len(ls) - 1):
        assert ls[-i] == intervals[-i]


@given(Intervals)
def test_intervals_match_indexes(intervals):
    ls = list(intervals)
    for v in ls:
        assert ls.index(v) == intervals.index(v)


@example(intervals=IntervalSet(()), v=0)
@given(Intervals, st.integers())
def test_error_for_index_of_not_present_value(intervals, v):
    assume(v not in intervals)
    with pytest.raises(ValueError):
        intervals.index(v)


def test_validates_index():
    with pytest.raises(IndexError):
        IntervalSet([])[1]

    with pytest.raises(IndexError):
        IntervalSet([[1, 10]])[11]

    with pytest.raises(IndexError):
Ejemplo n.º 24
0
class OneCharStringStrategy(SearchStrategy):
    """A strategy which generates single character strings of text type."""

    def __init__(
        self,
        whitelist_categories=None,
        blacklist_categories=None,
        blacklist_characters=None,
        min_codepoint=None,
        max_codepoint=None,
        whitelist_characters=None,
    ):
        assert set(whitelist_categories or ()).issubset(charmap.categories())
        assert set(blacklist_categories or ()).issubset(charmap.categories())
        intervals = charmap.query(
            include_categories=whitelist_categories,
            exclude_categories=blacklist_categories,
            min_codepoint=min_codepoint,
            max_codepoint=max_codepoint,
            include_characters=whitelist_characters,
            exclude_characters=blacklist_characters,
        )
        if not intervals:
            arguments = [
                ("whitelist_categories", whitelist_categories),
                ("blacklist_categories", blacklist_categories),
                ("whitelist_characters", whitelist_characters),
                ("blacklist_characters", blacklist_characters),
                ("min_codepoint", min_codepoint),
                ("max_codepoint", max_codepoint),
            ]
            raise InvalidArgument(
                "No characters are allowed to be generated by this "
                "combination of arguments: "
                + ", ".join("%s=%r" % arg for arg in arguments if arg[1] is not None)
            )
        self.intervals = IntervalSet(intervals)
        self.zero_point = self.intervals.index_above(ord("0"))
        self.Z_point = min(
            self.intervals.index_above(ord("Z")), len(self.intervals) - 1
        )

    def do_draw(self, data):
        if len(self.intervals) > 256:
            if biased_coin(data, 0.2):
                i = integer_range(data, 256, len(self.intervals) - 1)
            else:
                i = integer_range(data, 0, 255)
        else:
            i = integer_range(data, 0, len(self.intervals) - 1)

        i = self.rewrite_integer(i)

        return chr(self.intervals[i])

    def rewrite_integer(self, i):
        # We would like it so that, where possible, shrinking replaces
        # characters with simple ascii characters, so we rejig this
        # bit so that the smallest values are 0, 1, 2, ..., Z.
        #
        # Imagine that numbers are laid out as abc0yyyZ...
        # this rearranges them so that they are laid out as
        # 0yyyZcba..., which gives a better shrinking order.
        if i <= self.Z_point:
            # We want to rewrite the integers [0, n] inclusive
            # to [zero_point, Z_point].
            n = self.Z_point - self.zero_point
            if i <= n:
                i += self.zero_point
            else:
                # We want to rewrite the integers [n + 1, Z_point] to
                # [zero_point, 0] (reversing the order so that codepoints below
                # zero_point shrink upwards).
                i = self.zero_point - (i - n)
                assert i < self.zero_point
            assert 0 <= i <= self.Z_point
        return i
Ejemplo n.º 25
0
class OneCharStringStrategy(SearchStrategy):

    """A strategy which generates single character strings of text type."""
    specifier = text_type
    zero_point = ord('0')

    def __init__(self,
                 whitelist_categories=None,
                 blacklist_categories=None,
                 blacklist_characters=None,
                 min_codepoint=None,
                 max_codepoint=None):
        intervals = charmap.query(
            include_categories=whitelist_categories,
            exclude_categories=blacklist_categories,
            min_codepoint=min_codepoint,
            max_codepoint=max_codepoint,
        )
        if not intervals:
            raise InvalidArgument(
                'No valid characters in set'
            )
        self.intervals = IntervalSet(intervals)
        if blacklist_characters:
            self.blacklist_characters = set(
                b for b in blacklist_characters if ord(b) in self.intervals
            )
            if len(self.blacklist_characters) == len(self.intervals):
                raise InvalidArgument(
                    'No valid characters in set'
                )
        else:
            self.blacklist_characters = set()
        self.zero_point = self.intervals.index_above(ord('0'))
        self.special = []
        if '\n' not in self.blacklist_characters:
            n = ord('\n')
            try:
                self.special.append(self.intervals.index(n))
            except ValueError:
                pass

    def do_draw(self, data):
        denom = math.log1p(-1 / 127)

        def d(random):
            if self.special and random.randint(0, 10) == 0:
                return random.choice(self.special)
            if len(self.intervals) <= 256 or random.randint(0, 1):
                i = random.randint(0, len(self.intervals.offsets) - 1)
                u, v = self.intervals.intervals[i]
                return self.intervals.offsets[i] + random.randint(0, v - u + 1)
            else:
                return min(
                    len(self.intervals) - 1,
                    int(math.log(random.random()) / denom))

        while True:
            i = integer_range(
                data, 0, len(self.intervals) - 1,
                center=self.zero_point, distribution=d
            )
            c = hunichr(self.intervals[i])
            if c not in self.blacklist_characters:
                return c