Exemple #1
0
    def test_3(self):
        rnd = random()
        data = rnd.int_range(1, 100, size=10)

        def mod_5(x: int):
            return x % 5

        # We will be comparing result obtained by reversing comparator in MaxBy
        # so that we get "min" in each bucket created by "group by" operation; and
        # then comparing this result with the result obtained by MinBy.

        bkt_min = Stream(data).collect(
            GroupingBy(mod_5, MaxBy(comparing(lambda x: -x))))
        bkt_min_target = Stream(data).collect(GroupingBy(mod_5, MinBy()))

        self.assertDictEqual(bkt_min, bkt_min_target)
Exemple #2
0
    def test_conditional2(self):
        rnd = random()

        def get():
            return rnd.randint(1, 100)

        size = 1000

        conditions = (ChainedCondition().if_then(
            lambda x: x <= 10,
            lambda x: 10).if_then(lambda x: x <= 20, lambda x: 20).if_then(
                lambda x: x <= 30, lambda x: 30).otherwise(lambda x: -1))

        out = (Stream.from_supplier(get).limit(size).conditional(
            conditions).collect(GroupingBy(identity, Counting())))

        out_target = defaultdict(int)
        rnd.reset()

        for _ in range(size):
            e = get()

            if e <= 10:
                k = 10
            elif e <= 20:
                k = 20
            elif e <= 30:
                k = 30
            else:
                k = -1

            out_target[k] += 1

        self.assertDictEqual(out, out_target)
    def test_1a(self):
        rnd = random()

        start, end = 1, 100
        size = 1000

        data = rnd.int_range(start, end, size=size)
        element_frequency = Stream(data).collect(
            GroupingBy(identity, Counting()))

        out_target = Counter(data)
        self.assertDictEqual(element_frequency, out_target)
Exemple #4
0
    def test_group_by(self):

        out = Stream(range(10)).collect(GroupingBy(lambda x: x % 3))
        self.assertDictEqual(out, {
            0: [0, 3, 6, 9],
            1: [1, 4, 7],
            2: [2, 5, 8]
        })

        out = Stream(range(10)).collect(
            GroupingBy(lambda x: x % 3, Mapping(lambda x: x**2)))
        self.assertDictEqual(out, {
            0: [0, 9, 36, 81],
            1: [1, 16, 49],
            2: [4, 25, 64]
        })

        out = Stream([1, 2, 3, 4, 2, 4]).collect(GroupingBy(lambda x: x % 2))
        self.assertDictEqual(out, {1: [1, 3], 0: [2, 4, 2, 4]})

        out = Stream([1, 2, 3, 4, 2,
                      4]).collect(GroupingBy(lambda x: x % 2, ToSet()))
        self.assertDictEqual(out, {1: {1, 3}, 0: {2, 4}})
    def test_8(self):
        rnd = random()
        data_size = 1000
        countries = tuple('ABC')
        sex = ('M', 'F')

        class Person:
            def __init__(self, country, state, age, sex):
                self.country = country
                self.state = state
                self.age = age
                self.sex = sex

        ps = [
            Person(rnd.choice(countries), rnd.randrange(1, 4),
                   rnd.randrange(0, 60), rnd.choice(sex))
            for _ in range(data_size)
        ]
        # Counting Person on the basis of his/her country, then state
        # then on the basis of M/F.
        collector = GroupingBy(
            attrgetter('country'),
            GroupingBy(attrgetter('state'),
                       GroupingBy(attrgetter('sex'), Counting())))

        def _filter(x: Person):
            return 20 <= x.age <= 50  # considering person of age between 20 and 50 (both inclusive)

        out = Stream(ps).filter(_filter).collect(collector)

        out_target = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))

        for p in ps:
            if _filter(p):
                out_target[p.country][p.state][p.sex] += 1

        self.assertDictEqual(out, out_target)
Exemple #6
0
    def test_4(self):
        rnd = random()
        data = rnd.int_range(1, 100, size=10)

        def mod_5(x: int):
            return x % 5

        bkt_min = Stream(data).collect(GroupingBy(mod_5, MinBy()))

        temp = defaultdict(list)

        for e in data:
            temp[mod_5(e)].append(e)

        out_target = {k: Optional(min(v)) for k, v in temp.items()}

        self.assertDictEqual(bkt_min, out_target)
    def test_1(self):
        rnd = random()

        start, end = 1, 100
        size = 1000

        data = rnd.int_range(start, end, size=size)

        # finding frequency of integer numbers generated by random source "rnd".
        # To get this, grouping by elements on their value then collecting them in a list
        # and then finding number of elements in list.
        # For better method see "test_1a"
        element_frequency = Stream(data).collect(
            GroupingBy(identity, CollectAndThen(ToList(), len)))

        out_target = Counter(data)
        self.assertDictEqual(element_frequency, out_target)
Exemple #8
0
    def test_if_else(self):
        rnd = random()

        def get():
            return rnd.randint(1, 100)

        size = 1000

        out = (Stream.from_supplier(get).limit(size).if_else(
            lambda x: x < 50, lambda x: 0,
            lambda x: 1).collect(GroupingBy(identity, Counting())))

        out_target = defaultdict(int)

        rnd.reset()

        for _ in range(size):
            out_target[0 if get() < 50 else 1] += 1

        self.assertDictEqual(out, out_target)
    def test_4(self):
        rnd = random()

        start, end = 1, 100
        size = 1000

        def mod_10(x):
            return x % 10

        data = rnd.int_range(start, end, size=size)

        # Grouping elements on the basis of their remainder when dividing them by 10 and then
        # finding sum of elements in each bucket.
        out = Stream(data).collect(GroupingBy(mod_10, Summing()))

        out_target = defaultdict(int)

        for e in data:
            out_target[mod_10(e)] += e

        self.assertDictEqual(out, out_target)
    def test_2(self):
        rnd = random()

        start, end = 1, 100
        size = 1000

        def mod_10(x):
            return x % 10

        data = rnd.int_range(start, end, size=size)

        # Grouping elements on the basis of their remainder when dividing them by 10 and then
        # finding distinct elements in each "bucket"
        distinct_elements = Stream(data).collect(GroupingBy(mod_10, ToSet()))

        out_target = defaultdict(set)

        for e in data:
            out_target[mod_10(e)].add(e)

        self.assertDictEqual(distinct_elements, out_target)
    def test_6(self):
        rnd = random()

        start, end = 1, 10000
        size = 1000

        def mod_100(x):
            return x % 100

        data = rnd.int_range(start, end, size=size)
        # finding distinct element count when grouped by their remainder when divided by 100.
        element_distinct_count = Stream(data).collect(
            GroupingBy(mod_100, CollectAndThen(ToSet(), len)))

        temp = defaultdict(set)

        for e in data:
            temp[mod_100(e)].add(e)

        out_target = {k: len(es) for k, es in temp.items()}

        self.assertDictEqual(element_distinct_count, out_target)
Exemple #12
0
    def test_2(self):
        class Data:
            def __init__(self, num):
                self._num = num

            def __str__(self) -> str:
                return '[' + str(self._num) + ']'

            def __repr__(self):
                return str(self)

            def __eq__(self, other: 'Data'):
                return self._num == other._num

        rnd = random()

        data = [Data(x) for x in rnd.int_range(1, 100, size=10)]

        def mod_5(x: Data):
            return x._num % 5

        comp_key = attrgetter('_num')

        bkt_max = Stream(data).collect(
            GroupingBy(mod_5, MaxBy(comparing(comp_key))))
        temp = defaultdict(list)

        for e in data:
            temp[mod_5(e)].append(e)

        out_target = {
            k: Optional(max(v, key=comp_key))
            for k, v in temp.items()
        }

        self.assertDictEqual(bkt_max, out_target)
    def test_5(self):
        rnd = random()

        start, end = 1, 100
        size = 1000

        def mod_10(x):
            return x % 10

        def square(x):
            return x**2

        data = rnd.int_range(start, end, size=size)

        # finding sum of squares of elements grouped by remainder when divided by 10.
        out = Stream(data).collect(
            GroupingBy(mod_10, Mapping(square, Summing())))

        out_target = defaultdict(int)

        for e in data:
            out_target[mod_10(e)] += square(e)

        self.assertDictEqual(out, out_target)
    def test_4a(self):
        rnd = random()

        start, end = 1, 100
        size = 1000

        def mod_10(x):
            return x % 10

        data = rnd.int_range(start, end, size=size)

        # Grouping elements on the basis of their remainder when dividing them by 10 and then
        # finding sum of distinct elements in each bucket.
        out = Stream(data).collect(
            GroupingBy(mod_10, CollectAndThen(ToSet(), sum)))

        temp = defaultdict(set)

        for e in data:
            temp[mod_10(e)].add(e)

        out_target = {bkt: sum(es) for bkt, es in temp.items()}

        self.assertDictEqual(out, out_target)
    def test_7(self):
        def mod_10(e):
            return e % 10

        # since their is no element in Stream, so "GroupingBy" will return empty dict.
        self.assertDictEqual(Stream(()).collect(GroupingBy(mod_10)), dict())