def test_3(self): rnd = random() data = rnd.int_range(1, 100, size=10) def mod_5(x: int): return x % 5 # We will be comparing result obtained by reversing comparator in MaxBy # so that we get "min" in each bucket created by "group by" operation; and # then comparing this result with the result obtained by MinBy. bkt_min = Stream(data).collect( GroupingBy(mod_5, MaxBy(comparing(lambda x: -x)))) bkt_min_target = Stream(data).collect(GroupingBy(mod_5, MinBy())) self.assertDictEqual(bkt_min, bkt_min_target)
def test_conditional2(self): rnd = random() def get(): return rnd.randint(1, 100) size = 1000 conditions = (ChainedCondition().if_then( lambda x: x <= 10, lambda x: 10).if_then(lambda x: x <= 20, lambda x: 20).if_then( lambda x: x <= 30, lambda x: 30).otherwise(lambda x: -1)) out = (Stream.from_supplier(get).limit(size).conditional( conditions).collect(GroupingBy(identity, Counting()))) out_target = defaultdict(int) rnd.reset() for _ in range(size): e = get() if e <= 10: k = 10 elif e <= 20: k = 20 elif e <= 30: k = 30 else: k = -1 out_target[k] += 1 self.assertDictEqual(out, out_target)
def test_1a(self): rnd = random() start, end = 1, 100 size = 1000 data = rnd.int_range(start, end, size=size) element_frequency = Stream(data).collect( GroupingBy(identity, Counting())) out_target = Counter(data) self.assertDictEqual(element_frequency, out_target)
def test_group_by(self): out = Stream(range(10)).collect(GroupingBy(lambda x: x % 3)) self.assertDictEqual(out, { 0: [0, 3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8] }) out = Stream(range(10)).collect( GroupingBy(lambda x: x % 3, Mapping(lambda x: x**2))) self.assertDictEqual(out, { 0: [0, 9, 36, 81], 1: [1, 16, 49], 2: [4, 25, 64] }) out = Stream([1, 2, 3, 4, 2, 4]).collect(GroupingBy(lambda x: x % 2)) self.assertDictEqual(out, {1: [1, 3], 0: [2, 4, 2, 4]}) out = Stream([1, 2, 3, 4, 2, 4]).collect(GroupingBy(lambda x: x % 2, ToSet())) self.assertDictEqual(out, {1: {1, 3}, 0: {2, 4}})
def test_8(self): rnd = random() data_size = 1000 countries = tuple('ABC') sex = ('M', 'F') class Person: def __init__(self, country, state, age, sex): self.country = country self.state = state self.age = age self.sex = sex ps = [ Person(rnd.choice(countries), rnd.randrange(1, 4), rnd.randrange(0, 60), rnd.choice(sex)) for _ in range(data_size) ] # Counting Person on the basis of his/her country, then state # then on the basis of M/F. collector = GroupingBy( attrgetter('country'), GroupingBy(attrgetter('state'), GroupingBy(attrgetter('sex'), Counting()))) def _filter(x: Person): return 20 <= x.age <= 50 # considering person of age between 20 and 50 (both inclusive) out = Stream(ps).filter(_filter).collect(collector) out_target = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) for p in ps: if _filter(p): out_target[p.country][p.state][p.sex] += 1 self.assertDictEqual(out, out_target)
def test_4(self): rnd = random() data = rnd.int_range(1, 100, size=10) def mod_5(x: int): return x % 5 bkt_min = Stream(data).collect(GroupingBy(mod_5, MinBy())) temp = defaultdict(list) for e in data: temp[mod_5(e)].append(e) out_target = {k: Optional(min(v)) for k, v in temp.items()} self.assertDictEqual(bkt_min, out_target)
def test_1(self): rnd = random() start, end = 1, 100 size = 1000 data = rnd.int_range(start, end, size=size) # finding frequency of integer numbers generated by random source "rnd". # To get this, grouping by elements on their value then collecting them in a list # and then finding number of elements in list. # For better method see "test_1a" element_frequency = Stream(data).collect( GroupingBy(identity, CollectAndThen(ToList(), len))) out_target = Counter(data) self.assertDictEqual(element_frequency, out_target)
def test_if_else(self): rnd = random() def get(): return rnd.randint(1, 100) size = 1000 out = (Stream.from_supplier(get).limit(size).if_else( lambda x: x < 50, lambda x: 0, lambda x: 1).collect(GroupingBy(identity, Counting()))) out_target = defaultdict(int) rnd.reset() for _ in range(size): out_target[0 if get() < 50 else 1] += 1 self.assertDictEqual(out, out_target)
def test_4(self): rnd = random() start, end = 1, 100 size = 1000 def mod_10(x): return x % 10 data = rnd.int_range(start, end, size=size) # Grouping elements on the basis of their remainder when dividing them by 10 and then # finding sum of elements in each bucket. out = Stream(data).collect(GroupingBy(mod_10, Summing())) out_target = defaultdict(int) for e in data: out_target[mod_10(e)] += e self.assertDictEqual(out, out_target)
def test_2(self): rnd = random() start, end = 1, 100 size = 1000 def mod_10(x): return x % 10 data = rnd.int_range(start, end, size=size) # Grouping elements on the basis of their remainder when dividing them by 10 and then # finding distinct elements in each "bucket" distinct_elements = Stream(data).collect(GroupingBy(mod_10, ToSet())) out_target = defaultdict(set) for e in data: out_target[mod_10(e)].add(e) self.assertDictEqual(distinct_elements, out_target)
def test_6(self): rnd = random() start, end = 1, 10000 size = 1000 def mod_100(x): return x % 100 data = rnd.int_range(start, end, size=size) # finding distinct element count when grouped by their remainder when divided by 100. element_distinct_count = Stream(data).collect( GroupingBy(mod_100, CollectAndThen(ToSet(), len))) temp = defaultdict(set) for e in data: temp[mod_100(e)].add(e) out_target = {k: len(es) for k, es in temp.items()} self.assertDictEqual(element_distinct_count, out_target)
def test_2(self): class Data: def __init__(self, num): self._num = num def __str__(self) -> str: return '[' + str(self._num) + ']' def __repr__(self): return str(self) def __eq__(self, other: 'Data'): return self._num == other._num rnd = random() data = [Data(x) for x in rnd.int_range(1, 100, size=10)] def mod_5(x: Data): return x._num % 5 comp_key = attrgetter('_num') bkt_max = Stream(data).collect( GroupingBy(mod_5, MaxBy(comparing(comp_key)))) temp = defaultdict(list) for e in data: temp[mod_5(e)].append(e) out_target = { k: Optional(max(v, key=comp_key)) for k, v in temp.items() } self.assertDictEqual(bkt_max, out_target)
def test_5(self): rnd = random() start, end = 1, 100 size = 1000 def mod_10(x): return x % 10 def square(x): return x**2 data = rnd.int_range(start, end, size=size) # finding sum of squares of elements grouped by remainder when divided by 10. out = Stream(data).collect( GroupingBy(mod_10, Mapping(square, Summing()))) out_target = defaultdict(int) for e in data: out_target[mod_10(e)] += square(e) self.assertDictEqual(out, out_target)
def test_4a(self): rnd = random() start, end = 1, 100 size = 1000 def mod_10(x): return x % 10 data = rnd.int_range(start, end, size=size) # Grouping elements on the basis of their remainder when dividing them by 10 and then # finding sum of distinct elements in each bucket. out = Stream(data).collect( GroupingBy(mod_10, CollectAndThen(ToSet(), sum))) temp = defaultdict(set) for e in data: temp[mod_10(e)].add(e) out_target = {bkt: sum(es) for bkt, es in temp.items()} self.assertDictEqual(out, out_target)
def test_7(self): def mod_10(e): return e % 10 # since their is no element in Stream, so "GroupingBy" will return empty dict. self.assertDictEqual(Stream(()).collect(GroupingBy(mod_10)), dict())