def test_conditional2(self): rnd = random() def get(): return rnd.randint(1, 100) size = 1000 conditions = (ChainedCondition().if_then( lambda x: x <= 10, lambda x: 10).if_then(lambda x: x <= 20, lambda x: 20).if_then( lambda x: x <= 30, lambda x: 30).otherwise(lambda x: -1)) out = (Stream.from_supplier(get).limit(size).conditional( conditions).collect(GroupingBy(identity, Counting()))) out_target = defaultdict(int) rnd.reset() for _ in range(size): e = get() if e <= 10: k = 10 elif e <= 20: k = 20 elif e <= 30: k = 30 else: k = -1 out_target[k] += 1 self.assertDictEqual(out, out_target)
def test_4(self): size = 10 eps = 1e-9 class Data: def __init__(self, e): self._e = e def e(self): return self._e def __eq__(self, other) -> bool: return abs(self._e - other._e) <= eps rnd = random() out = (Stream.from_supplier(rnd.random).limit(size).map(Data).sort( key=Data.e, reverse=True).collect(ToList())) rnd.reset() out_target = sorted((Data(rnd.random()) for _ in range(size)), key=Data.e, reverse=True) for o, t in zip(out, out_target): with self.subTest(o=o): self.assertAlmostEqual(o, t)
def test_1(self): rnd = random() data = rnd.int_range(1, 100, size=1000) def mean(es): return sum(es) / len(es) window_size = 4 out = Stream(data).window_function(mean, window_size).collect(ToList()) out_target = [] chunk = data[:window_size] out_target.append(mean(chunk)) for e in data[window_size:]: chunk = chunk[1:] + [e] out_target.append(mean(chunk)) for o, t in zip(out, out_target): with self.subTest(): self.assertAlmostEqual(o, t, delta=1e-8)
def test_1(self): rnd = random() a, b, size = 0, 10, 10 chunk_size = 3 data = rnd.int_range(a, b, size=size) itr = iter(data.copy()) for i in range(a, b, chunk_size): chunk = get_chunk(itr, chunk_size, list) self.assertListEqual(chunk, data[i:i + chunk_size])
def test_1a(self): rnd = random() start, end = 1, 100 size = 1000 data = rnd.int_range(start, end, size=size) element_frequency = Stream(data).collect( GroupingBy(identity, Counting())) out_target = Counter(data) self.assertDictEqual(element_frequency, out_target)
def test_5(self): ctx = Context(prec=40) def dot_product(v1, v2): l1, l2 = len(v1), len(v2) assert l1 == l2, "dimension mismatch; v1's dim: {} and v2's dim: {}".format( l1, l2) assert v1 and v2, 'each vector must have at least one element.' out = 0 for x, y in zip(v1, v2): out += x * y return out class WeightedAverage: def __init__(self, weight: tuple): assert sum(weight) == 1, 'weight array is not normalised' self._weight = weight def __call__(self, es: tuple): return dot_product(self._weight, es) # ------------------------------------------------------------------- data = tuple( D(e, ctx) for e in random().float_range(1, 100, size=1000)) # ------------------------------------------------------------------- alpha = D(0.1, ctx) window_size = 10 _sum = alpha * (1 - alpha**window_size) / (1 - alpha) weight = tuple(alpha**n / _sum for n in range(1, window_size + 1)) # making sum(weight) == 1 wa = WeightedAverage(weight) # ------------------------------------------------------------------- out = Stream(data).window_function(wa, window_size).collect(ToList()) # ------------------------------------------------------------------- chunk = data[:window_size] self.assertEqual(out[0], dot_product(chunk, weight)) for n, e in enumerate(data[window_size:], start=1): chunk = chunk[1:] + (e, ) self.assertEqual(out[n], dot_product(chunk, weight))
def test_3(self): rnd = random() data = rnd.int_range(1, 100, size=10) def mod_5(x: int): return x % 5 # We will be comparing result obtained by reversing comparator in MaxBy # so that we get "min" in each bucket created by "group by" operation; and # then comparing this result with the result obtained by MinBy. bkt_min = Stream(data).collect( GroupingBy(mod_5, MaxBy(comparing(lambda x: -x)))) bkt_min_target = Stream(data).collect(GroupingBy(mod_5, MinBy())) self.assertDictEqual(bkt_min, bkt_min_target)
def test_1(self): rnd = random() start, end = 1, 100 size = 1000 data = rnd.int_range(start, end, size=size) # finding frequency of integer numbers generated by random source "rnd". # To get this, grouping by elements on their value then collecting them in a list # and then finding number of elements in list. # For better method see "test_1a" element_frequency = Stream(data).collect( GroupingBy(identity, CollectAndThen(ToList(), len))) out_target = Counter(data) self.assertDictEqual(element_frequency, out_target)
def test_2(self): size = 1000 rnd = random() square = lambda x: x**2 out = (Stream.from_supplier(rnd.random).limit(size).map(square).sort( reverse=True).collect(ToList())) rnd.reset() out_target = sorted((square(rnd.random()) for _ in range(size)), reverse=True) for o, t in zip(out, out_target): with self.subTest(o=o): self.assertAlmostEqual(o, t, delta=1e-9)
def test_4(self): rnd = random() data = rnd.int_range(1, 100, size=10) def mod_5(x: int): return x % 5 bkt_min = Stream(data).collect(GroupingBy(mod_5, MinBy())) temp = defaultdict(list) for e in data: temp[mod_5(e)].append(e) out_target = {k: Optional(min(v)) for k, v in temp.items()} self.assertDictEqual(bkt_min, out_target)
def test_4(self): data = random().int_range(1, 100, size=200) class Statistic: def __init__(self): self._mean = 0 self._mean_sqr = 0 # mean of squares self._n = 0 @property def mean(self): return self._mean @property def std(self): return (self._mean_sqr - self.mean**2)**0.5 def __call__(self, es: list): n = self._n e = es[-1] self._mean = (self._mean * n + e) / (n + 1) self._mean_sqr = (self._mean_sqr * n + e**2) / (n + 1) self._n = n + 1 return dict(mean=self.mean, std=self.std) stat = Statistic() out: List[dict] = Stream(data).window_function(stat, None).collect(ToList()) eps = 1e-9 data_holder = [] for o, d in zip(out, data): data_holder.append(d) with self.subTest(): mu = mean(data_holder) self.assertAlmostEqual(o['mean'], mu, delta=eps) self.assertAlmostEqual(o['std'], pstdev(data_holder, mu=mu), delta=eps)
def test_if_else(self): rnd = random() def get(): return rnd.randint(1, 100) size = 1000 out = (Stream.from_supplier(get).limit(size).if_else( lambda x: x < 50, lambda x: 0, lambda x: 1).collect(GroupingBy(identity, Counting()))) out_target = defaultdict(int) rnd.reset() for _ in range(size): out_target[0 if get() < 50 else 1] += 1 self.assertDictEqual(out, out_target)
def test_2(self): rnd = random() start, end = 1, 100 size = 1000 def mod_10(x): return x % 10 data = rnd.int_range(start, end, size=size) # Grouping elements on the basis of their remainder when dividing them by 10 and then # finding distinct elements in each "bucket" distinct_elements = Stream(data).collect(GroupingBy(mod_10, ToSet())) out_target = defaultdict(set) for e in data: out_target[mod_10(e)].add(e) self.assertDictEqual(distinct_elements, out_target)
def test_4(self): rnd = random() start, end = 1, 100 size = 1000 def mod_10(x): return x % 10 data = rnd.int_range(start, end, size=size) # Grouping elements on the basis of their remainder when dividing them by 10 and then # finding sum of elements in each bucket. out = Stream(data).collect(GroupingBy(mod_10, Summing())) out_target = defaultdict(int) for e in data: out_target[mod_10(e)] += e self.assertDictEqual(out, out_target)
def test_6(self): rnd = random() start, end = 1, 10000 size = 1000 def mod_100(x): return x % 100 data = rnd.int_range(start, end, size=size) # finding distinct element count when grouped by their remainder when divided by 100. element_distinct_count = Stream(data).collect( GroupingBy(mod_100, CollectAndThen(ToSet(), len))) temp = defaultdict(set) for e in data: temp[mod_100(e)].add(e) out_target = {k: len(es) for k, es in temp.items()} self.assertDictEqual(element_distinct_count, out_target)
def test_conditional1(self): rnd = random() def get(): return rnd.randint(1, 100) size = 1000 conditions = (ChainedCondition().if_then( lambda x: x <= 10, lambda x: 10).if_then(lambda x: x <= 20, lambda x: 20).if_then( lambda x: x <= 30, lambda x: 30).done()) out = (Stream.from_supplier(get).limit(size).conditional( conditions).mapping(identity, lambda x: 1, resolve=lambda x, y: x + y)) out_target = defaultdict(int) rnd.reset() for _ in range(size): e = get() k = None if e <= 10: k = 10 elif e <= 20: k = 20 elif e <= 30: k = 30 else: k = e out_target[k] += 1 self.assertDictEqual(out, out_target)
def test_8(self): rnd = random() data_size = 1000 countries = tuple('ABC') sex = ('M', 'F') class Person: def __init__(self, country, state, age, sex): self.country = country self.state = state self.age = age self.sex = sex ps = [ Person(rnd.choice(countries), rnd.randrange(1, 4), rnd.randrange(0, 60), rnd.choice(sex)) for _ in range(data_size) ] # Counting Person on the basis of his/her country, then state # then on the basis of M/F. collector = GroupingBy( attrgetter('country'), GroupingBy(attrgetter('state'), GroupingBy(attrgetter('sex'), Counting()))) def _filter(x: Person): return 20 <= x.age <= 50 # considering person of age between 20 and 50 (both inclusive) out = Stream(ps).filter(_filter).collect(collector) out_target = defaultdict(lambda: defaultdict(lambda: defaultdict(int))) for p in ps: if _filter(p): out_target[p.country][p.state][p.sex] += 1 self.assertDictEqual(out, out_target)
def test_2(self): class Data: def __init__(self, num): self._num = num def __str__(self) -> str: return '[' + str(self._num) + ']' def __repr__(self): return str(self) def __eq__(self, other: 'Data'): return self._num == other._num rnd = random() data = [Data(x) for x in rnd.int_range(1, 100, size=10)] def mod_5(x: Data): return x._num % 5 comp_key = attrgetter('_num') bkt_max = Stream(data).collect( GroupingBy(mod_5, MaxBy(comparing(comp_key)))) temp = defaultdict(list) for e in data: temp[mod_5(e)].append(e) out_target = { k: Optional(max(v, key=comp_key)) for k, v in temp.items() } self.assertDictEqual(bkt_max, out_target)
def test_4a(self): rnd = random() start, end = 1, 100 size = 1000 def mod_10(x): return x % 10 data = rnd.int_range(start, end, size=size) # Grouping elements on the basis of their remainder when dividing them by 10 and then # finding sum of distinct elements in each bucket. out = Stream(data).collect( GroupingBy(mod_10, CollectAndThen(ToSet(), sum))) temp = defaultdict(set) for e in data: temp[mod_10(e)].add(e) out_target = {bkt: sum(es) for bkt, es in temp.items()} self.assertDictEqual(out, out_target)
def test_5(self): rnd = random() start, end = 1, 100 size = 1000 def mod_10(x): return x % 10 def square(x): return x**2 data = rnd.int_range(start, end, size=size) # finding sum of squares of elements grouped by remainder when divided by 10. out = Stream(data).collect( GroupingBy(mod_10, Mapping(square, Summing()))) out_target = defaultdict(int) for e in data: out_target[mod_10(e)] += square(e) self.assertDictEqual(out, out_target)
def setUp(self): self.rnd = random()