def certain_kind_tap(data_items): """ As the stream of data items go by, get different kinds of information from them, in this case, the things that are fruit and metal, collecting each kind with a different spigot. stream_tap doesn't consume the data_items iterator by itself, it's a generator and must be consumed by something else. In this case, it's consuming the items by casting the iterator to a tuple, but doing it in batches. Since each batch is not referenced by anything the memory can be freed by the garbage collector, so no matter the size of the data_items, only a little memory is needed. The only things retained are the results, which should just be a subset of the items and in this case, the getter functions only return a portion of each item it matches. :param data_items: A sequence of unicode strings """ fruit_spigot = Bucket(get_fruit) metal_spigot = Bucket(get_metal) items = stream_tap((fruit_spigot, metal_spigot), data_items) for batch in i_batch(100, items): tuple(batch) return fruit_spigot.contents(), metal_spigot.contents()
def test_stream_tap_one_bucket(self): """ Test stream_tap with one bucket. """ odd_bucket = Bucket(get_odd) items = stream_tap((odd_bucket,), self.stream) out_flow = tuple(items) self.assertEqual((1, 2, 3, 4), out_flow) self.assertEqual((1, 3), tuple(odd_bucket.contents()))
def test_stream_tap_two_bucket(self): """ Test stream_tap with two buckets. """ odd_bucket = Bucket(get_odd) company_bucket = Bucket(get_company_crowd) items = stream_tap((odd_bucket, company_bucket), self.stream) out_flow = tuple(items) self.assertEqual((1, 2, 3, 4), out_flow) self.assertEqual((1, 3), tuple(odd_bucket.contents())) self.assertEqual(("3 >= 3", "4 >= 3"), tuple(company_bucket.contents()))
def certain_kind_tap(data_items): """ :param data_items: A sequence of unicode strings """ fruit_spigot = Bucket(get_fruit) metal_spigot = Bucket(get_metal) items = stream_tap((fruit_spigot, metal_spigot), data_items) for batch in i_batch(100, items): tuple(batch) return fruit_spigot.contents(), metal_spigot.contents()
def test_bucket_drain(self): """ Test Bucket can drain it's contents. """ odd_bucket = Bucket(get_odd) for item in self.stream: odd_bucket(item) odds = odd_bucket.drain_contents() self.assertEqual((1, 3), tuple(odds)) self.assertEqual(deque(), odd_bucket.contents())
def test_spigot_collects(self): """ Test Spigot collects it's contents. """ odd_spigot = Bucket(get_odd) for item in self.stream: odd_spigot(item) odds = odd_spigot.contents() self.assertEqual((1, 3), tuple(odds)) self.assertNotEqual(deque(), odd_spigot.contents())