def test_it_should_map_a_function_to_the_stream(self): stream = Stream(range(10)) stream = stream.map(lambda item: -item) self.assertEqual(max(stream), 0) stream = Stream(dict((v, v) for v in xrange(100))) stream = stream.values().skip(10).limit(3) self.assertListEqual(list(stream), [10, 11, 12])
def test_any(self): self.assertTrue(Stream(xrange(10)).any()) self.assertFalse(Stream([]).any()) self.assertTrue( Stream(xrange(10)).any(lambda item: item > 5, parallel=True)) self.assertTrue(Stream(xrange(10)).any(lambda item: item > 5)) self.assertFalse( Stream(xrange(10)).any(lambda item: item < -1, parallel=True))
def test_divisibleby(self): stream = Stream(xrange(2000)) stream = stream.ints().divisible_by(10) self.assertEqual(stream.count(), 200) stream = Stream(xrange(2000)) stream = stream.divisible_by(1000) self.assertEquals(list(stream), [0, 1000])
def test_it_should_limit_the_size_of_the_stream(self): stream = Stream(xrange(10000000000)) limited = stream.limit(10) self.assertListEqual(list(limited), list(xrange(10))) stream = Stream(xrange(100)) stream = stream.limit(1000) self.assertListEqual(list(stream), list(xrange(100)))
def test_it_should_concatenate_iterables(self): stream = Stream.concat(Stream.range(10), Stream.range(10)) self.assertListEqual(list(stream), list(xrange(10)) + list(xrange(10))) stream = Stream.concat(xrange(10), xrange(10), xrange(10)) self.assertListEqual(list(stream.distinct()), list(xrange(10))) stream = Stream.concat(xrange(10), xrange(10), xrange(10)) self.assertEqual(stream.count(), 30)
def test_it_should_filter_evens(self): stream = Stream(range(6)) stream = stream.evens() self.assertListEqual(list(stream), [0, 2, 4]) stream = Stream(xrange(200)) stream = stream.ints().evens() elements = list(stream) self.assertEqual(len(elements), 100) self.assertTrue(all(item % 2 == 0 for item in elements))
def test_it_should_filter_odds(self): stream = Stream(range(6)) stream = stream.odds() self.assertListEqual(list(stream), [1, 3, 5]) stream = Stream(xrange(200)) stream = stream.odds() elements = list(stream) self.assertEqual(len(elements), 100) self.assertFalse(any(item % 2 == 0 for item in elements))
def test_it_should_sum_a_stream(self): elements = list(xrange(5)) int_result = Stream(elements).ints().sum() float_result = Stream(elements).floats().sum() decimal_result = Stream(elements).decimals().sum() self.assertEqual(int_result, 10) self.assertIsInstance(int_result, int) self.assertAlmostEqual(float_result, 10) self.assertIsInstance(float_result, float) self.assertEqual(decimal_result, Decimal("10")) self.assertIsInstance(decimal_result, Decimal)
def test_it_should_filter_by_regular_expression(self): stream = Stream((text_type(x) for x in xrange(100))) ones = stream.regexp(r'^1') self.assertListEqual( list(ones), ['1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19']) stream = Stream(str(item) for item in xrange(1000)) stream = stream.regexp(r"^10*$") stream = stream.ints() self.assertListEqual(list(stream), [1, 10, 100])
def test_it_should_find_the_median(self): self.assertEqual(Stream(xrange(10)).median(), 5) self.assertEqual(Stream(xrange(11)).median(), 5) self.assertEqual(Stream(xrange(12)).median(), 6) arr = list(xrange(12)) shuffle(arr) self.assertEqual(Stream(arr).median(), 6) arr = list(xrange(11)) shuffle(arr) self.assertEqual(Stream(arr).median(), 5)
def test_it_should_filter_by_divisibility(self): stream = Stream(range(6)) stream = stream.divisible_by(2) self.assertListEqual(list(stream), [0, 2, 4]) stream = Stream(xrange(2000)) stream = stream.ints().divisible_by(10) self.assertEqual(stream.count(), 200) stream = Stream(xrange(2000)) stream = stream.divisible_by(1000) self.assertEquals(list(stream), [0, 1000])
def sounds_to_key_and_stream(sounds): key = set() stream = Stream() for sound in sounds: stream.add_sound(sound) key.add( KeySound( frequency=sound.frequency, duration=sound.duration, volume=sound.volume, ) ) return frozenset(key), stream
def test_it_should_filter_items(self): stream = Stream(range(10)) stream = stream.filter(lambda item: item % 2) self.assertEqual(stream.sum(), 25) stream = Stream(dict((v, v) for v in xrange(100))) stream = stream.filter(lambda kv: kv[0] % 2) stream = stream.filter(lambda kv: kv[0] % 10, parallel=6) stream = stream.limit(5).keys() stream = list(stream) self.assertListEqual(list(stream), [1, 3, 5, 7, 9])
def test_it_should_filter_instances_of_a_class(self): items = list(xrange(10)) + ['foo', 'bar', 'baz'] stream = Stream(items) strings = stream.instances_of(string_types) self.assertListEqual(list(strings), ['foo', 'bar', 'baz']) elements = list(xrange(100)) # noinspection PyTypeChecker elements = elements + [str(item) for item in elements] + [None, None] strings = list(Stream(elements).instances_of(str)) ints = list(Stream(elements).instances_of(int)) self.assertEqual(len(strings), 100) self.assertTrue(all(isinstance(item, str) for item in strings)) self.assertEqual(len(ints), 100) self.assertTrue(all(isinstance(item, int) for item in ints))
def test_it_should_count_the_number_of_occurrences_in_the_stream(self): stream = Stream(xrange(100)) stream = stream.limit(50) self.assertEqual(stream.count(), 50) stream = Stream(xrange(100)) stream = stream.limit(1000) self.assertEqual(stream.count(), 100)
def test_no_cache(self): # Make a normal stream. stream = Stream.range(10) # Iterate once self.assertEqual(list(stream), list(range(10))) # Iterate twice self.assertEqual(list(stream), [])
def preprocess(self, tokens: [str]): result = Stream(tokens) \ .filter(lambda token: token.isalpha()) \ .filter(lambda token: token not in self.__stopWords) \ .map(lambda token: token.lower()) \ .map(lambda token: self.stem(token)) return list(result)
def test_all(self): self.assertTrue(Stream(xrange(1, 10)).all(parallel=True)) self.assertTrue(Stream(xrange(1, 10)).all()) self.assertTrue(Stream([]).all()) self.assertTrue(Stream([]).all()) self.assertFalse(Stream(xrange(10)).all(parallel=True)) self.assertFalse(Stream(xrange(10)).all()) self.assertFalse(Stream(xrange(10)).all(lambda item: item < 5)) self.assertTrue(Stream(xrange(10)).all(lambda item: item < 100))
def shanks_transformation(stream): s0 = stream.value s1 = stream.next.value s2 = stream.next.next.value denominator = s0 - s1 - (s1 - s2) return Stream( s1 if denominator == 0 else s2 - (s2 - s1)**2 / denominator, lambda: shanks_transformation(stream.next))
def _add_new_stream(self, stream_id): """ internal function, no thread protect """ logger.info("Add new stream: %s", stream_id) if stream_id in self.streams: logger.warning("Stream %s already existed", stream_id) return False # FIXME RON check this stream = Stream(stream_id, self.model, self.sender) self.streams[stream_id] = stream
def test_for_each(self): class Apply: def __init__(self): self.total = 0 def apply(self, val): self.total += val stream = Stream.range(3) apply = Apply() stream.for_each(apply.apply) self.assertEqual(apply.total, 0 + 1 + 2)
def test_limit(self): stream = Stream(xrange(100)) stream = stream.limit(50) self.assertEqual(stream.count(), 50) stream = Stream(xrange(100)) stream = stream.limit(1000) self.assertEqual(stream.count(), 100)
def test_cache(self): # Make a cached stream. stream = Stream.range(10).cache() # Iterate once self.assertEqual(list(stream), list(range(10))) # Iterate twice, this time from the cache. self.assertEqual(list(stream), list(range(10))) # Now, we make a new, smaller, cached stream fromm our cached stream. stream = stream.cache(5) # Iterate once self.assertEqual(list(stream), list(range(10))) # Iterate twice, this time from the cache. We get the last 5 values. self.assertEqual(list(stream), list(range(5, 10)))
def test_it_should_expand_a_stream_to_tuples(self): tuples = Stream.range(10).tuplify() self.assertListEqual(list(tuples), [(i, i) for i in range(10)]) tuples = Stream.range(10).tuplify(clones=4) self.assertListEqual(list(tuples), [(i, i, i, i) for i in range(10)])
def test_it_should_iterate_on_a_seed_value(self): i = iter(Stream.iterate(lambda x: x * 10, 1)) self.assertEqual(next(i), 1) self.assertEqual(next(i), 10) self.assertEqual(next(i), 100) self.assertEqual(next(i), 1000)
# Streams is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the # Free Software Foundation, either version 3 of the License, or (at # your option) any later version. # # Streams is distributed in the hope that it will be useful, but # WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Streams. If not, see <https://www.gnu.org/licenses/>. # See README.rst at the top level directory of this repository for an # explanation of the code. from operator import add from streams import SinglyLinkedStream as Stream if __name__ == '__main__': ones = Stream(1, lambda: ones) print('Stream of ones:') print(list(ones[:10])) print() ints = Stream(1, lambda: Stream.map(add, ones, ints)) print(ints) print('Stream of natural numbers:') print(list(ints[:10]))
def test_it_should_produce_a_range(self): stream = Stream.range(10) self.assertIsInstance(stream, Stream) self.assertEqual(list(stream), list(range(10)))
return [] else: links = [link.get('href') for link in soup.findAll('a')] return [domain + link for link in links if link and link.startswith('/') and '?' not in link and link != '/'] def topk_dict(d, k=10): return dict(toolz.topk(k, d.items(), key=lambda x: x[1])) source = Stream() pages = source.unique().rate_limit(0.050) pages.sink(print) content = (pages.to_dask().map(requests.get) .map(lambda x: x.content)) links = (content.zip(pages) .map(links_of_page) .gather().concat()) links.sink(source.emit) """ from nltk.corpus import stopwords stopwords = set(stopwords.words('english')) word_counts = (content.map(str.split)
def test_reversed_should_reverse_the_stream(self): stream = Stream.range(10) reverse = reversed(stream) self.assertListEqual(list(reverse), list(reversed(range(10))))
def test_enumerate(self): stream = Stream(['A', 'B', 'C']) stream = stream.enumerate() self.assertEqual(stream.to_list(), [(0, 'A'), (1, 'B'), (2, 'C')])
def test_it_should_reduce_the_stream(self): stream = Stream.range(10) reduced = stream.reduce(add) self.assertEqual(reduced, sum(range(10)))
def test_range(self): self.assertListEqual(list(Stream.range(100)), list(xrange(100)))
def test_it_should_apply_a_side_effect_to_the_stream(self): side_list = [] stream = Stream.range(10) self.assertEqual(side_list, list(stream.peek(side_list.append)))
def test_it_should_map_a_predicate_to_values_in_key_value_pairs(self): stream = Stream.range(10).tuplify() mapped = stream.value_map(lambda v: v ** 2) self.assertListEqual(list(mapped), [(x, x ** 2) for x in xrange(10)])
def test_it_should_include_only_values(self): stream = Stream(list(zip(range(10), range(100, 110), range(20, 30))) + ['foo']) values = stream.values() self.assertListEqual(list(values), list(range(20, 30)) + ['foo'])
def test_it_should_skip_the_first_n_items(self): stream = Stream(range(20)) skipped = stream.skip(10) self.assertListEqual(list(skipped), list(range(10, 20)))
def test_list_is_created_correctly(self): stream = Stream.range(3) self.assertListEqual(stream.to_list(), [0, 1, 2])
def test_it_should_reverse_sort_the_stream(self): stream = Stream.range(10) sorted = stream.sorted(reverse=True) self.assertListEqual(list(sorted), list(reversed(range(10))))
def test_it_should_iterate_over_an_iterable_multiple_times(self): stream = Stream.range(10) self.assertListEqual(list(stream), list(xrange(10))) self.assertListEqual(list(stream), [])
def test_it_should_sort_the_stream_by_key(self): zipped = zip(reversed(list(xrange(10))), range(10)) stream = Stream(reversed(list(zipped))) sorted = stream.sorted(key=itemgetter(1)) self.assertListEqual(list(sorted), list(zip(reversed(range(10)), range(10))))
def test_join_works_correctly(self): stream = Stream([1, 2.0, "3", "4.0", None, {}]) self.assertEqual(stream.join(", "), "1, 2.0, 3, 4.0, None, {}")
def test_it_should_reverse_sort_the_stream_by_key(self): stream = Stream(zip(reversed(range(10)), range(10))) sorted = stream.sorted(itemgetter(1), reverse=True) self.assertListEqual( list(sorted), list(reversed(list(zip(reversed(range(10)), range(10))))))
soup = BeautifulSoup(content) except: return [] else: links = [link.get('href') for link in soup.findAll('a')] return [ domain + link for link in links if link and link.startswith('/') and '?' not in link and link != '/' ] def topk_dict(d, k=10): return dict(toolz.topk(k, d.items(), key=lambda x: x[1])) source = Stream() pages = source.unique() pages.sink(print) content = (pages.map(requests.get).map(lambda x: x.content)) links = (content.zip(pages).map(links_of_page).concat()) links.sink(source.emit) """ from nltk.corpus import stopwords stopwords = set(stopwords.words('english')) word_counts = (content.map(str.split) .concat() .filter(str.isalpha) .remove(stopwords.__contains__) .frequencies())
def test_it_should_cast_a_stream_to_strings(self): items = range(10) stream = Stream(items) strings = stream.strings() self.assertListEqual(list(strings), [text_type(i) for i in items])
def test_it_should_filter_the_n_smallest_items(self): stream = Stream(range(100)) smallest = stream.smallest(10) self.assertListEqual(list(smallest), sorted(range(10)))