def test_to(): assert Stream.range(5, 8).to_array() == Array([5, 6, 7]) assert Stream([(1, 2), (3, 4)]).to_dict() == {1: 2, 3: 4} assert Stream.range(5, 8).to_set() == set([5, 6, 7]) assert list(Stream.range(5, 8)) == [5, 6, 7] assert Stream.range(5, 8).to_series().equals(pd.Series([5, 6, 7]))
def test_filtering(): def is_even(n): return n % 2 == 0 assert Stream.range(5, 10).filter(is_even).to_list() == [6, 8] assert Stream.range(5, 10).filter_false(is_even).to_list() == [5, 7, 9] assert Stream.range(5, 10).without(6, 8).to_list() == [5, 7, 9]
def ndcg_score_of_prediction(y_truth, y_pred): pred_groups_stm = (Stream(y_pred).chunk(20).map(lambda row: row.to_list())) test_y_groups_stm = ( Stream(y_truth).chunk(20).map(lambda row: row.to_list())) return ( pred_groups_stm.zip(test_y_groups_stm).starmap(lambda pred_l, y_l: [ y_e for pred_e, y_e in sorted(zip(pred_l, y_l), reverse=True) ]).map(lambda l: ndcg_at_k(l)).mean())
def test_slice(): assert Stream.range(5, 10).take(3).to_list() == [5, 6, 7] assert Stream.range(5, 10).drop(3).to_list() == [8, 9] def less_than_3(n): return n < 3 assert Stream.range(0, 5).takewhile(less_than_3).to_list() == [0, 1, 2] assert Stream.range(0, 5).dropwhile(less_than_3).to_list() == [3, 4]
def test_groupby(): assert (Stream.range(10).group_by_as_stream(lambda n: n // 3).starmap( lambda k, vs: (k, list(vs))).to_list() == [(0, [0, 1, 2]), (1, [3, 4, 5]), (2, [6, 7, 8]), (3, [9])]) assert Stream.range(10).group_by_as_map(lambda n: n // 3) == { 0: Array([0, 1, 2]), 1: Array([3, 4, 5]), 2: Array([6, 7, 8]), 3: Array([9]) }
def test_map(): def multiply_2(n): return n * 2 assert Stream.range(10).map(multiply_2).to_list() == list(range(0, 20, 2)) def duplicate(v): return [v] * 2 assert Stream.range(3).flat_map(duplicate).to_list() == [0, 0, 1, 1, 2, 2] assert Stream([(5, 1), (3, 4)]).flatten().to_list() == [5, 1, 3, 4] assert Stream.range(10).len() == 10 assert Stream([(5, 1), (3, 4)]).flatten().len() == 4
def test_split(): def divisible_by_three(n): return n % 3 == 0 assert Stream.range(10).split_before(divisible_by_three).to_list() == [ Array([0, 1, 2]), Array([3, 4, 5]), Array([6, 7, 8]), Array([9]) ] assert Stream.range(10).split_after(divisible_by_three).to_list() == [ Array([0]), Array([1, 2, 3]), Array([4, 5, 6]), Array([7, 8, 9]) ]
def test_pluck(): assert Stream([{ 'a': 1, 'b': 2 }, { 'a': 4, 'b': 5 }]).pluck('a').to_list() == [1, 4] assert Stream([{ 'a': 1, 'b': 2 }, { 'b': 3 }, { 'a': 4, 'b': 5 }]).pluck_opt('a').to_list() == [Some(1), Nothing, Some(4)] people = [Person('Johnny', 18), Person('Amy', 15)] assert Stream(people).pluck_attr('name').to_list() == ['Johnny', 'Amy']
def test_reduce(): assert Stream.range(5, 8).reduce(lambda a, b: a + b) == 18 assert Stream.range(5, 8).fold_left(lambda a, b: a + b, 2) == 20 assert Stream.range(5, 8).mean() == 6 assert Stream.range(5, 8).sum() == 18 assert Stream.range(5, 8).accumulate().to_list() == [5, 11, 18] assert Stream.range(5, 8).accumulate(op.mul).to_list() == [5, 30, 210]
def test_zip(): assert Stream.range(5, 8).zip(itt.count(1)).to_list() == [(5, 1), (6, 2), (7, 3)] assert Stream.range(5, 8).zip(itt.count(1)).to_list() == [(5, 1), (6, 2), (7, 3)] assert Stream.range(5, 8).zip_longest(range(1, 5)).to_list() == [(5, 1), (6, 2), (7, 3), (None, 4)] assert (Stream.range(5, 8).zip_longest(range(1, 5), fillvalue=100).to_list() == [(5, 1), (6, 2), (7, 3), (100, 4)]) assert Stream.range(5, 8).zip_prev().to_list() == [ CurrPrev(5, None), CurrPrev(6, 5), CurrPrev(7, 6) ] assert Stream.range(5, 8).zip_next().to_list() == [ CurrNext(5, 6), CurrNext(6, 7), CurrNext(7, None) ] assert Stream.range(5, 8).zip_prev(99).to_list() == [ CurrPrev(5, 99), CurrPrev(6, 5), CurrPrev(7, 6) ] assert Stream.range(5, 8).zip_next(99).to_list() == [ CurrNext(5, 6), CurrNext(6, 7), CurrNext(7, 99) ] assert Stream.range(5, 8).zip_index().to_list() == [ ValueIndex(5, 0), ValueIndex(6, 1), ValueIndex(7, 2) ]
def handle(self): self.email_id_mappings = (Stream( self.argument('email_id_mappings')).map(lambda s: Row.from_values( s.split(':', 1), fields=('email', 'trello_id'))).to_list()) first_updated_min = (Optional.from_value( self.option('first-updated-min')).map(pdl.parse).get_or_none()) self.sync(first_updated_min) now = pdl.now(tz='Asia/Taipei') now = now.set(second=0, microsecond=0) last_minute = now.subtract(minutes=10) while True: last_minute = last_minute.add(minutes=1) self.sync(last_minute) time.sleep(60)
def test_general_case(ipsum, capsys): out = ( Stream(ipsum.splitlines()).flat_map(lambda line: line.split(' ')).map( lambda word: word.strip(',.')).filter(lambda word: len(word) > 0). distincted().sorted(key=lambda word: len(word)).group_by_as_stream( lambda word: len(word)).map(lambda keyvalues: keyvalues.transform( values=lambda stream: stream.to_array())) .map(lambda keyvalues: Row(length=keyvalues.key, count=keyvalues.values.len())).tap( tag='length count', n=2).nlargest( 3, key=lambda row: row.count).pluck_attr('length').to_list()) assert out == [6, 7, 8] captured = capsys.readouterr() assert captured.out == '''length count:0: Row(length=1, count=2) length count:1: Row(length=2, count=10) ''' assert captured.err == ''
def get_tfidf(joblist, tokens): if not tokens: return [ tuple([job] + [0.0 for field in Job.tfidf_fields]) for job in joblist ] joined_tokens = ' '.join(tokens) joblist = joblist q_multi_match = Q('multi_match', query=joined_tokens, type='most_fields', fields=Job.tfidf_fields, analyzer='whitespace') q_ids = Q('ids', values=joblist) q_overall = Q('bool', must=q_multi_match, filter=q_ids) client = connections.create_connection(hosts=['localhost:9201'], timeout=20) search = Search(using=client, index='hack104').query(q_overall).extra(explain=True) hits = (h for h in search.execute().hits.hits) def ugly_extract_fieldname(description): return description[7:description.find(':')] def extract_fields_scores(expl): return (Stream( expl['_explanation']['details'][0]['details']).map(lambda d: Row( field=ugly_extract_fieldname(d['details'][0]['description']), score=d['value'])).to_map()) scores_of_job = (Stream(hits).map(extract_fields_scores)).to_list() return [ tuple([job] + [scores.get(field, 0.0) for field in Job.tfidf_fields]) for job, scores in zip(joblist, scores_of_job) ]
def test_extend(): assert Stream.range(5, 8).extended([1, 3]).to_list() == [5, 6, 7, 1, 3] assert Stream.range(5, 8).appended(5).to_list() == [5, 6, 7, 5]
def ndcg_score_of_truth(y_truth): from carriage import Stream return (Stream(y_truth).chunk(20).map(lambda row: row.to_list()).map( lambda l: ndcg_at_k(l)).mean())
def test_access(): assert Stream.range(5, 10).get(3) == 8 assert Stream.range(5, 10).get(5, 0) == 0 assert Stream.range(5, 10).get(7, 0) == 0 assert Stream.range(5, 10).get(6) is None with pytest.raises(ValueError): assert Stream.range(5, 10).get(-1) is None assert Stream.range(5, 10)[3] == 8 with pytest.raises(ValueError): assert Stream(iter(range(5, 10)))[-1] with pytest.raises(IndexError): assert Stream.range(5, 10)[5] assert Stream.range(5, 10).get_opt(3) == Some(8) assert Stream.range(5, 10).get_opt(5) is Nothing with pytest.raises(ValueError): assert Stream(iter(range(5, 10))).get_opt(-1) is Nothing assert Stream.range(5, 10).first() == 5 assert Stream.range(5, 10).first_opt() == Some(5) assert Stream([5]).first_opt() == Some(5) assert Stream([]).first_opt() is Nothing assert Stream(iter([])).first_opt() is Nothing
def test_init(): assert Stream([1, 2, 3]).to_list() == [1, 2, 3] assert Stream(range(10)).to_list() == list(range(10)) assert Stream.range(10).to_list() == list(range(10))
def test_tuple_as_row(): strm = Stream([(1, 2), (3, 4)]) rows = strm.tuple_as_row(['x', 'y']).to_list() assert rows == [Row(x=1, y=2), Row(x=3, y=4)]
def test_counter(): assert Stream([3, 1, 1, 3, 5, 4, 9, 3, 1, 5, 3]).value_counts() == Counter( [3, 1, 1, 3, 5, 4, 9, 3, 1, 5, 3])
def test_sliding_window(): assert Stream.range(5, 10).sliding_window(3).to_list() == [(5, 6, 7), (6, 7, 8), (7, 8, 9)]
def extract_fields_scores(expl): return (Stream( expl['_explanation']['details'][0]['details']).map(lambda d: Row( field=ugly_extract_fieldname(d['details'][0]['description']), score=d['value'])).to_map())
def tlist_of_name_opt(self, name): return Stream(self.tlists).find_opt(lambda tlist: name in tlist.name)
def test_interpose(): assert Stream.range(5, 9).interpose(0).to_list() == [5, 0, 6, 0, 7, 0, 8] assert Stream([]).interpose(0).to_list() == []
def test_ordering(): assert Stream.range(5, 8).reversed().to_list() == [7, 6, 5] assert Stream([1, 3, 2, 4]).sorted().to_list() == [1, 2, 3, 4] assert Stream([1, 3, 2, 4]).sorted(lambda x: -x).to_list() == [4, 3, 2, 1] assert Stream([1, 3, 2, 4]).sorted(lambda x: x % 2).to_list() == [2, 4, 1, 3]
def test_distincted(): assert Stream([1, 5, 1, 3, 3, 5, 6]).distincted().to_list() == [1, 5, 3, 6]
def tokenize(text): if text is None: return None return (Stream(jieba.cut_for_search( openCC.convert(text))).filter(X != ' ').to_list())