Exemple #1
0
def test_any_select():
    example = ['12', '123', '1234']
    expected_1 = [
        (2, 12.0, '12'),
        (3, 123.0, '123'),
        (4, 1234.0, '1234'),
    ]
    received_1 = sm.AnyStream(
        example,
    ).select(
        len,
        float,
        str,
    ).get_list()
    assert received_1 == expected_1, 'test case 1: AnyStream to RowStream'
    expected_2 = [
        {'a': 2, 'b': 2.0, 'c': '12'},
        {'a': 3, 'b': 3.0, 'c': '123'},
        {'a': 4, 'b': 4.0, 'c': '1234'},
    ]
    received_2 = sm.AnyStream(
        example,
    ).select(
        a=len,
        b=lambda i: float(len(i)),
        c=(str, ),
    ).get_list()
    assert received_2 == expected_2, 'test case 1: AnyStream to RowStream'
Exemple #2
0
def test_sort():
    expected_0 = list(reversed(range(1, 10)))
    received_0 = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).set_meta(
        max_items_in_memory=4,
    ).sort(
        reverse=True,
    ).get_list()
    assert received_0 == expected_0, 'test case 0'
    expected_1 = list(reversed(range(1, 10)))
    received_1 = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).sort(
        lambda i: -i,
        reverse=False,
        step=4,
    ).get_list()
    assert received_1 == expected_1, 'test case 1'
    expected_2 = list(reversed(range(1, 10)))
    received_2 = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).sort(
        lambda i: 100,
        lambda i: -i,
        lambda i: i,
        reverse=False,
        step=4,
    ).get_list()
    assert received_2 == expected_2, 'test case 2'
Exemple #3
0
def test_group_by():
    example = [
        (1, 11), (1, 12),
        (2, 21),
        (3, 31), (3, 32), (3, 33),
    ]
    expected = [
        [11, 12],
        [21],
        [31, 32, 33],
    ]
    received_0 = sm.AnyStream(example).to_row_stream().to_record_stream(
        columns=('x', 'y'),
    ).group_by(
        'x',
        as_pairs=True,
    ).map_to_type(
        lambda a: [i.get('y') for i in a[1]],
        stream_type=sm.StreamType.RowStream,
    ).get_list()
    assert received_0 == expected, 'test case 0'

    received_1 = sm.AnyStream(example).to_row_stream().to_record_stream(
        columns=('x', 'y'),
    ).group_by(
        'x',
        as_pairs=False,
    ).map_to_type(
        lambda a: [i.get('y') for i in a],
        stream_type=sm.StreamType.RowStream,
    ).get_list()
    assert received_1 == expected, 'test case 1'
Exemple #4
0
def test_records_select():
    expected_1 = [
        {
            'a': '1',
            'd': None,
            'e': None,
            'f': '11',
            'g': None,
            'h': None
        },
        {
            'a': None,
            'd': '2,22',
            'e': None,
            'f': 'NoneNone',
            'g': None,
            'h': None
        },
        {
            'a': None,
            'd': None,
            'e': '3',
            'f': 'NoneNone',
            'g': '3',
            'h': '3'
        },
    ]
    received_1 = sm.AnyStream(
        EXAMPLE_CSV_ROWS, ).to_line_stream().to_row_stream(
            delimiter=',', ).map_to_records(lambda p: {
                p[0]: p[1]
            }, ).select(
                'a',
                h='g',
                g='e',
                d='b',
                e=lambda r: r.get('c'),
                f=('a', lambda v: str(v) * 2),
            ).get_list()
    assert received_1 == expected_1, 'test case 1: records'
    expected_2 = [
        (1.00, ('a', '1'), 'a'),
        (2.22, ('b', '2.22'), 'b'),
        (3.00, ('c', '3'), 'c'),
    ]
    received_2 = sm.AnyStream(
        EXAMPLE_CSV_ROWS, ).to_line_stream().to_row_stream(
            delimiter=',', ).select(
                0,
                lambda s: s[1].replace(',', '.'),
            ).select(
                (float, 1),
                '*',
                0,
            ).get_list()
    assert received_2 == expected_2, 'test case 2: rows'
Exemple #5
0
def test_split_by_step():
    expected = [
        [1, 3, 5, 7],
        [9, 2, 4, 6],
        [8],
    ]
    split_0 = sm.AnyStream(EXAMPLE_INT_SEQUENCE).split_to_disk_by_step(
        step=4, )
    received_0 = [f.get_list() for f in split_0]
    assert received_0 == expected, 'test case 0'
    split_1 = sm.AnyStream(EXAMPLE_INT_SEQUENCE).split_to_iter_by_step(
        step=4, )
    received_1 = [f.get_list() for f in split_1]
    assert received_1 == expected, 'test case 1'
Exemple #6
0
def test_split_by_pos():
    pos_1, pos_2 = 3, 5
    expected_1 = EXAMPLE_INT_SEQUENCE[:pos_1], EXAMPLE_INT_SEQUENCE[pos_1:]
    a, b = sm.AnyStream(EXAMPLE_INT_SEQUENCE, ).split(pos_1, )
    received_1 = a.get_list(), b.get_list()
    assert received_1 == expected_1, 'test case 1'
    expected_2 = (
        [pos_1] + EXAMPLE_INT_SEQUENCE[:pos_1],
        [pos_2 - pos_1] + EXAMPLE_INT_SEQUENCE[pos_1:pos_2],
        [len(EXAMPLE_INT_SEQUENCE) - pos_2] + EXAMPLE_INT_SEQUENCE[pos_2:],
    )
    a, b, c = sm.AnyStream(EXAMPLE_INT_SEQUENCE, ).split((pos_1, pos_2), )
    received_2 = a.count_to_items().get_list(), b.count_to_items().get_list(
    ), c.count_to_items().get_list()
    assert received_2 == expected_2, 'test case 2'
Exemple #7
0
def test_add_records():
    addition = list(reversed(EXAMPLE_INT_SEQUENCE))
    expected_1 = list(
        map(lambda v: dict(item=v), EXAMPLE_INT_SEQUENCE + addition))
    expected_2 = list(
        map(lambda v: dict(item=v), addition + EXAMPLE_INT_SEQUENCE))
    received_1 = sm.AnyStream(EXAMPLE_INT_SEQUENCE, ).map_to_records(
        lambda i: dict(item=i), ).add(
            sm.AnyStream(addition).to_record_stream(), ).get_list()
    assert received_1 == expected_1, 'test case 1i'
    received_2 = sm.AnyStream(EXAMPLE_INT_SEQUENCE, ).to_record_stream().add(
        sm.AnyStream(addition).to_record_stream(),
        before=True,
    ).get_list()
    assert received_2 == expected_2, 'test case 2i'
Exemple #8
0
def test_map():
    expected_types = ['AnyStream', 'LineStream', 'LineStream', 'LineStream']
    received_types = list()
    expected_0 = [-i for i in EXAMPLE_INT_SEQUENCE]
    received_0 = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).to_iter(
    ).map(
        lambda i: -i,
    ).submit(
        received_types,
        lambda f: f.get_class_name(),
    ).get_list()
    assert received_0 == expected_0, 'test case 0'
    expected_1 = [str(-i) for i in EXAMPLE_INT_SEQUENCE]
    received_1 = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).map_to_type(
        lambda i: str(-i),
        stream_type=sm.LineStream,
    ).submit(
        received_types,
        lambda f: f.get_class_name(),
    ).get_list()
    assert received_1 == expected_1, 'test case 1'
    expected_2 = [str(-i) for i in EXAMPLE_INT_SEQUENCE]
    received_2 = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).map_to_type(
        lambda i: str(-i),
        stream_type=sm.StreamType.LineStream,
    ).submit(
        received_types,
        lambda f: f.get_class_name(),
    ).get_list()
    assert received_2 == expected_2, 'test case 2'
    expected_3 = [str(-i) for i in EXAMPLE_INT_SEQUENCE]
    received_3 = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).map_to_type(
        lambda i: str(-i),
        stream_type='LineStream',
    ).submit(
        received_types,
        lambda f: f.get_class_name(),
    ).get_list()
    assert received_3 == expected_3, 'test case 3'
    assert received_types == expected_types, 'test for types'
Exemple #9
0
def test_filter():
    expected = [7, 6, 8]
    received = sm.AnyStream(EXAMPLE_INT_SEQUENCE, ).filter(
        lambda i: i > 5,
        lambda i: i <= 8,
    ).get_list()
    assert received == expected
Exemple #10
0
def test_memory_sort():
    expected = [7, 9, 8, 6, 5, 4, 3, 2, 1]
    received = sm.AnyStream(EXAMPLE_INT_SEQUENCE, ).memory_sort(
        key=lambda i: 777 if i == 7 else i,
        reverse=True,
    ).get_list()
    assert received == expected
Exemple #11
0
def test_sum_by_keys():
    example = [
        {
            'a': 1,
            'b': 2,
            'h': 1
        },
        {
            'a': 3,
            'b': 4,
            'h': 5
        },
        {
            'a': 1,
            'b': 2,
            'h': 2
        },
    ]
    expected = [((2, 1), {'h': 3}), ((4, 3), {'h': 5})]
    received = sm.AnyStream(example, ).apply_to_data(
        lambda a: ms.sum_by_keys(
            a,
            keys=('b', 'a'),
            counters=('h', ),
        ), ).get_list()
    assert received == expected
Exemple #12
0
def test_flat_map():
    expected = ['a', 'a', 'b', 'b']
    received = sm.AnyStream(
        ['a', 'b']
    ).flat_map(
        lambda i: [i, i],
    ).get_list()
    assert received == expected
Exemple #13
0
def test_skip():
    expected = [2, 4, 6, 8]
    received = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).skip(
        5,
    ).get_list()
    assert received == expected
Exemple #14
0
def test_save_and_read():
    expected = [str(i) for i in EXAMPLE_INT_SEQUENCE]
    received_0 = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE, ).to_line_stream().lazy_save(
            EXAMPLE_FILENAME, ).get_list()
    received_1 = sm.LineStream.from_text_file(EXAMPLE_FILENAME).get_list()
    assert received_0 == expected, 'test case 0: lazy_save()'
    assert received_1 == expected, 'test case 1: secondary fileholder'
    sm.AnyStream(EXAMPLE_INT_SEQUENCE, ).to_line_stream().to_text_file(
        EXAMPLE_FILENAME, )
    received_2 = sm.LineStream.from_text_file(EXAMPLE_FILENAME, ).get_list()
    assert received_2 == expected, 'test case 2: to_text_file()'
    sm.AnyStream(EXAMPLE_INT_SEQUENCE, ).to_row_stream(
        function=lambda i: [i], ).to_column_file(
            EXAMPLE_FILENAME,
            gzip=True,
        )
Exemple #15
0
def test_take():
    expected = [1, 3, 5, 7, 9]
    received = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).take(
        5,
    ).get_list()
    assert received == expected
Exemple #16
0
def test_parse_json():
    example = ['{"a": "b"}', 'abc', '{"d": "e"}']
    expected = [{'a': 'b'}, {'err': 'err'}, {'d': 'e'}]
    received = sm.AnyStream(example, ).to_line_stream().parse_json(
        default_value={
            'err': 'err'
        }, ).get_list()
    assert received == expected
Exemple #17
0
def test_split_by_func():
    expected = [1, 3, 2, 4], [5, 7, 9, 6, 8]
    a, b = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE
    ).split(
        lambda i: i >= 5,
    )
    received = a.get_list(), b.get_list()
    assert received == expected
Exemple #18
0
def test_separate_first():
    expected = [EXAMPLE_INT_SEQUENCE[0], EXAMPLE_INT_SEQUENCE[1:]]
    received = list(
        sm.AnyStream(
            EXAMPLE_INT_SEQUENCE,
        ).separate_first()
    )
    received[1] = received[1].get_list()
    assert received == expected
Exemple #19
0
def test_to_rows():
    expected = [['a', '1'], ['b', '2,22'], ['c', '3']]
    received = sm.AnyStream(
        EXAMPLE_CSV_ROWS,
    ).to_line_stream(
    ).to_row_stream(
        ',',
    ).get_list()
    assert received == expected
Exemple #20
0
def test_disk_sort_by_key():
    expected = [[k, str(k) * k] for k in range(1, 10)]
    received = sm.AnyStream(
        [(k, str(k) * k) for k in EXAMPLE_INT_SEQUENCE],
    ).to_pairs(
    ).disk_sort_by_key(
        step=5,
    ).get_list()
    assert received == expected
Exemple #21
0
def test_map_filter_take():
    expected = [-1, -3, -5]
    received = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).map(
        lambda i: -i,
    ).filter(
        lambda i: i % 2,
    ).take(
        3,
    ).get_list()
    assert received == expected
Exemple #22
0
def test_add():
    addition = list(reversed(EXAMPLE_INT_SEQUENCE))
    expected_1 = EXAMPLE_INT_SEQUENCE + addition
    expected_2 = addition + EXAMPLE_INT_SEQUENCE
    received_1i = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).add(
        addition
    ).get_list()
    assert received_1i == expected_1, 'test case 1i'
    received_2i = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).add(
        addition,
        before=True,
    ).get_list()
    assert received_2i == expected_2, 'test case 2i'
    received_1f = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).add(
        sm.AnyStream(addition),
    ).get_list()
    assert received_1f == expected_1, 'test case 1f'
    received_2f = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).add(
        sm.AnyStream(addition),
        before=True,
    ).get_list()
    assert received_2f == expected_2, 'test case 2f'
Exemple #23
0
def smoke_test_show():
    stream0 = sm.AnyStream(EXAMPLE_CSV_ROWS, ).to_line_stream().to_row_stream(
        delimiter=',', ).map_to_records(lambda p: {
            p[0]: p[1]
        }, ).select(
            'a',
            h='g',
            g='e',
            d='b',
            e=lambda r: r.get('c'),
            f=('a', lambda v: str(v) * 2),
        )
    stream0.show()
    stream0.collect().show()
Exemple #24
0
def test_save_and_read():
    expected = [str(i) for i in EXAMPLE_INT_SEQUENCE]
    received_0 = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).to_line_stream(
    ).lazy_save(
        EXAMPLE_FILENAME,
    ).get_list()
    received_1 = sm.LineStream.from_text_file(
        EXAMPLE_FILENAME
    ).get_list()
    assert received_0 == expected, 'test case 0: lazy_save(), {} != {}'.format(received_0, expected)
    assert received_1 == expected, 'test case 1: secondary fileholder, {} != {}'.format(received_1, expected)

    sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).to_line_stream(
    ).to_text_file(
        EXAMPLE_FILENAME,
    )
    received_2 = sm.LineStream.from_text_file(
        EXAMPLE_FILENAME,
    ).get_list()
    assert received_2 == expected, 'test case 2: to_text_file(), {} != {}'.format(received_2, expected)
Exemple #25
0
def test_sorted_group_by_key():
    example = [
        (1, 11), (1, 12),
        (2, 21),
        (3, 31), (3, 32), (3, 33),
    ]
    expected = [
        (1, [11, 12]),
        (2, [21]),
        (3, [31, 32, 33]),
    ]
    received = sm.AnyStream(
        example
    ).to_pairs(
    ).sorted_group_by_key(
    ).get_list()
    assert received == expected
Exemple #26
0
def test_enumerated():
    expected = list(enumerate(EXAMPLE_INT_SEQUENCE))
    received = sm.AnyStream(
        EXAMPLE_INT_SEQUENCE,
    ).enumerate().get_list()
    assert received == expected
Exemple #27
0
def test_records_join():
    example_a = [{'x': 0, 'y': 0, 'z': 0}, {'y': 2, 'z': 7}, {'x': 8, 'y': 9}]
    example_b = [{'x': 1, 'y': 2, 'z': 3}, {'x': 4, 'y': 2}, {'x': 6, 'y': 0}]
    expected_0 = [{'x': 6, 'y': 0, 'z': 0}, {'x': 4, 'y': 2, 'z': 7}, {'x': 8, 'y': 9}]
    received_0 = sm.AnyStream(
        example_a,
    ).map_side_join(
        sm.AnyStream(example_b),
        key='y',
        right_is_uniq=True,
    ).get_list()
    assert received_0 == expected_0, 'test case 0: right is uniq'
    expected_1 = [{'x': 6, 'y': 0, 'z': 0}, {'x': 1, 'y': 2, 'z': 3}, {'x': 4, 'y': 2, 'z': 7}, {'x': 8, 'y': 9}]
    received_1 = sm.AnyStream(
        example_a,
    ).map_side_join(
        sm.AnyStream(example_b),
        key='y',
        right_is_uniq=False,
    ).get_list()
    assert received_1 == expected_1, 'test case 1: right is not uniq'
    expected_2 = [{'x': 6, 'y': 0, 'z': 0}, {'x': 1, 'y': 2, 'z': 3}, {'x': 4, 'y': 2, 'z': 7}, {'x': 8, 'y': 9}]
    received_2 = sm.AnyStream(
        example_a,
    ).map_side_join(
        sm.AnyStream(example_b),
        key='y',
        how='left',
        right_is_uniq=False,
    ).get_list()
    assert received_2 == expected_2, 'test case 2: left join'
    expected_3 = [{'x': 6, 'y': 0, 'z': 0}, {'x': 1, 'y': 2, 'z': 3}, {'x': 4, 'y': 2, 'z': 7}, {'x': 8, 'y': 9}]
    received_3 = sm.AnyStream(
        example_a,
    ).map_side_join(
        sm.AnyStream(example_b),
        key='y',
        how='full',
        right_is_uniq=False,
    ).get_list()
    assert received_3 == expected_3, 'test case 3: full join'
    expected_4 = [{'x': 6, 'y': 0, 'z': 0}, {'x': 1, 'y': 2, 'z': 3}, {'x': 4, 'y': 2, 'z': 7}]
    received_4 = sm.AnyStream(
        example_a,
    ).join(
        sm.AnyStream(example_b),
        key='y',
        how='inner',
    ).get_list()
    assert received_4 == expected_4, 'test case 4: sorted left join'
    expected_5 = [{'x': 6, 'y': 0, 'z': 0}, {'x': 1, 'y': 2, 'z': 3}, {'x': 4, 'y': 2, 'z': 7}]
    received_5 = sm.AnyStream(
        example_a,
    ).join(
        sm.AnyStream(example_b),
        key='y',
        how='right',
    ).get_list()
    assert received_5 == expected_5, 'test case 5: sorted right join'
Exemple #28
0
def test_any_join():
    example_a = ['a', 'b', 1]
    example_b = ['c', 2, 33]
    expected_0 = [('a', 'c'), ('b', 'c'), (1, 33)]
    received_0 = sm.AnyStream(
        example_a,
    ).map_side_join(
        sm.AnyStream(example_b),
        key=type,
        right_is_uniq=True,
    ).get_list()
    assert received_0 == expected_0, 'test case 0: right is uniq'
    expected_1 = [('a', 'c'), ('b', 'c'), (1, 2), (1, 33)]
    received_1 = sm.AnyStream(
        example_a,
    ).map_side_join(
        sm.AnyStream(example_b),
        key=type,
        right_is_uniq=False,
    ).get_list()
    assert set(received_1) == set(expected_1), 'test case 1: right is not uniq'
    expected_2 = [('a', 'c'), ('b', 'c'), (1, 2)]
    received_2 = sm.AnyStream(
        example_a,
    ).map_side_join(
        sm.AnyStream(example_b),
        key=(type, lambda i: len(str(i))),
        how='left',
        right_is_uniq=False,
    ).get_list()
    assert set(received_2) == set(expected_2), 'test case 2: left join using composite key'
    expected_3 = [('a', 'c'), ('b', 'c'), (1, 2), (None, 33)]
    received_3 = sm.AnyStream(
        example_a,
    ).map_side_join(
        sm.AnyStream(example_b),
        key=(type, lambda i: len(str(i))),
        how='full',
        right_is_uniq=False,
    ).get_list()
    assert set(received_3) == set(expected_3), 'test case 3: full join using composite key'
    expected_4 = [(1, 2), ('a', 'c'), ('b', 'c')]
    received_4 = sm.AnyStream(
        example_a,
    ).join(
        sm.AnyStream(example_b),
        key=(lambda i: str(type(i)), lambda i: len(str(i))),
        how='inner',
    ).get_list()
    assert set(received_4) == set(expected_4), 'test case 4: sorted left join'
    expected_5 = [(1, 2), (None, 33), ('a', 'c'), ('b', 'c')]
    received_5 = sm.AnyStream(
        example_a,
    ).join(
        sm.AnyStream(example_b),
        key=(lambda i: str(type(i)), lambda i: len(str(i))),
        how='right',
    ).get_list()
    assert set(received_5) == set(expected_5), 'test case 5: sorted right join'