def test_appends(): with shell.tempdir(): stdin = """ b,c,d e,f,g h,i,j """ stdout = """ prefix_02 prefix_04 prefix_05 """ assert rm_whitespace(unindent(stdout)) == shell.run( 'bsv | bpartition -l 10 prefix', stdin=unindent(stdin)) assert rm_whitespace(unindent(stdout)) == shell.run( 'bsv | bpartition -l 10 prefix', stdin=unindent(stdin)) stdout = """ prefix_02:h,i,j prefix_02:h,i,j prefix_04:e,f,g prefix_04:e,f,g prefix_05:b,c,d prefix_05:b,c,d """ assert unindent(stdout).strip() == shell.run('bcat -l -p prefix*') stdout = """ prefix_02 prefix_04 prefix_05 """ assert unindent(stdout).strip() == shell.run('ls prefix*')
def test_appends(): with shell.tempdir(): stdin = """ 0,b,c,d 1,e,f,g 2,h,i,j """ stdout = """ prefix00 prefix01 prefix02 """ assert rm_whitespace(unindent(stdout)) == shell.run(f'bsv | bpartition 10 prefix', stdin=unindent(stdin)) assert rm_whitespace(unindent(stdout)) == shell.run(f'bsv | bpartition 10 prefix', stdin=unindent(stdin)) stdout = """ prefix00:b,c,d prefix00:b,c,d prefix01:e,f,g prefix01:e,f,g prefix02:h,i,j prefix02:h,i,j """ assert unindent(stdout).strip() == shell.run(f'bcat --prefix prefix*') stdout = """ prefix00 prefix01 prefix02 """ assert unindent(stdout).strip() == shell.run('ls prefix*')
def test_cycling4(): stdin = """ a b c de """ assert rm_whitespace(stdin) == run(rm_whitespace(stdin), '_csv.8').strip()
def test_cycling4(): stdin = """ a b c de """ assert typed(rm_whitespace(stdin)) == run(rm_whitespace(stdin), 'bin/_csv.8').strip()
def test_basic(): stdin = """ aa a """ stdout = """ a aa """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bsort | csv')
def test_compatability(): stdin = """ b c a """ stdout = """ c b a """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | brsort | bin/csv')
def test_compatability(): stdin = """ b c a """ stdout = """ c b a """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bsort --reversed | csv')
def test_basic(): stdin = """ a,b cd,e """ stdout = """ a b cd e """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), '_csv.8')
def test_holes(): stdin = """ a,b, 1,,3 x,y,z """ stdout = """ b, ,3 y,z """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bcut 2,3 | csv')
def test_types(): stdin = """ a b c 1 2 3.1 4. """ assert typed(rm_whitespace(stdin)) == run(rm_whitespace(stdin), 'bin/_csv.8').strip()
def test_repeats(): stdin = """ x,y,z 1,2,3 a,b,c,d """ stdout = """ x,z,x,x 1,3,1,1 a,c,a,a """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bcut 1,3,1,1 | csv')
def test_compatability(): stdin = """ a,b,c,d 1,2,3 x,y """ stdout = """ a,b 1,2 x,y """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bcut 1,2 | csv') assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'cut -d, -f1,2')
def test_basic(): stdin = """ a a a b b a """ stdout = """ 6 """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bcountrows | bschema i64:a | csv')
def test_basic2(): stdin = """ a,b,c,d 1,2,3 x,y """ stdout = """ b:a,a,b,c,d 2:1,1,2,3 y:x,x,y """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bcombine 2,1 | csv')
def test_single_column(): stdin = """ a y x """ stdout = """ 3,a 3,y 2,x """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bbucket 4 | bin/csv')
def test_basic(): stdin = """ a,b,c,d e,f,g x,y """ stdout = """ 3,a,b,c,d 3,e,f,g 2,x,y """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bbucket 4 | bin/csv')
def test_compatability2(): stdin = """ c,c b,b a,a """ stdout = """ a,a b,b c,c """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bsort | csv')
def test_basic1(): stdin = """ a,b,c,d 1,2,3 x,y """ stdout = """ a:b,a,b,c,d 1:2,1,2,3 x:y,x,y """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bcombine 1,2 | csv')
def test_basic(): stdin = """ a a a b b a """ stdout = """ a,3 b,2 a,1 """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bcounteach | bschema *,i64:a | csv')
def test_basic(): stdin = """ a,1 a,2 a,3 b,4 b,5 a,6 a,7 """ stdout = """ a,1 b,4 """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bdedupe-hash | csv')
def test_basic(): stdin = """ a,1 a,2 a,3 b,4 b,5 a,6 """ stdout = """ a,12 b,9 """ assert rm_whitespace(stdout) + '\n' == run( rm_whitespace(stdin), 'bsv | bschema *,a:i64 | bsumeach-hash i64 | bschema *,i64:a | bsort | csv' )
def test_basic3(): stdin = """ a,b,c,d 1,2,3 x """ with pytest.raises(Exception): run(rm_whitespace(stdin), 'bsv | bcombine 2,1 | csv')
def test_basic(): with shell.tempdir(): shell.run('echo -e "a,a\nc,c\ne,e\n" | bsv > a.bsv') shell.run('echo -e "b,b\nd,d\nf,f\n" | bsv > b.bsv') stdout = """ a,a b,b c,c d,d e,e f,f """ assert rm_whitespace(unindent(stdout)) == shell.run( 'echo a.bsv b.bsv | bmerge | csv', stream=True) assert rm_whitespace(unindent(stdout)) == shell.run( '(echo a.bsv; echo b.bsv) | bmerge | csv', stream=True) assert rm_whitespace(unindent(stdout)) == shell.run( '(echo a.bsv; echo; echo b.bsv) | bmerge | csv', stream=True)
def test_basic(): stdin = """ a,1.1 a,2.1 a,3.1 b,4.1 b,5.1 a,6.1 """ stdout = """ a,6.3 b,9.2 a,6.1 """ result = run( rm_whitespace(stdin), 'bsv | bschema *,a:f64 | bsumeach f64 | bschema *,f64:a | csv') result = '\n'.join(f'{k},{round(float(v), 3)}' for line in result.splitlines() for k, v in [line.split(',')]) + '\n' assert rm_whitespace(stdout) + '\n' == result
def test_basic(): with shell.tempdir(): shell.run('for char in a a b b c c; do echo $char | bsv >> $char; done') stdout = """ a:a b:b c:c """ assert rm_whitespace(unindent(stdout)) == shell.run('bcat --prefix --head 1 a b c') stdout = """ a:a a:a b:b b:b c:c c:c """ assert rm_whitespace(unindent(stdout)) == shell.run('bcat --prefix --head 2 a b c') assert rm_whitespace(unindent(stdout)) == shell.run('bcat --head 2 --prefix a b c') assert rm_whitespace(unindent(stdout)) == shell.run('bcat --prefix a b c') stdout = """ a b c """ assert rm_whitespace(unindent(stdout)) == shell.run('bcat --head 1 a b c') stdout = """ a a b b c c """ assert rm_whitespace(unindent(stdout)) == shell.run('bcat a b c')
def test_single_column(): stdin = """ x,y 1,2,3 a,b,c,d """ stdout = """ x 1 a """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bcut 1 | csv') stdin = """ a,b,c,d 1,2,3 x,y """ stdout = """ a 1 x """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bcut 1 | csv') stdin = """ a,b,c,d 1,2,3 x,y """ stdout = """ b 2 y """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bcut 2 | csv')
def test_forward(): stdin = """ a,b,c,d 1,2,3 x,y """ stdout = """ a,b 1,2 x,y """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bcut 1,2 | csv') stdin = """ a,b,c,d 1,2,3 x,y,z """ stdout = """ a,c 1,3 x,z """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bcut 1,3 | csv') stdin = """ x,y,z 1,2,3 a,b,c,d """ stdout = """ x,z 1,3 a,c """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bcut 1,3 | csv')
def test_reverse(): stdin = """ a,b,c,d 1,2,3 x,y """ stdout = """ b,a 2,1 y,x """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bcut 2,1 | csv') stdin = """ a,b,c,d 1,2,3 x,y,z """ stdout = """ c,a 3,1 z,x """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bcut 3,1 | csv') stdin = """ x,y,z 1,2,3 a,b,c,d """ stdout = """ z,x 3,1 c,a """ assert rm_whitespace(stdout) + '\n' == run(rm_whitespace(stdin), 'bsv | bcut 3,1 | csv')
def test_encoding(): stdin = '\n' val = runb(stdin, 'bsv') bsv = b'' assert bsv == val assert stdin == run(stdin, 'bsv | csv') stdin = """ a """ val = runb(rm_whitespace(stdin), 'bsv') bsv = b''.join([ # chunk header struct.pack( 'i', 6 ), # uint32 num bytes in this chunk, chunks contain 1 or more rows # chunk body struct.pack('H', 0), # uint16 max, see load.h struct.pack('H', 1), # uint16 sizes, see load.h b'a\0' ]) assert bsv == val assert rm_whitespace(stdin) + '\n' == run(rm_whitespace(stdin), 'bsv | csv') stdin = """ a,bb,ccc """ val = runb(rm_whitespace(stdin), 'bsv') bsv = b''.join([ # chunk header struct.pack( 'i', 17 ), # uint32 num bytes in this chunk, chunks contain 1 or more rows # chunk body struct.pack('H', 2), # uint16 max, see load.h struct.pack('H', 1), # uint16 sizes, see load.h struct.pack('H', 2), # uint16 sizes, see load.h struct.pack('H', 3), # uint16 sizes, see load.h b'a\0bb\0ccc\0', ]) assert bsv == val assert rm_whitespace(stdin) + '\n' == run(rm_whitespace(stdin), 'bsv | csv') stdin = '\n' assert rm_whitespace(stdin) + '\n' == run(rm_whitespace(stdin), 'bsv | csv')
def test_without_prefix(): with shell.tempdir(): stdin = """ b,c,d e,f,g h,i,j """ stdout = """ 02 04 05 """ assert rm_whitespace(unindent(stdout)) == shell.run( 'bsv | bpartition -l 10', stdin=unindent(stdin))