def test_resum_equal_in_memory(): G = LineFileInMemory("tests/smallcorpus.txt.bz2", header="foo bar baz qux".split(), path="tests/tmp/testcorpus") len_G = len(G) total = G.sum_column("qux") G.resum_equal("foo", "qux", assert_sorted=True, keep_all=False) assert_equal(len(G), 1) for line in G.lines(): assert_equal(int(G.extract_columns(line, "qux")[0]), total) G = LineFileInMemory("tests/smallcorpus.txt.bz2", header="foo bar baz qux".split(), path="tests/tmp/testcorpus") G.resum_equal("foo", "qux", assert_sorted=True, keep_all=True) assert_equal(len(G), len_G) for line in G.lines(): assert_equal(int(G.extract_columns(line, "qux")[0]), total)
def test_clean_in_memory(): G = LineFileInMemory("tests/smallcorpus-malformed.txt.bz2", header="foo bar baz qux".split(), path="tests/tmp/testcorpus") len_G = len(G) G.clean(columns=4, lower=False, alphanumeric=False, count_columns=True, nounderscores=False, echo_toss=True) assert_equal(len(G), len_G - 2) G = LineFileInMemory("tests/smallcorpus-malformed.txt.bz2", header="foo bar baz qux".split(), path="tests/tmp/testcorpus") G.clean(lower=True, alphanumeric=True, count_columns=False, echo_toss=True) assert_equal(len(G), 8562) G = LineFileInMemory("tests/smallcorpus-malformed.txt.bz2", header="foo bar baz qux".split(), path="tests/tmp/testcorpus") G.clean(lower=True, alphanumeric=True, count_columns=False, echo_toss=True, filter_fn=lambda x: False) assert_equal(len(G), 0) G = LineFileInMemory("tests/smallcorpus-malformed.txt.bz2", header="foo bar baz qux".split(), path="tests/tmp/testcorpus") G.clean(lower=True, alphanumeric=False, count_columns=False, echo_toss=True, modifier_fn=lambda x: "hello") assert_equal(len(G), len_G) for line in G.lines(parts=False): assert_equal(line, "hello")
def test_resum_equal_in_memory(): G = LineFileInMemory("tests/smallcorpus.txt.bz2", header="foo bar baz qux".split(), path="tests/tmp/testcorpus") len_G = len(G) total = G.sum_column("qux") G.resum_equal("foo", "qux", assert_sorted=True, keep_all=False) assert_equal(len(G), 1) for line in G.lines(): assert_equal(int(G.extract_columns(line, "qux")[0]), total) G = LineFileInMemory("tests/smallcorpus.txt.bz2", header="foo bar baz qux".split(), path="tests/tmp/testcorpus") G.resum_equal("foo", "qux", assert_sorted=True, keep_all=True) assert_equal(len(G), len_G) for line in G.lines(): assert_equal(int(G.extract_columns(line, "qux")[0]), total)
def test_basics_in_memory(): G = LineFileInMemory("tests/smallcorpus.txt.bz2", header="foo bar baz qux", path="tests/tmp/testcorpus") assert_equal(G.header, "foo bar baz qux".split()) assert_equal(G.files, ["tests/smallcorpus.txt.bz2"]) G.make_column("quux", lambda x, y, z, w: "cat", "foo bar baz qux") assert_equal(G.header, "foo bar baz qux quux".split()) for line in G.lines(parts=False): assert_equal(G.extract_columns(line, "quux"), ["cat"]) G.delete_columns("quux") assert_equal(G.header, "foo bar baz qux".split()) G.copy_column("quux", "qux") assert_equal(G.header, "foo bar baz qux quux".split()) for line in G.lines(parts=False): assert_equal(G.extract_columns(line, "qux"), G.extract_columns(line, "quux") )
def test_basics_in_memory(): G = LineFileInMemory("tests/smallcorpus.txt.bz2", header="foo bar baz qux", path="tests/tmp/testcorpus") assert_equal(G.header, "foo bar baz qux".split()) assert_equal(G.files, ["tests/smallcorpus.txt.bz2"]) G.make_column("quux", lambda x, y, z, w: "cat", "foo bar baz qux") assert_equal(G.header, "foo bar baz qux quux".split()) for line in G.lines(parts=False): assert_equal(G.extract_columns(line, "quux"), ["cat"]) G.delete_columns("quux") assert_equal(G.header, "foo bar baz qux".split()) G.copy_column("quux", "qux") assert_equal(G.header, "foo bar baz qux quux".split()) for line in G.lines(parts=False): assert_equal(G.extract_columns(line, "qux"), G.extract_columns(line, "quux"))
def test_clean_in_memory(): G = LineFileInMemory("tests/smallcorpus-malformed.txt.bz2", header="foo bar baz qux".split(), path="tests/tmp/testcorpus") len_G = len(G) G.clean(columns=4, lower=False, alphanumeric=False, count_columns=True, nounderscores=False, echo_toss=True) assert_equal(len(G), len_G - 2) G = LineFileInMemory("tests/smallcorpus-malformed.txt.bz2", header="foo bar baz qux".split(), path="tests/tmp/testcorpus") G.clean(lower=True, alphanumeric=True, count_columns=False, echo_toss=True) assert_equal(len(G), 8562) G = LineFileInMemory("tests/smallcorpus-malformed.txt.bz2", header="foo bar baz qux".split(), path="tests/tmp/testcorpus") G.clean(lower=True, alphanumeric=True, count_columns=False, echo_toss=True, filter_fn=lambda x: False) assert_equal(len(G), 0) G = LineFileInMemory("tests/smallcorpus-malformed.txt.bz2", header="foo bar baz qux".split(), path="tests/tmp/testcorpus") G.clean(lower=True, alphanumeric=False, count_columns=False, echo_toss=True, modifier_fn=lambda x: "hello") assert_equal(len(G), len_G) for line in G.lines(parts=False): assert_equal(line, "hello")