Ejemplo n.º 1
0
def test_resum_equal_in_memory():
    G = LineFileInMemory("tests/smallcorpus.txt.bz2", header="foo bar baz qux".split(), 
                 path="tests/tmp/testcorpus")
    len_G = len(G)
    total = G.sum_column("qux")
    G.resum_equal("foo", "qux", assert_sorted=True, keep_all=False)
    assert_equal(len(G), 1)
    for line in G.lines():
        assert_equal(int(G.extract_columns(line, "qux")[0]), total)

    G = LineFileInMemory("tests/smallcorpus.txt.bz2", header="foo bar baz qux".split(), 
                 path="tests/tmp/testcorpus")
    G.resum_equal("foo", "qux", assert_sorted=True, keep_all=True)
    assert_equal(len(G), len_G)
    for line in G.lines():
        assert_equal(int(G.extract_columns(line, "qux")[0]), total)
Ejemplo n.º 2
0
def test_clean_in_memory():
    G = LineFileInMemory("tests/smallcorpus-malformed.txt.bz2", header="foo bar baz qux".split(), 
                 path="tests/tmp/testcorpus")
    len_G = len(G)
    G.clean(columns=4, lower=False, alphanumeric=False, count_columns=True, 
            nounderscores=False, echo_toss=True)
    assert_equal(len(G), len_G - 2)

    G = LineFileInMemory("tests/smallcorpus-malformed.txt.bz2", header="foo bar baz qux".split(), 
                 path="tests/tmp/testcorpus")
    G.clean(lower=True, alphanumeric=True, count_columns=False, echo_toss=True)
    assert_equal(len(G), 8562)

    G = LineFileInMemory("tests/smallcorpus-malformed.txt.bz2", header="foo bar baz qux".split(), 
                 path="tests/tmp/testcorpus")
    G.clean(lower=True, alphanumeric=True, count_columns=False, echo_toss=True,
            filter_fn=lambda x: False)
    assert_equal(len(G), 0)

    G = LineFileInMemory("tests/smallcorpus-malformed.txt.bz2", header="foo bar baz qux".split(), 
                 path="tests/tmp/testcorpus")
    G.clean(lower=True, alphanumeric=False, count_columns=False, echo_toss=True,
            modifier_fn=lambda x: "hello")
    assert_equal(len(G), len_G)
    for line in G.lines(parts=False):
        assert_equal(line, "hello")
Ejemplo n.º 3
0
def test_resum_equal_in_memory():
    G = LineFileInMemory("tests/smallcorpus.txt.bz2",
                         header="foo bar baz qux".split(),
                         path="tests/tmp/testcorpus")
    len_G = len(G)
    total = G.sum_column("qux")
    G.resum_equal("foo", "qux", assert_sorted=True, keep_all=False)
    assert_equal(len(G), 1)
    for line in G.lines():
        assert_equal(int(G.extract_columns(line, "qux")[0]), total)

    G = LineFileInMemory("tests/smallcorpus.txt.bz2",
                         header="foo bar baz qux".split(),
                         path="tests/tmp/testcorpus")
    G.resum_equal("foo", "qux", assert_sorted=True, keep_all=True)
    assert_equal(len(G), len_G)
    for line in G.lines():
        assert_equal(int(G.extract_columns(line, "qux")[0]), total)
Ejemplo n.º 4
0
def test_basics_in_memory():
    G = LineFileInMemory("tests/smallcorpus.txt.bz2", header="foo bar baz qux", 
                 path="tests/tmp/testcorpus")
    assert_equal(G.header, "foo bar baz qux".split())
    assert_equal(G.files, ["tests/smallcorpus.txt.bz2"])

    G.make_column("quux", lambda x, y, z, w: "cat", "foo bar baz qux")
    assert_equal(G.header, "foo bar baz qux quux".split())
    for line in G.lines(parts=False):
        assert_equal(G.extract_columns(line, "quux"), ["cat"])
    
    G.delete_columns("quux")
    assert_equal(G.header, "foo bar baz qux".split())

    G.copy_column("quux", "qux")
    assert_equal(G.header, "foo bar baz qux quux".split())
    for line in G.lines(parts=False):
        assert_equal(G.extract_columns(line, "qux"), 
                     G.extract_columns(line, "quux")
                     )
Ejemplo n.º 5
0
def test_basics_in_memory():
    G = LineFileInMemory("tests/smallcorpus.txt.bz2",
                         header="foo bar baz qux",
                         path="tests/tmp/testcorpus")
    assert_equal(G.header, "foo bar baz qux".split())
    assert_equal(G.files, ["tests/smallcorpus.txt.bz2"])

    G.make_column("quux", lambda x, y, z, w: "cat", "foo bar baz qux")
    assert_equal(G.header, "foo bar baz qux quux".split())
    for line in G.lines(parts=False):
        assert_equal(G.extract_columns(line, "quux"), ["cat"])

    G.delete_columns("quux")
    assert_equal(G.header, "foo bar baz qux".split())

    G.copy_column("quux", "qux")
    assert_equal(G.header, "foo bar baz qux quux".split())
    for line in G.lines(parts=False):
        assert_equal(G.extract_columns(line, "qux"),
                     G.extract_columns(line, "quux"))
Ejemplo n.º 6
0
def test_clean_in_memory():
    G = LineFileInMemory("tests/smallcorpus-malformed.txt.bz2",
                         header="foo bar baz qux".split(),
                         path="tests/tmp/testcorpus")
    len_G = len(G)
    G.clean(columns=4,
            lower=False,
            alphanumeric=False,
            count_columns=True,
            nounderscores=False,
            echo_toss=True)
    assert_equal(len(G), len_G - 2)

    G = LineFileInMemory("tests/smallcorpus-malformed.txt.bz2",
                         header="foo bar baz qux".split(),
                         path="tests/tmp/testcorpus")
    G.clean(lower=True, alphanumeric=True, count_columns=False, echo_toss=True)
    assert_equal(len(G), 8562)

    G = LineFileInMemory("tests/smallcorpus-malformed.txt.bz2",
                         header="foo bar baz qux".split(),
                         path="tests/tmp/testcorpus")
    G.clean(lower=True,
            alphanumeric=True,
            count_columns=False,
            echo_toss=True,
            filter_fn=lambda x: False)
    assert_equal(len(G), 0)

    G = LineFileInMemory("tests/smallcorpus-malformed.txt.bz2",
                         header="foo bar baz qux".split(),
                         path="tests/tmp/testcorpus")
    G.clean(lower=True,
            alphanumeric=False,
            count_columns=False,
            echo_toss=True,
            modifier_fn=lambda x: "hello")
    assert_equal(len(G), len_G)
    for line in G.lines(parts=False):
        assert_equal(line, "hello")