Example #1
0
def test_issue998():
    src = find_file("h2o-3", "bigdata", "laptop", "higgs_head_2M.csv")
    # The file is 1.46GB in size. I could not find a smaller file that exhibits
    # this problem... The issue only appeared in single-threaded mode, so we
    # have to read this file slowly. On my laptop, this test runs in about 8s.
    f0 = dt.fread(src, nthreads=1, fill=True, na_strings=["-999"])
    assert f0.shape == (2000000, 29)
    assert f0.names == tuple("C%d" % i for i in range(f0.ncols))
    assert f0.stypes == (dt.stype.float64,) * f0.ncols
    assert list_equals(
        f0.sum().to_list(),
        [[1058818.0], [1981919.6107614636], [701.7858121241807],
         [-195.48500674014213], [1996390.3476011853], [-1759.5364254778178],
         [1980743.446578741], [-1108.7512905876065], [1712.947751407064],
         [2003064.4534490108], [1985100.3810670376], [1190.8404791812281],
         [384.00605312064], [1998592.0739881992], [1984490.1900614202],
         [2033.9754767678387], [-1028.0810855487362], [2001341.0813384056],
         [1971311.3271338642], [-943.92552991907], [-1079.3848229270661],
         [1996588.295421958], [2068619.2163415626], [2049516.5437491536],
         [2100795.4839400873], [2019540.6562294513], [1946283.046177674],
         [2066298.020782411], [1919714.12131235]])
Example #2
0
def test_fread_maxnrows_with_large_file():
    f = find_file("h2o-3", "bigdata", "laptop", "airlines_all.05p.csv")
    d0 = dt.fread(f, max_nrows=111)
    frame_integrity_check(d0)
    assert d0.nrows == 111
Example #3
0
def test_excel_testbook_xlsx_2():
    filename = find_file("h2o-3", "fread", "excelTestbook.xlsx")
    DT2 = dt.fread(filename + "/Sheet2")
    assert DT2.source == os.path.abspath(filename) + "/Sheet2"
    assert_equals(
        DT2, dt.Frame(day=["today", "tomorrow", "yes\nter\nday", "everyday"]))