Beispiel #1
0
def test_buffer(tmpdir):
    p = tmpdir.mkdir("tmp.csvs").mkdir("data")
    csv = _create_table(p, rows=200)

    max_buffer = 50
    ios = BufferedAutoEncodingStream(csv, max_buffer=max_buffer)

    c = 0
    cnt = b''
    for l in ios:
        c += 1
        cnt += l
        if c >= max_buffer:
            break

    ios.reset()
    c = 0
    re_cnt = b''
    for l in ios:
        c += 1
        re_cnt += l
        if c >= max_buffer:
            break

    assert len(cnt) == len(re_cnt)
    assert cnt == re_cnt
Beispiel #2
0
def test_buffer_not_reset(tmpdir):
    p = tmpdir.mkdir("tmp.csvs").mkdir("data")
    csv = _create_table(p, rows=200)

    ios = BufferedAutoEncodingStream(csv, max_buffer=10)

    [next(ios) for _ in range(12)]

    try:
        ios.reset()
    except Exception as e:
        assert isinstance(e, IOError)
Beispiel #3
0
def reader(csv,
           skip_guess_encoding=anycsvconfig.SKIP_GUESS_ENCODING,
           delimiter=None,
           sniff_lines=anycsvconfig.NO_SNIFF_LINES,
           max_file_size=anycsvconfig.MAX_FILE_SIZE,
           encoding=anycsvconfig.DEFAULT_ENCODING):
    if not csv:
        raise exceptions.AnyCSVException('No CSV input specified')

    ios = BufferedAutoEncodingStream(csv,
                                     max_buffer=sniff_lines,
                                     max_file_size=max_file_size)

    if not skip_guess_encoding:
        encoding_result = E.detect_encoding(ios,
                                            min_lines=10,
                                            max_lines=sniff_lines)
        ios.reset()

        final_encoding = E.prob_encoding(ios,
                                         encoding_result,
                                         max_lines=sniff_lines)
        ios.reset()
    else:
        final_encoding = encoding

    dialect = D.guessDialect(ios, final_encoding)
    ios.reset()

    return Table(csv, ios, dialect, encoding=final_encoding)
Beispiel #4
0
def test_single_file():
    csv = "/Users/jumbrich/data/mimesis_csvs/encoding/latin.csv"
    ios = BufferedAutoEncodingStream(csv, max_buffer=50)

    dialect = D.guessDialect(ios, 'cp737')

    import csv
    assert dialect.delimiter == csv.unix_dialect.delimiter
Beispiel #5
0
def test_file_gzipped(tmpdir):
    p = tmpdir.mkdir("tmp.csvs").mkdir("data")
    csv = _create_table(p, rows=200, gzipped=True)

    ios = BufferedAutoEncodingStream(csv, max_buffer=50)

    dialect = D.guessDialect(ios, 'utf-8')

    import csv
    assert dialect.delimiter == csv.unix_dialect.delimiter
Beispiel #6
0
def test_read_all(tmpdir):
    p = tmpdir.mkdir("tmp.csvs").mkdir("data")
    csv = _create_table(p, rows=200)

    ios = BufferedAutoEncodingStream(csv, max_buffer=50)

    for i, line in enumerate(ios):
        pass
    assert i == 200
    assert ios.digest is not None
Beispiel #7
0
def test_max_file_size(tmpdir):
    p = tmpdir.mkdir("tmp.csvs").mkdir("data")
    csv = _create_table(p, rows=200)

    ios = BufferedAutoEncodingStream(csv, max_buffer=10, max_file_size=1024)

    try:
        for row in ios:
            pass

    except Exception as e:
        assert isinstance(e, FileSizeException)
Beispiel #8
0
def test_buffer_gzipped(tmpdir):
    csv = "https://datascience.ai.wu.ac.at/ws1718_dataprocessing1_1823/data/allcampusrooms.csv"
    max_buffer = 50
    ios = BufferedAutoEncodingStream(csv, max_buffer=max_buffer)

    c = 0
    cnt = b''
    for l in ios:
        c += 1
        cnt += l
        if c >= max_buffer:
            break

    ios.reset()
    c = 0
    re_cnt = b''
    for l in ios:
        c += 1
        re_cnt += l
        if c >= max_buffer:
            break

    assert len(cnt) == len(re_cnt)
    assert cnt == re_cnt
Beispiel #9
0
def test_http(tmpdir):

    csv = "https://datascience.ai.wu.ac.at/ws1718_dataprocessing1_1823/data/allcampusrooms.csv"

    ios = BufferedAutoEncodingStream(csv, max_buffer=50)

    first_line = ios.readline()
    ios.reset()
    re_first_line = ios.readline()

    assert first_line == re_first_line
Beispiel #10
0
def test_file_gzipped(tmpdir):
    p = tmpdir.mkdir("tmp.csvs").mkdir("data")
    csv = _create_table(p, rows=200, gzipped=True)

    ios = BufferedAutoEncodingStream(csv, max_buffer=50)

    first_line = ios.readline()
    ios.reset()
    re_first_line = ios.readline()

    assert first_line == re_first_line