Exemple #1
0
def test_archive_zip():
    data1 = gen_data(5, 10)
    data2 = gen_data().replace('\n', '\r')
    with TempFile(data1) as f1:
        with TempFile(data2) as f2:
            with TempFile(suffix='.zip') as fname:
                with zipfile.ZipFile(fname,
                                     'w',
                                     compression=zipfile.ZIP_DEFLATED) as z:
                    z.write(f1)
                    z.write(f2)
                members = None
                with Archive(fname) as archive:
                    members = archive.get_members()
                    assert sorted(members) == sorted(
                        [f.lstrip('/') for f in f1, f2])
                with Archive(fname) as archive:
                    member = archive.open_member(sorted(members)[0])
                    assert ''.join(x for x in member) == sorted(
                        zip([f1, f2],
                            [data1, data2]))[0][1].replace('\r', '\n')
                with Archive(fname) as archive:
                    member = archive.open_member(sorted(members)[1])
                    assert ''.join(x for x in member) == sorted(
                        zip([f1, f2],
                            [data1, data2]))[1][1].replace('\r', '\n')
                with Archive(fname) as archive:
                    size = archive.get_member_size(f1.lstrip('/'))
                    assert size == 59
Exemple #2
0
def test_zip_write():
    data = gen_data() + ' ' * 1000
    with TempFile(data) as fname:
        with TempFile() as zip_file:
            zip_write(zip_file, fname)
            assert os.stat(fname).st_size > os.stat(zip_file).st_size
            with zipfile.ZipFile(zip_file) as z:
                assert data == z.read(z.namelist()[0])
Exemple #3
0
def test_archive_invalid():
    data = gen_data()
    with TempFile(data) as tf:
        with pytest.raises(InvalidArchive):
            Archive(tf)
    with TempFile(data, suffix='.zip') as tf:
        with pytest.raises(InvalidArchive):
            Archive(tf)
    with TempFile(data, suffix='.tar.bz') as tf:
        with pytest.raises(InvalidArchive):
            Archive(tf)
Exemple #4
0
def test_empty_archive():
    with TempFile(suffix='.zip') as td:
        with zipfile.ZipFile(td, 'w'):
            pass
        archive = Archive(td)
        assert [] == archive.get_members()

    with TempFile(suffix='.tar.gz') as td:
        with tarfile.open(td, 'w:gz'):
            pass
        archive = Archive(td)
        assert [] == archive.get_members()
Exemple #5
0
def test_open_gz():
    data = gen_data()
    with TempFile() as fname:
        with gzip.open(fname, 'wb') as gz:
            gz.write(data)
            gz.close()
        assert data.split('\n') == list(open_gz(fname))
Exemple #6
0
def test_open_bz():
    data = gen_data()
    with TempFile() as fname:
        with bz2.BZ2File(fname, 'wb') as bz:
            bz.write(data)
            bz.close()
        assert data.split('\n') == list(open_bz(fname))
Exemple #7
0
def test_parse_with_archive():
    data = gen_data()
    with tempdir() as td:
        td.child('class1').mkdir()
        td.child('class2').mkdir()
        td.child('class3').mkdir()
        td.child('class4').mkdir()
        files = [
            td.child('class1', 'f1.jpg'),
            td.child('class1', 'f2.JPG'),
            td.child('class2', 'f1.jpg'),
            td.child('class2', 'f2.bMp'),
            td.child('class2', 'f4jpg.Jpeg'),
            td.child('class3', 'test.txt'),
            td.child('f1.jpeg'),
        ]
        for f in (files):
            f = open(f, 'w')
            f.write(data)
            f.close()
        with TempFile(suffix='.zip') as fname:
            with zipfile.ZipFile(fname, 'w',
                                 compression=zipfile.ZIP_DEFLATED) as z:
                with cwd(td):
                    for f in (files):
                        f = f.replace(td, './')
                        z.write(f)
            meta = parse(fname)
            assert meta == {
                'data_type': 'IMAGES',
                'classes': {
                    'class1': 2,
                    'class2': 3
                }
            }
Exemple #8
0
def test_parse_with_invalid_csv():
    # Since we accept text columns, we presume that strings appear to be errors
    # only if it's less than 10% of them in a column. Hence it can be tested
    # only on a dataset with minimum 11 columns, where one contains string.
    #
    data = '1 2 3 4\n1 2 3 4\n5 6 a 8\n7 6 5 2\n8 8 8 8\n2 2 3 9\n5 6 7 8\n12 13 45 56\n12 43 6 7\n9 9 9 0\n1 2 5 0\n'
    log = """
    Parsing CSV with whitespace (tab) as delimiter.
    Found 4 fields in first row, assume all the rows have this number of fields.
    Parsing...
    Analyzing data...
    No header found, first row contains data.
    Found 1 row with invalid values:
    - row 3, column 3
    Found 10 samples.
    """
    notify = mock.MagicMock()
    notify.send = mock.MagicMock()
    notify.admin_send = mock.MagicMock()
    with TempFile(data, suffix='.csv') as csv:
        with global_notify(notify):
            parse(csv)
    rval = '\n'.join(x[0][0] for x in notify.send.call_args_list)
    assert rval == '\n'.join(x.strip() for x in log.strip().split('\n'))
    assert not notify.admin_send.called
def test_notify_data_invalid():
    data = gen_data()
    log = """
    Image dataset unpacked. Parsing...
    This file doesn't contain a supported data format."""
    notify = mock.MagicMock()
    notify.send = mock.MagicMock()
    notify.admin_send = mock.MagicMock()
    with tempdir() as td:
        td.child('class1').mkdir()
        td.child('class2').mkdir()
        td.child('class3').mkdir()
        td.child('class4').mkdir()
        files = [
            td.child('class3', 'test.txt'),
            td.child('1.jpg'),
        ]
        for f in (files):
            f = open(f, 'w')
            f.write(data)
            f.close()
        with TempFile(suffix='.tar.bz2') as fname:
            with tarfile.open(fname, 'w:bz2') as z:
                with cwd(td):
                    for f in (files):
                        f = f.replace(td, './')
                        z.add(f)
            with global_notify(notify):
                with pytest.raises(InvalidDataFile):
                    parse_archive(Archive(fname))
    rval = '\n'.join(x[0][0] for x in notify.send.call_args_list)
    assert rval == '\n'.join(x.strip() for x in log.strip().split('\n'))
    assert not notify.admin_send.called
Exemple #10
0
def fail_template(data,log):
    notify = mock.MagicMock()
    notify.send = mock.MagicMock()
    notify.admin_send = mock.MagicMock()
    with TempFile(data, suffix='.ts') as ts:
        with global_notify(notify):
            with pytest.raises(InvalidTimeseries):
                parse(ts)
    rval = '\n'.join(x[0][0] for x in notify.send.call_args_list)
    assert rval == '\n'.join(x.strip() for x in log.strip().split('\n'))
    assert not notify.admin_send.called
Exemple #11
0
def test_archive_tar_bz():
    data1 = gen_data(5, 10).replace('\n', '\r')
    data2 = gen_data().replace('\n', '\r')
    with TempFile(data1) as f1:
        with TempFile(data2) as f2:
            with TempFile(suffix='.tar.bz') as fname:
                with tarfile.open(fname, 'w:bz2') as z:
                    z.add(f1)
                    z.add(f2)
                members = None
                with Archive(fname) as archive:
                    members = archive.get_members()
                    assert sorted(members) == sorted(
                        [f.lstrip('/') for f in f1, f2])
                with Archive(fname) as archive:
                    member = archive.open_member(sorted(members)[1])
                    assert '\r'.join(x for x in member) == sorted(
                        zip([f1, f2], [data1, data2]))[1][1]
                with Archive(fname) as archive:
                    size = archive.get_member_size(f1.lstrip('/'))
                    assert size == 59
Exemple #12
0
def test_zip_get_members():
    names = [
        'whitespace in name.jpg', '1/2/test 1.jpg', '33/a\'a.bin', '.test.jpg',
        '1/.2 3/3/test.jpg', './.1/test/test.jpg'
    ]
    with TempFile(suffix='.zip') as td:
        with zipfile.ZipFile(td, 'w') as z:
            for name in names:
                z.writestr(name, 'null')
        archive = Archive(td)
        members = archive.get_members()
    assert sorted(names) == sorted(members)
Exemple #13
0
def pass_template(data,log,exp_meta):
    notify = mock.MagicMock()
    notify.send = mock.MagicMock()
    notify.admin_send = mock.MagicMock()
    with TempFile(data, suffix='.ts') as ts:
        with global_notify(notify):
            meta = parse(ts)
    for key in exp_meta:
        assert meta[key] == exp_meta[key]
    rval = '\n'.join(x[0][0] for x in notify.send.call_args_list)
    assert rval == '\n'.join(x.strip() for x in log.strip().split('\n'))
    assert not notify.admin_send.called
def test_notify_archive_invalid():
    data = 'thequickbrownfoxjumpsoverthelazydog'
    log = 'Unknown file format.'
    notify = mock.MagicMock()
    notify.send = mock.MagicMock()
    notify.admin_send = mock.MagicMock()
    with TempFile(data, suffix='.foo') as foo:
        with global_notify(notify):
            with pytest.raises(InvalidArchive):
                parse(foo)
    rval = '\n'.join(x[0][0] for x in notify.send.call_args_list)
    assert rval == '\n'.join(x.strip() for x in log.strip().split('\n'))
    assert not notify.admin_send.called
Exemple #15
0
def test_parse_no_data():
    data = ''
    log = """
    First row is empty, it must contain headers or data.
    This means your file isn't properly formatted
    (or you submitted another type of file).
    """
    notify = mock.MagicMock()
    notify.send = mock.MagicMock()
    notify.admin_send = mock.MagicMock()
    with TempFile(data, suffix='.csv') as csv:
        with global_notify(notify):
            with pytest.raises(InvalidCSV):
                parse(csv)
    rval = '\n'.join(x[0][0] for x in notify.send.call_args_list)
    assert rval == '\n'.join(x.strip() for x in log.strip().split('\n'))
    assert not notify.admin_send.called
Exemple #16
0
def test_parse_with_invalid_csv_other_file_type():
    data = '%PDF-1.4\n'
    log = """
    CSV doesn't contain a valid delimiter.
    This means your file isn't properly formatted
    (or you submitted another type of file).
    """
    notify = mock.MagicMock()
    notify.send = mock.MagicMock()
    notify.admin_send = mock.MagicMock()
    with TempFile(data, suffix='.csv') as csv:
        with global_notify(notify):
            with pytest.raises(InvalidCSV):
                parse(csv)
    rval = '\n'.join(x[0][0] for x in notify.send.call_args_list)
    assert rval == '\n'.join(x.strip() for x in log.strip().split('\n'))
    assert not notify.admin_send.called
Exemple #17
0
def test_parse_with_ts():
    with TempFile('1,2,3|1, 0;2,3,4|1,0', suffix='.ts') as ts:
        meta = parse(ts)
    assert meta == {
        'data_type': 'TIMESERIES',
        'data_rows': 1,
        'empty_rows': 0,
        'min_timesteps': 2,
        'max_timesteps': 2,
        'classes': {
            '0': 2
        },
        'binary_input': False,
        'binary_output': True,
        'input_size': 3,
        'output_size': 2
    }
Exemple #18
0
def test_parse_ts_archive():
    data = gen_data()
    with tempdir() as td:
        td.child('class1').mkdir()
        td.child('class2').mkdir()
        td.child('class3').mkdir()
        td.child('class4').mkdir()
        files = [
            td.child('class1', 'f1.jpg'),
            td.child('class1', 'f2.JPG'),
            td.child('class2', 'f1.jpg'),
            td.child('class2', 'f2.bMp'),
            td.child('class2', 'f4jpg.Jpeg'),
            td.child('class3', 'test.ts'),
            td.child('f1.jpeg'),
        ]
        for f in (files):
            f = open(f, 'w')
            f.write(data)
            f.close()
        with open(td.child('class3', 'test.ts'), 'w') as f:
            f.write('1,2,3|0,1; 2.3,4,1|0,1; 1.1, 0., 0.0|1,0\n\n2,2,2|0,1;')
        with TempFile(suffix='.tar.gz') as fname:
            with tarfile.open(fname, 'w:gz') as z:
                with cwd(td):
                    for f in (files):
                        f = f.replace(td, './')
                        z.add(f)
            meta = parse_archive(Archive(fname))
            assert meta == {
                'data_type': 'TIMESERIES',
                'data_rows': 2,
                'empty_rows': 1,
                'min_timesteps': 1,
                'max_timesteps': 3,
                'input_size': 3,
                'output_size': 2,
                'classes': {
                    '0': 1,
                    '1': 3
                },
                'binary_input': False,
                'binary_output': True,
                'archive_path': './/class3/test.ts'
            }
def test_notify_archive_csv_valid():
    data = gen_data()
    log = """
    Image dataset unpacked. Parsing...
    CSV file .//1.csv unpacked.
    Parsing CSV with whitespace (tab) as delimiter.
    Found 3 fields in first row, assume all the rows have this number of fields.
    Parsing...
    Analyzing data...
    The dataset appears to have a header.
    Found 2 samples."""
    notify = mock.MagicMock()
    notify.send = mock.MagicMock()
    notify.admin_send = mock.MagicMock()
    with tempdir() as td:
        td.child('class1').mkdir()
        td.child('class2').mkdir()
        td.child('class3').mkdir()
        td.child('class4').mkdir()
        files = [
            td.child('class1', 'f1.jpg'),
            td.child('class1', 'f2.JPG'),
            td.child('class2', 'f1.jpg'),
            td.child('class2', 'f2.bMp'),
            td.child('class2', 'f4jpg.Jpeg'),
            td.child('class3', 'test.txt'),
            td.child('1.csv'),
        ]
        for f in (files):
            f = open(f, 'w')
            f.write(data)
            f.close()
        with open(td.child('1.csv'), 'w') as f:
            f.write('one two free\r1 2 3\r4 5 6')
        with TempFile(suffix='.tar.bz2') as fname:
            with tarfile.open(fname, 'w:bz2') as z:
                with cwd(td):
                    for f in (files):
                        f = f.replace(td, './')
                        z.add(f)
            with global_notify(notify):
                parse_archive(Archive(fname))
    rval = '\n'.join(x[0][0] for x in notify.send.call_args_list)
    assert rval == '\n'.join(x.strip() for x in log.strip().split('\n'))
    assert not notify.admin_send.called
Exemple #20
0
def test_parse_with_invalid_csv_other_file_type_valid_delimiter():
    data = '%PDF,-1.4\nadsadsadsad'
    log = """
    Parsing CSV with comma as delimiter.
    Found 2 fields in first row, assume all the rows have this number of fields.
    Parsing...
    Analyzing data...
    The dataset is empty or isn't properly formatted.
    """
    notify = mock.MagicMock()
    notify.send = mock.MagicMock()
    notify.admin_send = mock.MagicMock()
    with TempFile(data, suffix='.csv') as csv:
        with global_notify(notify):
            with pytest.raises(InvalidCSV):
                parse(csv)
    rval = '\n'.join(x[0][0] for x in notify.send.call_args_list)
    assert rval == '\n'.join(x.strip() for x in log.strip().split('\n'))
Exemple #21
0
def test_parse_not_enough_columns():
    data = '3,\n 4,,5\n6,8,9\n'
    log = """
    Parsing CSV with comma as delimiter.
    With selected delimiter found only 1 columns in first row, must be at least 2.
    This means your file isn't properly formatted
    (or you submitted another type of file).
    """
    notify = mock.MagicMock()
    notify.send = mock.MagicMock()
    notify.admin_send = mock.MagicMock()
    with TempFile(data, suffix='.csv') as csv:
        with global_notify(notify):
            with pytest.raises(InvalidCSV):
                parse(csv)
    rval = '\n'.join(x[0][0] for x in notify.send.call_args_list)
    assert rval == '\n'.join(x.strip() for x in log.strip().split('\n'))
    assert not notify.admin_send.called
Exemple #22
0
def test_parse_different_delimiters_per_Row():
    data = '4 5\n4,6\n'
    log = """
    Parsing CSV with whitespace (tab) as delimiter.
    Found 2 fields in first row, assume all the rows have this number of fields.
    Parsing...
    Analyzing data...
    The dataset is empty or isn't properly formatted.
    """
    notify = mock.MagicMock()
    notify.send = mock.MagicMock()
    notify.admin_send = mock.MagicMock()
    with TempFile(data, suffix='.csv') as csv:
        with global_notify(notify):
            with pytest.raises(InvalidCSV):
                parse(csv)
    rval = '\n'.join(x[0][0] for x in notify.send.call_args_list)
    assert rval == '\n'.join(x.strip() for x in log.strip().split('\n'))
    assert not notify.admin_send.called
def test_notify_archive_image_skipped():
    data = gen_data()
    log = """
    Image dataset unpacked. Parsing...
    8 images found.
    Skipped 3 images with leading dot or without class.
    """
    notify = mock.MagicMock()
    notify.send = mock.MagicMock()
    notify.admin_send = mock.MagicMock()
    with tempdir() as td:
        td.child('class1').mkdir()
        td.child('class2').mkdir()
        td.child('class2').child('.class22').mkdir()
        td.child('class3').mkdir()
        td.child('class4').mkdir()
        files = [
            td.child('class1', 'f1.jpg'),
            td.child('class1', 'f2.JPG'),
            td.child('class2', 'f1.jpg'),
            td.child('class2', '.f1.jpg'),
            td.child('class2', 'f2.bMp'),
            td.child('class2', '.class22', 'ff2.jpg'),
            td.child('class2', 'f4jpg.Jpeg'),
            td.child('class3', 'test.txt'),
            td.child('f1.jpeg'),
        ]
        for f in (files):
            f = open(f, 'w')
            f.write(data)
            f.close()
        with TempFile(suffix='.zip') as fname:
            with zipfile.ZipFile(fname, 'w',
                                 compression=zipfile.ZIP_DEFLATED) as z:
                with cwd(td):
                    for f in (files):
                        f = f.replace(td, './')
                        z.write(f)
            with global_notify(notify):
                parse_archive(Archive(fname))
    rval = '\n'.join(x[0][0] for x in notify.send.call_args_list)
    assert rval == '\n'.join(x.strip() for x in log.strip().split('\n'))
    assert not notify.admin_send.called
Exemple #24
0
def test_parse_bad_column_data_first_row():
    data = '3,\\x00,4,3\n4,\n6,8,9\n'
    log = """
    Parsing CSV with comma as delimiter.
    Found 4 fields in first row, assume all the rows have this number of fields.
    Parsing...
    Analyzing data...
    The dataset is empty or isn't properly formatted.
    """
    notify = mock.MagicMock()
    notify.send = mock.MagicMock()
    notify.admin_send = mock.MagicMock()
    with TempFile(data, suffix='.csv') as csv:
        with global_notify(notify):
            with pytest.raises(InvalidCSV):
                parse(csv)
    rval = '\n'.join(x[0][0] for x in notify.send.call_args_list)
    assert rval == '\n'.join(x.strip() for x in log.strip().split('\n'))
    assert not notify.admin_send.called
Exemple #25
0
def test_parse_with_csv():
    data = 'a,b,c,d\n\n1,2,3,4\n5,6,7,8\n'
    log = """
    Parsing CSV with comma as delimiter.
    Found 4 fields in first row, assume all the rows have this number of fields.
    Parsing...
    Analyzing data...
    The dataset appears to have a header.
    Found 2 samples.
    """
    notify = mock.MagicMock()
    notify.send = mock.MagicMock()
    notify.admin_send = mock.MagicMock()
    with TempFile(data, suffix='.csv') as csv:
        with global_notify(notify):
            meta = parse(csv)
    rval = '\n'.join(x[0][0] for x in notify.send.call_args_list)
    assert meta['data_type'] == 'GENERAL'
    assert rval == '\n'.join(x.strip() for x in log.strip().split('\n'))
    assert not notify.admin_send.called
Exemple #26
0
def test_parse_nested_img_archive():
    data = gen_data()
    with tempdir() as td:
        td.child('class1').mkdir()
        td.child('class2').mkdir()
        td.child('class3').mkdir()
        td.child('class4').mkdir()
        td.child('class1').child('class1B').mkdir()
        td.child('class2').child('class2B').mkdir()
        td.child('class2').child('class2B').child('class2Bi').mkdir()
        td.child('class3').child('class3B').mkdir()
        files = [
            td.child('class1', 'f1.jpg'),
            td.child('class1', 'f1a.jpg'),
            td.child('class1').child('class1B', 'f2.jpg'),
            td.child('class2', 'f5.jpg'),
            td.child('class2').child('class2B').child('class2Bi', 'f3.jpg'),
            td.child('class3', 'f4.jpg'),
        ]
        for f in (files):
            f = open(f, 'w')
            f.write(data)
            f.close()
        with TempFile(suffix='.zip') as fname:
            with zipfile.ZipFile(fname, 'w',
                                 compression=zipfile.ZIP_DEFLATED) as z:
                with cwd(td):
                    for f in (files):
                        f = f.replace(td, './')
                        z.write(f)
            meta = parse_archive(Archive(fname))
            assert meta == {
                'data_type': 'IMAGES',
                'classes': {
                    'class1': 2,
                    'class1/class1B': 1,
                    'class2': 1,
                    'class2/class2B/class2Bi': 1,
                    'class3': 1
                }
            }
Exemple #27
0
def test_parse_with_csv_zip():
    with TempFile(suffix='.zip') as t:
        with zipfile.ZipFile(t, 'w') as z:
            z.writestr('test.csv', '1 2 3\n2 3 4')
        meta = parse(t)
    assert meta == {
        'data_type': 'GENERAL',
        'version': 3,
        'size': 12,
        'data_rows': 2,
        'empty_rows': 0,
        'invalid_rows': 0,
        'num_columns': 3,
        'delimeter': '\s+',
        'with_header': False,
        'archive_path': 'test.csv',
        'last_column_info': {
            'classes': {
                '3': 1,
                '4': 1
            },
            'distrib': {
                '3': 0.5,
                '4': 0.5
            },
            'max': 4.,
            'min': 3.,
            'unique': 2,
        },
        'histogram': [[1, 1], [1, 1], [1, 1]],
        'bins': [[1.0, 1.5, 2.0], [2.0, 2.5, 3.0], [3.0, 3.5, 4.0]],
        'uniques_per_col': [2, 2, 2],
        'classes': [[], [], []],
        'dtypes': ['i', 'i', 'i'],
        'locked': [False, False, False],
        'names': ['1', '2', '3'],
        'mean': [1.5, 2.5, 3.5],
        'stdev': [0.707107, 0.707107, 0.707107],
        'max': [2, 3, 4],
        'min': [1, 2, 3]
    }
Exemple #28
0
def test_parse_with_invalid_csv_with_null_bytes():
    data = "1 2 3 4\n1 2 3 4\n5 6 \\x00 8\n7 6 5 2\n8 8 8 8\n2 2 3 9\n5 6 7 8\n12 13 45 56\n12 43 6 7\n9 9 9 0\n1 2 5 0\n"
    log = """
    Parsing CSV with whitespace (tab) as delimiter.
    Found 4 fields in first row, assume all the rows have this number of fields.
    Parsing...
    Analyzing data...
    No header found, first row contains data.
    Found 1 row with invalid values:
    - row 3, column 3
    Found 10 samples.
    """
    notify = mock.MagicMock()
    notify.send = mock.MagicMock()
    notify.admin_send = mock.MagicMock()
    with TempFile(data, suffix='.csv') as csv:
        with global_notify(notify):
            parse(csv)
    rval = '\n'.join(x[0][0] for x in notify.send.call_args_list)
    assert rval == '\n'.join(x.strip() for x in log.strip().split('\n'))
    assert not notify.admin_send.called
def test_notify_archive_ts_valid():
    data = gen_data()
    log = """
    Image dataset unpacked. Parsing...
    Timeseries data .//class3/test.ts unpacked. Parsing...
    First timestep has 3 inputs and 2 outputs. Applying this requirement to the entire file."""
    notify = mock.MagicMock()
    notify.send = mock.MagicMock()
    notify.admin_send = mock.MagicMock()
    with tempdir() as td:
        td.child('class1').mkdir()
        td.child('class2').mkdir()
        td.child('class3').mkdir()
        td.child('class4').mkdir()
        files = [
            td.child('class1', 'f1.jpg'),
            td.child('class1', 'f2.JPG'),
            td.child('class2', 'f1.jpg'),
            td.child('class2', 'f2.bMp'),
            td.child('class2', 'f4jpg.Jpeg'),
            td.child('class3', 'test.ts'),
            td.child('f1.jpeg'),
        ]
        for f in (files):
            f = open(f, 'w')
            f.write(data)
            f.close()
        with open(td.child('class3', 'test.ts'), 'w') as f:
            f.write('1,2,3|0,1; 2.3,4,1|0,1; 1.1, 0., 0.0|1,0\n\n2,2,2|0,1;')
        with TempFile(suffix='.tar.gz') as fname:
            with tarfile.open(fname, 'w:gz') as z:
                with cwd(td):
                    for f in (files):
                        f = f.replace(td, './')
                        z.add(f)
            with global_notify(notify):
                parse_archive(Archive(fname))
    rval = '\n'.join(x[0][0] for x in notify.send.call_args_list)
    assert rval == '\n'.join(x.strip() for x in log.strip().split('\n'))
    assert not notify.admin_send.called
Exemple #30
0
def test_parse_with_csv():
    with TempFile('1,2,3\r\n2,3,4', suffix='.csv') as csv:
        meta = parse(csv)
    assert meta == {
        'data_type': 'GENERAL',
        'version': 3,
        'size': 12,
        'data_rows': 2,
        'empty_rows': 0,
        'invalid_rows': 0,
        'num_columns': 3,
        'delimeter': '\s*,\s*',
        'with_header': False,
        'last_column_info': {
            'classes': {
                '3': 1,
                '4': 1
            },
            'distrib': {
                '3': 0.5,
                '4': 0.5
            },
            'max': 4.,
            'min': 3.,
            'unique': 2,
        },
        'histogram': [[1, 1], [1, 1], [1, 1]],
        'bins': [[1.0, 1.5, 2.0], [2.0, 2.5, 3.0], [3.0, 3.5, 4.0]],
        'uniques_per_col': [2, 2, 2],
        'classes': [[], [], []],
        'dtypes': ['i', 'i', 'i'],
        'locked': [False, False, False],
        'names': ['1', '2', '3'],
        'mean': [1.5, 2.5, 3.5],
        'stdev': [0.707107, 0.707107, 0.707107],
        'max': [2, 3, 4],
        'min': [1, 2, 3]
    }