예제 #1
0
def test_archive_fileexistserror(archive_params, archive_dir):
    filename, schema = archive_params

    context = Context()
    context.archive = str(archive_dir)
    context.data = os.path.join(get_data_path(), filename)
    context.overwrite = False
    context.schema = schema

    init(context)

    expected_df = DataFrame(read_csv(context.data, dtype=str))

    expected_paths = _get_expected_paths(context.archive, schema, expected_df,
                                         filename)

    for e in expected_paths:
        os.makedirs(os.path.dirname(e), exist_ok=True)
        with open(e, mode='w') as f:
            f.write('content')

    with pytest.raises(FileExistsError):
        archive(context)

    try:
        os.remove(os.path.join(get_data_path(), '#lock'))
    except OSError:
        raise
예제 #2
0
    def test_build_iris_no_schema(self, archive_dir, cache_file, overwrite):
        try:
            TestBuild._delete_cache(str(cache_file))
        except FileNotFoundError:
            pass
        except OSError:
            raise

        context = Context()
        context.archive = str(archive_dir)
        context.cache = str(cache_file)
        context.data = os.path.join(get_data_path(), 'iris.csv')
        context.overwrite = overwrite
        context.schema = SortedDict()

        archive(context)
        assert not os.path.exists(os.path.join(get_data_path(), '#lock'))

        expected_frame = DataFrame(read_csv(context.data, dtype=str))

        if context.overwrite:
            with open(context.cache, mode='w') as f:
                f.write('content')

        build(context)

        actual_frame = DataFrame(read_csv(context.cache, dtype=str))

        assert_frame_equal(expected_frame, actual_frame, check_like=True)
예제 #3
0
def test_archive(archive_params, archive_dir, overwrite):
    filename, schema = archive_params

    context = Context()
    context.archive = str(archive_dir)
    context.data = os.path.join(get_data_path(), filename)
    context.overwrite = overwrite
    context.schema = schema

    init(context)

    expected_df = DataFrame(read_csv(context.data, dtype=str))
    expected_df.sort_values(list(expected_df.columns), inplace=True)
    expected_df.reset_index(drop=True, inplace=True)

    expected_paths = _get_expected_paths(
        context.archive,
        schema,
        expected_df,
        filename
    )

    if context.overwrite:
        for e in expected_paths:
            os.makedirs(os.path.dirname(e), exist_ok=True)
            with open(e, mode='w') as f:
                f.write('content')

    archive(context)
    assert not os.path.exists(os.path.join(get_data_path(), '#lock'))

    actual_frame = DataFrame()
    actual_paths = SortedList()
    for root, _, files in os.walk(context.archive):
        for f in files:
            if '.csv' in f:
                filepath = os.path.join(root, f)
                actual_paths.add(filepath)
                actual_frame = concat([
                    actual_frame,
                    DataFrame(read_csv(filepath, dtype=str))
                ])

    actual_frame.sort_values(list(actual_frame.columns), inplace=True)
    actual_frame.reset_index(drop=True, inplace=True)

    assert expected_paths == actual_paths
    assert_frame_equal(expected_df, actual_frame)
예제 #4
0
def test_archive_no_schema(archive_params, archive_dir, overwrite):
    filename, _ = archive_params

    context = Context()
    context.archive = str(archive_dir)
    context.data = os.path.join(get_data_path(), filename)
    context.overwrite = overwrite
    context.schema = SortedDict()

    expected_df = DataFrame(read_csv(context.data, dtype=str))
    expected_df.sort_values(list(expected_df.columns), inplace=True)
    expected_df.reset_index(drop=True, inplace=True)

    expected_paths = SortedList([os.path.join(context.archive, filename)])

    if context.overwrite:
        for e in expected_paths:
            path = archive_dir.new()
            path.mkdir(os.path.basename(os.path.dirname(e)))
            with open(e, mode='w') as f:
                f.write('content')

    archive(context)

    actual_frame = DataFrame()
    actual_paths = SortedList()
    for root, _, files in os.walk(context.archive):
        for f in files:
            if '.csv' in f:
                filepath = os.path.join(root, f)
                actual_paths.add(filepath)
                actual_frame = concat(
                    [actual_frame,
                     DataFrame(read_csv(filepath, dtype=str))])

    actual_frame.sort_values(list(actual_frame.columns), inplace=True)
    actual_frame.reset_index(drop=True, inplace=True)

    assert expected_paths == actual_paths
    assert_frame_equal(expected_df, actual_frame)
예제 #5
0
    def test_build_fileexistserror(self, archive_dir, cache_file):
        try:
            TestBuild._delete_cache(str(cache_file))
        except FileNotFoundError:
            pass
        except OSError:
            raise

        context = Context()
        context.archive = str(archive_dir)
        context.cache = str(cache_file)
        context.data = os.path.join(get_data_path(), 'iris.csv')
        context.overwrite = False
        context.schema = SortedDict()

        archive(context)
        assert not os.path.exists(os.path.join(get_data_path(), '#lock'))

        with open(context.cache, mode='w') as f:
            f.write('content')

        with pytest.raises(FileExistsError):
            build(context)
예제 #6
0
    def test_build_iris(self, archive_dir, cache_file, overwrite):
        try:
            TestBuild._delete_cache(str(cache_file))
        except FileNotFoundError:
            pass
        except OSError:
            raise

        context = Context()
        context.archive = str(archive_dir)
        context.cache = str(cache_file)
        context.data = os.path.join(get_data_path(), 'iris.csv')
        context.overwrite = overwrite
        context.schema = SortedDict({'0': 'Name'})
        context.verbose = True

        init(context)
        archive(context)
        assert not os.path.exists(os.path.join(get_data_path(), '#lock'))

        expected_frame = DataFrame(read_csv(context.data, index_col='Index'))
        expected_frame.sort_index(inplace=True)

        if context.overwrite:
            with open(context.cache, mode='w') as f:
                f.write('content')

        build(context)

        actual_frame = DataFrame(read_csv(context.cache, index_col='Index'))
        actual_frame.sort_index(inplace=True)

        equal_shapes = expected_frame.shape == actual_frame.shape
        equal_indices = (expected_frame.index).equals(actual_frame.index)
        equal_columns = (expected_frame.columns).equals(actual_frame.columns)

        if not equal_shapes:
            print('unequal shapes: expected {0} found {1}'.format(
                expected_frame.shape, actual_frame.shape))

        if not equal_indices:
            print('unequal indices: expected {0} found {1}'.format(
                expected_frame.index, actual_frame.index))

        if not equal_columns:
            print('unequal columns: expected {0} found {1}'.format(
                expected_frame.columns, actual_frame.columns))

        for i, col in enumerate(expected_frame.columns):
            if col in actual_frame:
                expected_col = expected_frame.iloc[:, i]
                actual_col = actual_frame.iloc[:, i]
                equal_names = expected_col.name == actual_col.name
                equal_lengths = len(expected_col) == len(actual_col)
                equal_indices = (expected_col.index).equals(actual_col.index)

                if not equal_names:
                    print('  col {0}: unequal names: expected {1} found {2}'.
                          format(i, expected_col.name, actual_col.name))

                if not equal_lengths:
                    print('  col {0}: unequal lengths: expected {1} found {2}'.
                          format(i, len(expected_col), len(actual_col)))

                if not equal_indices:
                    print('  col {0}: unequal indices: expected {1} found {3}'.
                          format(i, expected_col.index, actual_col.index))

                if equal_lengths:
                    for j in range(expected_col.size):
                        if expected_col[j] != actual_col[j]:
                            msg = '  col {0}: unequal @ row {1}: '
                            msg += 'expected {2} found {3}'
                            print(
                                msg.format(i, j, expected_col[j],
                                           actual_col[j]))

        assert_frame_equal(expected_frame, actual_frame, check_exact=True)