def test_archive_fileexistserror(archive_params, archive_dir): filename, schema = archive_params context = Context() context.archive = str(archive_dir) context.data = os.path.join(get_data_path(), filename) context.overwrite = False context.schema = schema init(context) expected_df = DataFrame(read_csv(context.data, dtype=str)) expected_paths = _get_expected_paths(context.archive, schema, expected_df, filename) for e in expected_paths: os.makedirs(os.path.dirname(e), exist_ok=True) with open(e, mode='w') as f: f.write('content') with pytest.raises(FileExistsError): archive(context) try: os.remove(os.path.join(get_data_path(), '#lock')) except OSError: raise
def test_build_iris_no_schema(self, archive_dir, cache_file, overwrite): try: TestBuild._delete_cache(str(cache_file)) except FileNotFoundError: pass except OSError: raise context = Context() context.archive = str(archive_dir) context.cache = str(cache_file) context.data = os.path.join(get_data_path(), 'iris.csv') context.overwrite = overwrite context.schema = SortedDict() archive(context) assert not os.path.exists(os.path.join(get_data_path(), '#lock')) expected_frame = DataFrame(read_csv(context.data, dtype=str)) if context.overwrite: with open(context.cache, mode='w') as f: f.write('content') build(context) actual_frame = DataFrame(read_csv(context.cache, dtype=str)) assert_frame_equal(expected_frame, actual_frame, check_like=True)
def test_init_fileexistserror(archive_dir, init_schema_fixture): context = Context() context.archive = str(archive_dir) context.overwrite = False context.schema = init_schema_fixture with open(str(archive_dir.join(context.schema_file)), mode='w') as f: f.write('content') with pytest.raises(FileExistsError): init(context)
def test_archive(archive_params, archive_dir, overwrite): filename, schema = archive_params context = Context() context.archive = str(archive_dir) context.data = os.path.join(get_data_path(), filename) context.overwrite = overwrite context.schema = schema init(context) expected_df = DataFrame(read_csv(context.data, dtype=str)) expected_df.sort_values(list(expected_df.columns), inplace=True) expected_df.reset_index(drop=True, inplace=True) expected_paths = _get_expected_paths( context.archive, schema, expected_df, filename ) if context.overwrite: for e in expected_paths: os.makedirs(os.path.dirname(e), exist_ok=True) with open(e, mode='w') as f: f.write('content') archive(context) assert not os.path.exists(os.path.join(get_data_path(), '#lock')) actual_frame = DataFrame() actual_paths = SortedList() for root, _, files in os.walk(context.archive): for f in files: if '.csv' in f: filepath = os.path.join(root, f) actual_paths.add(filepath) actual_frame = concat([ actual_frame, DataFrame(read_csv(filepath, dtype=str)) ]) actual_frame.sort_values(list(actual_frame.columns), inplace=True) actual_frame.reset_index(drop=True, inplace=True) assert expected_paths == actual_paths assert_frame_equal(expected_df, actual_frame)
def test_archive_no_schema(archive_params, archive_dir, overwrite): filename, _ = archive_params context = Context() context.archive = str(archive_dir) context.data = os.path.join(get_data_path(), filename) context.overwrite = overwrite context.schema = SortedDict() expected_df = DataFrame(read_csv(context.data, dtype=str)) expected_df.sort_values(list(expected_df.columns), inplace=True) expected_df.reset_index(drop=True, inplace=True) expected_paths = SortedList([os.path.join(context.archive, filename)]) if context.overwrite: for e in expected_paths: path = archive_dir.new() path.mkdir(os.path.basename(os.path.dirname(e))) with open(e, mode='w') as f: f.write('content') archive(context) actual_frame = DataFrame() actual_paths = SortedList() for root, _, files in os.walk(context.archive): for f in files: if '.csv' in f: filepath = os.path.join(root, f) actual_paths.add(filepath) actual_frame = concat( [actual_frame, DataFrame(read_csv(filepath, dtype=str))]) actual_frame.sort_values(list(actual_frame.columns), inplace=True) actual_frame.reset_index(drop=True, inplace=True) assert expected_paths == actual_paths assert_frame_equal(expected_df, actual_frame)
def test_build_fileexistserror(self, archive_dir, cache_file): try: TestBuild._delete_cache(str(cache_file)) except FileNotFoundError: pass except OSError: raise context = Context() context.archive = str(archive_dir) context.cache = str(cache_file) context.data = os.path.join(get_data_path(), 'iris.csv') context.overwrite = False context.schema = SortedDict() archive(context) assert not os.path.exists(os.path.join(get_data_path(), '#lock')) with open(context.cache, mode='w') as f: f.write('content') with pytest.raises(FileExistsError): build(context)
def test_build_iris(self, archive_dir, cache_file, overwrite): try: TestBuild._delete_cache(str(cache_file)) except FileNotFoundError: pass except OSError: raise context = Context() context.archive = str(archive_dir) context.cache = str(cache_file) context.data = os.path.join(get_data_path(), 'iris.csv') context.overwrite = overwrite context.schema = SortedDict({'0': 'Name'}) context.verbose = True init(context) archive(context) assert not os.path.exists(os.path.join(get_data_path(), '#lock')) expected_frame = DataFrame(read_csv(context.data, index_col='Index')) expected_frame.sort_index(inplace=True) if context.overwrite: with open(context.cache, mode='w') as f: f.write('content') build(context) actual_frame = DataFrame(read_csv(context.cache, index_col='Index')) actual_frame.sort_index(inplace=True) equal_shapes = expected_frame.shape == actual_frame.shape equal_indices = (expected_frame.index).equals(actual_frame.index) equal_columns = (expected_frame.columns).equals(actual_frame.columns) if not equal_shapes: print('unequal shapes: expected {0} found {1}'.format( expected_frame.shape, actual_frame.shape)) if not equal_indices: print('unequal indices: expected {0} found {1}'.format( expected_frame.index, actual_frame.index)) if not equal_columns: print('unequal columns: expected {0} found {1}'.format( expected_frame.columns, actual_frame.columns)) for i, col in enumerate(expected_frame.columns): if col in actual_frame: expected_col = expected_frame.iloc[:, i] actual_col = actual_frame.iloc[:, i] equal_names = expected_col.name == actual_col.name equal_lengths = len(expected_col) == len(actual_col) equal_indices = (expected_col.index).equals(actual_col.index) if not equal_names: print(' col {0}: unequal names: expected {1} found {2}'. format(i, expected_col.name, actual_col.name)) if not equal_lengths: print(' col {0}: unequal lengths: expected {1} found {2}'. format(i, len(expected_col), len(actual_col))) if not equal_indices: print(' col {0}: unequal indices: expected {1} found {3}'. format(i, expected_col.index, actual_col.index)) if equal_lengths: for j in range(expected_col.size): if expected_col[j] != actual_col[j]: msg = ' col {0}: unequal @ row {1}: ' msg += 'expected {2} found {3}' print( msg.format(i, j, expected_col[j], actual_col[j])) assert_frame_equal(expected_frame, actual_frame, check_exact=True)
def init_context_fixture(archive_dir, init_schema_fixture, overwrite): context = Context() context.archive = str(archive_dir) context.schema = init_schema_fixture context.overwrite = overwrite return context