def test_read_raw_data(tmpdir): """Check our utility for reading raw data.""" from xmitgcm.utils import read_raw_data shape = (2, 4) for dtype in [np.dtype('f8'), np.dtype('f4'), np.dtype('i4')]: # create some test data testdata = np.zeros(shape, dtype) # write to a file datafile = tmpdir.join("tmp.data") datafile.write_binary(testdata.tobytes()) fname = str(datafile) # now test the function data = read_raw_data(fname, dtype, shape) np.testing.assert_allclose(data, testdata) # interestingly, memmaps are also ndarrays, but not vice versa assert isinstance(data, np.ndarray) and not isinstance(data, np.memmap) # check memmap mdata = read_raw_data(fname, dtype, shape, use_mmap=True) assert isinstance(mdata, np.memmap) # make sure errors are correct wrongshape = (2, 5) with pytest.raises(IOError): _ = read_raw_data(fname, dtype, wrongshape)
def test_llc_facets_2d_to_compact(llc_mds_datadirs): from xmitgcm.utils import llc_facets_2d_to_compact, get_extra_metadata from xmitgcm.utils import rebuild_llc_facets, read_raw_data from xmitgcm.utils import write_to_binary from xmitgcm import open_mdsdataset dirname, expected = llc_mds_datadirs # open dataset ds = open_mdsdataset(dirname, iters=expected['test_iternum'], geometry=expected['geometry']) nt, nfaces, ny, nx = expected['shape'] md = get_extra_metadata(domain=expected['geometry'], nx=nx) # split in facets facets = rebuild_llc_facets(ds['XC'], md) flatdata = llc_facets_2d_to_compact(facets, md) # compare with raw data raw = read_raw_data(dirname + '/XC.data', np.dtype('>f'), (nfaces, ny, nx)) flatraw = raw.flatten() assert len(flatdata) == len(flatraw) assert flatdata.min() == flatraw.min() assert flatdata.max() == flatraw.max() # write new file write_to_binary(flatdata, 'tmp.bin', dtype=np.dtype('f')) md5new = file_md5_checksum('tmp.bin') md5old = file_md5_checksum(dirname + '/XC.data') assert md5new == md5old os.remove('tmp.bin')
def read_pigbin_xy(fname, ds): """Read a binary file with dimensions (YC,XC) and return dataarray Parameters ---------- fname : str full path filename to binary file ds : xarray Dataset with coordinates to create the DataArray Returns ------- xda : xarray DataArray """ arr = read_raw_data(fname, np.float64(), ds.Depth.shape).byteswap() return xr.DataArray(arr, ds.Depth.coords, ds.Depth.dims)
def read_pigbin_yz(fname, ds): """Read a binary file with dimensions (Z,YC) and return dataarray Parameters ---------- fname : str full path filename to binary file ds : xarray Dataset with coordinates to create the DataArray Returns ------- xda : xarray DataArray """ arr = read_raw_data(fname, np.float64(), [len(ds.Z), len(ds.YC)]).byteswap() return xr.DataArray(arr, {'Z': ds.Z, 'YC': ds.YC}, ('Z', 'YC'))
def test_read_raw_data(tmpdir, dtype): """Check our utility for reading raw data.""" from xmitgcm.utils import read_raw_data shape = (2, 4) # create some test data testdata = np.zeros(shape, dtype) # write to a file datafile = tmpdir.join("tmp.data") datafile.write_binary(testdata.tobytes()) fname = str(datafile) # now test the function data = read_raw_data(fname, dtype, shape) np.testing.assert_allclose(data, testdata) # interestingly, memmaps are also ndarrays, but not vice versa assert isinstance(data, np.ndarray) and not isinstance(data, np.memmap) # check memmap mdata = read_raw_data(fname, dtype, shape, use_mmap=True) assert isinstance(mdata, np.memmap) # make sure errors are correct wrongshape = (2, 5) with pytest.raises(IOError): read_raw_data(fname, dtype, wrongshape) # test optional functionalities shape = (5, 15, 10) shape_subset = (15, 10) testdata = np.zeros(shape, dtype) # create some test data x = np.arange(shape[0], dtype=dtype) for k in np.arange(shape[0]): testdata[k, :, :] = x[k] # write to a file datafile = tmpdir.join("tmp.data") datafile.write_binary(testdata.tobytes()) fname = str(datafile) # now test the function for k in np.arange(shape[0]): offset = (k * shape[1] * shape[2] * dtype.itemsize) data = read_raw_data(fname, dtype, shape_subset, offset=offset, partial_read=True) np.testing.assert_allclose(data, testdata[k, :, :]) assert isinstance(data, np.ndarray) and not isinstance(data, np.memmap) # check memmap mdata = read_raw_data(fname, dtype, shape_subset, offset=offset, partial_read=True, use_mmap=True) assert isinstance(mdata, np.memmap) # test it breaks when it should with pytest.raises(IOError): # read with wrong shape read_raw_data(fname, dtype, shape_subset, offset=0, partial_read=False) with pytest.raises(IOError): read_raw_data(fname, dtype, shape_subset, offset=0, partial_read=False, use_mmap=True) with pytest.raises(ValueError): # use offset when trying to read global file read_raw_data(fname, dtype, shape_subset, offset=4, partial_read=False) with pytest.raises(ValueError): read_raw_data(fname, dtype, shape_subset, offset=4, partial_read=False, use_mmap=True) # offset is too big with pytest.raises(ValueError): read_raw_data(fname, dtype, shape, offset=(shape[0] * shape[1] * shape[2] * dtype.itemsize), partial_read=True) with pytest.raises(ValueError): read_raw_data(fname, dtype, shape, offset=(shape[0] * shape[1] * shape[2] * dtype.itemsize), partial_read=True, use_mmap=True)
def test_get_grid_from_input(all_grid_datadirs, usedask): from xmitgcm.utils import get_grid_from_input, get_extra_metadata from xmitgcm.utils import read_raw_data dirname, expected = all_grid_datadirs md = get_extra_metadata(domain=expected['domain'], nx=expected['nx']) ds = get_grid_from_input(dirname + '/' + expected['gridfile'], geometry=expected['geometry'], dtype=np.dtype('d'), endian='>', use_dask=usedask, extra_metadata=md) # test types assert type(ds) == xarray.Dataset assert type(ds['XC']) == xarray.core.dataarray.DataArray if usedask: ds.load() # check all variables are in expected_variables = [ 'XC', 'YC', 'DXF', 'DYF', 'RAC', 'XG', 'YG', 'DXV', 'DYU', 'RAZ', 'DXC', 'DYC', 'RAW', 'RAS', 'DXG', 'DYG' ] for var in expected_variables: assert type(ds[var]) == xarray.core.dataarray.DataArray assert ds[var].values.shape == expected['shape'] # check we don't leave points behind if expected['geometry'] == 'llc': nx = expected['nx'] + 1 nvars = len(expected_variables) sizeofd = 8 grid = expected['gridfile'] grid1 = dirname + '/' + grid.replace('<NFACET>', '001') grid2 = dirname + '/' + grid.replace('<NFACET>', '002') grid3 = dirname + '/' + grid.replace('<NFACET>', '003') grid4 = dirname + '/' + grid.replace('<NFACET>', '004') grid5 = dirname + '/' + grid.replace('<NFACET>', '005') size1 = os.path.getsize(grid1) size2 = os.path.getsize(grid2) size3 = os.path.getsize(grid3) size4 = os.path.getsize(grid4) size5 = os.path.getsize(grid5) ny1 = int(size1 / sizeofd / nvars / nx) ny2 = int(size2 / sizeofd / nvars / nx) ny3 = int(size3 / sizeofd / nvars / nx) ny4 = int(size4 / sizeofd / nvars / nx) ny5 = int(size5 / sizeofd / nvars / nx) xc1 = read_raw_data(grid1, dtype=np.dtype('>d'), shape=(ny1, nx), partial_read=True) xc2 = read_raw_data(grid2, dtype=np.dtype('>d'), shape=(ny2, nx), partial_read=True) xc3 = read_raw_data(grid3, dtype=np.dtype('>d'), shape=(ny3, nx), partial_read=True) xc4 = read_raw_data(grid4, dtype=np.dtype('>d'), shape=(ny4, nx), order='F', partial_read=True) xc5 = read_raw_data(grid5, dtype=np.dtype('>d'), shape=(ny5, nx), order='F', partial_read=True) yc1 = read_raw_data(grid1, dtype=np.dtype('>d'), shape=(ny1, nx), partial_read=True, offset=nx * ny1 * sizeofd) yc2 = read_raw_data(grid2, dtype=np.dtype('>d'), shape=(ny2, nx), partial_read=True, offset=nx * ny2 * sizeofd) yc3 = read_raw_data(grid3, dtype=np.dtype('>d'), shape=(ny3, nx), partial_read=True, offset=nx * ny3 * sizeofd) yc4 = read_raw_data(grid4, dtype=np.dtype('>d'), shape=(ny4, nx), order='F', partial_read=True, offset=nx * ny4 * sizeofd) yc5 = read_raw_data(grid5, dtype=np.dtype('>d'), shape=(ny5, nx), order='F', partial_read=True, offset=nx * ny5 * sizeofd) xc = np.concatenate([ xc1[:-1, :-1].flatten(), xc2[:-1, :-1].flatten(), xc3[:-1, :-1].flatten(), xc4[:-1, :-1].flatten(), xc5[:-1, :-1].flatten() ]) yc = np.concatenate([ yc1[:-1, :-1].flatten(), yc2[:-1, :-1].flatten(), yc3[:-1, :-1].flatten(), yc4[:-1, :-1].flatten(), yc5[:-1, :-1].flatten() ]) xc_from_ds = ds['XC'].values.flatten() yc_from_ds = ds['YC'].values.flatten() assert xc.min() == xc_from_ds.min() assert xc.max() == xc_from_ds.max() assert yc.min() == yc_from_ds.min() assert yc.max() == yc_from_ds.max() # passing llc without metadata should fail if expected['geometry'] == 'llc': with pytest.raises(ValueError): ds = get_grid_from_input(dirname + '/' + expected['gridfile'], geometry=expected['geometry'], dtype=np.dtype('d'), endian='>', use_dask=False, extra_metadata=None)