Ejemplo n.º 1
0
def test_read_raw_data(tmpdir):
    """Check our utility for reading raw data."""

    from xmitgcm.utils import read_raw_data
    shape = (2, 4)
    for dtype in [np.dtype('f8'), np.dtype('f4'), np.dtype('i4')]:
        # create some test data
        testdata = np.zeros(shape, dtype)
        # write to a file
        datafile = tmpdir.join("tmp.data")
        datafile.write_binary(testdata.tobytes())
        fname = str(datafile)
        # now test the function
        data = read_raw_data(fname, dtype, shape)
        np.testing.assert_allclose(data, testdata)
        # interestingly, memmaps are also ndarrays, but not vice versa
        assert isinstance(data, np.ndarray) and not isinstance(data, np.memmap)
        # check memmap
        mdata = read_raw_data(fname, dtype, shape, use_mmap=True)
        assert isinstance(mdata, np.memmap)

    # make sure errors are correct
    wrongshape = (2, 5)
    with pytest.raises(IOError):
        _ = read_raw_data(fname, dtype, wrongshape)
Ejemplo n.º 2
0
def test_llc_facets_2d_to_compact(llc_mds_datadirs):
    from xmitgcm.utils import llc_facets_2d_to_compact, get_extra_metadata
    from xmitgcm.utils import rebuild_llc_facets, read_raw_data
    from xmitgcm.utils import write_to_binary
    from xmitgcm import open_mdsdataset

    dirname, expected = llc_mds_datadirs

    # open dataset
    ds = open_mdsdataset(dirname,
                         iters=expected['test_iternum'],
                         geometry=expected['geometry'])

    nt, nfaces, ny, nx = expected['shape']
    md = get_extra_metadata(domain=expected['geometry'], nx=nx)
    # split in facets
    facets = rebuild_llc_facets(ds['XC'], md)
    flatdata = llc_facets_2d_to_compact(facets, md)
    # compare with raw data
    raw = read_raw_data(dirname + '/XC.data', np.dtype('>f'), (nfaces, ny, nx))
    flatraw = raw.flatten()

    assert len(flatdata) == len(flatraw)
    assert flatdata.min() == flatraw.min()
    assert flatdata.max() == flatraw.max()

    # write new file
    write_to_binary(flatdata, 'tmp.bin', dtype=np.dtype('f'))
    md5new = file_md5_checksum('tmp.bin')
    md5old = file_md5_checksum(dirname + '/XC.data')
    assert md5new == md5old
    os.remove('tmp.bin')
Ejemplo n.º 3
0
def read_pigbin_xy(fname, ds):
    """Read a binary file with dimensions (YC,XC)
    and return dataarray

    Parameters
    ----------
    fname : str
        full path filename to binary file
    ds : xarray Dataset
        with coordinates to create the DataArray

    Returns
    -------
    xda : xarray DataArray
    """
    arr = read_raw_data(fname, np.float64(), ds.Depth.shape).byteswap()
    return xr.DataArray(arr, ds.Depth.coords, ds.Depth.dims)
Ejemplo n.º 4
0
def read_pigbin_yz(fname, ds):
    """Read a binary file with dimensions (Z,YC)
    and return dataarray

    Parameters
    ----------
    fname : str
        full path filename to binary file
    ds : xarray Dataset
        with coordinates to create the DataArray

    Returns
    -------
    xda : xarray DataArray
    """

    arr = read_raw_data(fname, np.float64(),
                        [len(ds.Z), len(ds.YC)]).byteswap()
    return xr.DataArray(arr, {'Z': ds.Z, 'YC': ds.YC}, ('Z', 'YC'))
Ejemplo n.º 5
0
def test_read_raw_data(tmpdir, dtype):
    """Check our utility for reading raw data."""

    from xmitgcm.utils import read_raw_data
    shape = (2, 4)
    # create some test data
    testdata = np.zeros(shape, dtype)
    # write to a file
    datafile = tmpdir.join("tmp.data")
    datafile.write_binary(testdata.tobytes())
    fname = str(datafile)
    # now test the function
    data = read_raw_data(fname, dtype, shape)
    np.testing.assert_allclose(data, testdata)
    # interestingly, memmaps are also ndarrays, but not vice versa
    assert isinstance(data, np.ndarray) and not isinstance(data, np.memmap)
    # check memmap
    mdata = read_raw_data(fname, dtype, shape, use_mmap=True)
    assert isinstance(mdata, np.memmap)

    # make sure errors are correct
    wrongshape = (2, 5)
    with pytest.raises(IOError):
        read_raw_data(fname, dtype, wrongshape)

    # test optional functionalities
    shape = (5, 15, 10)
    shape_subset = (15, 10)
    testdata = np.zeros(shape, dtype)
    # create some test data
    x = np.arange(shape[0], dtype=dtype)
    for k in np.arange(shape[0]):
        testdata[k, :, :] = x[k]
    # write to a file
    datafile = tmpdir.join("tmp.data")
    datafile.write_binary(testdata.tobytes())
    fname = str(datafile)
    # now test the function
    for k in np.arange(shape[0]):
        offset = (k * shape[1] * shape[2] * dtype.itemsize)
        data = read_raw_data(fname,
                             dtype,
                             shape_subset,
                             offset=offset,
                             partial_read=True)
        np.testing.assert_allclose(data, testdata[k, :, :])
        assert isinstance(data, np.ndarray) and not isinstance(data, np.memmap)
        # check memmap
        mdata = read_raw_data(fname,
                              dtype,
                              shape_subset,
                              offset=offset,
                              partial_read=True,
                              use_mmap=True)
        assert isinstance(mdata, np.memmap)

        # test it breaks when it should
        with pytest.raises(IOError):
            # read with wrong shape
            read_raw_data(fname,
                          dtype,
                          shape_subset,
                          offset=0,
                          partial_read=False)
        with pytest.raises(IOError):
            read_raw_data(fname,
                          dtype,
                          shape_subset,
                          offset=0,
                          partial_read=False,
                          use_mmap=True)
        with pytest.raises(ValueError):
            # use offset when trying to read global file
            read_raw_data(fname,
                          dtype,
                          shape_subset,
                          offset=4,
                          partial_read=False)
        with pytest.raises(ValueError):
            read_raw_data(fname,
                          dtype,
                          shape_subset,
                          offset=4,
                          partial_read=False,
                          use_mmap=True)
            # offset is too big
        with pytest.raises(ValueError):
            read_raw_data(fname,
                          dtype,
                          shape,
                          offset=(shape[0] * shape[1] * shape[2] *
                                  dtype.itemsize),
                          partial_read=True)
        with pytest.raises(ValueError):
            read_raw_data(fname,
                          dtype,
                          shape,
                          offset=(shape[0] * shape[1] * shape[2] *
                                  dtype.itemsize),
                          partial_read=True,
                          use_mmap=True)
Ejemplo n.º 6
0
def test_get_grid_from_input(all_grid_datadirs, usedask):
    from xmitgcm.utils import get_grid_from_input, get_extra_metadata
    from xmitgcm.utils import read_raw_data
    dirname, expected = all_grid_datadirs
    md = get_extra_metadata(domain=expected['domain'], nx=expected['nx'])
    ds = get_grid_from_input(dirname + '/' + expected['gridfile'],
                             geometry=expected['geometry'],
                             dtype=np.dtype('d'),
                             endian='>',
                             use_dask=usedask,
                             extra_metadata=md)
    # test types
    assert type(ds) == xarray.Dataset
    assert type(ds['XC']) == xarray.core.dataarray.DataArray

    if usedask:
        ds.load()

    # check all variables are in
    expected_variables = [
        'XC', 'YC', 'DXF', 'DYF', 'RAC', 'XG', 'YG', 'DXV', 'DYU', 'RAZ',
        'DXC', 'DYC', 'RAW', 'RAS', 'DXG', 'DYG'
    ]

    for var in expected_variables:
        assert type(ds[var]) == xarray.core.dataarray.DataArray
        assert ds[var].values.shape == expected['shape']

    # check we don't leave points behind
    if expected['geometry'] == 'llc':
        nx = expected['nx'] + 1
        nvars = len(expected_variables)
        sizeofd = 8

        grid = expected['gridfile']
        grid1 = dirname + '/' + grid.replace('<NFACET>', '001')
        grid2 = dirname + '/' + grid.replace('<NFACET>', '002')
        grid3 = dirname + '/' + grid.replace('<NFACET>', '003')
        grid4 = dirname + '/' + grid.replace('<NFACET>', '004')
        grid5 = dirname + '/' + grid.replace('<NFACET>', '005')

        size1 = os.path.getsize(grid1)
        size2 = os.path.getsize(grid2)
        size3 = os.path.getsize(grid3)
        size4 = os.path.getsize(grid4)
        size5 = os.path.getsize(grid5)

        ny1 = int(size1 / sizeofd / nvars / nx)
        ny2 = int(size2 / sizeofd / nvars / nx)
        ny3 = int(size3 / sizeofd / nvars / nx)
        ny4 = int(size4 / sizeofd / nvars / nx)
        ny5 = int(size5 / sizeofd / nvars / nx)

        xc1 = read_raw_data(grid1,
                            dtype=np.dtype('>d'),
                            shape=(ny1, nx),
                            partial_read=True)
        xc2 = read_raw_data(grid2,
                            dtype=np.dtype('>d'),
                            shape=(ny2, nx),
                            partial_read=True)
        xc3 = read_raw_data(grid3,
                            dtype=np.dtype('>d'),
                            shape=(ny3, nx),
                            partial_read=True)
        xc4 = read_raw_data(grid4,
                            dtype=np.dtype('>d'),
                            shape=(ny4, nx),
                            order='F',
                            partial_read=True)
        xc5 = read_raw_data(grid5,
                            dtype=np.dtype('>d'),
                            shape=(ny5, nx),
                            order='F',
                            partial_read=True)

        yc1 = read_raw_data(grid1,
                            dtype=np.dtype('>d'),
                            shape=(ny1, nx),
                            partial_read=True,
                            offset=nx * ny1 * sizeofd)
        yc2 = read_raw_data(grid2,
                            dtype=np.dtype('>d'),
                            shape=(ny2, nx),
                            partial_read=True,
                            offset=nx * ny2 * sizeofd)
        yc3 = read_raw_data(grid3,
                            dtype=np.dtype('>d'),
                            shape=(ny3, nx),
                            partial_read=True,
                            offset=nx * ny3 * sizeofd)
        yc4 = read_raw_data(grid4,
                            dtype=np.dtype('>d'),
                            shape=(ny4, nx),
                            order='F',
                            partial_read=True,
                            offset=nx * ny4 * sizeofd)
        yc5 = read_raw_data(grid5,
                            dtype=np.dtype('>d'),
                            shape=(ny5, nx),
                            order='F',
                            partial_read=True,
                            offset=nx * ny5 * sizeofd)

        xc = np.concatenate([
            xc1[:-1, :-1].flatten(), xc2[:-1, :-1].flatten(),
            xc3[:-1, :-1].flatten(), xc4[:-1, :-1].flatten(),
            xc5[:-1, :-1].flatten()
        ])

        yc = np.concatenate([
            yc1[:-1, :-1].flatten(), yc2[:-1, :-1].flatten(),
            yc3[:-1, :-1].flatten(), yc4[:-1, :-1].flatten(),
            yc5[:-1, :-1].flatten()
        ])

        xc_from_ds = ds['XC'].values.flatten()
        yc_from_ds = ds['YC'].values.flatten()

        assert xc.min() == xc_from_ds.min()
        assert xc.max() == xc_from_ds.max()
        assert yc.min() == yc_from_ds.min()
        assert yc.max() == yc_from_ds.max()

    # passing llc without metadata should fail
    if expected['geometry'] == 'llc':
        with pytest.raises(ValueError):
            ds = get_grid_from_input(dirname + '/' + expected['gridfile'],
                                     geometry=expected['geometry'],
                                     dtype=np.dtype('d'),
                                     endian='>',
                                     use_dask=False,
                                     extra_metadata=None)