예제 #1
0
def test_parse_size():

    tests = [('1000000.0b', 1000000), ('1MiB', 1048576), ('1.0MB', 1000000),
             ('0.001Gb', 1000000), ('500Mb', 500000000)]

    for test, check in tests:
        assert (parse_size(test) == check)
예제 #2
0
def test_parse_size():

    tests = [
        ("1000000.0b", 1000000),
        ("1MiB", 1048576),
        ("1.0MB", 1000000),
        ("0.001Gb", 1000000),
        ("500Mb", 500000000),
    ]

    for test, check in tests:
        assert parse_size(test) == check
예제 #3
0
def get_chunk_length(da):
    size = da.nbytes
    n_times = len(da.time.values)
    mem_limit = parse_size(chunk_memory_limit)

    if size > 0:
        n_chunks = math.ceil(size / mem_limit)
    else:
        n_chunks = 1

    chunk_length = math.ceil(n_times / n_chunks)

    return chunk_length
예제 #4
0
def get_chunk_length(da):
    """
    Calculate the chunk length to use when chunking xarray datasets.

    Based on memory limit provided in config and the size of th dataset.
    """
    size = da.nbytes
    n_times = len(da.time.values)
    mem_limit = parse_size(chunk_memory_limit)

    if size > 0:
        n_chunks = math.ceil(size / mem_limit)
    else:
        n_chunks = 1

    chunk_length = math.ceil(n_times / n_chunks)

    return chunk_length
예제 #5
0
def get_time_slices(ds,
                    split_method,
                    start=None,
                    end=None,
                    file_size_limit=None):
    """
    Take an xarray Dataset or DataArray, assume it can be split on the time axis
    into a sequence of slices. Optionally, take a start and end date to specify
    a sub-slice of the main time axis.

    Use the prescribed file size limit to generate a list of
    ("YYYY-MM-DD", "YYYY-MM-DD") slices so that the output files do
    not (significantly) exceed the file size limit.

    :param ds: xarray Dataset
    :file_size_limit: a string specifying "<number><units>"
    :param start:
    :param end:
    :param file_size_limit:
    :param split_method:
    :return: list of tuples of date strings.
    """

    if split_method != "time:auto":
        raise NotImplementedError(
            f"The split method {split_method} is not implemeted.")

    # Use default file size limit if not provided
    if not file_size_limit:
        file_size_limit = parse_size(
            CONFIG["clisops:write"]["file_size_limit"])

    da = get_da(ds)

    times = filter_times_within(da.time.values, start=start, end=end)
    n_times = len(times)

    if n_times == 0:
        raise Exception("Zero time steps found between {start} and {end}.")

    n_slices = da.nbytes / file_size_limit
    slice_length = int(n_times // n_slices)

    if slice_length == 0:
        raise Exception(
            "Unable to calculate slice length for splitting output files.")

    slices = []
    indx = 0
    final_indx = n_times - 1

    while indx <= final_indx:

        start_indx = indx
        indx += slice_length
        end_indx = indx - 1

        if end_indx > final_indx:
            end_indx = final_indx
        slices.append((f"{_format_time(times[start_indx])}",
                       f"{_format_time(times[end_indx])}"))

    return slices
예제 #6
0
def get_time_slices(
    ds: Union[xr.Dataset, xr.DataArray],
    split_method,
    start=None,
    end=None,
    file_size_limit: str = None,
) -> List[Tuple[str, str]]:
    """

    Take an xarray Dataset or DataArray, assume it can be split on the time axis
    into a sequence of slices. Optionally, take a start and end date to specify
    a sub-slice of the main time axis.

    Use the prescribed file size limit to generate a list of
    ("YYYY-MM-DD", "YYYY-MM-DD") slices so that the output files do
    not (significantly) exceed the file size limit.

    Parameters
    ----------
    ds: Union[xr.Dataset, xr.DataArray]
    split_method
    start
    end
    file_size_limit: str
      a string specifying "<number><units>".

    Returns
    -------
    List[Tuple[str, str]]
    """

    if split_method not in SUPPORTED_SPLIT_METHODS:
        raise NotImplementedError(
            f"The split method {split_method} is not implemented.")

    # Use default file size limit if not provided
    if not file_size_limit:
        file_size_limit = parse_size(
            CONFIG["clisops:write"]["file_size_limit"])

    da = get_da(ds)
    slices = []

    try:
        times = filter_times_within(da.time.values, start=start, end=end)
    # catch where "time" attribute cannot be accessed in ds
    except AttributeError:
        slices.append(None)
        return slices

    n_times = len(times)

    if n_times == 0:
        raise Exception(f"Zero time steps found between {start} and {end}.")

    n_slices = da.nbytes / file_size_limit
    slice_length = int(n_times // n_slices)

    if slice_length == 0:
        raise Exception(
            "Unable to calculate slice length for splitting output files.")

    indx = 0
    final_indx = n_times - 1

    while indx <= final_indx:

        start_indx = indx
        indx += slice_length
        end_indx = indx - 1

        if end_indx > final_indx:
            end_indx = final_indx
        slices.append((f"{_format_time(times[start_indx])}",
                       f"{_format_time(times[end_indx])}"))

    return slices