Beispiel #1
0
def func_for_map_blocks(*args):
    additional_params = args[-1]

    func = additional_params["func"] # the actual function to be called
    func_args = additional_params["func_args"]

    v_names = additional_params["variable_names"]
    time_limit_in_min = additional_params["time_limit_in_min"]
    return_shape = additional_params["return_shape"]
    logfile_name = additional_params["logfile_name"]

    # create a dctionary that acts as a dataset replacement
    d = {v_names[i]: args[i].reshape((-1,)) for i in range(len(args[:-1]))}

    res = -np.inf * np.ones(return_shape)
    start_time = time.time()
    error_msg = ""
    info = tuple()
    try:
        res, info = custom_timeout(
            time_limit_in_min*60,
            func,
            d,
            **func_args
        )
    except TimeoutError:
        duration = (time.time() - start_time) / 60
        error_msg = "Timeout after %2.2f min" % duration
        print(error_msg, flush=True)
    except Exception as e:
        tb = traceback.format_exc()
#        print(tb, flush=True)
        res = np.nan * np.ones(return_shape)
        error_msg = "Error: " + str(e)
        print(error_msg, flush=True)

    if error_msg == "":
        error_msg = "done"

    write_to_logfile(
        logfile_name,
        d["lat"],
        d["lon"],
        d["prob"],
        error_msg,
        *info
    )

    return res.reshape(return_shape)
Beispiel #2
0
                          task["result_chunks"],
                          overwrite=task["overwrite"])

    nr_incomplete_sites, _, _ = CARDAMOMlib.get_incomplete_sites(z, slices)
    print("Number of incomplete sites:", nr_incomplete_sites)
    logfile_name = str(project_path.joinpath(task["computation"] + ".log"))
    print("Logfile:", logfile_name)

    for timeout in task["timeouts"]:
        CARDAMOMlib.compute_incomplete_sites(timeout, z, nr_times,
                                             variable_names, variables,
                                             non_data_variables, slices, task,
                                             logfile_name)

    nr_incomplete_sites, _, _ = CARDAMOMlib.get_incomplete_sites(z, slices)
    write_to_logfile(logfile_name, nr_incomplete_sites,
                     "incomplete sites remaining")
    print(nr_incomplete_sites, "incomplete sites remaining")
    print()
# -

# +
import pandas as pd

task = task_list[-1]
logfile_name = str(project_path.joinpath(task["computation"] + ".log"))
csv = pd.read_csv(
    logfile_name,
    sep=",",
    skiprows=1,
    skipfooter=2,
    names=["time", "lat", "lon", "prob", "msg", "max_abs_err", "max_rel_err"])
Beispiel #3
0
def compute_incomplete_sites_with_mr(
    time_limit_in_min,
    z,
    nr_pools,
    time_step_in_days,
    times_da,
    start_values_zarr,
    us_zarr,
    Bs_zarr,
    slices,
    task,
    logfile_name,
):
    start_values_da = da.from_zarr(start_values_zarr)
    us_da = da.from_zarr(us_zarr)
    Bs_da = da.from_zarr(Bs_zarr)
#    B = Bs_da[7, 60, 10].compute()
#    print(B[:120].mean(axis=0))
    
    # copy nans from start_values, us, or Bs tu z
    nr_nan_sites, nan_coords_tuples = get_nan_site_tuples_for_mr_computation(
        start_values_zarr,
        us_zarr,
        Bs_zarr,
        slices
    )

    for nan_coords in nan_coords_tuples:
        z[nan_coords] = np.nan

    # identify non-nan computed sites in start_values, us, and Bs
    # combine with not yet computed sites in z
    nr_incomplete_sites, incomplete_coords_tuples, _ = get_incomplete_site_tuples_for_mr_computation(
        start_values_zarr,
        us_zarr,
        Bs_zarr,
        z,
        slices
    )

    if nr_incomplete_sites > 0:
        print('number of incomplete sites:', nr_incomplete_sites)
    else:
        print('no incomplete sites remaining')
        return True

    # select incomplete sites from variables
    incomplete_variables = []

    nr_times = len(times_da)
    shapes = [
        (nr_times, nr_pools, nr_pools),
        (1, nr_pools, 1),
        (nr_times, nr_pools, 1)
    ]
    for v, shape in zip([Bs_da, start_values_da, us_da], shapes):
        v_stack_list = []
        for ic in incomplete_coords_tuples:
            v_stack_list.append(v[ic].reshape(shape))

        incomplete_variables.append(da.stack(v_stack_list))

    # add lat, lon, prob
    for k, name in enumerate(["lat", "lon", "prob"]):
        incomplete_variables.append(
            da.from_array(
                np.array(
                    [ic[k] for ic in incomplete_coords_tuples]
                ).reshape(-1, 1, 1, 1),
                chunks=(1, 1, 1, 1)
            )
        )

    # add time
    incomplete_variables.append(times_da.reshape((1, -1, 1, 1)).rechunk((1, nr_times, 1, 1)))

    # prepare the delayed computation
    additional_params = {
        "model_type": task["model_type"],
        "computation": task["computation"],
        "nr_pools": nr_pools,
        "time_step_in_days": time_step_in_days,
        "return_shape": task["return_shape"],
        "func": task["func"],
        "func_args": task["func_args"],
        "time_limit_in_min": time_limit_in_min,
        "logfile_name": logfile_name
    }

    meta_shape = list(task["meta_shape"])
    meta_shape[0] = nr_incomplete_sites
    meta_shape = tuple(meta_shape)

    res_da = incomplete_variables[0].map_blocks(
        func_for_map_blocks_with_mr,
        *incomplete_variables[1:], # variables[0] comes automatically as first argument
        additional_params,
        drop_axis=task["drop_axis"],
        new_axis=task["new_axis"],
        chunks=task["return_shape"],
        dtype=np.float64,
        meta=np.ndarray(meta_shape, dtype=np.float64)
    )

    # write header to logfile
    print(write_header_to_logfile(logfile_name, res_da, time_limit_in_min))
    print('starting, timeout (min) = ', time_limit_in_min, flush=True)

    # do the computation
    linear_batchwise_to_zarr(
        res_da, # dask array
        z, # target zarr archive
        slices, # slices of interest,
        incomplete_coords_tuples,
        task["batch_size"]
    )

    write_to_logfile(logfile_name, 'done, timeout (min) = ' + str(time_limit_in_min))
    print('done, timeout (min) =', time_limit_in_min, flush=True)
    return False
Beispiel #4
0
def compute_incomplete_sites(
    time_limit_in_min,
    z,
    nr_times,
    variable_names,
    variables,
    non_data_variables,
    slices,
    task,
    logfile_name,
):
    nr_incomplete_sites, incomplete_coords_tuples, incomplete_sliced_coords_tuples = get_incomplete_sites(z, slices)

    if nr_incomplete_sites > 0:
        print('number of incomplete sites:', nr_incomplete_sites)
    else:
        print('no incomplete sites remaining')
        return

    # select incomplete sites from variables
    incomplete_variables = []
    for v, name in zip(variables, variable_names):
        if name not in non_data_variables:
            v_stack_list = []
            for coords in incomplete_sliced_coords_tuples:
                v_stack_list.append(v[coords])

            incomplete_variables.append(da.stack(v_stack_list))

    # add lat, lon, prob, time
#    incomplete_variables.append(da.from_array(incomplete_coords[0].reshape(-1, 1), chunks=(1, 1))) # lat
    incomplete_variables.append(
        da.from_array(
            np.array([c[0] for c in incomplete_coords_tuples]).reshape(-1, 1),
            chunks=(1, 1)
        )
    )
#    incomplete_variables.append(da.from_array(incomplete_coords[1].reshape(-1, 1), chunks=(1, 1))) # lon
    incomplete_variables.append(
        da.from_array(
            np.array([c[1] for c in incomplete_coords_tuples]).reshape(-1, 1),
            chunks=(1, 1)
        )
    )
#    incomplete_variables.append(da.from_array(incomplete_coords[2].reshape(-1, 1), chunks=(1, 1))) # prob
    incomplete_variables.append(
        da.from_array(
            np.array([c[2] for c in incomplete_coords_tuples]).reshape(-1, 1),
            chunks=(1, 1)
        )
    )
    time_da = variables[variable_names.index('time')].reshape(1, -1).rechunk((1, nr_times))
    incomplete_variables.append(time_da)

    # prepare the delayed computation
    additional_params = {
        "func": task["func"],
        "func_args": task["func_args"],
        "variable_names": variable_names,
        "time_limit_in_min": time_limit_in_min,
        "return_shape": task["return_shape"],
        "logfile_name": logfile_name
    }
    meta_shape = list(task["meta_shape"])
    meta_shape[0] = nr_incomplete_sites
    meta_shape = tuple(meta_shape)

    res_da = incomplete_variables[0].map_blocks(
        func_for_map_blocks,
        *incomplete_variables[1:], # variables[0] comes automatically as first argument
        additional_params,
        drop_axis=task["drop_axis"],
        new_axis=task["new_axis"],
        chunks=task["return_shape"],
        dtype=np.float64,
        meta=np.ndarray(meta_shape, dtype=np.float64)
    )

    # write header to logfile
    print(write_header_to_logfile(logfile_name, res_da, time_limit_in_min))
    print('starting, timeout (min) = ', time_limit_in_min, flush=True)

    # do the computation
    linear_batchwise_to_zarr(
        res_da, # dask array
        z, # target zarr archive
        slices, # slices of interest,
        incomplete_coords_tuples,
        task["batch_size"]
    )

    write_to_logfile(logfile_name, 'done, timeout (min) = '+str(time_limit_in_min))
    print('done, timeout (min) = ', time_limit_in_min, flush=True)
Beispiel #5
0
def func_for_map_blocks_with_mr(*args):
    additional_params = args[-1]

    model_type = additional_params["model_type"]
    computation = additional_params["computation"]
    nr_pools = additional_params["nr_pools"]
    time_step_in_days = additional_params["time_step_in_days"]
    return_shape = additional_params["return_shape"]
    func = additional_params["func"]
    func_args = additional_params["func_args"]
    time_limit_in_min = additional_params["time_limit_in_min"]
    logfile_name = additional_params["logfile_name"]
                                            
    Bs = args[0].reshape((-1, nr_pools, nr_pools))
    start_values = args[1].reshape(nr_pools)
    us = args[2].reshape((-1, nr_pools))
                                                            
    lat = args[3].reshape(-1)
    lon = args[4].reshape(-1)
    prob = args[5].reshape(-1)
    times = args[6].reshape(-1)

    nr_times = len(times)
    data_times = np.arange(0, nr_times, 1) * time_step_in_days
                                                                                        
    log_msg = "done"
    res = -np.inf * np.ones(return_shape)
    start_time = time.time()
    try:
        time_symbol = symbols('t')

        # using the custom_timeout function (even with timeout switched off) 
        # makes the worker report to the scheduler
        # and prevents frequent timeouts

        if model_type == "continuous":
            mr = custom_timeout(
                np.inf,
                PWCModelRunFD.from_Bs_and_us,
                time_symbol,
                data_times,
                start_values,
                Bs[:-1],
                us[:-1]
            )
        elif model_type == "discrete":       
            mr = custom_timeout(
                np.inf,
                DMR.from_Bs_and_net_Us,
                start_values,
                data_times,
                Bs[:-1],
                us[:-1]
            )
        else:
            raise(ValueError("model_type not recognized"))

        print("computing", computation, lat, lon, prob, flush=True)
        res = custom_timeout(
            time_limit_in_min*60,
            func,
            mr,
            **func_args
        )           
        print("done", lat, lon, prob, flush=True)
    except TimeoutError:
        duration = (time.time() - start_time) / 60
        log_msg = "Timeout after %2.2f min" % duration
        print(log_msg, flush=True)
    except Exception as e:
        tb = traceback.format_exc()
        res = np.nan * np.ones_like(res)
        print(str(e), flush=True)
        print(tb, flush=True)
        log_msg = "Error: " + str(e) + str(tb)

    write_to_logfile(
        logfile_name,
        lat,
        lon,
        prob,
        log_msg
    )
    
    return res.reshape(return_shape)
Beispiel #6
0
def run_task_with_mr(
    project_path,
    task,
    nr_pools,
    time_step_in_days,
    times_da,
    start_values_zarr,
    us_zarr,
    Bs_zarr,
    slices
):
    print("task: computing", task["computation"])
    print()
            
    zarr_path = Path(project_path.joinpath(task["computation"]))
    print("zarr archive:", str(zarr_path))
    z = load_zarr_archive(
        zarr_path,
        task["result_shape"],
        task["result_chunks"],
        overwrite=task["overwrite"]
    )

#    nr_incomplete_sites, _ = get_incomplete_site_tuples_for_mr_computation(
#        start_values_zarr,
#        us_zarr,
#        Bs_zarr,
#        z,
#        slices
#    )
#    print("Number of incomplete sites:", nr_incomplete_sites)

    logfile_name = str(project_path.joinpath(task["computation"] + ".log"))
    print("Logfile:", logfile_name)

    for timeout in task["timeouts"]:
        done = False
        done = compute_incomplete_sites_with_mr(
            timeout,
            z,
            nr_pools,
            time_step_in_days,
            times_da,
            start_values_zarr,
            us_zarr,
            Bs_zarr,
            slices,
            task,
            logfile_name
        )
        if done:
            break

    if done:
        nr_incomplete_sites = 0
    else:
        nr_incomplete_sites, _, _ = get_incomplete_site_tuples_for_mr_computation(
            start_values_zarr,
            us_zarr,
            Bs_zarr,
            z,
            slices
        )

    write_to_logfile(logfile_name, nr_incomplete_sites, "incomplete sites remaining")
    print(nr_incomplete_sites, "incomplete sites remaining")
    print()