Esempio n. 1
0
def execute(tasks, filter_files=True, multi_threaded=False):
    valid_fx_tasks = execute_tasks(
        [t for t in tasks if cmor_target.get_freq(t.target) == 0],
        filter_files,
        multi_threaded=False,
        once=True)
    valid_other_tasks = execute_tasks(
        [t for t in tasks if cmor_target.get_freq(t.target) != 0],
        filter_files,
        multi_threaded=multi_threaded,
        once=False)
    return valid_fx_tasks + valid_other_tasks
Esempio n. 2
0
def validate_tasks(tasks):
    global varstasks
    varstasks = {}
    valid_tasks = []
    for task in tasks:
        if not isinstance(task.source, cmor_source.ifs_source):
            continue
        codes = task.source.get_root_codes()
        target_freq = cmor_target.get_freq(task.target)
        grid_key = task.source.grid_
        for c in codes:
            levtype, levels = get_levels(task, c)
            for l in levels:
                if task.status == cmor_task.status_failed:
                    break
                key = (c.var_id, c.tab_id, levtype, l, task.source.grid_)
                match_key = key
                if levtype == grib_file.hybrid_level_code:
                    matches = [
                        k for k in varsfreq.keys()
                        if k[:3] == key[:3] and k[4] == key[4]
                    ]
                    match_key = key if not any(matches) else matches[0]
                if c.var_id == 134 and len(codes) == 1:
                    matches = [k for k in varsfreq.keys() if k[:3] == key[:3]]
                    match_key = key if not any(matches) else matches[0]
                    if any(matches):
                        grid_key = match_key[4]
                if match_key not in varsfreq:
                    log.error(
                        "Field missing in the first day of file: "
                        "code %d.%d, level type %d, level %d. Dismissing task %s in table %s"
                        % (key[0], key[1], key[2], key[3],
                           task.target.variable, task.target.table))
                    task.set_failed()
                    break
                if 0 < target_freq < varsfreq[match_key]:
                    log.error(
                        "Field has too low frequency for target %s: "
                        "code %d.%d, level type %d, level %d. Dismissing task %s in table %s"
                        % (task.target.variable, key[0], key[1], key[2],
                           key[3], task.target.variable, task.target.table))
                    task.set_failed()
                    break
        if task.status != cmor_task.status_failed:
            for c in codes:
                levtype, levels = get_levels(task, c)
                for l in levels:
                    key = (c.var_id, c.tab_id, levtype, l, grid_key)
                    if key in varstasks:
                        varstasks[key].append(task)
                    else:
                        varstasks[key] = [task]
            valid_tasks.append(task)
    return valid_tasks
Esempio n. 3
0
def validate_tasks(tasks):
    varstasks = {}
    valid_tasks = []
    for task in tasks:
        if not isinstance(task.source, cmor_source.ifs_source):
            continue
        codes = task.source.get_root_codes()
        target_freq = cmor_target.get_freq(task.target)
        matched_keys = []
        matched_grid = None
        for c in codes:
            levtype, levels = get_levels(task, c)
            for level in levels:
                if task.status == cmor_task.status_failed:
                    break
                match_key = soft_match_key(c.var_id, c.tab_id, levtype, level,
                                           task.source.grid_, varsfreq.keys())
                if match_key is None:
                    log.error(
                        "Field missing in the first day of file: "
                        "code %d.%d, level type %d, level %d. Dismissing task %s in table %s"
                        % (c.var_id, c.tab_id, levtype, level,
                           task.target.variable, task.target.table))
                    task.set_failed()
                    break
                if 0 < target_freq < varsfreq[match_key]:
                    log.error(
                        "Field has too low frequency for target %s: "
                        "code %d.%d, level type %d, level %d. Dismissing task %s in table %s"
                        % (task.target.variable, c.var_id, c.tab_id, levtype,
                           level, task.target.variable, task.target.table))
                    task.set_failed()
                    break
                if matched_grid is None:
                    matched_grid = match_key[4]
                else:
                    if match_key[4] != matched_grid:
                        log.warning(
                            "Task %s in table %s depends on both gridpoint and spectral fields"
                            % (task.target.variable, task.target.table))
                matched_keys.append(match_key)
        if task.status != cmor_task.status_failed:
            # Fix for zg and ps on gridpoints and spectral fields on height levels:
            task.source.grid_ = matched_grid
            for key in matched_keys:
                if key in varstasks:
                    varstasks[key].append(task)
                else:
                    varstasks[key] = [task]
            valid_tasks.append(task)
    return valid_tasks, varstasks
Esempio n. 4
0
def execute_netcdf_task(task):
    global log
    task.next_state()
    filepath = getattr(task, cmor_task.output_path_key, None)
    if not filepath:
        log.error(
            "Could not find file containing data for variable %s in table %s" %
            (task.target.variable, task.target.table))
        return
    store_var = getattr(task, "store_with", None)
    surf_pressure_task = getattr(task, "sp_task", None)
    surf_pressure_path = getattr(surf_pressure_task, "path",
                                 None) if surf_pressure_task else None
    if store_var and not surf_pressure_path:
        log.error(
            "Could not find file containing surface pressure for model level variable...skipping variable %s in table "
            "%s" % (task.target.variable, task.target.table))
        return
    axes = []
    t_bnds = []
    if hasattr(task, "grid_id"):
        task_grid_id = getattr(task, "grid_id")
        if isinstance(task_grid_id, tuple):
            axes.extend([a for a in task_grid_id if a is not None])
        else:
            axes.append(task_grid_id)
    if hasattr(task, "z_axis_id"):
        axes.append(getattr(task, "z_axis_id"))
    if hasattr(task, "t_axis_id"):
        axes.append(getattr(task, "t_axis_id"))
        t_bnds = time_axis_bnds.get(getattr(task, "t_axis_id"), [])
    try:
        dataset = netCDF4.Dataset(filepath, 'r')
    except Exception as e:
        log.error(
            "Could not read netcdf file %s while cmorizing variable %s in table %s. Cause: %s"
            % (filepath, task.target.variable, task.target.table, e.message))
        return
    try:
        ncvars = dataset.variables
        dataset.set_auto_mask(False)
        codestr = str(task.source.get_grib_code().var_id)
        varlist = [
            v for v in ncvars
            if str(getattr(ncvars[v], "code", None)) == codestr
        ]
        if len(varlist) == 0:
            varlist = [v for v in ncvars if str(v) == "var" + codestr]
        if task.target.variable == "areacella":
            varlist = ["cell_area"]
        if len(varlist) == 0:
            log.error(
                "No suitable variable found in cdo-produced file %s fro cmorizing variable %s in table %s... "
                "dismissing task" %
                (filepath, task.target.variable, task.target.table))
            task.set_failed()
            return
        if len(varlist) > 1:
            log.warning(
                "CDO variable retrieval resulted in multiple (%d) netcdf variables; will take first"
                % len(varlist))
        ncvar = ncvars[varlist[0]]
        unit = getattr(ncvar, "units", None)
        if (not unit) or hasattr(task, cmor_task.conversion_key):
            unit = getattr(task.target, "units")
        if len(getattr(task.target, "positive", "")) > 0:
            var_id = cmor.variable(table_entry=str(task.target.variable),
                                   units=str(unit),
                                   axis_ids=axes,
                                   positive="down")
        else:
            var_id = cmor.variable(table_entry=str(task.target.variable),
                                   units=str(unit),
                                   axis_ids=axes)
        flip_sign = (getattr(task.target, "positive", None) == "up")
        factor, term = get_conversion_constants(
            getattr(task, cmor_task.conversion_key, None),
            getattr(task, cmor_task.output_frequency_key))
        time_dim, index = -1, 0
        for d in ncvar.dimensions:
            if d.startswith("time"):
                time_dim = index
                break
            index += 1

        time_selection = None
        time_stamps = cmor_utils.read_time_stamps(filepath)
        if any(time_stamps) and len(t_bnds) > 0:
            time_slice_map = []
            for bnd in t_bnds:
                candidates = [t for t in time_stamps if bnd[0] <= t <= bnd[1]]
                if any(candidates):
                    time_slice_map.append(time_stamps.index(candidates[0]))
                else:
                    log.warning(
                        "For variable %s in table %s, no valid time point could be found at %s...inserting "
                        "missing values" %
                        (task.target.variable, task.target.table, str(bnd[0])))
                    time_slice_map.append(-1)
            time_selection = numpy.array(time_slice_map)

        mask = getattr(task.target, cmor_target.mask_key, None)
        mask_array = masks[mask].get("array", None) if mask in masks else None
        missval = getattr(task.target, cmor_target.missval_key, 1.e+20)
        if flip_sign:
            missval = -missval
        cmor_utils.netcdf2cmor(var_id,
                               ncvar,
                               time_dim,
                               factor,
                               term,
                               store_var,
                               get_sp_var(surf_pressure_path),
                               swaplatlon=False,
                               fliplat=True,
                               mask=mask_array,
                               missval=missval,
                               time_selection=time_selection,
                               force_fx=(cmor_target.get_freq(
                                   task.target) == 0))
        cmor.close(var_id)
        task.next_state()
        if store_var:
            cmor.close(store_var)
    finally:
        dataset.close()
Esempio n. 5
0
def execute(tasks, nthreads=1):
    global log, start_date_, auto_filter_

    supported_tasks = [
        t for t in filter_tasks(tasks)
        if t.status == cmor_task.status_initialized
    ]
    log.info("Executing %d IFS tasks..." % len(supported_tasks))
    mask_tasks = get_mask_tasks(supported_tasks)
    fx_tasks = [
        t for t in supported_tasks if cmor_target.get_freq(t.target) == 0
    ]
    surf_pressure_tasks = get_sp_tasks(supported_tasks)
    regular_tasks = [
        t for t in supported_tasks
        if t not in surf_pressure_tasks and cmor_target.get_freq(t.target) != 0
    ]

    # No fx filtering needed, cdo can handle this file
    if ifs_init_gridpoint_file_.endswith("+000000"):
        tasks_to_filter = surf_pressure_tasks + regular_tasks
        tasks_no_filter = fx_tasks + mask_tasks
        for task in tasks_no_filter:
            # dirty hack for orography being in ICMGG+000000 file...
            if task.target.variable in ["orog", "areacella"]:
                task.source.grid_ = cmor_source.ifs_grid.point
            if task.source.grid_id() == cmor_source.ifs_grid.spec:
                setattr(task, cmor_task.filter_output_key,
                        [ifs_init_spectral_file_])
            else:
                setattr(task, cmor_task.filter_output_key,
                        [ifs_init_gridpoint_file_])
            setattr(task, cmor_task.output_frequency_key, 0)
    else:
        tasks_to_filter = mask_tasks + fx_tasks + surf_pressure_tasks + regular_tasks
        tasks_no_filter = []

    if auto_filter_:
        tasks_todo = tasks_no_filter + grib_filter.execute(
            tasks_to_filter,
            filter_files=do_post_process(),
            multi_threaded=(nthreads > 1))
    else:
        tasks_todo = tasks_no_filter
        for task in tasks_to_filter:
            if task.source.grid_id() == cmor_source.ifs_grid.point:
                setattr(task, cmor_task.filter_output_key,
                        ifs_gridpoint_files_.values())
                tasks_todo.append(task)
            elif task.source.grid_id() == cmor_source.ifs_grid.spec:
                setattr(task, cmor_task.filter_output_key,
                        ifs_spectral_files_.values())
                tasks_todo.append(task)
            else:
                log.error(
                    "Task ifs source has unknown grid for %s in table %s" %
                    (task.target.variable, task.target.table))
                task.set_failed()

    for task in tasks_todo:
        setattr(task, cmor_task.output_frequency_key, get_output_freq(task))

    # First post-process surface pressure and mask tasks
    for task in list(
            set(tasks_todo).intersection(mask_tasks + surf_pressure_tasks)):
        postproc.post_process(task, temp_dir_, do_post_process())
    for task in list(set(tasks_todo).intersection(mask_tasks)):
        read_mask(task.target.variable, getattr(task,
                                                cmor_task.output_path_key))
    proctasks = list(set(tasks_todo).intersection(regular_tasks + fx_tasks))
    if nthreads == 1:
        for task in proctasks:
            cmor_worker(task)
    else:
        pool = multiprocessing.Pool(processes=nthreads)
        pool.map(cmor_worker, proctasks)
        for task in proctasks:
            setattr(task, cmor_task.output_path_key,
                    postproc.get_output_path(task, temp_dir_))
    if cleanup_tmpdir():
        clean_tmp_data(tasks_todo)
Esempio n. 6
0
def execute(tasks, nthreads=1):
    global log, start_date_, auto_filter_

    supported_tasks = [t for t in filter_tasks(tasks) if t.status == cmor_task.status_initialized]
    log.info("Executing %d IFS tasks..." % len(supported_tasks))
    mask_tasks = get_mask_tasks(supported_tasks)
    fx_tasks = [t for t in supported_tasks if cmor_target.get_freq(t.target) == 0]
    regular_tasks = [t for t in supported_tasks if cmor_target.get_freq(t.target) != 0]
    script_tasks = [t for t in supported_tasks if validate_script_task(t) is not None]
    # Scripts in charge of their own filtering, can create a group of variables at once
    script_tasks_no_filter = [t for t in script_tasks if validate_script_task(t) == "false"]
    # Scripts creating single variable, filtering done by ece2cmor3
    script_tasks_filter = list(set(script_tasks) - set(script_tasks_no_filter))
    req_ps_tasks, extra_ps_tasks = get_sp_tasks(supported_tasks)

    # No fx filtering needed, cdo can handle this file
    if ifs_init_gridpoint_file_.endswith("+000000"):
        tasks_to_filter = extra_ps_tasks + regular_tasks + script_tasks_filter
        tasks_no_filter = fx_tasks + mask_tasks
        for task in tasks_no_filter:
            # dirty hack for orography being in ICMGG+000000 file...
            if task.target.variable in ["orog", "areacella"]:
                task.source.grid_ = cmor_source.ifs_grid.point
            if task.source.grid_id() == cmor_source.ifs_grid.spec:
                setattr(task, cmor_task.filter_output_key, [ifs_init_spectral_file_])
            else:
                setattr(task, cmor_task.filter_output_key, [ifs_init_gridpoint_file_])
            setattr(task, cmor_task.output_frequency_key, 0)
    else:
        tasks_to_filter = mask_tasks + fx_tasks + extra_ps_tasks + regular_tasks + script_tasks_filter
        tasks_no_filter = []
    np = nthreads
    # Launch no-filter scripts
    jobs = []
    tasks_per_script = cmor_utils.group(script_tasks_no_filter, lambda tsk: getattr(tsk, cmor_task.postproc_script_key))
    for s, tasklist in tasks_per_script.items():
        log.info("Launching script %s to process variables %s" %
                 (s, ','.join([t.target.variable + " in " + t.target.table for t in tasklist])))
        script_args = (s, str(scripts[s]["src"]), tasklist)
        if np == 1:
            script_worker(*script_args)
        else:
            p = multiprocessing.Process(name=s, target=script_worker, args=script_args)
            p.start()
            jobs.append(p)
            np -= 1

    # Do filtering
    if auto_filter_:
        tasks_todo = tasks_no_filter + grib_filter.execute(tasks_to_filter, filter_files=do_post_process(),
                                                           multi_threaded=(nthreads > 1))
    else:
        tasks_todo = tasks_no_filter
        for task in tasks_to_filter:
            if task.source.grid_id() == cmor_source.ifs_grid.point:
                setattr(task, cmor_task.filter_output_key, ifs_gridpoint_files_.values())
                tasks_todo.append(task)
            elif task.source.grid_id() == cmor_source.ifs_grid.spec:
                setattr(task, cmor_task.filter_output_key, ifs_spectral_files_.values())
                tasks_todo.append(task)
            else:
                log.error("Task ifs source has unknown grid for %s in table %s" % (task.target.variable,
                                                                                   task.target.table))
                task.set_failed()

    for task in tasks_todo:
        setattr(task, cmor_task.output_frequency_key, get_output_freq(task))

    # First post-process surface pressure and mask tasks
    for task in list(set(tasks_todo).intersection(mask_tasks + req_ps_tasks + extra_ps_tasks)):
        postproc.post_process(task, temp_dir_, do_post_process())
    for task in list(set(tasks_todo).intersection(mask_tasks)):
        read_mask(task.target.variable, getattr(task, cmor_task.output_path_key))
    proctasks = list(set(tasks_todo).intersection(regular_tasks + fx_tasks))
    if np == 1:
        for task in proctasks:
            cmor_worker(task)
    else:
        pool = multiprocessing.Pool(processes=np)
        pool.map(cmor_worker, proctasks)
        for task in proctasks:
            setattr(task, cmor_task.output_path_key, postproc.get_output_path(task, temp_dir_))
    for job in jobs:
        job.join()
    if cleanup_tmpdir():
        clean_tmp_data(tasks_todo)