Ejemplo n.º 1
0
def send_products_to_server(job_id):
    args = load_sys_cfg()
    jobfile = osp.abspath(osp.join(args.workspace_path, job_id, 'job.json'))
    logging.info('sent_products_to_server: loading job description from %s' %
                 jobfile)
    try:
        js = Dict(json.load(open(jobfile, 'r')))
    except Exception as e:
        logging.error('Cannot load the job description file %s' % jobfile)
        logging.error('%s' % e)
        sys.exit(1)
    desc = js.postproc[
        'description'] if 'description' in js.postproc else js.job_id
    pp_dir = js.get(
        'pp_dir', osp.abspath(osp.join(args.workspace_path, job_id,
                                       "products")))
    manifest_filename = js.get('manifest_filename',
                               'wfc-' + js.grid_code + '.json')
    send_product_to_server(args, pp_dir, job_id, job_id, manifest_filename,
                           desc)
Ejemplo n.º 2
0
def execute(args):
    """
    Executes a weather/fire simulation.

    The args dictionary contains

    :param args: a dictionary with the following keys
    :param grid_code: the (unique) code of the grid that is used
    :param sys_install_path: system installation directory
    :param start_utc: start time of simulation in UTC
    :param end_utc: end time of simulation in UTC
    :param workspace_path: workspace directory
    :param wps_install_path: installation directory of WPS that will be used
    :param wrf_install_path: installation directory of WRF that will be used
    :param grib_source: a string identifying a valid GRIB2 source
    :param wps_namelist_path: the path to the namelist.wps file that will be used as template
    :param wrf_namelist_path: the path to the namelist.input file that will be used as template
    :param fire_namelist_path: the path to the namelist.fire file that will be used as template
    :param wps_geog_path: the path to the geogrid data directory providing terrain/fuel data
    :param email_notification: dictionary containing keys address and events indicating when a mail should be fired off
    """
    logging.basicConfig(level=logging.INFO)

    # initialize the job state from the arguments
    js = JobState(args)

    logging.info("job %s starting [%d hours to forecast]." % (js.job_id, js.fc_hrs))
    send_email(js, 'start', 'Job %s started.' % js.job_id)

    # read in all namelists
    js.wps_nml = f90nml.read(args['wps_namelist_path'])
    js.wrf_nml = f90nml.read(args['wrf_namelist_path'])
    js.fire_nml = f90nml.read(args['fire_namelist_path'])
    js.ems_nml = None
    if 'emissions_namelist_path' in args:
        js.ems_nml = f90nml.read(args['emissions_namelist_path'])
    
    # Parse and setup the domain configuration
    js.domain_conf = WPSDomainConf(js.domains)

    num_doms = len(js.domain_conf)
    js.wps_nml['share']['start_date'] = [utc_to_esmf(js.start_utc)] * num_doms
    js.wps_nml['share']['end_date'] = [utc_to_esmf(js.end_utc)] * num_doms
    js.wps_nml['share']['interval_seconds'] = 3600

    logging.info("number of domains defined is %d." % num_doms)

    # build directories in workspace
    js.wps_dir = osp.abspath(osp.join(js.workspace_path, js.job_id, 'wps'))
    js.wrf_dir = osp.abspath(osp.join(js.workspace_path, js.job_id, 'wrf'))

    logging.info("cloning WPS into %s" % js.wps_dir)

    # step 1: clone WPS and WRF directories
    cln = WRFCloner(args)
    cln.clone_wps(js.wps_dir, js.grib_source.vtables(), [])

    # step 2: process domain information and patch namelist for geogrid
    js.wps_nml['geogrid']['geog_data_path'] = args['wps_geog_path']
    js.domain_conf.prepare_for_geogrid(js.wps_nml, js.wrf_nml, js.wrfxpy_dir, js.wps_dir)
    f90nml.write(js.wps_nml, osp.join(js.wps_dir, 'namelist.wps'), force=True)

    # do steps 2 & 3 & 4 in parallel (two execution streams)
    #  -> GEOGRID ->
    #  -> GRIB2 download ->  UNGRIB ->

    proc_q = Queue()
    geogrid_proc = Process(target=run_geogrid, args=(js, proc_q))
    grib_proc = Process(target=retrieve_gribs_and_run_ungrib, args=(js, proc_q))

    geogrid_proc.start()
    grib_proc.start()

    # wait until both tasks are done
    geogrid_proc.join()
    grib_proc.join()

    if proc_q.get() != 'SUCCESS':
        return

    if proc_q.get() != 'SUCCESS':
        return

    proc_q.close()

    # step 5: execute metgrid after ensuring all grids will be processed
    js.domain_conf.prepare_for_metgrid(js.wps_nml)
    f90nml.write(js.wps_nml, osp.join(js.wps_dir, 'namelist.wps'), force=True)

    logging.info("running METGRID")
    Metgrid(js.wps_dir).execute().check_output()

    send_email(js, 'metgrid', 'Job %s - metgrid complete.' % js.job_id)
    logging.info("cloning WRF into %s" % js.wrf_dir)

    # step 6: clone wrf directory, symlink all met_em* files
    cln.clone_wrf(js.wrf_dir, [])
    symlink_matching_files(js.wrf_dir, js.wps_dir, "met_em*")

    logging.info("running REAL")

    # step 7: patch input namelist, fire namelist, emissions namelist (if required)
    #         and execute real.exe
    time_ctrl = update_time_control(js.start_utc, js.end_utc, num_doms)
    js.wrf_nml['time_control'].update(time_ctrl)
    update_namelist(js.wrf_nml, js.grib_source.namelist_keys())
    if 'ignitions' in args:
        update_namelist(js.wrf_nml, render_ignitions(js, num_doms))

    # if we have an emissions namelist, automatically turn on the tracers
    if js.ems_nml is not None:
        f90nml.write(js.ems_nml, osp.join(js.wrf_dir, 'namelist.fire_emissions'), force=True)
        js.wrf_nml['dynamics']['tracer_opt'] = [2] * num_doms

    f90nml.write(js.wrf_nml, osp.join(js.wrf_dir, 'namelist.input'), force=True)

    f90nml.write(js.fire_nml, osp.join(js.wrf_dir, 'namelist.fire'), force=True)

    # try to run Real twice as it sometimes fails the first time
    # it's not clear why this error happens 
    try:
        Real(js.wrf_dir).execute().check_output()
    except Exception as e:
        logging.error('Real step failed with exception %s, retrying ...' % str(e))
        Real(js.wrf_dir).execute().check_output()
    

    # step 8: if requested, do fuel moisture DA
    if js.fmda is not None:
        logging.info('running fuel moisture data assimilation')
        for dom in js.fmda.domains:
            assimilate_fm10_observations(osp.join(wrf_dir, 'wrfinput_d%02d' % dom), None, js.fmda.token)

    logging.info('submitting WRF job')
    send_email(js, 'wrf_submit', 'Job %s - wrf job submitted.' % js.job_id)

    # step 8: execute wrf.exe on parallel backend
    js.task_id = "sim-" + js.grid_code + "-" + utc_to_esmf(js.start_utc)[:10]
    WRF(js.wrf_dir, js.qsys).submit(js.task_id, js.num_nodes, js.ppn, js.wall_time_hrs)

    send_email(js, 'wrf_exec', 'Job %s - wrf job starting now with id %s.' % (js.job_id, js.task_id))
    logging.info("WRF job submitted with id %s, waiting for rsl.error.0000" % js.task_id)

    # step 9: wait for appearance of rsl.error.0000 and open it
    wrf_out = None
    while wrf_out is None:
        try:
            wrf_out = open(osp.join(js.wrf_dir, 'rsl.error.0000'))
            break
        except IOError:
            logging.info('forecast: waiting 10 seconds for rsl.error.0000 file')
        
        time.sleep(5)
    
    logging.info('Detected rsl.error.0000')

    # step 10: track log output and check for history writes fro WRF
    pp = None
    already_sent_files, max_pp_dom = [], -1
    if js.postproc is not None:
        js.pp_dir = osp.join(js.workspace_path, js.job_id, "products")
        make_dir(js.pp_dir)
        pp = Postprocessor(js.pp_dir, 'wfc-' + js.grid_code)
	max_pp_dom = max([int(x) for x in filter(lambda x: len(x) == 1, js.postproc)])

    while True:
        line = wrf_out.readline().strip()
        if not line:
            time.sleep(0.2)
            continue

        if "SUCCESS COMPLETE WRF" in line:
            send_email(js, 'complete', 'Job %s - wrf job complete SUCCESS.' % js.job_id)
            logging.info("WRF completion detected.")
            break

        if "Timing for Writing wrfout" in line:
            esmf_time,domain_str = re.match(r'.*wrfout_d.._([0-9_\-:]{19}) for domain\ +(\d+):' ,line).groups()
            dom_id = int(domain_str)
            logging.info("Detected history write for domain %d for time %s." % (dom_id, esmf_time))
            if js.postproc is not None and str(dom_id) in js.postproc:
                var_list = [str(x) for x in js.postproc[str(dom_id)]]
                logging.info("Executing postproc instructions for vars %s for domain %d." % (str(var_list), dom_id))
                wrfout_path = find_fresh_wrfout(js.wrf_dir, dom_id)
            try:
                pp.process_vars(wrfout_path, dom_id, esmf_time, var_list)
            except Exception as e:
                logging.warning('Failed to postprocess for time %s with error %s.' % (esmf_time, str(e)))

            # if this is the last processed domain for this timestamp in incremental mode, upload to server
            if dom_id == max_pp_dom and js.postproc.get('shuttle', None) == 'incremental':
                desc = js.postproc['description'] if 'description' in js.postproc else js.job_id
                sent_files_1 = send_product_to_server(args, js.pp_dir, js.job_id, js.job_id, desc, already_sent_files)
                logging.info('sent %d files to visualization server.'  % len(sent_files_1))
                already_sent_files = filter(lambda x: not x.endswith('json'), already_sent_files + sent_files_1)

    # if we are to send out the postprocessed files after completion, this is the time
    if js.postproc.get('shuttle', None) == 'on_completion':
        desc = js.postproc['description'] if 'description' in js.postproc else js.job_id
        send_product_to_server(args, js.pp_dir, js.job_id, js.job_id, desc)
Ejemplo n.º 3
0
def process_output(job_id):
    args = load_sys_cfg()
    jobfile = osp.abspath(osp.join(args.workspace_path, job_id, 'job.json'))
    logging.info('process_output: loading job description from %s' % jobfile)
    try:
        js = Dict(json.load(open(jobfile, 'r')))
    except Exception as e:
        logging.error('Cannot load the job description file %s' % jobfile)
        logging.error('%s' % e)
        sys.exit(1)
    js.old_pid = js.pid
    js.pid = os.getpid()
    js.state = 'Processing'
    json.dump(js, open(jobfile, 'w'), indent=4, separators=(',', ': '))

    js.wrf_dir = osp.abspath(osp.join(args.workspace_path, js.job_id, 'wrf'))

    pp = None
    already_sent_files, max_pp_dom = [], -1
    if js.postproc is None:
        logging.info('No postprocessing specified, exiting.')
        return

    # set up postprocessing
    delete_visualization(js.job_id)
    js.pp_dir = osp.join(args.workspace_path, js.job_id, "products")
    make_clean_dir(js.pp_dir)
    pp = Postprocessor(js.pp_dir, 'wfc-' + js.grid_code)
    js.manifest_filename = 'wfc-' + js.grid_code + '.json'
    logging.debug('Postprocessor created manifest %s', js.manifest_filename)
    max_pp_dom = max(
        [int(x) for x in filter(lambda x: len(x) == 1, js.postproc)])

    if js.postproc.get('from', None) == 'wrfout':
        logging.info('Postprocessing all wrfout files.')
        # postprocess all wrfouts
        for wrfout_path in sorted(
                glob.glob(
                    osp.join(js.wrf_dir, 'wrfout_d??_????-??-??_??:??:??'))):
            logging.info("Found %s" % wrfout_path)
            domain_str, wrfout_esmf_time = re.match(
                r'.*wrfout_d(0[0-9])_([0-9_\-:]{19})', wrfout_path).groups()
            dom_id = int(domain_str)
            d = nc4.Dataset(wrfout_path)
            # extract ESMF string times
            times = [''.join(x) for x in d.variables['Times'][:]]
            d.close()
            for esmf_time in sorted(times):
                logging.info("Processing domain %d for time %s." %
                             (dom_id, esmf_time))
                if js.postproc is not None and str(dom_id) in js.postproc:
                    var_list = [str(x) for x in js.postproc[str(dom_id)]]
                    logging.info(
                        "Executing postproc instructions for vars %s for domain %d."
                        % (str(var_list), dom_id))
                    try:
                        pp.process_vars(osp.join(js.wrf_dir, wrfout_path),
                                        dom_id, esmf_time, var_list)
                        # in incremental mode, upload to server
                        if js.postproc.get('shuttle', None) == 'incremental':
                            desc = js.postproc[
                                'description'] if 'description' in js.postproc else js.job_id
                            sent_files_1 = send_product_to_server(
                                args, js.pp_dir, js.job_id, js.job_id,
                                js.manifest_filename, desc, already_sent_files)
                            already_sent_files = filter(
                                lambda x: not x.endswith('json'),
                                already_sent_files + sent_files_1)
                    except Exception as e:
                        logging.warning(
                            'Failed to postprocess for time %s with error %s.'
                            % (esmf_time, str(e)))

        # if we are to send out the postprocessed files after completion, this is the time
        if js.postproc.get('shuttle', None) == 'on_completion':
            desc = js.postproc[
                'description'] if 'description' in js.postproc else js.job_id
            send_product_to_server(args, js.pp_dir, js.job_id, js.job_id,
                                   js.manifest_filename, desc)

        json.dump(js, open(jobfile, 'w'), indent=4, separators=(',', ': '))
        return

    # step 9: wait for appearance of rsl.error.0000 and open it
    wrf_out = None
    rsl_path = osp.join(js.wrf_dir, 'rsl.error.0000')
    while wrf_out is None:
        try:
            wrf_out = open(rsl_path)
            break
        except IOError:
            logging.info(
                'process_output: waiting 5 seconds for rsl.error.0000 file')
        time.sleep(5)

    logging.info('process_output: Detected rsl.error.0000')
    js.run_utc = time.ctime(os.path.getmtime(rsl_path))
    js.processed_utc = time.asctime(time.gmtime())

    # step 10: track log output and check for history writes fro WRF
    wait_lines = 0
    wait_wrfout = 0
    while True:
        line = wrf_out.readline().strip()
        if not line:
            if not parallel_job_running(js):
                logging.warning('WRF did not run to completion.')
                break
            if not wait_lines:
                logging.info('Waiting for more output lines')
            wait_lines = wait_lines + 1
            time.sleep(5)
            continue
        wait_lines = 0

        if "SUCCESS COMPLETE WRF" in line:
            # send_email(js, 'complete', 'Job %s - wrf job complete SUCCESS.' % js.job_id)
            logging.info("WRF completion detected.")
            js.old_job_num = js.job_num
            js.job_num = None
            json.dump(js, open(jobfile, 'w'), indent=4, separators=(',', ': '))
            break

        if "Timing for Writing wrfout" in line:
            wait_wrfout = 0
            esmf_time, domain_str = re.match(
                r'.*wrfout_d.._([0-9_\-:]{19}) for domain\ +(\d+):',
                line).groups()
            wrfout_path, domain_str = re.match(
                r'.*(wrfout_d.._[0-9_\-:]{19}) for domain\ +(\d+):',
                line).groups()
            dom_id = int(domain_str)
            logging.info("Detected history write for domain %d for time %s." %
                         (dom_id, esmf_time))
            if js.postproc is not None and str(dom_id) in js.postproc:
                var_list = [str(x) for x in js.postproc[str(dom_id)]]
                logging.info(
                    "Executing postproc instructions for vars %s for domain %d."
                    % (str(var_list), dom_id))
                wrfout_path = find_wrfout(js.wrf_dir, dom_id, esmf_time)
                try:
                    pp.process_vars(osp.join(js.wrf_dir, wrfout_path), dom_id,
                                    esmf_time, var_list)
                except Exception as e:
                    logging.warning(
                        'Failed to postprocess for time %s with error %s.' %
                        (esmf_time, str(e)))
                else:
                    # in incremental mode, upload to server
                    if js.postproc.get('shuttle', None) == 'incremental':
                        desc = js.postproc[
                            'description'] if 'description' in js.postproc else js.job_id
                        sent_files_1 = send_product_to_server(
                            args, js.pp_dir, js.job_id, js.job_id,
                            js.manifest_filename, desc, already_sent_files)
                        already_sent_files = filter(
                            lambda x: not x.endswith('json'),
                            already_sent_files + sent_files_1)
        else:
            if not wait_wrfout:
                logging.info('Waiting for wrfout')
            wait_wrfout = wait_wrfout + 1

    # if we are to send out the postprocessed files after completion, this is the time
    if js.postproc.get('shuttle', None) == 'on_completion':
        desc = js.postproc[
            'description'] if 'description' in js.postproc else js.job_id
        send_product_to_server(args, js.pp_dir, js.job_id, js.job_id,
                               js.manifest_filename, desc)

    if js.postproc.get('shuttle', None) is not None:
        make_kmz(js.job_id)  # arguments can be added to the job id string

    js.old_pid = js.pid
    js.pid = None
    js.state = 'Completed'
    json.dump(js, open(jobfile, 'w'), indent=4, separators=(',', ': '))
Ejemplo n.º 4
0
        dont_have_vars, have_vars = rtma.retrieve_rtma(cycle)
        if dont_have_vars:
            logging.info('RTMA variables %s not yet available for cycle %s.' % (str(dont_have_vars), str(cycle)))
            cycle -= timedelta(hours=1)
            lookback_length -= 1
        else:
            break
            
    if dont_have_vars:
        logging.error('CYCLER could not find useable cycle, exiting.')
        sys.exit(1)
        
    logging.info('Have RTMA data for cycle %s.' % str(cycle))
      
    # check for each region, if we are up to date w.r.t. RTMA data available
    for region_id,region_cfg in cfg.regions.iteritems():
        wrapped_cfg = Dict(region_cfg)
        #if 1:   # to run every time for debugging
        if not is_cycle_computed(cycle, wrapped_cfg, cfg.workspace_path):
            logging.info('CYCLER processing region %s for cycle %s' % (region_id, str(cycle)))
            fmda_advance_region(cycle, wrapped_cfg, rtma, cfg.workspace_path, lookback_length, meso_token)
            pp_path = postprocess_cycle(cycle, wrapped_cfg, cfg.workspace_path)   
            if 'shuttle_remote_host' in sys_cfg:
                sim_code = 'fmda-' + wrapped_cfg.code
                send_product_to_server(sys_cfg, pp_path, sim_code, sim_code, region_id + ' FM')
        else:
            logging.info('CYCLER the cycle %s for region %s is already complete, skipping ...' % (str(cycle), str(region_id)))

    # done
    logging.info('CYCLER cycle %s complete.' % str(cycle))
Ejemplo n.º 5
0
def fmda_advance_region(cycle, cfg, rtma, wksp_path, lookback_length,
                        meso_token):
    """
    Advance the fuel moisture estimates in the region specified by the configuration.
    The function assumes that the fuel moisture model has not been advanced to this
    cycle yet and will overwrite any previous computations.
    
    Control flow:
    
    1) read in RTMA variables
    2) check if there is a stored FM model for previous cycle
    2a) yes -> load it, advance one time-step, perform DA
    2b) no -> compute equilibrium, use background covariance to do DA
    3) store model
    
    :param cycle: the datetime indicating the processed cycle in UTC
    :param cfg: the configuration dictionary specifying the region
    :param rtma: the RTMA object that can be used to retrieve variables for this cycle
    :param wksp_path: the workspace path for the cycler
    :param lookback_length: number of cycles to search before we find a computed cycle
    :param meso_token: the mesowest API access token or a list of them
    :return: the model advanced and assimilated at the current cycle
    """
    logging.info("rtma_cycler.fmda_advance_region: %s" % str(cycle))
    model = None
    prev_cycle = cycle - timedelta(hours=1)
    prev_model_path = compute_model_path(prev_cycle, cfg.code, wksp_path)
    if not osp.exists(prev_model_path):
        logging.info('CYCLER cannot find model from previous cycle %s' %
                     str(prev_cycle))
        if lookback_length > 0:
            model = fmda_advance_region(cycle - timedelta(hours=1), cfg, rtma,
                                        wksp_path, lookback_length - 1,
                                        meso_token)
    else:
        logging.info('CYCLER found previous model for cycle %s.' %
                     str(prev_cycle))
        model = FuelMoistureModel.from_netcdf(prev_model_path)

    # retrieve the variables and make sure they are available (we should not be here if they are not)
    try:
        dont_have_vars, have_vars = rtma.retrieve_rtma(cycle)
    except ValueError as e:
        logging.error(e)
        sys.exit(1)
    assert not dont_have_vars

    logging.info('CYCLER loading RTMA data for cycle %s.' % str(cycle))
    TD, T2, RH, precipa, hgt, lats, lons = load_rtma_data(have_vars, cfg.bbox)
    Ed, Ew = compute_equilibria(T2, RH)

    rain = precipa[:, :] + 0
    # remove rain that is too small to make any difference
    rain[rain < 0.01] = 0
    # remove bogus rain that is too large
    rain[rain > 1e10] = 0

    dom_shape = T2.shape

    # store the lons/lats for this domain
    geo_path = osp.join(wksp_path, '%s-geo.nc' % cfg.code)
    if not osp.isfile(geo_path):
        logging.info('CYCLER initializing new file %s.' % (geo_path))
        d = netCDF4.Dataset(geo_path, 'w', format='NETCDF4')
        d.createDimension('south_north', dom_shape[0])
        d.createDimension('west_east', dom_shape[1])
        xlat = d.createVariable('XLAT', 'f4', ('south_north', 'west_east'))
        xlat[:, :] = lats
        xlong = d.createVariable('XLONG', 'f4', ('south_north', 'west_east'))
        xlong[:, :] = lons
        d.close()
    else:
        logging.info('CYCLER file already exists:  %s.' % (geo_path))

    # the process noise matrix
    Q = np.diag([1e-4, 5e-5, 1e-5, 1e-6, 1e-6])

    # background covariance
    P0 = np.diag([0.01, 0.01, 0.01, 0.001, 0.001])

    # check if we must start from equilibrium
    if model is None:
        logging.info('CYCLER initializing from equilibrium for cycle %s.' %
                     (str(cycle)))
        # setup model parameters
        Nk = 3
        Tk = np.array([1.0, 10.0, 100.0])
        m0 = np.expand_dims(0.5 * (Ed + Ew), axis=2)
        model = FuelMoistureModel(m0[:, :, [0, 0, 0]], Tk, P0)
    else:
        logging.info('CYCLER advancing model one hour to cycle %s.' %
                     (str(cycle)))
        dt = 3600  # always 1 hr step in RTMA
        model.advance_model(Ed, Ew, rain, dt, Q)

    logging.info('CYCLER retrieving fm-10 observations for cycle %s.' %
                 (str(cycle)))

    # perform assimilation with mesowest observations
    tm_start = cycle - timedelta(minutes=30)
    tm_end = cycle + timedelta(minutes=30)
    fm10 = retrieve_mesowest_observations(meso_token, tm_start, tm_end, lats,
                                          lons, hgt)
    fm10v = []
    for fm10_obs in fm10.values():
        for obs in fm10_obs:
            fm10v.append(obs.get_value())

    logging.info(
        'CYCLER retrieved %d valid observations, min/mean/max [%g/%g/%g].' %
        (len(fm10), np.amin(fm10v), np.mean(fm10v), np.amax(fm10v)))

    # run the data assimilation step
    covs = [np.ones(dom_shape), hgt / 2000.0]
    covs_names = ['const', 'hgt/2000']
    if np.any(rain > 0.01):
        covs.append(rain)
        covs_names.append('rain')
    execute_da_step(model, cycle, covs, covs_names, fm10)

    # make geogrid files for WPS; datasets and lines to add to GEOGRID.TBL
    geo_path = compute_model_path(cycle, cfg.code, wksp_path, ext="geo")
    index = rtma.geogrid_index()
    print('index', index)
    model.to_geogrid(geo_path, index, lats, lons)

    # make wps format files for WPS
    fmda_path = osp.join(wksp_path, cfg.code,
                         '{:04d}{:02d}'.format(cycle.year, cycle.month))
    time_tag = '{:04d}-{:02d}-{:02d}_{:02d}'.format(cycle.year, cycle.month,
                                                    cycle.day, cycle.hour)
    model.to_wps_format(fmda_path, index, lats, lons, time_tag)

    # store the new model
    model_path = compute_model_path(cycle, cfg.code, wksp_path)
    logging.info('CYCLER writing model variables to:  %s.' % model_path)
    model.to_netcdf(
        ensure_dir(model_path), {
            'EQUILd FM': Ed,
            'EQUILw FM': Ew,
            'TD': TD,
            'T2': T2,
            'RH': RH,
            'PRECIPA': precipa,
            'PRECIP': rain,
            'HGT': hgt
        })

    # create visualization and send results
    bounds = (lons.min(), lons.max(), lats.min(), lats.max())
    pp_path = postprocess_cycle(cycle, cfg, wksp_path, bounds)
    if pp_path != None:
        if 'shuttle_remote_host' in sys_cfg:
            sim_code = 'fmda-' + cfg.code
            send_product_to_server(sys_cfg, pp_path, sim_code, sim_code,
                                   sim_code + '.json', cfg.region_id + ' FM')

    return model
Ejemplo n.º 6
0
    if dont_have_vars:
        logging.warning('CYCLER could not find useable cycle.')
        logging.warning('CYCLER copying previous post-processing.')
        for region_id, region_cfg in six.iteritems(cfg.regions):
            wrapped_cfg = Dict(region_cfg)
            wrapped_cfg.update({'region_id': region_id})
            try:
                bounds = compute_rtma_bounds(wrapped_cfg.bbox)
                pp_path = postprocess_cycle(cycle, wrapped_cfg,
                                            cfg.workspace_path, bounds)
                if pp_path != None:
                    if 'shuttle_remote_host' in sys_cfg:
                        sim_code = 'fmda-' + wrapped_cfg.code
                        send_product_to_server(sys_cfg, pp_path, sim_code,
                                               sim_code, sim_code + '.json',
                                               region_id + ' FM')
            except Exception as e:
                logging.warning('CYCLER exception {}'.format(e))
                logging.error('CYCLER skipping region {} for cycle {}'.format(
                    region_id, str(cycle)))
        sys.exit(1)

    logging.info('Have RTMA data for cycle %s.' % str(cycle))

    # check for each region, if we are up to date w.r.t. RTMA data available
    for region_id, region_cfg in six.iteritems(cfg.regions):
        wrapped_cfg = Dict(region_cfg)
        wrapped_cfg.update({'region_id': region_id})
        #if 1:   # to run every time for debugging
        if not is_cycle_computed(cycle, wrapped_cfg, cfg.workspace_path):
Ejemplo n.º 7
0
def process_output(job_id):
    args = load_sys_cfg()
    jobfile = osp.abspath(osp.join(args.workspace_path, job_id,'job.json'))
    logging.info('process_output: loading job description from %s' % jobfile)
    try:
        js = Dict(json.load(open(jobfile,'r')))
    except Exception as e:
        logging.error('Cannot load the job description file %s' % jobfile)
        logging.error('%s' % e)
        sys.exit(1)
    js.old_pid = js.pid
    js.pid = os.getpid()
    js.state = 'Processing'
    json.dump(js, open(jobfile,'w'), indent=4, separators=(',', ': '))

    js.wrf_dir = osp.abspath(osp.join(args.workspace_path, js.job_id, 'wrf'))

    # step 9: wait for appearance of rsl.error.0000 and open it
    wrf_out = None
    while wrf_out is None:
        try:
            wrf_out = open(osp.join(js.wrf_dir, 'rsl.error.0000'))
            break
        except IOError:
            logging.info('process_output: waiting 5 seconds for rsl.error.0000 file')
        
        time.sleep(5)
    
    logging.info('process_output: Detected rsl.error.0000')

    # step 10: track log output and check for history writes fro WRF
    pp = None
    already_sent_files, max_pp_dom = [], -1
    if js.postproc is not None:
        js.pp_dir = osp.join(args.workspace_path, js.job_id, "products")
        make_clean_dir(js.pp_dir)
        pp = Postprocessor(js.pp_dir, 'wfc-' + js.grid_code)
        max_pp_dom = max([int(x) for x in filter(lambda x: len(x) == 1, js.postproc)])

    wait_lines = 0
    wait_wrfout = 0
    while True:
        line = wrf_out.readline().strip()
        if not line:
            if not parallel_job_running(js):
                logging.warning('WRF did not run to completion.')
                break  
            if not wait_lines:
                logging.info('Waiting for more output lines')
            wait_lines = wait_lines + 1 
            time.sleep(0.5)
            continue
        wait_lines = 0

        if "SUCCESS COMPLETE WRF" in line:
            # send_email(js, 'complete', 'Job %s - wrf job complete SUCCESS.' % js.job_id)
            logging.info("WRF completion detected.")
            js.old_job_num = js.job_num
            js.job_num = None
            json.dump(js, open(jobfile,'w'), indent=4, separators=(',', ': '))
            break

        if "Timing for Writing wrfout" in line:
            esmf_time,domain_str = re.match(r'.*wrfout_d.._([0-9_\-:]{19}) for domain\ +(\d+):' ,line).groups()
            wrfout_path,domain_str = re.match(r'.*(wrfout_d.._[0-9_\-:]{19}) for domain\ +(\d+):' ,line).groups()
            dom_id = int(domain_str)
            logging.info("Detected history write for domain %d for time %s." % (dom_id, esmf_time))
            if js.postproc is not None and str(dom_id) in js.postproc:
                var_list = [str(x) for x in js.postproc[str(dom_id)]]
                logging.info("Executing postproc instructions for vars %s for domain %d." % (str(var_list), dom_id))
                wrfout_path = find_wrfout(js.wrf_dir, dom_id, esmf_time)
                try:
                    pp.process_vars(osp.join(js.wrf_dir,wrfout_path), dom_id, esmf_time, var_list)
                except Exception as e:
                    logging.warning('Failed to postprocess for time %s with error %s.' % (esmf_time, str(e)))

            # in incremental mode, upload to server
            if js.postproc.get('shuttle', None) == 'incremental':
                desc = js.postproc['description'] if 'description' in js.postproc else js.job_id
                sent_files_1 = send_product_to_server(args, js.pp_dir, js.job_id, js.job_id, desc, already_sent_files)
                already_sent_files = filter(lambda x: not x.endswith('json'), already_sent_files + sent_files_1)
            wait_wrfout = 0
        else: 
            if not wait_wrfout:
                logging.info('Waiting for wrfout')
            wait_wrfout = wait_wrfout + 1 

    # if we are to send out the postprocessed files after completion, this is the time
    if js.postproc.get('shuttle', None) == 'on_completion':
        desc = js.postproc['description'] if 'description' in js.postproc else js.job_id
        send_product_to_server(args, js.pp_dir, js.job_id, js.job_id, desc)

    js.old_pid = js.pid
    js.pid = None
    js.state = 'Completed'
    json.dump(js, open(jobfile,'w'), indent=4, separators=(',', ': '))
Ejemplo n.º 8
0
def execute(args):
    """
    Executes a weather/fire simulation.

    The args dictionary contains

    :param args: a dictionary with the following keys
    :param grid_code: the (unique) code of the grid that is used
    :param sys_install_path: system installation directory
    :param start_utc: start time of simulation in UTC
    :param end_utc: end time of simulation in UTC
    :param workspace_path: workspace directory
    :param wps_install_path: installation directory of WPS that will be used
    :param wrf_install_path: installation directory of WRF that will be used
    :param grib_source: a string identifying a valid GRIB2 source
    :param wps_namelist_path: the path to the namelist.wps file that will be used as template
    :param wrf_namelist_path: the path to the namelist.input file that will be used as template
    :param fire_namelist_path: the path to the namelist.fire file that will be used as template
    :param wps_geog_path: the path to the geogrid data directory providing terrain/fuel data
    :param email_notification: dictionary containing keys address and events indicating when a mail should be fired off
    """
    logging.basicConfig(level=logging.INFO)

    # initialize the job state from the arguments
    js = JobState(args)

    logging.info("job %s starting [%d hours to forecast]." %
                 (js.job_id, js.fc_hrs))
    send_email(js, 'start', 'Job %s started.' % js.job_id)

    # read in all namelists
    js.wps_nml = f90nml.read(args['wps_namelist_path'])
    js.wrf_nml = f90nml.read(args['wrf_namelist_path'])
    js.fire_nml = f90nml.read(args['fire_namelist_path'])
    js.ems_nml = None
    if 'emissions_namelist_path' in args:
        js.ems_nml = f90nml.read(args['emissions_namelist_path'])

    # Parse and setup the domain configuration
    js.domain_conf = WPSDomainConf(js.domains)

    num_doms = len(js.domain_conf)
    js.wps_nml['share']['start_date'] = [utc_to_esmf(js.start_utc)] * num_doms
    js.wps_nml['share']['end_date'] = [utc_to_esmf(js.end_utc)] * num_doms
    js.wps_nml['share']['interval_seconds'] = 3600

    logging.info("number of domains defined is %d." % num_doms)

    # build directories in workspace
    js.wps_dir = osp.abspath(osp.join(js.workspace_path, js.job_id, 'wps'))
    js.wrf_dir = osp.abspath(osp.join(js.workspace_path, js.job_id, 'wrf'))

    logging.info("cloning WPS into %s" % js.wps_dir)

    # step 1: clone WPS and WRF directories
    cln = WRFCloner(args)
    cln.clone_wps(js.wps_dir, js.grib_source.vtables(), [])

    # step 2: process domain information and patch namelist for geogrid
    js.wps_nml['geogrid']['geog_data_path'] = args['wps_geog_path']
    js.domain_conf.prepare_for_geogrid(js.wps_nml, js.wrf_nml, js.wrfxpy_dir,
                                       js.wps_dir)
    f90nml.write(js.wps_nml, osp.join(js.wps_dir, 'namelist.wps'), force=True)

    # do steps 2 & 3 & 4 in parallel (two execution streams)
    #  -> GEOGRID ->
    #  -> GRIB2 download ->  UNGRIB ->

    proc_q = Queue()
    geogrid_proc = Process(target=run_geogrid, args=(js, proc_q))
    grib_proc = Process(target=retrieve_gribs_and_run_ungrib,
                        args=(js, proc_q))

    geogrid_proc.start()
    grib_proc.start()

    # wait until both tasks are done
    geogrid_proc.join()
    grib_proc.join()

    if proc_q.get() != 'SUCCESS':
        return

    if proc_q.get() != 'SUCCESS':
        return

    proc_q.close()

    # step 5: execute metgrid after ensuring all grids will be processed
    js.domain_conf.prepare_for_metgrid(js.wps_nml)
    f90nml.write(js.wps_nml, osp.join(js.wps_dir, 'namelist.wps'), force=True)

    logging.info("running METGRID")
    Metgrid(js.wps_dir).execute().check_output()

    send_email(js, 'metgrid', 'Job %s - metgrid complete.' % js.job_id)
    logging.info("cloning WRF into %s" % js.wrf_dir)

    # step 6: clone wrf directory, symlink all met_em* files
    cln.clone_wrf(js.wrf_dir, [])
    symlink_matching_files(js.wrf_dir, js.wps_dir, "met_em*")

    logging.info("running REAL")

    # step 7: patch input namelist, fire namelist, emissions namelist (if required)
    #         and execute real.exe
    time_ctrl = update_time_control(js.start_utc, js.end_utc, num_doms)
    js.wrf_nml['time_control'].update(time_ctrl)
    update_namelist(js.wrf_nml, js.grib_source.namelist_keys())
    if 'ignitions' in args:
        update_namelist(js.wrf_nml, render_ignitions(js, num_doms))

    # if we have an emissions namelist, automatically turn on the tracers
    if js.ems_nml is not None:
        f90nml.write(js.ems_nml,
                     osp.join(js.wrf_dir, 'namelist.fire_emissions'),
                     force=True)
        js.wrf_nml['dynamics']['tracer_opt'] = [2] * num_doms

    f90nml.write(js.wrf_nml,
                 osp.join(js.wrf_dir, 'namelist.input'),
                 force=True)

    f90nml.write(js.fire_nml,
                 osp.join(js.wrf_dir, 'namelist.fire'),
                 force=True)

    # try to run Real twice as it sometimes fails the first time
    # it's not clear why this error happens
    try:
        Real(js.wrf_dir).execute().check_output()
    except Exception as e:
        logging.error('Real step failed with exception %s, retrying ...' %
                      str(e))
        Real(js.wrf_dir).execute().check_output()

    # step 8: if requested, do fuel moisture DA
    if js.fmda is not None:
        logging.info('running fuel moisture data assimilation')
        for dom in js.fmda.domains:
            assimilate_fm10_observations(
                osp.join(wrf_dir, 'wrfinput_d%02d' % dom), None, js.fmda.token)

    logging.info('submitting WRF job')
    send_email(js, 'wrf_submit', 'Job %s - wrf job submitted.' % js.job_id)

    # step 8: execute wrf.exe on parallel backend
    js.task_id = "sim-" + js.grid_code + "-" + utc_to_esmf(js.start_utc)[:10]
    WRF(js.wrf_dir, js.qsys).submit(js.task_id, js.num_nodes, js.ppn,
                                    js.wall_time_hrs)

    send_email(
        js, 'wrf_exec',
        'Job %s - wrf job starting now with id %s.' % (js.job_id, js.task_id))
    logging.info("WRF job submitted with id %s, waiting for rsl.error.0000" %
                 js.task_id)

    # step 9: wait for appearance of rsl.error.0000 and open it
    wrf_out = None
    while wrf_out is None:
        try:
            wrf_out = open(osp.join(js.wrf_dir, 'rsl.error.0000'))
            break
        except IOError:
            logging.info(
                'forecast: waiting 10 seconds for rsl.error.0000 file')

        time.sleep(5)

    logging.info('Detected rsl.error.0000')

    # step 10: track log output and check for history writes fro WRF
    pp = None
    already_sent_files, max_pp_dom = [], -1
    if js.postproc is not None:
        js.pp_dir = osp.join(js.workspace_path, js.job_id, "products")
        make_dir(js.pp_dir)
        pp = Postprocessor(js.pp_dir, 'wfc-' + js.grid_code)
        max_pp_dom = max(
            [int(x) for x in filter(lambda x: len(x) == 1, js.postproc)])

    while True:
        line = wrf_out.readline().strip()
        if not line:
            time.sleep(0.2)
            continue

        if "SUCCESS COMPLETE WRF" in line:
            send_email(js, 'complete',
                       'Job %s - wrf job complete SUCCESS.' % js.job_id)
            logging.info("WRF completion detected.")
            break

        if "Timing for Writing wrfout" in line:
            esmf_time, domain_str = re.match(
                r'.*wrfout_d.._([0-9_\-:]{19}) for domain\ +(\d+):',
                line).groups()
            dom_id = int(domain_str)
            logging.info("Detected history write for domain %d for time %s." %
                         (dom_id, esmf_time))
            if js.postproc is not None and str(dom_id) in js.postproc:
                var_list = [str(x) for x in js.postproc[str(dom_id)]]
                logging.info(
                    "Executing postproc instructions for vars %s for domain %d."
                    % (str(var_list), dom_id))
                wrfout_path = find_fresh_wrfout(js.wrf_dir, dom_id)
            try:
                pp.process_vars(wrfout_path, dom_id, esmf_time, var_list)
            except Exception as e:
                logging.warning(
                    'Failed to postprocess for time %s with error %s.' %
                    (esmf_time, str(e)))

            # if this is the last processed domain for this timestamp in incremental mode, upload to server
            if dom_id == max_pp_dom and js.postproc.get('shuttle',
                                                        None) == 'incremental':
                desc = js.postproc[
                    'description'] if 'description' in js.postproc else js.job_id
                sent_files_1 = send_product_to_server(args, js.pp_dir,
                                                      js.job_id, js.job_id,
                                                      desc, already_sent_files)
                logging.info('sent %d files to visualization server.' %
                             len(sent_files_1))
                already_sent_files = filter(lambda x: not x.endswith('json'),
                                            already_sent_files + sent_files_1)

    # if we are to send out the postprocessed files after completion, this is the time
    if js.postproc.get('shuttle', None) == 'on_completion':
        desc = js.postproc[
            'description'] if 'description' in js.postproc else js.job_id
        send_product_to_server(args, js.pp_dir, js.job_id, js.job_id, desc)