Example #1
0
def create_process_output_script(job_id):
    cfg = load_sys_cfg()
    script_path = osp.join(cfg.workspace_path, job_id, 'job_process_output.sh')
    log_path = osp.join(cfg.workspace_path, job_id, 'job_process_output.log')
    process_script = osp.join(cfg.sys_install_path, 'process_output.sh')
    with open(script_path, 'w') as f:
        f.write('#!/usr/bin/env bash\n')
        f.write('cd ' + cfg.sys_install_path + '\n')
        f.write('LOG=' + log_path + '\n')
        f.write(process_script + ' ' + job_id + ' &> $LOG \n')

    # make it executable
    st = os.stat(script_path)
    os.chmod(script_path, st.st_mode | stat.S_IEXEC)
Example #2
0
    def execute(self):
        """
        Execute real.exe in given working directory.

        This method is redefined here as real.exe needs special treatment.  DMPAR compilation of WRF
        causes real.exe to output stdout and stderr into rsl.out.0000 and rsl.error.0000 respectively.
        We don't redirect these (we can't) but we rename the output files after the fact.

        NOTE: on some machines it is OK to run real.exe from command line, but generally mpirun is required!

        If cfg.wrf_serial_install_path is not None, run real.exe as serial,
        This is to support systems that do not allow executing mpi binary from command line.
        We do not run real.exe by mpirun because mpirun on head node may not be allowed.

        :return: raises OutputCheckFailed if return code is non-zero
        """
        exec_name = self.exec_name
        stdout_path = self.stdout_path
        stderr_path = self.stderr_path

        # first verify if we have already done our job
        try:
            self.check_output()
            return self
        except OutputCheckFailed:
            pass

        cfg = load_sys_cfg()
        if "wrf_serial_install_path" in cfg:
            logging.info("Executing serial %s" % exec_name)
            stdout_file = open(stdout_path, 'w')
            stderr_file = open(stderr_path, 'w')
            check_call(exec_name,
                       cwd=self.work_dir,
                       stdout=stdout_file,
                       stderr=stderr_file)
        else:
            logging.info("Executing MPI %s directly without mpirun" %
                         exec_name)
            check_call(exec_name, cwd=self.work_dir)
            os.rename(osp.join(self.work_dir, "rsl.out.0000"), stdout_path)
            os.rename(osp.join(self.work_dir, "rsl.error.0000"), stderr_path)

        return self
Example #3
0
def send_products_to_server(job_id):
    args = load_sys_cfg()
    jobfile = osp.abspath(osp.join(args.workspace_path, job_id, 'job.json'))
    logging.info('sent_products_to_server: loading job description from %s' %
                 jobfile)
    try:
        js = Dict(json.load(open(jobfile, 'r')))
    except Exception as e:
        logging.error('Cannot load the job description file %s' % jobfile)
        logging.error('%s' % e)
        sys.exit(1)
    desc = js.postproc[
        'description'] if 'description' in js.postproc else js.job_id
    pp_dir = js.get(
        'pp_dir', osp.abspath(osp.join(args.workspace_path, job_id,
                                       "products")))
    manifest_filename = js.get('manifest_filename',
                               'wfc-' + js.grid_code + '.json')
    send_product_to_server(args, pp_dir, job_id, job_id, manifest_filename,
                           desc)
Example #4
0
def test_time():

    cfg = load_sys_cfg()

    g = NAM218(cfg)

    cycle_start_esmf = "2005-01-05_00:00:00"
    cycle_start = esmf_to_utc(cycle_start_esmf)
    from_utc = esmf_to_utc("2005-01-05_00:00:00")
    to_utc = esmf_to_utc("2005-01-06_22:00:0")

    print(cycle_start.year, cycle_start.month, cycle_start.day,
          cycle_start.hour)

    fc_hours = int((to_utc - cycle_start).total_seconds()) / 3600

    delta = from_utc - cycle_start
    print(str(delta))
    print(delta.days, delta.seconds, delta.total_seconds())
    print((to_utc - cycle_start).total_seconds())
    print(timedelta_hours(to_utc - cycle_start))
    print(timedelta_hours(to_utc - cycle_start, False))

    fc_start, fc_hours = g.forecast_times(cycle_start, from_utc, to_utc)
    print('fc_start = ', fc_start)
    print('fc_hours = ', fc_hours)
    fc_list, colmet_list_utc = g.file_times(cycle_start, fc_start, fc_hours)
    grib_files, colmet_prefix, colmet_files = g.file_names(
        cycle_start, fc_list, colmet_list_utc)
    print('fc_list = ', fc_list)
    print('colmet_list_utc = ')
    for x in colmet_list_utc:
        print(x)
    print('grib_files = ')
    for x in grib_files:
        print(x)
    print('colmet_files = ')
    for x in colmet_files:
        print(colmet_prefix + '/' + x)
Example #5
0
from utils import esmf_to_utc, load_sys_cfg

import requests
from datetime import datetime, timedelta
import pytz
import logging
import os.path as osp
from utils import readhead
import time
from .grib_file import grib_messages
from six.moves import range

# global parameter
min_content_size = 10000

cfg = load_sys_cfg()
sleep_seconds = cfg.get('sleep_seconds', 20)
max_retries = cfg.get('max_retries', 3)


class RTMA(object):
    """
    This class supports the ingest of NOAA RTMA (Real-time Mesoscale Analysis) data.

    RTMA is different since there is only one set of files for 24 hours (1 fileset
    per hour), which are continuously overwritten.  This RTMA class thus checks the
    filetime for each file to verify it belongs to 'today' instead of the day before.

    Note that RTMA operates in the UTC time zone.
    """
Example #6
0
def ssh_command(command):
    cfg = load_sys_cfg()
    s = SSHShuttle(cfg)
    s.connect()
    s.simple_command(command)
    s.disconnect()
Example #7
0
def process_output(job_id):
    args = load_sys_cfg()
    jobfile = osp.abspath(osp.join(args.workspace_path, job_id, 'job.json'))
    logging.info('process_output: loading job description from %s' % jobfile)
    try:
        js = Dict(json.load(open(jobfile, 'r')))
    except Exception as e:
        logging.error('Cannot load the job description file %s' % jobfile)
        logging.error('%s' % e)
        sys.exit(1)
    js.old_pid = js.pid
    js.pid = os.getpid()
    js.state = 'Processing'
    json.dump(js, open(jobfile, 'w'), indent=4, separators=(',', ': '))

    js.wrf_dir = osp.abspath(osp.join(args.workspace_path, js.job_id, 'wrf'))

    pp = None
    already_sent_files, max_pp_dom = [], -1
    if js.postproc is None:
        logging.info('No postprocessing specified, exiting.')
        return

    # set up postprocessing
    delete_visualization(js.job_id)
    js.pp_dir = osp.join(args.workspace_path, js.job_id, "products")
    make_clean_dir(js.pp_dir)
    pp = Postprocessor(js.pp_dir, 'wfc-' + js.grid_code)
    js.manifest_filename = 'wfc-' + js.grid_code + '.json'
    logging.debug('Postprocessor created manifest %s', js.manifest_filename)
    max_pp_dom = max(
        [int(x) for x in filter(lambda x: len(x) == 1, js.postproc)])

    if js.postproc.get('from', None) == 'wrfout':
        logging.info('Postprocessing all wrfout files.')
        # postprocess all wrfouts
        for wrfout_path in sorted(
                glob.glob(
                    osp.join(js.wrf_dir, 'wrfout_d??_????-??-??_??:??:??'))):
            logging.info("Found %s" % wrfout_path)
            domain_str, wrfout_esmf_time = re.match(
                r'.*wrfout_d(0[0-9])_([0-9_\-:]{19})', wrfout_path).groups()
            dom_id = int(domain_str)
            d = nc4.Dataset(wrfout_path)
            # extract ESMF string times
            times = [''.join(x) for x in d.variables['Times'][:]]
            d.close()
            for esmf_time in sorted(times):
                logging.info("Processing domain %d for time %s." %
                             (dom_id, esmf_time))
                if js.postproc is not None and str(dom_id) in js.postproc:
                    var_list = [str(x) for x in js.postproc[str(dom_id)]]
                    logging.info(
                        "Executing postproc instructions for vars %s for domain %d."
                        % (str(var_list), dom_id))
                    try:
                        pp.process_vars(osp.join(js.wrf_dir, wrfout_path),
                                        dom_id, esmf_time, var_list)
                        # in incremental mode, upload to server
                        if js.postproc.get('shuttle', None) == 'incremental':
                            desc = js.postproc[
                                'description'] if 'description' in js.postproc else js.job_id
                            sent_files_1 = send_product_to_server(
                                args, js.pp_dir, js.job_id, js.job_id,
                                js.manifest_filename, desc, already_sent_files)
                            already_sent_files = filter(
                                lambda x: not x.endswith('json'),
                                already_sent_files + sent_files_1)
                    except Exception as e:
                        logging.warning(
                            'Failed to postprocess for time %s with error %s.'
                            % (esmf_time, str(e)))

        # if we are to send out the postprocessed files after completion, this is the time
        if js.postproc.get('shuttle', None) == 'on_completion':
            desc = js.postproc[
                'description'] if 'description' in js.postproc else js.job_id
            send_product_to_server(args, js.pp_dir, js.job_id, js.job_id,
                                   js.manifest_filename, desc)

        json.dump(js, open(jobfile, 'w'), indent=4, separators=(',', ': '))
        return

    # step 9: wait for appearance of rsl.error.0000 and open it
    wrf_out = None
    rsl_path = osp.join(js.wrf_dir, 'rsl.error.0000')
    while wrf_out is None:
        try:
            wrf_out = open(rsl_path)
            break
        except IOError:
            logging.info(
                'process_output: waiting 5 seconds for rsl.error.0000 file')
        time.sleep(5)

    logging.info('process_output: Detected rsl.error.0000')
    js.run_utc = time.ctime(os.path.getmtime(rsl_path))
    js.processed_utc = time.asctime(time.gmtime())

    # step 10: track log output and check for history writes fro WRF
    wait_lines = 0
    wait_wrfout = 0
    while True:
        line = wrf_out.readline().strip()
        if not line:
            if not parallel_job_running(js):
                logging.warning('WRF did not run to completion.')
                break
            if not wait_lines:
                logging.info('Waiting for more output lines')
            wait_lines = wait_lines + 1
            time.sleep(5)
            continue
        wait_lines = 0

        if "SUCCESS COMPLETE WRF" in line:
            # send_email(js, 'complete', 'Job %s - wrf job complete SUCCESS.' % js.job_id)
            logging.info("WRF completion detected.")
            js.old_job_num = js.job_num
            js.job_num = None
            json.dump(js, open(jobfile, 'w'), indent=4, separators=(',', ': '))
            break

        if "Timing for Writing wrfout" in line:
            wait_wrfout = 0
            esmf_time, domain_str = re.match(
                r'.*wrfout_d.._([0-9_\-:]{19}) for domain\ +(\d+):',
                line).groups()
            wrfout_path, domain_str = re.match(
                r'.*(wrfout_d.._[0-9_\-:]{19}) for domain\ +(\d+):',
                line).groups()
            dom_id = int(domain_str)
            logging.info("Detected history write for domain %d for time %s." %
                         (dom_id, esmf_time))
            if js.postproc is not None and str(dom_id) in js.postproc:
                var_list = [str(x) for x in js.postproc[str(dom_id)]]
                logging.info(
                    "Executing postproc instructions for vars %s for domain %d."
                    % (str(var_list), dom_id))
                wrfout_path = find_wrfout(js.wrf_dir, dom_id, esmf_time)
                try:
                    pp.process_vars(osp.join(js.wrf_dir, wrfout_path), dom_id,
                                    esmf_time, var_list)
                except Exception as e:
                    logging.warning(
                        'Failed to postprocess for time %s with error %s.' %
                        (esmf_time, str(e)))
                else:
                    # in incremental mode, upload to server
                    if js.postproc.get('shuttle', None) == 'incremental':
                        desc = js.postproc[
                            'description'] if 'description' in js.postproc else js.job_id
                        sent_files_1 = send_product_to_server(
                            args, js.pp_dir, js.job_id, js.job_id,
                            js.manifest_filename, desc, already_sent_files)
                        already_sent_files = filter(
                            lambda x: not x.endswith('json'),
                            already_sent_files + sent_files_1)
        else:
            if not wait_wrfout:
                logging.info('Waiting for wrfout')
            wait_wrfout = wait_wrfout + 1

    # if we are to send out the postprocessed files after completion, this is the time
    if js.postproc.get('shuttle', None) == 'on_completion':
        desc = js.postproc[
            'description'] if 'description' in js.postproc else js.job_id
        send_product_to_server(args, js.pp_dir, js.job_id, js.job_id,
                               js.manifest_filename, desc)

    if js.postproc.get('shuttle', None) is not None:
        make_kmz(js.job_id)  # arguments can be added to the job id string

    js.old_pid = js.pid
    js.pid = None
    js.state = 'Completed'
    json.dump(js, open(jobfile, 'w'), indent=4, separators=(',', ': '))
Example #8
0
def process_outputs_tiff(job_id):
    args = load_sys_cfg()
    jobfile = osp.abspath(osp.join(args.workspace_path, job_id,'job.json'))
    satfile = osp.abspath(osp.join(args.workspace_path, job_id,'sat.json'))
    logging.info('process_tiffs: loading job description from %s' % jobfile)
    try:
        js = Dict(json.load(open(jobfile,'r')))
    except Exception as e:
        logging.error('Cannot load the job description file %s' % jobfile)
        logging.error('%s' % e)
        sys.exit(1)
    logging.info('process_tiffs: loading satellite description from %s' % satfile)
    try:
        jsat = Dict(json.load(open(satfile,'r')))
        available_sats = [sat.upper()+'_AF' for sat in jsat.granules.keys()]
        not_empty_sats = [sat.upper()+'_AF' for sat in jsat.granules.keys() if jsat.granules[sat]]
    except:
        logging.warning('Cannot load the satellite data in satellite description file %s' % satfile)
        available_sats = []
        not_empty_sats = []
        pass
    logging.info('process_tiffs: available satellite data %s' % available_sats)
    logging.info('process_tiffs: not empty satellite data %s' % not_empty_sats)

    js.old_pid = js.pid
    js.pid = os.getpid()
    js.state = 'Processing'
    json.dump(js, open(jobfile,'w'), indent=4, separators=(',', ': '))
    js.wrf_dir = osp.abspath(osp.join(args.workspace_path, js.job_id, 'wrf'))

    # set up postprocessing
    pp = None
    js.pp_dir = osp.join(args.workspace_path, js.job_id, "products", "tiffs")
    make_clean_dir(js.pp_dir)
    pp = Postprocessor(js.pp_dir, 'wfc-' + js.grid_code)
    js.manifest_filename= 'wfc-' + js.grid_code + '.json'
    logging.debug('Postprocessor created manifest %s',js.manifest_filename)

    logging.info('Postprocessing all wrfout files.')
    # postprocess all wrfouts
    for wrfout_path in sorted(glob.glob(osp.join(js.wrf_dir,'wrfout_d??_????-??-??_??:??:??'))):
        logging.info("Found %s" % wrfout_path)
        domain_str,wrfout_esmf_time = re.match(r'.*wrfout_d(0[0-9])_([0-9_\-:]{19})',wrfout_path).groups()
        dom_id = int(domain_str)
        d = nc4.Dataset(wrfout_path)
        # extract ESMF string times
        times = [''.join(x) for x in d.variables['Times'][:].astype(str)]
        if js.postproc is not None and str(dom_id) in js.postproc:
            if available_sats:
                sat_list = [sat for sat in available_sats if sat in js.postproc[str(dom_id)]]
                var_list = [str(x) for x in js.postproc[str(dom_id)] if not str(x) in sat_list]
                sat_list = [sat for sat in sat_list if sat in not_empty_sats]
                logging.info("Executing postproc instructions for sats %s for domain %d." % (str(sat_list), dom_id))
            else:
                sat_list = []
                var_list = [str(x) for x in js.postproc[str(dom_id)]]
            logging.info("Executing postproc tiff instructions for vars %s for domain %d." % (str(var_list), dom_id))
            try:
                if sat_list:
                    pass
                    #process_sats_tiff()
                process_vars_tiff(pp, d, wrfout_path, dom_id, times, var_list)
            except Exception as e:
                logging.warning('Failed to postprocess with error %s.' % str(e))
        d.close()

    js.old_pid = js.pid
    js.pid = None
    js.state = 'Completed'
    json.dump(js, open(jobfile,'w'), indent=4, separators=(',', ': '))
Example #9
0
from simulation import cancel_simulation, delete_simulation, delete_simulation_files, load_simulations, cleanup_sim_output, cleanup_sim_workspace
import json
import sys
import logging
from utils import load_sys_cfg

conf = load_sys_cfg()
sims_path = conf['sims_path']


def cleanup_delete(sim_id):
    simulations = load_simulations(sims_path)
    try:
        logging.info('Deleting simulation %s' % sim_id)
        sim_info = simulations[sim_id]
        delete_simulation(sim_info, conf)
    except KeyError:
        logging.error('Simulation %s not found.' % sim_id)
        delete_simulation_files(sim_id, conf)  # rm any stray files


def cleanup_cancel(sim_id):
    simulations = load_simulations(sims_path)
    try:
        logging.info('Canceling simulation %s' % sim_id)
        sim_info = simulations[sim_id]
        cancel_simulation(sim_info, conf)
    except KeyError:
        logging.error('Simulation %s not found.' % sim_id)

Example #10
0
from simulation import cancel_simulation, delete_simulation, delete_simulation_files, load_simulations, cleanup_sim_output, cleanup_sim_workspace
import json
import sys
import logging
from utils import load_sys_cfg

conf = load_sys_cfg()
sims_path = conf['sims_path']

def cleanup_delete(sim_id):
    simulations = load_simulations(sims_path)
    try:
        logging.info('Deleting simulation %s' % sim_id)
        sim_info = simulations[sim_id]
        delete_simulation(sim_info,conf)
    except KeyError:
        logging.error('Simulation %s not found.' % sim_id)
        delete_simulation_files(sim_id,conf) # rm any stray files

def cleanup_cancel(sim_id):
    simulations = load_simulations(sims_path)
    try:
        logging.info('Canceling simulation %s' % sim_id)
        sim_info = simulations[sim_id]
        cancel_simulation(sim_info,conf)
    except KeyError:
        logging.error('Simulation %s not found.' % sim_id)

def cleanup_output(sim_id):
    simulations = load_simulations(sims_path)
    try:
Example #11
0
import os.path as osp

## Standalone script that can be used to simply download files
if __name__ == '__main__':
    if len(sys.argv) != 5:
        print(
            'Usage: %s <grib_source_name> <esmf_from_utc> <esmf_to_utc> <target_directory>'
            % sys.argv[0])
        print('       supported GRIB sources: HRRR, NAM, CFSR_P, CFSR_S, NARR')
        sys.exit(-1)

    # configure the basic logger
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s')

    js = load_sys_cfg()
    grib_src_name = sys.argv[1]
    from_utc = esmf_to_utc(sys.argv[2])
    to_utc = esmf_to_utc(sys.argv[3])
    ingest_dir = sys.argv[4]
    js.ingest_dir = ingest_dir

    grib_src = None
    if grib_src_name == 'HRRR':
        grib_src = HRRR(js)
    elif grib_src_name == 'NAM':
        grib_src = NAM218(js)
    elif grib_src_name == 'NAM227':
        grib_src = NAM227(js)
    elif grib_src_name == 'CFSR_P':
        grib_src = CFSR_P(js)