Ejemplo n.º 1
0
def transform(message, data):
    logging.info("Transforming: message=[%s] data=[%s]" % (message, data))

    t_data = data
    storage = message['storage']
    data_type = message['type']

    out_type = output_type(data_type, storage)
    logging.info("output type: %s" % out_type)

    if out_type == 'yaml':
        t_data = yaml.dump(data)

    elif out_type == 'json':
        t_data = json.dumps(data)

    message['app'] = message['name']
    message['data'] = t_data

    # flavor spring?
    if utils.has_key('spring.profiles', data):
        spring_profile = utils.get_key('spring.profiles', data)
        n_pattern = "%s/%s" if storage == 'vault' else "%s-%s"
        message['name'] = n_pattern % (message['name'], spring_profile)

    return message
Ejemplo n.º 2
0
def get_article_urls(query='', page=0):
    yielded = []
    url = 'https://www.hmetro.com.my/search?s={}{}'.format(
        query, ''
        if page == 0 or not isinstance(page, int) else '&page={}'.format(page))
    for url in filter(
            map(filter(open_soup(url).find_all('a'), has_key('href')),
                get_key('href')), text_match(r'^/.+?/\d{4}/\d{2}/\d{6}/.+$')):
        url = 'https://www.hmetro.com.my{}'.format(url)
        if url in yielded: continue
        yielded.append(url)
        yield page, url, get_article(open_soup(url))
Ejemplo n.º 3
0
def config_wps(work_root, wps_root, geog_root, config, args):
    if has_key(config, ('custom', 'start_time')):
        start_time = config['custom']['start_time']
        start_time_str = start_time.format('YYYY-MM-DD_HH:mm:ss')
        if not has_key(config, ('custom', 'end_time')):
            cli.error('custom->end_time does not exist in config file!')
        end_time = config['custom']['end_time']
        end_time_str = end_time.format('YYYY-MM-DD_HH:mm:ss')

    if not has_key(config, ('domains', 'max_dom')):
        cli.error('domains->max_dom does not exist in config file!')
    max_dom = config['domains']['max_dom']

    wps_work_dir = work_root + '/wps'
    if not os.path.isdir(wps_work_dir): os.makedirs(wps_work_dir)
    os.chdir(wps_work_dir)

    version = wrf_version(wps_root)
    if version < Version('3.9.1'):
        cli.error(
            f'WPS {version} may not handle GFS data correctly! Please use WPS >= 3.9.1.'
        )

    cli.notice('Edit namelist.wps for WPS.')
    copy(f'{wps_root}/namelist.wps', 'namelist.wps')
    namelist_wps = f90nml.read('namelist.wps')
    namelist_wps['share']['max_dom'] = max_dom
    if has_key(config, ('custom', 'start_time')):
        namelist_wps['share']['start_date'] = [
            start_time_str for i in range(max_dom)
        ]
    if has_key(config, ('custom', 'end_time')):
        namelist_wps['share']['end_date'] = [
            end_time_str if i == 0 else start_time_str for i in range(max_dom)
        ]
    if has_key(config, ('custom', 'background')) and has_key(
            config, ('custom', 'background', 'interval_seconds')):
        namelist_wps['share']['interval_seconds'] = config['custom'][
            'background']['interval_seconds']
    namelist_wps['geogrid']['geog_data_path'] = geog_root
    for key, value in config['geogrid'].items():
        namelist_wps['geogrid'][key] = value
    namelist_wps['geogrid']['opt_geogrid_tbl_path'] = wps_work_dir
    namelist_wps['metgrid']['opt_metgrid_tbl_path'] = wps_work_dir
    if 'metgrid' in config:
        for key, value in config['metgrid'].items():
            namelist_wps['metgrid'][key] = value
    namelist_wps.write('./namelist.wps', force=True)
    run(f'ncl -Q {script_root}/../plots/plot_domains.ncl')
    cli.notice(f'Check {wps_work_dir}/wps_show_dom.pdf for domains.')

    cli.notice('Succeeded.')
Ejemplo n.º 4
0
def callback(ch, method, properties, body):

    p_body = json.loads(body)
    id = p_body['id']
    type = p_body['type']
    message = dict(p_body)

    # get file by id
    resp = api_client.get_file(id)
    if resp.status_code != 200:
        error_msg = "Failed to get file %s. [HTTP_STATUS=%i] - %s" % (
            id, resp.status_code, resp.text)
        rabbitmq.send_to_retry(error_msg, properties)

    raw_data = resp.text
    logging.info("raw_file=[%s]" % raw_data)

    if type == 'yaml':
        for data in raw_data.split('---'):
            message['data'] = data
            rabbitmq.publish(exchange, output_routing_key, message, conn)

    elif type == 'json':

        j_body = json.loads(raw_data)
        if utils.has_key('seeder.splitter-key', j_body):
            s_key = utils.get_key('seeder.splitter-key', j_body)
            for data in j_body[s_key]:
                message['data'] = data
                rabbitmq.publish(exchange, output_routing_key, message, conn)

        else:
            message['data'] = raw_data
            rabbitmq.publish(exchange, output_routing_key, message, conn)

    else:
        message['data'] = raw_data
        rabbitmq.publish(exchange, output_routing_key, message, conn)

    api_client.delete_file(id)
Ejemplo n.º 5
0
def config_wrfda(work_root,
                 wrfda_root,
                 config,
                 args,
                 wrf_work_dir=None,
                 tag=None,
                 fg=None):
    start_time = config['custom']['start_time']
    end_time = config['custom']['end_time']
    datetime_fmt = 'YYYY-MM-DD_HH:mm:ss'
    start_time_str = start_time.format(datetime_fmt)
    max_dom = config['domains']['max_dom']

    # Need to take some parameters from wrfinput file.
    if not wrf_work_dir:
        if tag != None:
            wrf_work_dir = f'{work_root}/wrf_{tag}'
        else:
            wrf_work_dir = f'{work_root}/wrf'

    if max_dom > 1:
        if not has_key(config, ('custom', 'wrfda', 'dom')):
            cli.error(
                'You need to set custom->wrfda->dom to set which domain to work on!'
            )
        dom_idx = config['custom']['wrfda']['dom']
        dom_str = 'd' + str(dom_idx + 1).zfill(2)
        if tag != None:
            wrfda_work_dir = f'{work_root}/wrfda_{tag}/{dom_str}'
        else:
            wrfda_work_dir = f'{work_root}/wrfda/{dom_str}'
    else:
        dom_idx = 0
        dom_str = 'd01'
        if tag != None:
            wrfda_work_dir = f'{work_root}/wrfda_{tag}'
        else:
            wrfda_work_dir = f'{work_root}/wrfda'
    if not os.path.isdir(wrfda_work_dir): os.makedirs(wrfda_work_dir)
    os.chdir(wrfda_work_dir)

    version = wrf_version(wrfda_root)

    if os.path.isfile(f'{wrf_work_dir}/wrfinput_{dom_str}'):
        f = Dataset(f'{wrf_work_dir}/wrfinput_{dom_str}')
    elif os.path.isfile(f'{wrf_work_dir}/wrfout_{dom_str}_{start_time_str}'):
        f = Dataset(f'{wrf_work_dir}/wrfout_{dom_str}_{start_time_str}')
    elif fg:
        f = Dataset(fg)
    else:
        cli.error(
            f'config_wrfda: Cannot find wrfinput or wrfout in {wrf_work_dir} or wrfvar!'
        )
    num_land_cat = f.getncattr('NUM_LAND_CAT')
    hypsometric_opt = f.getncattr('HYPSOMETRIC_OPT')
    f.close()

    time_window = get_value(config, ('custom', 'wrfda', 'time_window'), 360)
    # Read in namelist template (not exact Fortran namelist format, we need to change it).
    template = open(f'{wrfda_root}/var/README.namelist').read()
    template = re.sub(r'^[^&]*', '', template, flags=re.DOTALL)
    template = re.sub(r';.*', '', template)
    template = re.sub(r'\([^\)]*\)', '', template)
    namelist_input = f90nml.read(StringIO(template))
    namelist_input['wrfvar1']['var4d_lbc'] = False
    namelist_input['wrfvar18']['analysis_date'] = start_time_str
    namelist_input['wrfvar21']['time_window_min'] = start_time.subtract(
        minutes=time_window / 2).format(datetime_fmt)
    namelist_input['wrfvar22']['time_window_max'] = start_time.add(
        minutes=time_window / 2).format(datetime_fmt)
    # Fix bugs
    namelist_input['wrfvar2']['qc_rej_both'] = False
    namelist_input['wrfvar14']['rtminit_satid'] = -1
    namelist_input['wrfvar14']['rtminit_sensor'] = -1
    if version == Version('3.6.1'):
        namelist_input['wrfvar4']['use_iasiobs'] = False
        del namelist_input['wrfvar4']['use_iasisobs']
        namelist_input['wrfvar4']['use_seviriobs'] = False
        del namelist_input['wrfvar4']['use_sevirisobs']
        namelist_input['wrfvar5']['max_omb_spd'] = namelist_input['wrfvar5'][
            'max_omb_sp']
        del namelist_input['wrfvar5']['max_omb_sp']
        namelist_input['wrfvar5']['max_error_spd'] = namelist_input['wrfvar5'][
            'max_error_sp']
        del namelist_input['wrfvar5']['max_error_sp']
    elif version > Version('3.8.1'):
        namelist_input['wrfvar11']['write_detail_grad_fn'] = True
    namelist_input['wrfvar11']['calculate_cg_cost_fn'] = True
    # Merge namelist.input in tutorial.
    tmp = f90nml.read(f'{wrfda_root}/var/test/tutorial/namelist.input')
    for key, value in tmp.items():
        if not key in namelist_input:
            namelist_input[key] = value
    namelist_input['time_control']['run_hours'] = config['custom'][
        'forecast_hours']
    namelist_input['time_control']['start_year'] = [
        int(start_time.format("Y")) for i in range(max_dom)
    ]
    namelist_input['time_control']['start_month'] = [
        int(start_time.format("M")) for i in range(max_dom)
    ]
    namelist_input['time_control']['start_day'] = [
        int(start_time.format("D")) for i in range(max_dom)
    ]
    namelist_input['time_control']['start_hour'] = [
        int(start_time.format("H")) for i in range(max_dom)
    ]
    namelist_input['time_control']['end_year'] = [
        int(end_time.format("Y")) for i in range(max_dom)
    ]
    namelist_input['time_control']['end_month'] = [
        int(end_time.format("M")) for i in range(max_dom)
    ]
    namelist_input['time_control']['end_day'] = [
        int(end_time.format("D")) for i in range(max_dom)
    ]
    namelist_input['time_control']['end_hour'] = [
        int(end_time.format("H")) for i in range(max_dom)
    ]
    namelist_input['time_control']['frames_per_outfile'] = [
        1 for i in range(max_dom)
    ]
    for key, value in config['time_control'].items():
        namelist_input['time_control'][key] = value
    for key, value in config['domains'].items():
        namelist_input['domains'][key] = value
    # WRFDA only take grids parameters one domain at a time.
    namelist_input['domains']['max_dom'] = 1
    for key in ('e_we', 'e_sn', 'e_vert', 'dx', 'dy', 'grid_id', 'parent_id',
                'i_parent_start', 'j_parent_start', 'parent_grid_ratio',
                'parent_time_step_ratio'):
        if key in config['domains']:
            namelist_input['domains'][key] = config['domains'][key][dom_idx]
    namelist_input['domains']['hypsometric_opt'] = hypsometric_opt
    # Sync physics parameters.
    if 'physics' in config:
        for key, value in config['physics'].items():
            namelist_input['physics'][key] = value
    namelist_input['physics']['num_land_cat'] = num_land_cat
    if version == Version('3.9.1'):
        namelist_input['dynamics']['gwd_opt'] = 0
    # Write customized parameters.
    for tag in range(1, 23):
        section = f'wrfvar{tag}'
        for key, value in config[section].items():
            namelist_input[section][key] = value

    # Validate some parameters.
    for key in ('as1', 'as2', 'as3', 'as4', 'as5'):
        if namelist_input['wrfvar7'][key] == -1:
            cli.error(f'wrfvar7->{key} is -1!')

    namelist_input.write(f'{wrfda_work_dir}/namelist.input', force=True)

    cli.notice('Succeeded.')
Ejemplo n.º 6
0
def run_wrfda_obsproc(work_root,
                      wrfda_root,
                      littler_root,
                      config,
                      args,
                      wrf_work_dir=None,
                      tag=None):
    start_time = config['custom']['start_time']
    datetime_fmt = 'YYYY-MM-DD_HH:mm:ss'
    start_time_str = start_time.format(datetime_fmt)

    if not wrf_work_dir:
        if tag != None:
            wrf_work_dir = f'{work_root}/wrf_{tag}'
        else:
            wrf_work_dir = f'{work_root}/wrf'

    if tag != None:
        wrfda_work_dir = f'{work_root}/wrfda_{tag}/obsproc'
    else:
        wrfda_work_dir = f'{work_root}/wrfda/obsproc'
    if not os.path.isdir(wrfda_work_dir): os.mkdir(wrfda_work_dir)
    os.chdir(wrfda_work_dir)

    cli.notice('Use builtin obserr.')
    run(f'ln -sf {wrfda_root}/var/obsproc/obserr.txt {wrfda_work_dir}')

    # Use d01 domain extent.
    if check_files([f'{wrf_work_dir}/wrfinput_d01_{start_time_str}']):
        ncfile = Dataset(f'{wrf_work_dir}/wrfinput_d01_{start_time_str}', 'r')
        iproj = ncfile.getncattr('MAP_PROJ')
        phic = ncfile.getncattr('CEN_LAT')
        xlonc = ncfile.getncattr('CEN_LON')
        moad_cen_lat = ncfile.getncattr('MOAD_CEN_LAT')
        standard_lon = ncfile.getncattr('STAND_LON')
        ncfile.close()
    else:
        iproj = config['geogrid']['map_proj']
        phic = config['geogrid']['ref_lat']
        xlonc = config['geogrid']['ref_lon']
        moad_cen_lat = config['geogrid']['ref_lat']
        standard_lon = config['geogrid']['ref_lon']

    output_format = get_value(config, ['custom', 'obsproc', 'output_format'],
                              default=2)
    time_window = get_value(config, ['custom', 'wrfda', 'time_window'],
                            default=360)

    if has_key(config, ('custom', 'da', 'type')):
        if config['custom']['da']['type'] == '3dvar':
            namelist_obsproc = f90nml.read(
                f'{wrfda_root}/var/obsproc/namelist.obsproc.3dvar.wrfvar-tut')
        else:
            cli.error('Currently, we only support 3DVar...')
    else:
        namelist_obsproc = f90nml.read(
            f'{wrfda_root}/var/obsproc/namelist.obsproc.3dvar.wrfvar-tut')

    namelist_obsproc['record1'][
        'obs_gts_filename'] = f'obs.gts.{start_time.format("YYYYMMDDHHmm")}'
    namelist_obsproc['record2']['time_window_min'] = start_time.subtract(
        minutes=time_window / 2).format('YYYY-MM-DD_HH:mm:ss')
    namelist_obsproc['record2']['time_analysis'] = start_time.format(
        'YYYY-MM-DD_HH:mm:ss')
    namelist_obsproc['record2']['time_window_max'] = start_time.add(
        minutes=time_window / 2).format('YYYY-MM-DD_HH:mm:ss')
    namelist_obsproc['record3']['max_number_of_obs'] = 1200000
    namelist_obsproc['record7']['PHIC'] = phic
    namelist_obsproc['record7']['XLONC'] = xlonc
    namelist_obsproc['record7']['MOAD_CEN_LAT'] = moad_cen_lat
    namelist_obsproc['record7']['STANDARD_LON'] = standard_lon
    namelist_obsproc['record8']['NESTIX'] = config['geogrid']['e_sn']
    namelist_obsproc['record8']['NESTJX'] = config['geogrid']['e_we']
    namelist_obsproc['record8']['DIS'] = config['geogrid']['dx']
    namelist_obsproc['record9']['OUTPUT_OB_FORMAT'] = output_format
    namelist_obsproc.write('./namelist.obsproc', force=True)

    cli.stage(f'Run obsproc.exe at {wrfda_work_dir} ...')
    expected_files = [
        f'obs_gts_{start_time.format("YYYY-MM-DD_HH:mm:ss")}.3DVAR'
    ]
    if not check_files(expected_files) or args.force:
        run('rm -f obs_gts_*')

        if has_key(config, ('custom', 'littler')):
            if 'dir_pattern' in config['custom'][
                    'littler'] and 'file_pattern' in config['custom'][
                        'littler']:
                dir_name = Template(
                    config['custom']['littler']['dir_pattern']).render(
                        time=start_time)
                file_name = Template(
                    config['custom']['littler']['file_pattern']).render(
                        time=start_time)
                littler_path = f'{littler_root}/{dir_name}/{file_name}'
            else:
                cli.error(
                    'No dir_pattern and file_pattern in custom->littler section!'
                )
        else:
            littler_path = f'{littler_root}/{start_time.format("YYYYMMDD")}/obs.gts.{start_time.format("YYYYMMDDHHmm")}'

        if os.path.exists(littler_path):
            run(f'ln -sf {littler_path} {wrfda_work_dir}/obs.gts.{start_time.format("YYYYMMDDHHmm")}'
                )
        else:
            cli.error(f'Failed! {littler_path} Not Found.')
        submit_job(f'{wrfda_root}/var/obsproc/obsproc.exe',
                   1,
                   config,
                   args,
                   wait=True)
        if not check_files(expected_files):
            cli.error(f'Failed!')
        cli.notice('Succeeded.')
    else:
        cli.notice('File obs_gts_* already exist.')
    run('ls -l obs_gts_*')
Ejemplo n.º 7
0
    args.littler_root = os.path.abspath(args.littler_root)
    if not os.path.isdir(args.littler_root):
        cli.error(f'Directory {args.littler_root} does not exist!')

start_time = config['custom']['start_time']
end_time = config['custom']['end_time']
datetime_fmt = 'YYYY-MM-DD_HH:mm:ss'
start_time_str = start_time.format(datetime_fmt)
end_time_str = end_time.format(datetime_fmt)

if not os.path.isdir(args.work_root + '/fb'): os.mkdir(args.work_root + '/fb')
if not os.path.isdir(args.work_root + '/fa'): os.mkdir(args.work_root + '/fa')
if not os.path.isdir(args.work_root + '/ref'):
    os.mkdir(args.work_root + '/ref')

if not has_key(config, ('wrfvar7', 'cv_options')):
    cli.error('cv_options in wrfvar7 is not set!')

wrf.config_wps(args.work_root, args.wps_root, args.geog_root, config, args)
wrf.run_wps_geogrid(args.work_root, args.wps_root, config, args)

# Spin up 6 hours.
spinup_hours = get_value(config['custom'], 'spinup_hours', 6)
spinup_config = copy.deepcopy(config)
spinup_config['custom']['start_time'] = config['custom'][
    'start_time'].subtract(hours=spinup_hours)
spinup_config['custom']['forecast_hours'] += spinup_hours
wrf.config_wps(args.work_root, args.wps_root, args.geog_root, spinup_config,
               args)

# Run forecast with xb as initial condition.
Ejemplo n.º 8
0
def is_spring_valid_profile(data):
    has_profile = utils.has_key('spring.profiles', data)
    return has_profile is False or (has_profile and data['spring']['profiles']
                                    == SPRING_PROFILE)