Example #1
0
def extract_jaxa_satellite_data(start_ts_utc, end_ts_utc, output_dir):
    start = utils.datetime_floor(start_ts_utc, 3600)
    end = utils.datetime_floor(end_ts_utc, 3600)

    lat_min = 5.722969
    lon_min = 79.52146
    lat_max = 10.06425
    lon_max = 82.18992

    login = '******'

    url0 = 'ftp://' + login + '@hokusai.eorc.jaxa.jp/realtime/txt/05_AsiaSS/YYYY/MM/DD/gsmap_nrt.YYYYMMDD.HH00.05_AsiaSS.csv.zip'
    url1 = 'ftp://' + login + '@hokusai.eorc.jaxa.jp/now/txt/05_AsiaSS/gsmap_now.YYYYMMDD.HH00_HH59.05_AsiaSS.csv.zip'

    def get_jaxa_url(ts):
        url_switch = (dt.datetime.utcnow() - ts) > dt.timedelta(hours=5)
        _url = url0 if url_switch else url1
        ph = {'YYYY': ts.strftime('%Y'),
              'MM': ts.strftime('%m'),
              'DD': ts.strftime('%d'),
              'HH': ts.strftime('%H')}
        for k, v in ph.iteritems():
            _url = _url.replace(k, v)
        return _url

    tmp_dir = os.path.join(output_dir, 'tmp_jaxa/')
    if not os.path.exists(tmp_dir):
        os.mkdir(tmp_dir)
    else:
        utils.cleanup_dir(tmp_dir)

    url_dest_list = []
    for timestamp in np.arange(start, end, dt.timedelta(hours=1)).astype(dt.datetime):
        url = get_jaxa_url(timestamp)
        url_dest_list.append((url, os.path.join(tmp_dir, os.path.basename(url)),
                              os.path.join(output_dir, 'jaxa_sat_rf_' + timestamp.strftime('%Y-%m-%d_%H:%M') + '.asc')))

    utils.download_parallel(url_dest_list)

    procs = multiprocessing.cpu_count()
    Parallel(n_jobs=procs)(
        delayed(_process_zip_file)(i[1], i[2], lat_min, lon_min, lat_max, lon_max) for i in url_dest_list)

    # clean up temp dir
    shutil.rmtree(tmp_dir)
Example #2
0
def replace_file_with_values(wrf_config,
                             src,
                             dest,
                             aux_dict,
                             start_date=None,
                             end_date=None):
    if start_date is None:
        start_date = utils.datetime_floor(
            dt.datetime.strptime(wrf_config.get('start_date'),
                                 '%Y-%m-%d_%H:%M'),
            wrf_config.get('gfs_step') * 3600)

    if end_date is None:
        end_date = start_date + dt.timedelta(days=wrf_config.get('period'))

    period = wrf_config.get('period')

    d = {
        'YYYY1': start_date.strftime('%Y'),
        'MM1': start_date.strftime('%m'),
        'DD1': start_date.strftime('%d'),
        'hh1': start_date.strftime('%H'),
        'mm1': start_date.strftime('%M'),
        'YYYY2': end_date.strftime('%Y'),
        'MM2': end_date.strftime('%m'),
        'DD2': end_date.strftime('%d'),
        'hh2': end_date.strftime('%H'),
        'mm2': end_date.strftime('%M'),
        'GEOG': wrf_config.get('geog_dir'),
        'RD0': str(int(period)),
        'RH0': str(int(period * 24 % 24)),
        'RM0': str(int(period * 60 * 24 % 60)),
        'hi1': '180',
        'hi2': '60',
        'hi3': '60',
    }

    if aux_dict and wrf_config.is_set(aux_dict):
        d.update(wrf_config.get(aux_dict))

    utils.replace_file_with_values(src, dest, d)
Example #3
0
def extract_jaxa_satellite_data(start_ts_utc,
                                end_ts_utc,
                                output_dir,
                                cleanup=True,
                                cum=False,
                                tmp_dir=None,
                                lat_min=5.722969,
                                lon_min=79.52146,
                                lat_max=10.06425,
                                lon_max=82.18992,
                                output_prefix='jaxa_sat',
                                db_adapter_config=None):
    start = utils.datetime_floor(start_ts_utc, 3600)
    end = utils.datetime_floor(end_ts_utc, 3600)

    login = '******'

    url0 = 'ftp://' + login + '@hokusai.eorc.jaxa.jp/realtime/txt/05_AsiaSS/YYYY/MM/DD/gsmap_nrt.YYYYMMDD.HH00.05_AsiaSS.csv.zip'
    url1 = 'ftp://' + login + '@hokusai.eorc.jaxa.jp/now/txt/05_AsiaSS/gsmap_now.YYYYMMDD.HH00_HH59.05_AsiaSS.csv.zip'

    def get_jaxa_url(ts):
        url_switch = (dt.datetime.utcnow() - ts) > dt.timedelta(hours=5)
        _url = url0 if url_switch else url1
        ph = {
            'YYYY': ts.strftime('%Y'),
            'MM': ts.strftime('%m'),
            'DD': ts.strftime('%d'),
            'HH': ts.strftime('%H')
        }
        for k, v in list(ph.items()):
            _url = _url.replace(k, v)
        return _url

    # tmp_dir = os.path.join(output_dir, 'tmp_jaxa/')
    # if not os.path.exists(tmp_dir):
    #     os.mkdir(tmp_dir)
    if tmp_dir is None:
        tmp_dir = tempfile.mkdtemp(prefix='tmp_jaxa')

    url_dest_list = []
    for timestamp in np.arange(start, end,
                               dt.timedelta(hours=1)).astype(dt.datetime):
        url = get_jaxa_url(timestamp)
        url_dest_list.append(
            (url, os.path.join(tmp_dir, os.path.basename(url)),
             os.path.join(
                 output_dir, output_prefix + '_' +
                 timestamp.strftime('%Y-%m-%d_%H:%M') + '.asc')))

    procs = multiprocessing.cpu_count()

    logging.info('Downloading inventory in parallel')
    utils.download_parallel(url_dest_list, procs)
    logging.info('Downloading inventory complete')

    logging.info('Processing files in parallel')
    Parallel(n_jobs=procs)(delayed(process_jaxa_zip_file)
                           (i[1], i[2], lat_min, lon_min, lat_max, lon_max,
                            cum, output_prefix, db_adapter_config)
                           for i in url_dest_list)
    logging.info('Processing files complete')

    logging.info('Creating sat rf gif for today')
    create_daily_gif(start, output_dir, output_prefix + '_today.gif',
                     output_prefix)

    prev_day_gif = os.path.join(output_dir, output_prefix + '_yesterday.gif')
    if not utils.file_exists_nonempty(prev_day_gif) or start.strftime(
            '%H:%M') == '00:00':
        logging.info('Creating sat rf gif for yesterday')
        create_daily_gif(
            utils.datetime_floor(start, 3600 * 24) - dt.timedelta(days=1),
            output_dir, output_prefix + '_yesterday.gif', output_prefix)

    if cum:
        logging.info('Processing cumulative')
        process_cumulative_plot(url_dest_list, start_ts_utc, end_ts_utc,
                                output_dir, lat_min, lon_min, lat_max, lon_max)
        logging.info('Processing cumulative complete')

    # clean up temp dir
    if cleanup:
        logging.info('Cleaning up')
        shutil.rmtree(tmp_dir)
        utils.delete_files_with_prefix(output_dir, '*.archive')
Example #4
0
def extract_jaxa_satellite_hourly_data(ts, output_dir):
    ts = utils.datetime_floor(ts, 3600)
    logging.info('Jaxa satellite data extraction for %s (previous hour)' %
                 str(ts))
    extract_jaxa_satellite_data(ts - dt.timedelta(hours=1), ts, output_dir)
Example #5
0
def extract_jaxa_satellite_data_every_half_hr(exe_ts_utc,
                                              output_dir,
                                              cleanup=True,
                                              cum=False,
                                              tmp_dir=None,
                                              lat_min=5.722969,
                                              lon_min=79.52146,
                                              lat_max=10.06425,
                                              lon_max=82.18992,
                                              output_prefix='jaxa_sat',
                                              db_adapter_config=None):
    exe_ts_utc = exe_ts_utc - dt.timedelta(minutes=2)
    print(
        '-------------extract_jaxa_satellite_data_half_hr---------------exe_ts_utc:',
        exe_ts_utc)
    exe_ts_utc = exe_ts_utc - dt.timedelta(minutes=30)
    run_minute = int(exe_ts_utc.strftime('%M'))
    print('run_minute : ', run_minute)
    year_str = exe_ts_utc.strftime('%Y')
    month_str = exe_ts_utc.strftime('%m')
    day_str = exe_ts_utc.strftime('%d')
    hour_str = exe_ts_utc.strftime('%H')
    hour_str1 = (exe_ts_utc + dt.timedelta(hours=1)).strftime('%H')
    # if run_minute == 0:
    #    url = 'ftp://*****:*****@hokusai.eorc.jaxa.jp/now/txt/05_AsiaSS/gsmap_now.{}{}{}.{}00_{}59.05_AsiaSS.csv.zip' \
    #        .format(year_str, month_str, day_str, hour_str, hour_str)
    # else:
    #    url = 'ftp://*****:*****@hokusai.eorc.jaxa.jp/now/txt/05_AsiaSS/gsmap_now.{}{}{}.{}30_{}29.05_AsiaSS.csv.zip' \
    #        .format(year_str, month_str, day_str, hour_str, hour_str1)

    if run_minute == 0:
        url = 'ftp://*****:*****@hokusai.eorc.jaxa.jp/now/txt/island/SriLanka/gsmap_now.{}{}{}.{}00_{}59.SriLanka.csv.zip' \
            .format(year_str, month_str, day_str, hour_str, hour_str)
    else:
        url = 'ftp://*****:*****@hokusai.eorc.jaxa.jp/now/txt/island/SriLanka/gsmap_now.{}{}{}.{}30_{}29.SriLanka.csv.zip' \
            .format(year_str, month_str, day_str, hour_str, hour_str1)

    print('Download url : ', url)
    start_time = exe_ts_utc
    end_time = start_time + dt.timedelta(hours=1)
    print('_get_start_end|[start_time, end_time] : ', [start_time, end_time])

    if tmp_dir is None:
        tmp_dir = tempfile.mkdtemp(prefix='tmp_jaxa')

    print('Download url : ', url)
    url_dest_list = [(url, os.path.join(tmp_dir, os.path.basename(url)),
                      os.path.join(
                          output_dir, output_prefix + '_' +
                          exe_ts_utc.strftime('%Y-%m-%d_%H:%M') + '.asc'))]

    procs = multiprocessing.cpu_count()

    logging.info('Downloading inventory in parallel')
    utils.download_parallel(url_dest_list, procs)
    logging.info('Downloading inventory complete')

    logging.info('Processing files in parallel')
    Parallel(n_jobs=procs)(delayed(process_jaxa_zip_file)
                           (i[1], i[2], lat_min, lon_min, lat_max, lon_max,
                            cum, output_prefix, db_adapter_config)
                           for i in url_dest_list)
    logging.info('Processing files complete')

    logging.info('Creating sat rf gif for today')
    create_daily_gif(exe_ts_utc, output_dir, output_prefix + '_today.gif',
                     output_prefix)

    prev_day_gif = os.path.join(output_dir, output_prefix + '_yesterday.gif')
    if not utils.file_exists_nonempty(prev_day_gif) or exe_ts_utc.strftime(
            '%H:%M') == '00:00':
        logging.info('Creating sat rf gif for yesterday')
        create_daily_gif(
            utils.datetime_floor(exe_ts_utc, 3600 * 24) - dt.timedelta(days=1),
            output_dir, output_prefix + '_yesterday.gif', output_prefix)

    if cum:
        logging.info('Processing cumulative')
        process_cumulative_plot(url_dest_list, start_time, end_time,
                                output_dir, lat_min, lon_min, lat_max, lon_max)
        logging.info('Processing cumulative complete')

    # clean up temp dir
    if cleanup:
        logging.info('Cleaning up')
        shutil.rmtree(tmp_dir)
        utils.delete_files_with_prefix(output_dir, '*.archive')
Example #6
0
def extract_jaxa_satellite_data_half_hr(exe_ts_utc,
                                        output_dir,
                                        cleanup=True,
                                        cum=False,
                                        tmp_dir=None,
                                        lat_min=5.722969,
                                        lon_min=79.52146,
                                        lat_max=10.06425,
                                        lon_max=82.18992,
                                        output_prefix='jaxa_sat',
                                        db_adapter_config=None):
    print(
        '-------------extract_jaxa_satellite_data_half_hr---------------exe_ts_utc:',
        exe_ts_utc)
    exe_ts_utc = exe_ts_utc - dt.timedelta(hours=1)
    login = '******'
    url_hour = 'ftp://' + login + '@hokusai.eorc.jaxa.jp/now/txt/05_AsiaSS/gsmap_now.YYYYMMDD.HH00_HH59.05_AsiaSS.csv.zip'
    url_half_hour = 'ftp://' + login + '@hokusai.eorc.jaxa.jp/now/txt/05_AsiaSS/gsmap_now.YYYYMMDD.HH30_KK29.05_AsiaSS.csv.zip'

    run_minute = int(exe_ts_utc.strftime('%M'))
    print('run_minute : ', run_minute)

    remainder = run_minute % 30
    run_minute = run_minute - remainder
    start_time = exe_ts_utc - dt.timedelta(minutes=remainder)
    end_time = start_time + dt.timedelta(minutes=30)
    print('_get_start_end|[start_time, end_time] : ', [start_time, end_time])
    if run_minute == 0:
        exe_ts_utc = exe_ts_utc.strftime('%Y-%m-%d %H:00:00')

    else:
        exe_ts_utc = exe_ts_utc.strftime('%Y-%m-%d %H:30:00')

    exe_ts_utc = dt.datetime.strptime(exe_ts_utc, '%Y-%m-%d %H:%M:%S')

    def _get_download_url(run_minute):
        remainder = run_minute % 30
        run_minute = run_minute - remainder
        if run_minute == 0:
            return url_hour
        else:
            return url_half_hour

    def _format_url(url):
        ph = {
            'YYYY': exe_ts_utc.strftime('%Y'),
            'MM': exe_ts_utc.strftime('%m'),
            'DD': exe_ts_utc.strftime('%d'),
            'KK': (exe_ts_utc + dt.timedelta(hours=1)).strftime('%H'),
            'HH': exe_ts_utc.strftime('%H')
        }
        for k, v in list(ph.items()):
            url = url.replace(k, v)
        print('url : ', url)
        return url

    if tmp_dir is None:
        tmp_dir = tempfile.mkdtemp(prefix='tmp_jaxa')

    url = _get_download_url(run_minute)
    formatted_url = _format_url(url)
    print('formatted_url : ', formatted_url)
    url_dest_list = [(formatted_url,
                      os.path.join(tmp_dir, os.path.basename(formatted_url)),
                      os.path.join(
                          output_dir, output_prefix + '_' +
                          exe_ts_utc.strftime('%Y-%m-%d_%H:%M') + '.asc'))]

    procs = multiprocessing.cpu_count()

    logging.info('Downloading inventory in parallel')
    utils.download_parallel(url_dest_list, procs)
    logging.info('Downloading inventory complete')

    logging.info('Processing files in parallel')
    Parallel(n_jobs=procs)(delayed(process_jaxa_zip_file)
                           (i[1], i[2], lat_min, lon_min, lat_max, lon_max,
                            cum, output_prefix, db_adapter_config)
                           for i in url_dest_list)
    logging.info('Processing files complete')

    logging.info('Creating sat rf gif for today')
    create_daily_gif(exe_ts_utc, output_dir, output_prefix + '_today.gif',
                     output_prefix)

    prev_day_gif = os.path.join(output_dir, output_prefix + '_yesterday.gif')
    if not utils.file_exists_nonempty(prev_day_gif) or exe_ts_utc.strftime(
            '%H:%M') == '00:00':
        logging.info('Creating sat rf gif for yesterday')
        create_daily_gif(
            utils.datetime_floor(exe_ts_utc, 3600 * 24) - dt.timedelta(days=1),
            output_dir, output_prefix + '_yesterday.gif', output_prefix)

    if cum:
        logging.info('Processing cumulative')
        process_cumulative_plot(url_dest_list, start_time, end_time,
                                output_dir, lat_min, lon_min, lat_max, lon_max)
        logging.info('Processing cumulative complete')

    # clean up temp dir
    if cleanup:
        logging.info('Cleaning up')
        shutil.rmtree(tmp_dir)
        utils.delete_files_with_prefix(output_dir, '*.archive')
Example #7
0
def set_initial_parameters_fs(wrf_home_key='wrf_home',
                              wrf_start_date_key='wrf_start_date',
                              wrf_config_key='wrf_config',
                              ignore_namelists=False,
                              **kwargs):
    # set wrf_home --> wrf_home Var > WRF_HOME env var > wrf_home default
    try:
        wrf_home = Variable.get(wrf_home_key)
    except KeyError:
        try:
            wrf_home = os.environ['WRF_HOME']
        except KeyError:
            wrf_home = constants.DEFAULT_WRF_HOME
    logging.info('wrf_home: %s' % wrf_home)

    # set wrf_config --> wrf_config Var (YAML format) > get_wrf_config(wrf_home)
    try:
        wrf_config_dict = Variable.get(wrf_config_key, deserialize_json=True)
        wrf_config = wrf_exec.get_wrf_config(wrf_config_dict.pop('wrf_home'),
                                             **wrf_config_dict)
    except KeyError:
        wrf_config = wrf_exec.get_wrf_config(wrf_home)
        logging.warning('%s Variable is set to %s' %
                        (wrf_config_key, wrf_config.to_json_string()))
    logging.info('wrf_config: %s' % wrf_config.to_string())

    if wrf_home != wrf_config.get('wrf_home'):
        logging.warning(
            'wrf_home and wrf_config[wrf_home] are different! Please check the inputs'
        )

    # set start_date --> wrf_start_date var > execution_date param in the workflow > today
    start_date_dt = None
    start_date = None
    try:
        start_date_dt_utc = dt.datetime.strptime(
            Variable.get(wrf_start_date_key),
            '%Y-%m-%d_%H:%M') + dt.timedelta(seconds=time.altzone)
        start_date_dt = utils.datetime_floor(start_date_dt_utc, 3600)
    except KeyError as e1:
        logging.warning(
            'wrf_start_date variable is not available. execution_date will be used - %s'
            % str(e1))
        try:
            start_date_dt = utils.datetime_floor(
                kwargs['execution_date'] + dt.timedelta(seconds=time.altzone),
                3600)
            # NOTE: airflow execution time is at the end of the (starting day + interval) period
            start_date_dt = start_date_dt + dt.timedelta(days=1)
        except KeyError as e2:
            logging.warning('execution_date is not available - %s' % str(e2))

    if start_date_dt is not None:
        start_date = (start_date_dt - dt.timedelta(
            hours=wrf_config.get('offset'))).strftime('%Y-%m-%d_%H:%M')
        logging.info('wrf_start_date: %s' % start_date)

    if start_date is not None and (not wrf_config.is_set('start_date') or
                                   wrf_config.get('start_date') != start_date):
        logging.info('Setting start date ' + start_date)
        wrf_config.set('start_date', start_date)
        # date_splits = re.split('[-_:]', start_date)
        Variable.set(wrf_config_key, wrf_config.to_json_string())
        logging.info('New wrf conifg: ' + wrf_config.to_json_string())

    if not ignore_namelists:
        logging.info('Replacing namelist.wps place-holders')
        wrf_exec.replace_namelist_wps(wrf_config)

        logging.info('Replacing namelist.input place-holders')
        wrf_exec.replace_namelist_input(wrf_config)

    if 'ti' in kwargs:
        kwargs['ti'].xcom_push(key=wrf_config_key,
                               value=wrf_config.to_json_string())