def extract_jaxa_satellite_data(start_ts_utc, end_ts_utc, output_dir): start = utils.datetime_floor(start_ts_utc, 3600) end = utils.datetime_floor(end_ts_utc, 3600) lat_min = 5.722969 lon_min = 79.52146 lat_max = 10.06425 lon_max = 82.18992 login = '******' url0 = 'ftp://' + login + '@hokusai.eorc.jaxa.jp/realtime/txt/05_AsiaSS/YYYY/MM/DD/gsmap_nrt.YYYYMMDD.HH00.05_AsiaSS.csv.zip' url1 = 'ftp://' + login + '@hokusai.eorc.jaxa.jp/now/txt/05_AsiaSS/gsmap_now.YYYYMMDD.HH00_HH59.05_AsiaSS.csv.zip' def get_jaxa_url(ts): url_switch = (dt.datetime.utcnow() - ts) > dt.timedelta(hours=5) _url = url0 if url_switch else url1 ph = {'YYYY': ts.strftime('%Y'), 'MM': ts.strftime('%m'), 'DD': ts.strftime('%d'), 'HH': ts.strftime('%H')} for k, v in ph.iteritems(): _url = _url.replace(k, v) return _url tmp_dir = os.path.join(output_dir, 'tmp_jaxa/') if not os.path.exists(tmp_dir): os.mkdir(tmp_dir) else: utils.cleanup_dir(tmp_dir) url_dest_list = [] for timestamp in np.arange(start, end, dt.timedelta(hours=1)).astype(dt.datetime): url = get_jaxa_url(timestamp) url_dest_list.append((url, os.path.join(tmp_dir, os.path.basename(url)), os.path.join(output_dir, 'jaxa_sat_rf_' + timestamp.strftime('%Y-%m-%d_%H:%M') + '.asc'))) utils.download_parallel(url_dest_list) procs = multiprocessing.cpu_count() Parallel(n_jobs=procs)( delayed(_process_zip_file)(i[1], i[2], lat_min, lon_min, lat_max, lon_max) for i in url_dest_list) # clean up temp dir shutil.rmtree(tmp_dir)
def replace_file_with_values(wrf_config, src, dest, aux_dict, start_date=None, end_date=None): if start_date is None: start_date = utils.datetime_floor( dt.datetime.strptime(wrf_config.get('start_date'), '%Y-%m-%d_%H:%M'), wrf_config.get('gfs_step') * 3600) if end_date is None: end_date = start_date + dt.timedelta(days=wrf_config.get('period')) period = wrf_config.get('period') d = { 'YYYY1': start_date.strftime('%Y'), 'MM1': start_date.strftime('%m'), 'DD1': start_date.strftime('%d'), 'hh1': start_date.strftime('%H'), 'mm1': start_date.strftime('%M'), 'YYYY2': end_date.strftime('%Y'), 'MM2': end_date.strftime('%m'), 'DD2': end_date.strftime('%d'), 'hh2': end_date.strftime('%H'), 'mm2': end_date.strftime('%M'), 'GEOG': wrf_config.get('geog_dir'), 'RD0': str(int(period)), 'RH0': str(int(period * 24 % 24)), 'RM0': str(int(period * 60 * 24 % 60)), 'hi1': '180', 'hi2': '60', 'hi3': '60', } if aux_dict and wrf_config.is_set(aux_dict): d.update(wrf_config.get(aux_dict)) utils.replace_file_with_values(src, dest, d)
def extract_jaxa_satellite_data(start_ts_utc, end_ts_utc, output_dir, cleanup=True, cum=False, tmp_dir=None, lat_min=5.722969, lon_min=79.52146, lat_max=10.06425, lon_max=82.18992, output_prefix='jaxa_sat', db_adapter_config=None): start = utils.datetime_floor(start_ts_utc, 3600) end = utils.datetime_floor(end_ts_utc, 3600) login = '******' url0 = 'ftp://' + login + '@hokusai.eorc.jaxa.jp/realtime/txt/05_AsiaSS/YYYY/MM/DD/gsmap_nrt.YYYYMMDD.HH00.05_AsiaSS.csv.zip' url1 = 'ftp://' + login + '@hokusai.eorc.jaxa.jp/now/txt/05_AsiaSS/gsmap_now.YYYYMMDD.HH00_HH59.05_AsiaSS.csv.zip' def get_jaxa_url(ts): url_switch = (dt.datetime.utcnow() - ts) > dt.timedelta(hours=5) _url = url0 if url_switch else url1 ph = { 'YYYY': ts.strftime('%Y'), 'MM': ts.strftime('%m'), 'DD': ts.strftime('%d'), 'HH': ts.strftime('%H') } for k, v in list(ph.items()): _url = _url.replace(k, v) return _url # tmp_dir = os.path.join(output_dir, 'tmp_jaxa/') # if not os.path.exists(tmp_dir): # os.mkdir(tmp_dir) if tmp_dir is None: tmp_dir = tempfile.mkdtemp(prefix='tmp_jaxa') url_dest_list = [] for timestamp in np.arange(start, end, dt.timedelta(hours=1)).astype(dt.datetime): url = get_jaxa_url(timestamp) url_dest_list.append( (url, os.path.join(tmp_dir, os.path.basename(url)), os.path.join( output_dir, output_prefix + '_' + timestamp.strftime('%Y-%m-%d_%H:%M') + '.asc'))) procs = multiprocessing.cpu_count() logging.info('Downloading inventory in parallel') utils.download_parallel(url_dest_list, procs) logging.info('Downloading inventory complete') logging.info('Processing files in parallel') Parallel(n_jobs=procs)(delayed(process_jaxa_zip_file) (i[1], i[2], lat_min, lon_min, lat_max, lon_max, cum, output_prefix, db_adapter_config) for i in url_dest_list) logging.info('Processing files complete') logging.info('Creating sat rf gif for today') create_daily_gif(start, output_dir, output_prefix + '_today.gif', output_prefix) prev_day_gif = os.path.join(output_dir, output_prefix + '_yesterday.gif') if not utils.file_exists_nonempty(prev_day_gif) or start.strftime( '%H:%M') == '00:00': logging.info('Creating sat rf gif for yesterday') create_daily_gif( utils.datetime_floor(start, 3600 * 24) - dt.timedelta(days=1), output_dir, output_prefix + '_yesterday.gif', output_prefix) if cum: logging.info('Processing cumulative') process_cumulative_plot(url_dest_list, start_ts_utc, end_ts_utc, output_dir, lat_min, lon_min, lat_max, lon_max) logging.info('Processing cumulative complete') # clean up temp dir if cleanup: logging.info('Cleaning up') shutil.rmtree(tmp_dir) utils.delete_files_with_prefix(output_dir, '*.archive')
def extract_jaxa_satellite_hourly_data(ts, output_dir): ts = utils.datetime_floor(ts, 3600) logging.info('Jaxa satellite data extraction for %s (previous hour)' % str(ts)) extract_jaxa_satellite_data(ts - dt.timedelta(hours=1), ts, output_dir)
def extract_jaxa_satellite_data_every_half_hr(exe_ts_utc, output_dir, cleanup=True, cum=False, tmp_dir=None, lat_min=5.722969, lon_min=79.52146, lat_max=10.06425, lon_max=82.18992, output_prefix='jaxa_sat', db_adapter_config=None): exe_ts_utc = exe_ts_utc - dt.timedelta(minutes=2) print( '-------------extract_jaxa_satellite_data_half_hr---------------exe_ts_utc:', exe_ts_utc) exe_ts_utc = exe_ts_utc - dt.timedelta(minutes=30) run_minute = int(exe_ts_utc.strftime('%M')) print('run_minute : ', run_minute) year_str = exe_ts_utc.strftime('%Y') month_str = exe_ts_utc.strftime('%m') day_str = exe_ts_utc.strftime('%d') hour_str = exe_ts_utc.strftime('%H') hour_str1 = (exe_ts_utc + dt.timedelta(hours=1)).strftime('%H') # if run_minute == 0: # url = 'ftp://*****:*****@hokusai.eorc.jaxa.jp/now/txt/05_AsiaSS/gsmap_now.{}{}{}.{}00_{}59.05_AsiaSS.csv.zip' \ # .format(year_str, month_str, day_str, hour_str, hour_str) # else: # url = 'ftp://*****:*****@hokusai.eorc.jaxa.jp/now/txt/05_AsiaSS/gsmap_now.{}{}{}.{}30_{}29.05_AsiaSS.csv.zip' \ # .format(year_str, month_str, day_str, hour_str, hour_str1) if run_minute == 0: url = 'ftp://*****:*****@hokusai.eorc.jaxa.jp/now/txt/island/SriLanka/gsmap_now.{}{}{}.{}00_{}59.SriLanka.csv.zip' \ .format(year_str, month_str, day_str, hour_str, hour_str) else: url = 'ftp://*****:*****@hokusai.eorc.jaxa.jp/now/txt/island/SriLanka/gsmap_now.{}{}{}.{}30_{}29.SriLanka.csv.zip' \ .format(year_str, month_str, day_str, hour_str, hour_str1) print('Download url : ', url) start_time = exe_ts_utc end_time = start_time + dt.timedelta(hours=1) print('_get_start_end|[start_time, end_time] : ', [start_time, end_time]) if tmp_dir is None: tmp_dir = tempfile.mkdtemp(prefix='tmp_jaxa') print('Download url : ', url) url_dest_list = [(url, os.path.join(tmp_dir, os.path.basename(url)), os.path.join( output_dir, output_prefix + '_' + exe_ts_utc.strftime('%Y-%m-%d_%H:%M') + '.asc'))] procs = multiprocessing.cpu_count() logging.info('Downloading inventory in parallel') utils.download_parallel(url_dest_list, procs) logging.info('Downloading inventory complete') logging.info('Processing files in parallel') Parallel(n_jobs=procs)(delayed(process_jaxa_zip_file) (i[1], i[2], lat_min, lon_min, lat_max, lon_max, cum, output_prefix, db_adapter_config) for i in url_dest_list) logging.info('Processing files complete') logging.info('Creating sat rf gif for today') create_daily_gif(exe_ts_utc, output_dir, output_prefix + '_today.gif', output_prefix) prev_day_gif = os.path.join(output_dir, output_prefix + '_yesterday.gif') if not utils.file_exists_nonempty(prev_day_gif) or exe_ts_utc.strftime( '%H:%M') == '00:00': logging.info('Creating sat rf gif for yesterday') create_daily_gif( utils.datetime_floor(exe_ts_utc, 3600 * 24) - dt.timedelta(days=1), output_dir, output_prefix + '_yesterday.gif', output_prefix) if cum: logging.info('Processing cumulative') process_cumulative_plot(url_dest_list, start_time, end_time, output_dir, lat_min, lon_min, lat_max, lon_max) logging.info('Processing cumulative complete') # clean up temp dir if cleanup: logging.info('Cleaning up') shutil.rmtree(tmp_dir) utils.delete_files_with_prefix(output_dir, '*.archive')
def extract_jaxa_satellite_data_half_hr(exe_ts_utc, output_dir, cleanup=True, cum=False, tmp_dir=None, lat_min=5.722969, lon_min=79.52146, lat_max=10.06425, lon_max=82.18992, output_prefix='jaxa_sat', db_adapter_config=None): print( '-------------extract_jaxa_satellite_data_half_hr---------------exe_ts_utc:', exe_ts_utc) exe_ts_utc = exe_ts_utc - dt.timedelta(hours=1) login = '******' url_hour = 'ftp://' + login + '@hokusai.eorc.jaxa.jp/now/txt/05_AsiaSS/gsmap_now.YYYYMMDD.HH00_HH59.05_AsiaSS.csv.zip' url_half_hour = 'ftp://' + login + '@hokusai.eorc.jaxa.jp/now/txt/05_AsiaSS/gsmap_now.YYYYMMDD.HH30_KK29.05_AsiaSS.csv.zip' run_minute = int(exe_ts_utc.strftime('%M')) print('run_minute : ', run_minute) remainder = run_minute % 30 run_minute = run_minute - remainder start_time = exe_ts_utc - dt.timedelta(minutes=remainder) end_time = start_time + dt.timedelta(minutes=30) print('_get_start_end|[start_time, end_time] : ', [start_time, end_time]) if run_minute == 0: exe_ts_utc = exe_ts_utc.strftime('%Y-%m-%d %H:00:00') else: exe_ts_utc = exe_ts_utc.strftime('%Y-%m-%d %H:30:00') exe_ts_utc = dt.datetime.strptime(exe_ts_utc, '%Y-%m-%d %H:%M:%S') def _get_download_url(run_minute): remainder = run_minute % 30 run_minute = run_minute - remainder if run_minute == 0: return url_hour else: return url_half_hour def _format_url(url): ph = { 'YYYY': exe_ts_utc.strftime('%Y'), 'MM': exe_ts_utc.strftime('%m'), 'DD': exe_ts_utc.strftime('%d'), 'KK': (exe_ts_utc + dt.timedelta(hours=1)).strftime('%H'), 'HH': exe_ts_utc.strftime('%H') } for k, v in list(ph.items()): url = url.replace(k, v) print('url : ', url) return url if tmp_dir is None: tmp_dir = tempfile.mkdtemp(prefix='tmp_jaxa') url = _get_download_url(run_minute) formatted_url = _format_url(url) print('formatted_url : ', formatted_url) url_dest_list = [(formatted_url, os.path.join(tmp_dir, os.path.basename(formatted_url)), os.path.join( output_dir, output_prefix + '_' + exe_ts_utc.strftime('%Y-%m-%d_%H:%M') + '.asc'))] procs = multiprocessing.cpu_count() logging.info('Downloading inventory in parallel') utils.download_parallel(url_dest_list, procs) logging.info('Downloading inventory complete') logging.info('Processing files in parallel') Parallel(n_jobs=procs)(delayed(process_jaxa_zip_file) (i[1], i[2], lat_min, lon_min, lat_max, lon_max, cum, output_prefix, db_adapter_config) for i in url_dest_list) logging.info('Processing files complete') logging.info('Creating sat rf gif for today') create_daily_gif(exe_ts_utc, output_dir, output_prefix + '_today.gif', output_prefix) prev_day_gif = os.path.join(output_dir, output_prefix + '_yesterday.gif') if not utils.file_exists_nonempty(prev_day_gif) or exe_ts_utc.strftime( '%H:%M') == '00:00': logging.info('Creating sat rf gif for yesterday') create_daily_gif( utils.datetime_floor(exe_ts_utc, 3600 * 24) - dt.timedelta(days=1), output_dir, output_prefix + '_yesterday.gif', output_prefix) if cum: logging.info('Processing cumulative') process_cumulative_plot(url_dest_list, start_time, end_time, output_dir, lat_min, lon_min, lat_max, lon_max) logging.info('Processing cumulative complete') # clean up temp dir if cleanup: logging.info('Cleaning up') shutil.rmtree(tmp_dir) utils.delete_files_with_prefix(output_dir, '*.archive')
def set_initial_parameters_fs(wrf_home_key='wrf_home', wrf_start_date_key='wrf_start_date', wrf_config_key='wrf_config', ignore_namelists=False, **kwargs): # set wrf_home --> wrf_home Var > WRF_HOME env var > wrf_home default try: wrf_home = Variable.get(wrf_home_key) except KeyError: try: wrf_home = os.environ['WRF_HOME'] except KeyError: wrf_home = constants.DEFAULT_WRF_HOME logging.info('wrf_home: %s' % wrf_home) # set wrf_config --> wrf_config Var (YAML format) > get_wrf_config(wrf_home) try: wrf_config_dict = Variable.get(wrf_config_key, deserialize_json=True) wrf_config = wrf_exec.get_wrf_config(wrf_config_dict.pop('wrf_home'), **wrf_config_dict) except KeyError: wrf_config = wrf_exec.get_wrf_config(wrf_home) logging.warning('%s Variable is set to %s' % (wrf_config_key, wrf_config.to_json_string())) logging.info('wrf_config: %s' % wrf_config.to_string()) if wrf_home != wrf_config.get('wrf_home'): logging.warning( 'wrf_home and wrf_config[wrf_home] are different! Please check the inputs' ) # set start_date --> wrf_start_date var > execution_date param in the workflow > today start_date_dt = None start_date = None try: start_date_dt_utc = dt.datetime.strptime( Variable.get(wrf_start_date_key), '%Y-%m-%d_%H:%M') + dt.timedelta(seconds=time.altzone) start_date_dt = utils.datetime_floor(start_date_dt_utc, 3600) except KeyError as e1: logging.warning( 'wrf_start_date variable is not available. execution_date will be used - %s' % str(e1)) try: start_date_dt = utils.datetime_floor( kwargs['execution_date'] + dt.timedelta(seconds=time.altzone), 3600) # NOTE: airflow execution time is at the end of the (starting day + interval) period start_date_dt = start_date_dt + dt.timedelta(days=1) except KeyError as e2: logging.warning('execution_date is not available - %s' % str(e2)) if start_date_dt is not None: start_date = (start_date_dt - dt.timedelta( hours=wrf_config.get('offset'))).strftime('%Y-%m-%d_%H:%M') logging.info('wrf_start_date: %s' % start_date) if start_date is not None and (not wrf_config.is_set('start_date') or wrf_config.get('start_date') != start_date): logging.info('Setting start date ' + start_date) wrf_config.set('start_date', start_date) # date_splits = re.split('[-_:]', start_date) Variable.set(wrf_config_key, wrf_config.to_json_string()) logging.info('New wrf conifg: ' + wrf_config.to_json_string()) if not ignore_namelists: logging.info('Replacing namelist.wps place-holders') wrf_exec.replace_namelist_wps(wrf_config) logging.info('Replacing namelist.input place-holders') wrf_exec.replace_namelist_input(wrf_config) if 'ti' in kwargs: kwargs['ti'].xcom_push(key=wrf_config_key, value=wrf_config.to_json_string())