def gen_all(self): dataset_paths = [get_dataset_path(d) for d in DATASETS] self.task_ctrl.add( Task(verify_lats_lons, dataset_paths, [ PATHS['output_datadir'] / 'basin_diurnal_cycle_analysis' / 'verify_lats_lons.txt' ])) for self.raster_scales in ['small_medium_large', 'sliding']: if self.raster_scales == 'small_medium_large': self.scales = SCALES elif self.raster_scales == 'sliding': self.scales = SLIDING_SCALES self.hb_raster_cubes_fn = ( PATHS['output_datadir'] / f'basin_diurnal_cycle_analysis/hb_N1280_raster_{self.raster_scales}.nc' ) hb_raster_cubes_task = Task(gen_hydrobasins_raster_cubes, [get_dataset_path('cmorph')], [self.hb_raster_cubes_fn], func_args=[self.scales]) self.task_ctrl.add(hb_raster_cubes_task) for dataset, mode in itertools.product(DATASETS, MODES): self.gen_analysis_tasks(dataset, mode) self.df_keys = pd.DataFrame(self.df_keys_data) for scale, mode in itertools.product(self.scales, MODES): self.gen_fig_tasks(scale, mode) self.gen_cmorph_vs_datasets_fig_tasks()
def gen_task_ctrl(): tc = TaskControl(__file__) year = 2006 model = 'al508' dist_thresh = 100 dotprod_thresh = 0.05 inputs = { f'al508_diag_{month:02}': fmtp(diag_orog_precip_path_tpl, model=model, year=year, month=month) for month in [6, 7, 8] } inputs['al508_direct'] = fmtp(orog_precip_mean_fields_tpl, model='al508', year=year, season='jja', dotprod_thresh=dotprod_thresh, dist_thresh=dist_thresh) inputs['ak543_direct'] = fmtp(orog_precip_mean_fields_tpl, model='ak543', year=year, season='jja', dotprod_thresh=dotprod_thresh, dist_thresh=dist_thresh) tc.add(Task(plot_fig2, inputs, [D23_fig2])) tc.add(Task(plot_fig3, inputs, [D23_fig3])) tc.add(Task(plot_fig4, inputs, [D23_fig4, D23_fig5])) return tc
def gen_task_ctrl(): task_ctrl = TaskControl(__file__) hb_names = [f'S{i}' for i in range(11)] output_datadir = PATHS['output_datadir'] inputs = {f'basin_vector_{hb_name}': output_datadir / 'basin_weighted_analysis' / hb_name / f'hb_{hb_name}.shp' for hb_name in hb_names} task_ctrl.add(Task(gen_vector_basin_stats, inputs, [PATHS['figsdir'] / 'basin_stats' / 'basin_vector_stats.csv'], func_args=(hb_names,), )) resolutions = ['N1280', 'N512', 'N216', 'N96'] inputs = {f'basin_weights_{res}_{hb_name}': (output_datadir / 'basin_weighted_analysis' / hb_name / f'weights_{res}_{hb_name}.nc') for res in resolutions for hb_name in hb_names} task_ctrl.add(Task(gen_weights_basin_stats, inputs, [PATHS['figsdir'] / 'basin_stats' / 'basin_weights_stats.csv'], func_args=(resolutions, hb_names), )) return task_ctrl
def gen_task_ctrl(): task_ctrl = TaskControl(__file__) for model, hb_name in itertools.product(MODELS, HB_NAMES): hb_names = { f'hb_name_{ext}': PATHS['output_datadir'] / f'raster_vs_hydrobasins/hb_{hb_name}.{ext}' for ext in ['shp', 'dbf', 'prj', 'cpg', 'shx'] } input_filenames = {'model': FILENAMES[model]} input_filenames.update(hb_names) weights_filename = PATHS[ 'output_datadir'] / f'weights_vs_hydrobasins/weights_{model}_{hb_name}.nc' task_ctrl.add( Task(gen_weights_cube, input_filenames, [weights_filename], func_args=(hb_name, ))) input_filenames = { 'model': weights_filename, 'hb_name': PATHS['output_datadir'] / f'raster_vs_hydrobasins/hb_{hb_name}.shp' } # TODO: Has not necessarily been created (if e.g. just querying task_ctrl). # TODO: Also, creating loads of output files creates loads of metadata, which takes up space. # TODO: Perhaps zip all the pngs together? # hb = gpd.read_file(str(input_filenames['hb_name'])) # output_filenames = {i: PATHS['figsdir'] / 'weights_vs_hydrobasins' / f'{model}_{hb_name}' / f'basin_{i}.png' # for i in range(len(hb))} # task_ctrl.add(Task(plot_weights_cube, input_filenames, output_filenames)) input_filenames = { (model, hb_name): (PATHS['output_datadir'] / f'weights_vs_hydrobasins/weights_{model}_{hb_name}.nc') for model, hb_name in itertools.product(MODELS, HB_NAMES) } for hb_name in HB_NAMES: input_filenames[hb_name] = PATHS[ 'output_datadir'] / f'raster_vs_hydrobasins/hb_{hb_name}.shp' output_filenames = [ PATHS['figsdir'] / 'weights_vs_hydrobasins' / 'weights_cube_table' / f'basins_table.pdf' ] task_ctrl.add( Task(plot_weights_cube_table, input_filenames, output_filenames, func_kwargs={ 'rows': list(zip(HB_NAMES[::-1], [770, 201, 23])), 'cols': MODELS })) return task_ctrl
def basin_area_avg(self, dataset, diurnal_cycle_cube_path, scale, method, mode): fn_base = f'basin_diurnal_cycle_analysis/{dataset}/basin_area_avg_' \ f'{diurnal_cycle_cube_path.stem}_{mode}_{scale}_{method}' task_kwargs = dict( dataset=dataset, mode=mode, basin_scale=scale, analysis_order='basin_area_avg', method=method, ) df_phase_mag_key = PATHS['output_datadir'] / f'{fn_base}.hdf' inputs = { 'raster_cubes': self.hb_raster_cubes_fn, 'diurnal_cycle_cubes': diurnal_cycle_cube_path } if dataset[:7] == 'HadGEM3': cube_name = 'amount_of_precip_JJA' else: cube_name = 'amount_of_precip_jja' phase_mag_task = Task(gen_basin_area_avg_phase_mag, inputs, [df_phase_mag_key], func_args=[scale, cube_name, method]) self.df_keys_data.append({ **task_kwargs, **{ 'type': 'phase_mag', 'task': phase_mag_task } }) phase_mag_cubes_key = PATHS['output_datadir'] / f'{fn_base}.nc' phase_mag_cubes_task = Task(gen_phase_mag_map, { **inputs, **{ 'df_phase_mag': df_phase_mag_key } }, [phase_mag_cubes_key], func_args=[scale, cube_name]) self.df_keys_data.append({ **task_kwargs, **{ 'type': 'phase_mag_cubes', 'task': phase_mag_cubes_task } }) self.task_ctrl.add(phase_mag_task) self.task_ctrl.add(phase_mag_cubes_task)
def gen_cmorph_vs_datasets_fig_tasks(self): rmses_filename = Path( PATHS['output_datadir'] / f'basin_diurnal_cycle_analysis/rmses_{self.raster_scales}.pkl') inputs = {} for mode in MODES: selector = ((self.df_keys.method == 'harmonic') & (self.df_keys.type == 'phase_mag_cubes') & (self.df_keys.analysis_order == 'basin_area_avg') & (self.df_keys['mode'] == mode)) df_cmorph = self.df_keys[selector & (self.df_keys.dataset == 'cmorph')] for dataset in DATASETS[1:]: df_dataset = self.df_keys[selector & (self.df_keys.dataset == dataset)] for scale in self.scales: inputs[mode, 'cmorph', scale] = (df_cmorph[df_cmorph.basin_scale == scale]. task.values[0].outputs[0]) inputs[mode, dataset, scale] = ( df_dataset[df_dataset.basin_scale == scale].task.values[0].outputs[0]) inputs['raster_cubes'] = self.hb_raster_cubes_fn self.task_ctrl.add( Task(gen_rmses, inputs, [rmses_filename], func_args=(self.scales, ))) both_filename = Path(f'{self.figsdir}/cmorph_vs/{self.raster_scales}/' f'cmorph_vs_datasets.all.phase_and_mag.png') self.task_ctrl.add( Task(plot_cmorph_vs_all_datasets, [rmses_filename], [both_filename], func_args=[self.raster_scales])) for mode in MODES: phase_filename = Path( f'{self.figsdir}/cmorph_vs/{self.raster_scales}/' f'cmorph_vs_datasets.{mode}.phase.circular_rmse.png') mag_filename = Path( f'{self.figsdir}/cmorph_vs/{self.raster_scales}/' f'cmorph_vs_datasets.{mode}.mag.rmse.png') self.task_ctrl.add( Task(plot_cmorph_vs_all_datasets2, [rmses_filename], [phase_filename, mag_filename], func_args=[mode, self.raster_scales]))
def gen_task_ctrl(): task_ctrl = TaskControl(__file__) task_ctrl.add( Task( plot_dem, [], [PATHS['figsdir'] / 'orog_precip' / f'dem_asia_with_regions.png'])) return task_ctrl
def gen_task_ctrl(): tc = TaskControl(__file__) # /gws/nopw/j04/cosmic/mmuetz/data/era_interim_orog_precip years = [2006] # years = [2005, 2006, 2007, 2008] models = ['al508', 'ak543'] months = [6, 7, 8] for model, year, month in product(models, years, months): # al508a.p9200606.asia_precip.nc precip_path = fmtp(precip_path_tpl, model=model, year=year, month=month) orog_precip_inputs = { 'extended_rclim_mask': extended_rclim_mask, 'land_sea_mask': land_sea_mask, 'precip': precip_path } diag_orog_precip_path = fmtp(diag_orog_precip_path_tpl, model=model, year=year, month=month) tc.add(Task(calc_orog_precip, orog_precip_inputs, [diag_orog_precip_path], func_args=(month - 1, ))) orog_precip_fracs_inputs = { 'extended_rclim_mask': extended_rclim_mask, 'land_sea_mask': land_sea_mask, 'orog_precip': diag_orog_precip_path } diag_orog_precip_frac_path = fmtp(diag_orog_precip_frac_path_tpl, model=model, year=year, month=month) tc.add(Task(calc_orog_precip_fracs, orog_precip_fracs_inputs, [diag_orog_precip_frac_path], func_args=(month - 1, ))) variables = list(product(models, months)) columns = ['model', 'month'] combine_inputs = [fmtp(diag_orog_precip_frac_path_tpl, model=model, year=year, month=month) for model, month in variables] combine_fracs_output = [diag_combine_frac_path] tc.add(Task(combine_orog_precip_fracs, combine_inputs, combine_fracs_output, func_args=(variables, columns) )) return tc
def gen_task_ctrl(): tc = TaskControl(__file__) models = ['al508', 'ak543'] # models = ['ak543'] dist_threshs = [100] dotprod_threshs = [0.05] months = [6, 7, 8] # dist_threshs = [20, 100] # dotprod_threshs = [0.05, 0.1] year = 2006 for model, dotprod_thresh, dist_thresh in product(models, dotprod_threshs, dist_threshs): # 1 model at a time. orog_precip_paths = [fmtp(orog_precip_path_tpl, model=model, year=year, month=month, dotprod_thresh=dotprod_thresh, dist_thresh=dist_thresh) for month in months] orog_precip_mean_fields = [fmtp(orog_precip_mean_fields_tpl, model=model, year=year, season='jja', dotprod_thresh=dotprod_thresh, dist_thresh=dist_thresh)] tc.add(Task(extract_precip_mean_fields, orog_precip_paths, orog_precip_mean_fields)) for model, dotprod_thresh, dist_thresh in product(models, dotprod_threshs, dist_threshs): orog_precip_mean_fields = [fmtp(orog_precip_mean_fields_tpl, model=model, year=year, season='jja', dotprod_thresh=dotprod_thresh, dist_thresh=dist_thresh)] orog_precip_figs = [fmtp(orog_precip_fig_tpl, model=model, year=year, season='jja', dotprod_thresh=dotprod_thresh, dist_thresh=dist_thresh, precip_type=precip_type) for precip_type in ['orog', 'non_orog', 'ocean', 'orog_frac']] tc.add(Task(plot_mean_orog_precip, orog_precip_mean_fields, orog_precip_figs)) return tc for dotprod_thresh, dist_thresh in product(dotprod_threshs, dist_threshs): # Compare 2 models. orog_precip_paths = {(model, month): fmtp(orog_precip_path_tpl, model=model, year=year, month=month, dotprod_thresh=dotprod_thresh, dist_thresh=dist_thresh) for model in models for month in months} orog_precip_figs = [fmtp(orog_precip_fig_tpl, model='-'.join(models), year=year, season='jja', dotprod_thresh=dotprod_thresh, dist_thresh=dist_thresh, precip_type=precip_type) for precip_type in ['orog', 'non_orog', 'ocean']] tc.add(Task(plot_compare_mean_orog_precip, orog_precip_paths, orog_precip_figs, func_args=(models, months))) return tc
def gen_task_ctrl(): task_ctrl = TaskControl(__file__) input_paths = {dataset: get_dataset_path(dataset) for dataset in DATASETS} task = Task(plot_gridpoint_mean_precip_asia, input_paths, [ PATHS['figsdir'] / 'gridpoint_analysis' / f'gridpoint_mean_precip_asia.pdf' ]) task_ctrl.add(task) return task_ctrl
def gen_task_ctrl(): tc = TaskControl(__file__) datadir = PATHS['datadir'] / 'aphrodite_data/025deg' for year in ALL_YEARS: output_path = datadir / FILE_TPL.format(year=year) tc.add( Task(download_year, [], [output_path], func_args=(year, datadir), atomic_write=False)) return tc
def gen_task_ctrl(): task_ctrl = TaskControl(__file__) for fn, args, kwargs in all_plot_gauge_data_gen(): kwargs_str = '.'.join([f'{k}-{i}' for k, i in kwargs.items()]) task = Task(fn, [], [PATHS['figsdir'] / 'gauge_data' / f'precip_station_jja_cressman.{kwargs_str}.png'], func_args=args, func_kwargs=kwargs) task_ctrl.add(task) return task_ctrl
def gen_task_ctrl(): tc = TaskControl(__file__) datadir = PATHS['datadir'] / 'aphrodite_data/025deg' filenames = [datadir / FILE_TPL.format(year=year) for year in ALL_YEARS] tc.add( Task(combine_years, filenames, [ datadir / 'aphrodite_combined_all.nc', datadir / 'aphrodite_combined_jja.nc' ])) return tc
def gen_task_ctrl(): task_ctrl = TaskControl(__file__) aphrodite_dir = Path('aphrodite_data/025deg') inputs = { ' daily precipitation analysis interpolated onto 0.25deg grids': (PATHS['datadir'] / aphrodite_dir / 'APHRO_MA_025deg_V1901.2009.nc')} outputs = { 'asia': PATHS['figsdir'] / 'aphrodite' / 'asia_aphrodite_2009_jja.png', 'china': PATHS['figsdir'] / 'aphrodite' / 'china_aphrodite_2009_jja.png', } task_ctrl.add(Task(plot_aphrodite_seasonal_analysis, inputs, outputs, func_args=(True, ))) outputs = { 'asia': PATHS['figsdir'] / 'aphrodite' / 'asia_aphrodite_2009_jja.lognorm.png', 'china': PATHS['figsdir'] / 'aphrodite' / 'china_aphrodite_2009_jja.lognorm.png', } task_ctrl.add(Task(plot_aphrodite_seasonal_analysis, inputs, outputs, func_args=(False, ))) return task_ctrl
def gen_task_ctrl(): years = [2019] months = range(1, 13) task_ctrl = TaskControl(__file__) for year, month in itertools.product(years, months): filename = (BASEDIR / 'raw' / f'precip_{year}{month:02}' / f'CMORPH_V1.0_ADJ_8km-30min_{year}{month:02}.tar') task_ctrl.add(Task(download_year, [], [filename], func_args=(year, month))) raw_filename = (BASEDIR / 'raw' / f'precip_{year}{month:02}' / f'CMORPH_V1.0_ADJ_8km-30min_{year}{month:02}.tar') nc_filenames = {day: (BASEDIR / f'precip_{year}{month:02}' / f'cmorph_ppt_{year}{month:02}{day:02}.nc') for day in range(1, calendar.monthrange(year, month)[1] + 1)} task_ctrl.add(Task(convert_year_month, [raw_filename], nc_filenames, func_args=(year, month))) nc_asia_filename = (BASEDIR / f'precip_{year}{month:02}' / f'cmorph_ppt_{year}{month:02}.asia.nc') task_ctrl.add(Task(extract_year_month, nc_filenames, [nc_asia_filename], func_args=(year, month))) regrid_inputs = {} regrid_inputs['target'] = Path(f'/gws/nopw/j04/cosmic/mmuetz/data/u-al508/ap9.pp/precip_200501/al508a.p9200501.asia_precip.nc') regrid_inputs['cmorph'] = BASEDIR / f'precip_{year}{month:02}/cmorph_ppt_{year}{month:02}.asia.nc' regrid_output = regrid_inputs['cmorph'].parent / (regrid_inputs['cmorph'].stem + '.N1280.nc') task_ctrl.add(Task(regrid_asia, regrid_inputs, [regrid_output])) return task_ctrl
def gen_task_ctrl(): task_ctrl = TaskControl(__file__) for fn, args, kwargs in all_seasonal_analysis_gen(): task_unique_name = task_unique_name_from_fn_args_kwargs(fn, args, kwargs) task = Task(fn, [plot_seasonal_analysis.__file__], [PATHS['figsdir'] / 'seasonal_analysis' / task_unique_name], func_args=args, func_kwargs=kwargs) task_ctrl.add(task) return task_ctrl
def gen_phase_mag_maps_tasks(self, scale, mode, row): phase_filename = Path( f'{self.figsdir}/map/{mode}/{row.dataset}_{row.analysis_order}_{row.method}' f'.{scale}.phase.png') mag_filename = Path( f'{self.figsdir}/map/{mode}/{row.dataset}_{row.analysis_order}_{row.method}' f'.{scale}.mag.png') inputs = { 'raster_cubes': self.hb_raster_cubes_fn, 'phase_mag_cubes': row.task.outputs[0] } self.task_ctrl.add( Task(plot_phase_mag, inputs, [phase_filename, mag_filename], func_args=[scale, mode, row]))
def gen_task_ctrl(): task_ctrl = TaskControl(__file__) start_years = range(1998, 2016) inputs = { y: (PATHS['datadir'] / 'cmorph_data' / '8km-30min' / f'cmorph_8km_N1280.{y}06-{y + 3}08.jja.asia_precip_afi.ppt_thresh_0p1.nc' ) for y in start_years } output = PATHS['figsdir'] / 'cmorph' / 'min_max_data.txt' task_ctrl.add( Task(calc_cmorph_min_max, inputs, [output], func_args=(start_years, ))) inputs['full'] = ( PATHS['datadir'] / 'cmorph_data' / '8km-30min' / 'cmorph_8km_N1280.199801-201812.jja.asia_precip_afi.ppt_thresh_0p1.nc') inputs['min_max'] = output output = PATHS['figsdir'] / 'cmorph' / 'cmorph_full_min_max_asia.pdf' task_ctrl.add(Task(plot_cmorph_min_max, inputs, [output])) return task_ctrl
def gen_task_ctrl(): tc = TaskControl(__file__) # pp_files = list((PATHS['datadir'] / 'u-al508' / 'ap8.pp' / 'lowlevel_wind_200901').glob('al508a.p8200901??.pp')) # for pp_file in pp_files: # tc.add(Task(convert_wrapper, [pp_file], [pp_file.with_suffix('.nc')])) pp_files = list( (PATHS['datadir'] / 'u-al508' / 'ap9.pp').glob('surface_wind_2006??/al508a.p9????????.pp')) pp_files += list( (PATHS['datadir'] / 'u-ak543' / 'ap9.pp').glob('surface_wind_2006??/ak543a.p9????????.pp')) for pp_file in pp_files: tc.add(Task(convert_wrapper, [pp_file], [pp_file.with_suffix('.nc')])) return tc
def gen_task_ctrl(): tc = TaskControl(__file__) tc.add( Task(plot_masks, [PATHS['datadir'] / 'era_interim_orog_precip' / 'R_clim.nc'], [PATHS['figsdir'] / 'experimental' / 'sinclair_orog_masks.png'])) tc.add( Task(calc_extended_orog_mask, [PATHS['datadir'] / 'era_interim_orog_precip' / 'R_clim.nc'], [PATHS['datadir'] / 'experimental' / 'extended_orog_mask.nc'])) tc.add( Task( calc_extended_orog_mask_JJA, [PATHS['datadir'] / 'era_interim_orog_precip' / 'R_clim.nc'], [PATHS['datadir'] / 'experimental' / 'extended_orog_mask_JJA.nc'])) tc.add( Task(calc_JJA_clim, [PATHS['datadir'] / 'era_interim_orog_precip' / 'R_clim.nc'], [PATHS['datadir'] / 'experimental' / 'R_clim_JJA.nc'])) tc.add( Task(plot_masks_combined, [ PATHS['datadir'] / 'era_interim_orog_precip' / 'R_clim.nc', PATHS['datadir'] / 'experimental' / 'extended_orog_mask.nc' ], [ PATHS['figsdir'] / 'experimental' / 'sinclair_orog_data_combined.png', PATHS['figsdir'] / 'experimental' / 'sinclair_orog_masks_combined.png' ], func_args=(slice(None), ))) tc.add( Task(plot_masks_combined, [ PATHS['datadir'] / 'era_interim_orog_precip' / 'R_clim.nc', PATHS['datadir'] / 'experimental' / 'extended_orog_mask_JJA.nc' ], [ PATHS['figsdir'] / 'experimental' / 'sinclair_orog_data_combined_JJA.png', PATHS['figsdir'] / 'experimental' / 'sinclair_orog_masks_combined_JJA.png' ], func_args=(slice(5, 8), ))) tc.add( Task(regrid_to_N1280, [PATHS['datadir'] / 'era_interim_orog_precip' / 'R_clim.nc'], [PATHS['datadir'] / 'experimental' / 'R_clim.N1280.nc'])) return tc
def gen_dataset_comparison_tasks(self, scale, mode, row1, row2): phase_scatter_filename = Path( f'{self.figsdir}/comparison/{mode}/' f'{row1.dataset}_{row1.analysis_order}_{row1.method}_vs_' f'{row2.dataset}_{row2.analysis_order}_{row2.method}.' f'{scale}.phase.png') mag_scatter_filename = Path( f'{self.figsdir}/comparison/{mode}/' f'{row1.dataset}_{row1.analysis_order}_{row1.method}_vs_' f'{row2.dataset}_{row2.analysis_order}_{row2.method}.' f'{scale}.mag.png') inputs = [row1.task.outputs[0], row2.task.outputs[0]] self.task_ctrl.add( Task(plot_dataset_scatter, inputs, [phase_scatter_filename, mag_scatter_filename], func_args=[scale, mode, row1, row2]))
def gen_task_ctrl(): tc = TaskControl(__file__) for model in ['u-al508', 'u-ak543']: year = 2006 for month in [6, 7, 8]: nc_files = [ PATHS['gcosmic'] / 'share' / 'ancils' / 'N1280' / 'qrparm.orog' ] nc_files.extend( sorted((PATHS['datadir'] / model / 'ap9.pp' / f'surface_wind_{year}{month:02}' ).glob(f'{model[2:]}a.p9????????.nc'))) outpath = (PATHS['datadir'] / model / 'ap9.pp' / f'surface_wind_{year}{month:02}' / f'{model[2:]}a.p9{year}{month:02}.asia.nc') tc.add(Task(regrid_extract_asia, nc_files, [outpath])) return tc
def gen_task_ctrl(): task_ctrl = TaskControl(__file__) task_ctrl.add( Task(plot_dem, [], [PATHS['figsdir'] / 'dem' / f'dem_asia.pdf'])) return task_ctrl
def gen_task_ctrl(): task_ctrl = TaskControl(__file__) for basin_scales in ['small_medium_large', 'sliding']: hb_raster_cubes_fn = PATHS[ 'output_datadir'] / f'basin_weighted_analysis/hb_N1280_raster_{basin_scales}.nc' cmorph_path = get_dataset_path('cmorph') # cmorph_path = (PATHS['datadir'] / # 'cmorph_data/8km-30min/cmorph_ppt_jja.199801-201812.asia_precip.ppt_thresh_0p1.N1280.nc') task_ctrl.add( Task(gen_hydrobasins_raster_cubes, [cmorph_path], [hb_raster_cubes_fn], func_args=[ SLIDING_SCALES if basin_scales == 'sliding' else SCALES ])) if basin_scales == 'small_medium_large': hb_names = HB_NAMES else: hb_names = [f'S{i}' for i in range(11)] shp_path_tpl = 'basin_weighted_analysis/{hb_name}/hb_{hb_name}.{ext}' for hb_name in hb_names: # Creates a few different files with different extensions - need to have them all in outputs # so that they are moved to the right place after run by Task.atomic_write. task_ctrl.add( Task( gen_hydrobasins_files, [], { ext: PATHS['output_datadir'] / shp_path_tpl.format(hb_name=hb_name, ext=ext) for ext in ['shp', 'dbf', 'prj', 'cpg', 'shx'] }, func_args=[hb_name], )) # N.B. Need to do this once for one dataset at each resolution. # I.e. only need one N1280 res dataset -- u-ak543. for dataset, hb_name in itertools.product(DATASETS[:4] + ['aphrodite'], hb_names): if dataset == 'u-ak543': dataset_cube_path = PATHS[ 'datadir'] / 'u-ak543/ap9.pp/precip_200601/ak543a.p9200601.asia_precip.nc' elif dataset[:7] == 'HadGEM3': dataset_cube_path = HADGEM_FILENAMES[dataset] elif dataset == 'aphrodite': dataset_cube_path = PATHS[ 'datadir'] / 'aphrodite_data/025deg/aphrodite_combined_all.nc' input_filenames = { dataset: dataset_cube_path, hb_name: PATHS['output_datadir'] / shp_path_tpl.format(hb_name=hb_name, ext='shp') } resolution = DATASET_RESOLUTION[dataset] weights_filename = ( PATHS['output_datadir'] / f'basin_weighted_analysis/{hb_name}/weights_{resolution}_{hb_name}.nc' ) task_ctrl.add( Task(gen_weights_cube, input_filenames, [weights_filename])) weighted_mean_precip_tpl = 'basin_weighted_analysis/{hb_name}/' \ '{dataset}.{hb_name}.area_weighted.mean_precip.hdf' weighted_mean_precip_filenames = defaultdict(list) for dataset, hb_name in itertools.product(DATASETS + ['aphrodite'], hb_names): fmt_kwargs = {'dataset': dataset, 'hb_name': hb_name} dataset_path = get_dataset_path(dataset) resolution = DATASET_RESOLUTION[dataset] weights_filename = ( PATHS['output_datadir'] / f'basin_weighted_analysis/{hb_name}/weights_{resolution}_{hb_name}.nc' ) weighted_mean_precip_filename = PATHS[ 'output_datadir'] / weighted_mean_precip_tpl.format( **fmt_kwargs) weighted_mean_precip_filenames[hb_name].append( weighted_mean_precip_filename) task_ctrl.add( Task(native_weighted_basin_mean_precip_analysis, { 'dataset_path': dataset_path, 'weights': weights_filename }, [weighted_mean_precip_filename])) for obs in ['cmorph', 'aphrodite', 'u-al508', 'u-ak543']: mean_precip_rmse_data_filename = ( PATHS['output_datadir'] / f'basin_weighted_analysis/{obs}.mean_precip_all_rmses.{basin_scales}.pkl' ) gen_mean_precip_rmses_inputs = { (ds, hb_name): PATHS['output_datadir'] / weighted_mean_precip_tpl.format(dataset=ds, hb_name=hb_name) for ds, hb_name in itertools.product(DATASETS + ['aphrodite'], hb_names) } task_ctrl.add( Task(gen_mean_precip_rmses_corrs, inputs=gen_mean_precip_rmses_inputs, outputs=[mean_precip_rmse_data_filename], func_kwargs={ 'hb_names': hb_names, 'obs': obs })) mean_precip_bias_data_filename = ( PATHS['output_datadir'] / f'basin_weighted_analysis/{obs}.mean_precip_all_bias.{basin_scales}.csv' ) task_ctrl.add( Task(gen_mean_precip_highest_percentage_bias, inputs=gen_mean_precip_rmses_inputs, outputs=[mean_precip_bias_data_filename], func_kwargs={ 'hb_names': hb_names, 'obs': obs })) for hb_name in hb_names: # N.B. out of order. max_min_path = PATHS[ 'output_datadir'] / f'basin_weighted_analysis/{hb_name}/mean_precip_max_min.pkl' task_ctrl.add( Task(calc_mean_precip_max_min, weighted_mean_precip_filenames[hb_name], [max_min_path])) weighted_phase_mag_tpl = 'basin_weighted_analysis/{hb_name}/' \ '{dataset}.{hb_name}.{mode}.area_weighted.phase_mag.hdf' for dataset, hb_name, mode in itertools.product( DATASETS, hb_names, PRECIP_MODES): fmt_kwargs = {'dataset': dataset, 'hb_name': hb_name, 'mode': mode} if dataset[:7] == 'HadGEM3': cube_name = f'{mode}_of_precip_JJA' else: cube_name = f'{mode}_of_precip_jja' dataset_path = get_dataset_path(dataset) resolution = DATASET_RESOLUTION[dataset] weights_filename = PATHS[ 'output_datadir'] / f'basin_weighted_analysis/{hb_name}/weights_{resolution}_{hb_name}.nc' weighted_phase_mag_filename = PATHS[ 'output_datadir'] / weighted_phase_mag_tpl.format(**fmt_kwargs) task_ctrl.add( Task(native_weighted_basin_diurnal_cycle_analysis, { 'diurnal_cycle': dataset_path, 'weights': weights_filename }, [weighted_phase_mag_filename], func_args=[cube_name])) for area_weighted in [True, False]: weighted = 'area_weighted' if area_weighted else 'not_area_weighted' vrmse_data_filename = ( PATHS['output_datadir'] / f'basin_weighted_analysis/all_rmses.{weighted}.{basin_scales}.pkl' ) gen_rmses_inputs = { (ds, mode, hb_name): PATHS['output_datadir'] / weighted_phase_mag_tpl.format( dataset=ds, hb_name=hb_name, mode=mode) for ds, mode, hb_name in itertools.product( DATASETS, PRECIP_MODES, hb_names) } gen_rmses_inputs['raster_cubes'] = hb_raster_cubes_fn task_ctrl.add( Task(gen_phase_mag_rmses, inputs=gen_rmses_inputs, outputs=[vrmse_data_filename], func_kwargs={ 'area_weighted': area_weighted, 'hb_names': hb_names })) return task_ctrl
def gen_task_ctrl(): tc = TaskControl(__file__) years = [2006] # years = [2005, 2006, 2007, 2008] models = ['al508', 'ak543'] dist_threshs = [50, 100] dotprod_threshs = [0.05] months = [6, 7, 8] # dist_threshs = [20, 100] # dotprod_threshs = [0.05, 0.1] for dist_thresh in dist_threshs: cache_key = fmtp(cache_key_tpl, dist_thresh=dist_thresh) tc.add( Task(gen_dist_cache, {'orog': orog_path}, [cache_key], func_args=(dist_thresh, ))) for year, model, dotprod_thresh, dist_thresh in product( years, models, dotprod_threshs, dist_threshs): cache_key = fmtp(cache_key_tpl, dist_thresh=dist_thresh) for month in months: surf_wind_path = fmtp(surf_wind_path_tpl, model=model, year=year, month=month) orog_mask_path = fmtp(orog_mask_path_tpl, model=model, year=year, month=month, dotprod_thresh=dotprod_thresh, dist_thresh=dist_thresh) inputs = { 'orog': orog_path, 'cache_key': cache_key, 'surf_wind': surf_wind_path } tc.add( Task(gen_orog_mask, inputs, [orog_mask_path], func_args=(dotprod_thresh, dist_thresh))) precip_path = fmtp(precip_path_tpl, model=model, year=year, month=month) orog_precip_path = fmtp(orog_precip_path_tpl, model=model, year=year, month=month, dotprod_thresh=dotprod_thresh, dist_thresh=dist_thresh) orog_precip_inputs = { 'orog_mask': orog_mask_path, 'land_sea_mask': land_sea_mask, 'precip': precip_path } tc.add( Task(calc_orog_precip, orog_precip_inputs, [orog_precip_path])) orog_precip_frac_inputs = { 'orog_mask': orog_mask_path, 'land_sea_mask': land_sea_mask, 'orog_precip': orog_precip_path } orog_precip_frac_path = fmtp(orog_precip_frac_path_tpl, model=model, year=year, month=month, dotprod_thresh=dotprod_thresh, dist_thresh=dist_thresh) tc.add( Task(calc_orog_precip_fracs, orog_precip_frac_inputs, [orog_precip_frac_path])) variables = list(product(models, dotprod_threshs, dist_threshs, months)) columns = ['model', 'dotprod_thresh', 'dist_thresh', 'month'] combine_inputs = [ fmtp(orog_precip_frac_path_tpl, model=model, year=year, month=month, dotprod_thresh=dotprod_thresh, dist_thresh=dist_thresh) for model, dotprod_thresh, dist_thresh, month in variables ] combine_fracs_output = [combine_frac_path] tc.add( Task(combine_orog_precip_fracs, combine_inputs, combine_fracs_output, func_args=(variables, columns))) return tc
def gen_task_ctrl(): task_ctrl = TaskControl(__file__) for basin_scales in ['small_medium_large', 'sliding']: hb_raster_cubes_fn = PATHS[ 'output_datadir'] / f'basin_weighted_analysis/hb_N1280_raster_{basin_scales}.nc' if basin_scales == 'small_medium_large': hb_names = HB_NAMES else: hb_names = [f'S{i}' for i in range(11)] shp_path_tpl = 'basin_weighted_analysis/{hb_name}/hb_{hb_name}.{ext}' for hb_name in hb_names: shp_inputs = { ext: PATHS['output_datadir'] / shp_path_tpl.format(hb_name=hb_name, ext=ext) for ext in ['shp', 'dbf', 'prj', 'cpg', 'shx'] } task_ctrl.add( Task( plot_hydrobasins_files, shp_inputs, [ PATHS['figsdir'] / 'basin_weighted_analysis' / 'map' / 'hydrobasins_size' / f'map_{hb_name}.png' ], func_args=[hb_name], )) weighted_mean_precip_tpl = 'basin_weighted_analysis/{hb_name}/' \ '{dataset}.{hb_name}.area_weighted.mean_precip.hdf' weighted_mean_precip_filenames = defaultdict(list) for dataset, hb_name in itertools.product(DATASETS + ['aphrodite'], hb_names): fmt_kwargs = {'dataset': dataset, 'hb_name': hb_name} max_min_path = PATHS[ 'output_datadir'] / f'basin_weighted_analysis/{hb_name}/mean_precip_max_min.pkl' weighted_mean_precip_filename = PATHS[ 'output_datadir'] / weighted_mean_precip_tpl.format( **fmt_kwargs) weighted_mean_precip_filenames[hb_name].append( weighted_mean_precip_filename) task_ctrl.add( Task(plot_mean_precip, { 'weighted': weighted_mean_precip_filename, 'raster_cubes': hb_raster_cubes_fn, 'mean_precip_max_min': max_min_path }, [ PATHS['figsdir'] / 'basin_weighted_analysis' / 'map' / 'mean_precip' / f'map_{dataset}.{hb_name}.area_weighted.png' ], func_args=[dataset, hb_name])) if dataset != 'cmorph': fmt_kwargs = {'dataset': 'cmorph', 'hb_name': hb_name} cmorph_weighted_mean_precip_filename = ( PATHS['output_datadir'] / weighted_mean_precip_tpl.format(**fmt_kwargs)) task_ctrl.add( Task(plot_obs_mean_precip_diff, { 'dataset_weighted': weighted_mean_precip_filename, 'obs_weighted': cmorph_weighted_mean_precip_filename, 'raster_cubes': hb_raster_cubes_fn, 'mean_precip_max_min': max_min_path }, [ PATHS['figsdir'] / 'basin_weighted_analysis' / 'map' / 'cmorph_mean_precip_diff' / f'map_{dataset}.{hb_name}.area_weighted.png' ], func_args=[dataset, hb_name])) if dataset not in ['cmorph', 'aphrodite']: fmt_kwargs = {'dataset': 'aphrodite', 'hb_name': hb_name} obs_weighted_mean_precip_filename = ( PATHS['output_datadir'] / weighted_mean_precip_tpl.format(**fmt_kwargs)) task_ctrl.add( Task(plot_obs_mean_precip_diff, { 'dataset_weighted': weighted_mean_precip_filename, 'obs_weighted': obs_weighted_mean_precip_filename, 'raster_cubes': hb_raster_cubes_fn, 'mean_precip_max_min': max_min_path }, [ PATHS['figsdir'] / 'basin_weighted_analysis' / 'map' / 'cmorph_mean_precip_diff' / f'map_aphrodite_vs_{dataset}.{hb_name}.area_weighted.png' ], func_args=[dataset, hb_name])) for obs in ['cmorph', 'aphrodite', 'u-al508', 'u-ak543']: mean_precip_rmse_data_filename = ( PATHS['output_datadir'] / f'basin_weighted_analysis/{obs}.mean_precip_all_rmses.{basin_scales}.pkl' ) task_ctrl.add( Task(plot_obs_vs_all_datasets_mean_precip, inputs=[mean_precip_rmse_data_filename], outputs=[ PATHS['figsdir'] / 'basin_weighted_analysis' / 'cmorph_vs' / 'mean_precip' / f'{obs}_vs_all_datasets.all_{f}.{basin_scales}.pdf' for f in ['rmse', 'corr'] ])) if basin_scales == 'small_medium_large': input_paths = {'raster_cubes': hb_raster_cubes_fn} for name, datasets in zip( ['', 'full_'], (['cmorph', 'u-al508', 'u-ak543' ], ['cmorph', 'u-al508', 'u-am754', 'u-ak543'])): paths = { f'weighted_{hb_name}_{dataset}': (PATHS['output_datadir'] / weighted_mean_precip_tpl.format( hb_name=hb_name, dataset=dataset)) for hb_name, dataset in itertools.product( hb_names, datasets) } input_paths.update(paths) task_ctrl.add( Task(plot_mean_precip_asia_combined, input_paths, [ PATHS['figsdir'] / 'basin_weighted_analysis' / 'map' / 'mean_precip_asia_combined' / f'{name}asia_combined_basin_scales.pdf' ], func_args=[datasets, hb_names])) weighted_phase_mag_tpl = 'basin_weighted_analysis/{hb_name}/' \ '{dataset}.{hb_name}.{mode}.area_weighted.phase_mag.hdf' for dataset, hb_name, mode in itertools.product( DATASETS, hb_names, PRECIP_MODES): fmt_kwargs = {'dataset': dataset, 'hb_name': hb_name, 'mode': mode} weighted_phase_mag_filename = PATHS[ 'output_datadir'] / weighted_phase_mag_tpl.format(**fmt_kwargs) task_ctrl.add( Task(plot_phase_mag, { 'weighted': weighted_phase_mag_filename, 'raster_cubes': hb_raster_cubes_fn }, [ PATHS['figsdir'] / 'basin_weighted_analysis' / 'map' / mode / f'map_{dataset}.{hb_name}.{mode}.area_weighted.{v}.png' for v in ['phase', 'alpha_phase', 'mag'] ], func_args=[dataset, hb_name, mode])) if basin_scales == 'small_medium_large': for name, datasets in zip( ['', 'full_'], (['cmorph', 'u-al508', 'u-ak543' ], ['cmorph', 'u-al508', 'u-am754', 'u-ak543'])): for mode in PRECIP_MODES: input_paths = { f'weighted_{hb_name}_{dataset}': (PATHS['output_datadir'] / weighted_phase_mag_tpl.format( hb_name=hb_name, dataset=dataset, mode=mode)) for hb_name, dataset in itertools.product( hb_names, datasets) } input_paths.update({'raster_cubes': hb_raster_cubes_fn}) task_ctrl.add( Task(plot_phase_alpha_combined, input_paths, [ PATHS['figsdir'] / 'basin_weighted_analysis' / 'map' / 'phase_alpha_combined' / f'{name}{mode}_phase_alpha_combined_asia.pdf' ], func_args=(datasets, hb_names, mode))) for area_weighted in [True, False]: weighted = 'area_weighted' if area_weighted else 'not_area_weighted' vrmse_data_filename = ( PATHS['output_datadir'] / f'basin_weighted_analysis/all_rmses.{weighted}.{basin_scales}.pkl' ) task_ctrl.add( Task( plot_cmorph_vs_all_datasets_phase_mag, [vrmse_data_filename], [ PATHS['figsdir'] / 'basin_weighted_analysis' / 'cmorph_vs' / 'phase_mag' / f'cmorph_vs_all_datasets.all_rmse.{weighted}.{basin_scales}.pdf' ], )) return task_ctrl
def gen_task_ctrl(test=False): tc = TaskControl(__file__) models = ['al508', 'ak543'] months = [6, 7, 8] year = 2006 regions = ['TP_southern_flank', 'sichuan_basin'] days_in_month = 30 if test: models = models[:1] months = months[:1] days_in_month = 2 # regions = regions[:1] for model, month, region in product(models, months, regions): surf_wind_path = fmtp(surf_wind_path_tpl, model=model, year=year, month=month) precip_path = fmtp(precip_path_tpl, model=model, year=year, month=month) orog_mask_path = fmtp(orog_mask_path_tpl, model=model, year=year, month=month, dotprod_thresh=0.05, dist_thresh=100) raw_data_fig_paths = [ fmtp(raw_data_fig_tpl, model=model, year=year, month=month, day=day, hour=h, region=region) for day in range(1, days_in_month + 1) for h in range(0, 24) ] tc.add( Task(plot_precip_wind_region, { 'orog': orog_path, 'surf_wind': surf_wind_path, 'precip': precip_path, 'orog_mask': orog_mask_path, }, raw_data_fig_paths, func_args=(region, month))) raw_data_dc_fig_paths = [ fmtp(raw_data_dc_fig_tpl, model=model, year=year, month=month, hour=h, region=region) for h in range(24) ] tc.add( Task(plot_dc_region, { 'orog': orog_path, 'surf_wind': surf_wind_path, 'precip': precip_path, 'orog_mask': orog_mask_path, }, raw_data_dc_fig_paths, func_args=(region, 24))) raw_data_dc_anom_wind_fig_paths = [ fmtp(raw_data_dc_anom_wind_fig_tpl, model=model, year=year, month=month, hour=h, region=region) for h in range(24) ] tc.add( Task(plot_dc_anom_wind_region, { 'orog': orog_path, 'surf_wind': surf_wind_path, 'precip': precip_path, 'orog_mask': orog_mask_path, }, raw_data_dc_anom_wind_fig_paths, func_args=(region, 24))) tc.add( Task(create_animation, raw_data_dc_anom_wind_fig_paths, [ fmtp(anim_raw_data_dc_anom_wind_fig_tpl, model=model, year=year, month=month, region=region) ], (120, ))) return tc
from remake import Task, TaskControl # from cosmic.task import Task, TaskControl BSUB_KWARGS = { 'job_name': 'demo', 'queue': 'short-serial', 'max_runtime': '00:10', # 'mem': '64000', } def task_fn(inputs, outputs): print(inputs) print(outputs) for o in outputs: o.touch() task_ctrl = TaskControl() task_ctrl.add(Task(task_fn, [], ['task_demo1.out'])) task_ctrl.add(Task(task_fn, [], ['task_demo2.out'])) task_ctrl.add(Task(task_fn, [], ['task_demo3.out'])) task_ctrl.add(Task(task_fn, ['task_demo1.out', 'task_demo2.out', 'task_demo3.out'], ['task_demo4.out'])) task_ctrl.add(Task(task_fn, ['task_demo4.out'], ['task_demo5.out'])) task_ctrl.add(Task(task_fn, ['task_demo1.out', 'task_demo5.out'], ['task_demo6.out']))