def eval_simulation(name, site, sim_file=None, plots=False, fix_closure=True, qc=True, overwrite=False): """Main function for evaluating an existing simulation. Copies simulation data to source directory. TODO: skip running if cached, for easier page regeneration :name: name of the model :site: PALS site name to run the model at :sim_file: Path to simulation netcdf. Only required if simulation is at a non-standard place. """ args = locals() args_str = '\n'.join([k + ': ' + str(args[k]) for k in sorted(args.keys())]) logger = setup_logger(__name__, 'logs/eval/{m}/{s}/{m}_{s}.log'.format(m=name, s=site)) logger.info("Evaluating model.\nArgs:\n{a}".format(a=args_str)) nc_path = get_sim_nc_path(name, site) if sim_file is None: filename = nc_path else: filename = sim_file eval_path = 'source/models/{n}/metrics/{n}_{s}_metrics.csv'.format(n=name, s=site) if not overwrite: if os.path.exists(filename) and os.path.exists(eval_path) and \ os.path.getmtime(filename) > os.path.getmtime(eval_path): logger.warning("Overwriting evaluation file because simulation is newer") else: logger.warning("Evaluation file already exists, skipping") return try: sim_data = xr.open_dataset(filename) except (OSError, RuntimeError) as e: logger.error("Sim file ({f}) doesn't exist. What are you doing? {e}".format(f=filename, e=e)) return if sim_file is not None: logger.warning("Overwriting existing sim!") sim_data.to_netcdf(nc_path) flux_data = get_flux_data([site], fix_closure=fix_closure)[site] evaluate_simulation(sim_data, flux_data, name, site=site, qc=qc) if plots: diagnostic_plots(sim_data, flux_data, name, site=site) sim_data.close() return
from docopt import docopt import os import xarray as xr import numpy as np import matplotlib.pyplot as plt from mpl_toolkits import basemap from mpl_toolkits.axes_grid1 import ImageGrid from matplotlib.colors import LinearSegmentedColormap from empirical_lsm.gridded_datasets import get_MODIS_data # , get_GLEAM3a_data, get_MPI_data from pals_utils.logging import setup_logger logger = setup_logger(__name__, 'logs/plot_dataset.log') def get_range(x): return np.quantile(x, [0, 100]) def colormap(x0): """custom colourmap for map plots""" if x0 > 0: x2 = (1 + 2 * x0) / 3.0 x3 = (2 + x0) / 3.0 # blue, white, red, yellow, purple cdict1 = {
-h, --help Show this screen and exit. """ from docopt import docopt import os from glob import glob from empirical_lsm.data import get_sites from empirical_lsm.checks import check_model_data, check_metrics from empirical_lsm.offline_simulation import run_model_site_tuples_mp from empirical_lsm.offline_eval import eval_simulation from pals_utils.logging import setup_logger logger = setup_logger(None, 'logs/check_sanity.log') def main(args): if args['--sites'] is None: sites = get_sites('PLUMBER_ext') else: try: sites = get_sites(args['--sites']) except: sites = args['--sites'].split(',') if args['all']: if args['data']: models = [os.path.basename(f) for f in glob('model_data/*')] else:
Usage: run_model.py run <name> <site> [--no-mp] [--multivariate] [--overwrite] [--no-fix-closure] Options: -h, --help Show this screen and exit. """ from docopt import docopt from pals_utils.data import set_config from empirical_lsm.offline_simulation import run_simulation_mp from pals_utils.logging import setup_logger logger = setup_logger(__name__, 'logs/run_model.log') set_config(['vars', 'flux'], ['NEE', 'Qle', 'Qh']) def main(args): name = args['<name>'] site = args['<site>'] run_simulation_mp(name, site, no_mp=args['--no-mp'], multivariate=args['--multivariate'], overwrite=args['--overwrite'], fix_closure=not args['--no-fix-closure'])
def run_simulation(model, name, site, multivariate=False, overwrite=False, fix_closure=True): """Main function for fitting and running a model. :model: sklearn-style model or pipeline (regression estimator) :name: name of the model :site: PALS site name to run the model at (or 'all', or 'debug') """ args = locals() args_str = '\n'.join( [k + ': ' + str(args[k]) for k in sorted(args.keys())]) sim_dir = 'model_data/{n}'.format(n=name) os.makedirs(sim_dir, exist_ok=True) nc_file = '{d}/{n}_{s}.nc'.format(d=sim_dir, n=name, s=site) if os.path.isfile(nc_file) and not overwrite: logger.warning( "Sim netcdf already exists for {n} at {s}, use --overwrite to re-run." .format(n=name, s=site)) return run_logger = setup_logger( __name__, 'logs/run/{m}/{s}/{m}_{s}.log'.format(m=name, s=site)) run_logger.info("Running model.\nArgs:\n{a}".format(a=args_str)) for i in range(3): # We attempt to run the model up to 3 times, incase of numerical problems try: sim_data = fit_predict(model, name, site, multivariate=multivariate, fix_closure=fix_closure) except AssertionError as e: logger.exception("Model failed: " + str(e)) return try: model_sanity_check(sim_data, name, site) except RuntimeError as e: logger.warning(str(e)) if i < 2: logger.warning('Attempting a %s run.' % ['2nd', '3rd'][i]) continue else: logger.error( 'Giving up after 3 failed runs. Check your model structres or met data.' ) sim_data.attrs.update( {'Warning': 'model failed after 3 attempts, saved anyway'}) else: # model run successful, presumably break if os.path.isfile(nc_file): logger.warning("Overwriting sim file at {f}".format(f=nc_file)) else: logger.info("Writing sim file at {f}".format(f=nc_file)) # if site != 'debug': sim_data.to_netcdf(nc_file) return
from docopt import docopt import os import datetime import numpy as np import xarray as xr import pals_utils.data as pud from empirical_lsm.data import get_sites, get_data_dir from empirical_lsm.gridded_datasets import get_dataset_data, get_dataset_freq from empirical_lsm.models import get_model from pals_utils.logging import setup_logger logger = setup_logger(__name__, 'logs/gridded_benchmarks.log') def predict_gridded(model, dataset_data, flux_vars, datafreq=None): """predict model results for gridded data :model: scikit-learn style model/pipeline :data: xarray-style dataset :returns: xarray-style dataset """ # set prediction metadata prediction = dataset_data[list(dataset_data.coords)] # Arrays like (var, lon, lat, time) result = np.full([
inspect_cluster_regression.py <model_path> inspect_cluster_regression.py (-h | --help | --version) Options: -h, --help Show this screen and exit. --option=<n> Option description [default: 3] """ from docopt import docopt import pickle import numpy as np import empirical_lsm from pals_utils.logging import setup_logger logger = setup_logger(__name__, 'logs/inspect_cluster_regression.log') def get_wrapper_vars(wrapper): return [v + '_' + l for v, lags in wrapper.var_lags.items() for l in lags] def get_cluster_regression_model(wrapper): """Unwraps a model""" if isinstance(wrapper, empirical_lsm.clusterregression.ModelByCluster): return wrapper else: return get_cluster_regression_model(wrapper.model) def get_cluster_regression_centers(model):
import_sim.py benchmark <name> [<site>...] import_sim.py sim <name> <site> <file> Options: -h, --help Show this screen and exit. """ from docopt import docopt import numpy as np import xarray as xr from pals_utils.data import get_flux_data from empirical_lsm.data import get_sites, get_sim_nc_path from pals_utils.logging import setup_logger logger = setup_logger(__name__, 'logs/check_sanity.log') def fix_benchmark(site_data, name, site): """Performs checks on broken benchmarks, and fixes them inplace :name: benchmark name :site_data: xarray dataset (will be modified inplace """ if name in ['Manabe_Bucket.2', 'Penman_Monteith.1']: lon = site_data['longitude'] lat = site_data['latitude'] del site_data['longitude'], site_data['latitude'], lon[ 'latitude'], lat['longitude'] site_data['longitude'] = lon
Options: -h, --help Show this screen and exit. --sites=<sites> Sites to run the models at [default: PLUMBER_ext] """ from docopt import docopt import subprocess from empirical_lsm.offline_simulation import run_simulation_mp from empirical_lsm.offline_eval import eval_simulation_mp, main_rst_gen_mp from empirical_lsm.model_sets import get_model_set from empirical_lsm.model_search import model_site_index_rst_mp, model_search_index_rst, get_available_models from pals_utils.logging import setup_logger logger = setup_logger(__name__, 'logs/eval_all.log') def eval_simulation_all(names, sites, run=False, multivariate=True, evalu=False, plots=False, rst=False, html=False, rebuild=False, no_mp=False, overwrite=False, fix_closure=True):
Description: Evaluates a model (sim or set of sims) and produces rst output with diagnostics Usage: eval_model.py eval <name> <site> [<file>] [--no-mp] [--plot] [--no-fix-closure] [--no-qc] eval_model.py rst-gen <name> <site> [--no-mp] Options: -h, --help Show this screen and exit. """ from docopt import docopt from empirical_lsm.offline_eval import eval_simulation_mp, main_rst_gen_mp from pals_utils.logging import setup_logger logger = setup_logger(__name__, 'logs/eval_model.log') def main(args): name = args['<name>'] site = args['<site>'] sim_file = args['<file>'] plots = args['--plot'] if args['eval']: eval_simulation_mp(name, site, sim_file, plots, no_mp=args['--no-mp'], fix_closure=not args['--no-fix-closure'],
model_combos.py (-h | --help | --version) Options: -h, --help Show this screen and exit. --sites=<sites> Sites to run the models at [default: PLUMBER_ext] """ from docopt import docopt from empirical_lsm.model_sets import get_combo_model_names from empirical_lsm.offline_simulation import run_simulation_mp from empirical_lsm.offline_eval import eval_simulation_mp from pals_utils.logging import setup_logger logger = setup_logger(__name__, 'logs/model_combos.log') def main(sites, run=False, multivariate=True, evalu=False, plots=False, no_mp=False, overwrite=False, fix_closure=True): names = get_combo_model_names() if args['--run']: for name in names: run_simulation_mp(name, sites, no_mp=no_mp, multivariate=multivariate, overwrite=overwrite, fix_closure=fix_closure) if args['--eval']: for name in names: eval_simulation_mp(name, sites, plots=plots, no_mp=no_mp,