def check_metrics(models, sites): """Checks metrics to see if they're bullshit :models: TODO :sites: TODO :returns: TODO """ # Glob all metric filenames bad_simulations = [] for model in models: for site in sites: csv_path = 'source/models/{m}/metrics/{m}_{s}_metrics.csv'.format( m=model, s=site) if not os.path.exists(csv_path): continue metrics = pd.read_csv(csv_path, index_col=0) if ((metrics > 500).any().any() or (metrics.loc['corr'] > 1).any() or (metrics.loc['corr'] < -1).any() or (metrics.loc['overlap'] > 1).any() or (metrics.loc['overlap'] < 0).any()): print_bad("Crazy value for {m} at {s}".format(m=model, s=site)) bad_simulations.append((model, site)) return bad_simulations
def check_model_data(models, sites): """Checks all models :models: list of model names """ bad_simulations = [] print("Checking {nm} models at {ns} sites.".format(nm=len(models), ns=len(sites))) for model in models: print('Checking {m}:'.format(m=model)) for site in sites: file_path = 'model_data/{m}/{m}_{s}.nc'.format(m=model, s=site) if not os.path.exists(file_path): # print('\nmissing model run: {m} at {s}'.format(m=model, s=site)) print('x', end='', flush=True) continue with xr.open_dataset(file_path) as ds: try: model_sanity_check(ds, model, site) except RuntimeError as e: print_bad('\n' + str(e)) bad_simulations.append((model, site)) else: print('.', end='', flush=True) print('') return bad_simulations
def run_simulation(model, name, site, multivariate=False, overwrite=False, fix_closure=True): """Main function for fitting and running a model. :model: sklearn-style model or pipeline (regression estimator) :name: name of the model :site: PALS site name to run the model at (or 'all', or 'debug') """ sim_dir = 'model_data/{n}'.format(n=name) os.makedirs(sim_dir, exist_ok=True) nc_file = '{d}/{n}_{s}.nc'.format(d=sim_dir, n=name, s=site) if os.path.isfile(nc_file) and not overwrite: print_warn( "Sim netcdf already exists for {n} at {s}, use --overwrite to re-run." .format(n=name, s=site)) return for i in range(3): # We attempt to run the model up to 3 times, incase of numerical problems sim_data = fit_predict(model, name, site, multivariate=multivariate, fix_closure=fix_closure) try: model_sanity_check(sim_data, name, site) except RuntimeError as e: print_warn(str(e)) if i < 2: print_warn('Attempting a %s run.' % ['2nd', '3rd'][i]) continue else: print_bad( 'Giving up after 3 failed runs. Check your model structres or met data.' ) sim_data.attrs.update( {'Warning': 'model failed after 3 attempts, saved anyway'}) else: # model run successful, presumably break if os.path.isfile(nc_file): print_warn("Overwriting sim file at {f}".format(f=nc_file)) else: print_good("Writing sim file at {f}".format(f=nc_file)) # if site != 'debug': sim_data.to_netcdf(nc_file) return
def eval_simulation(name, site, sim_file=None, plots=False, fix_closure=True, qc=True): """Main function for evaluating an existing simulation. Copies simulation data to source directory. TODO: skip running if cached, for easier page regeneration :name: name of the model :site: PALS site name to run the model at :sim_file: Path to simulation netcdf """ nc_path = get_sim_nc_path(name, site) if sim_file is None: filename = nc_path else: filename = sim_file try: sim_data = xr.open_dataset(filename) except (OSError, RuntimeError) as e: print_bad( "Sim file ({f}) doesn't exist. What are you doing? {e}".format( f=filename, e=e)) return if sim_file is not None: # WARNING! over writes existing sim! sim_data.to_netcdf(nc_path) flux_data = get_flux_data([site], fix_closure=fix_closure)[site] evaluate_simulation(sim_data, flux_data, name, qc=qc) if plots: diagnostic_plots(sim_data, flux_data, name) sim_data.close() return
def main_rst_gen(name, site): """Main function for formatting existing simulation evaluations and plots Copies simulation data to source directory. :name: name of the model :site: PALS site name to run the model at """ try: eval_results = load_sim_evaluation(name, site) plot_files = get_existing_plots(name, site) except (OSError, RuntimeError) as e: print_bad('one or more files missing for {n} at {s}: {e}'.format( n=name, s=site, e=e)) return model_site_rst_write(name, site, eval_results, plot_files) return