def eval_simulation_mp(name, site, sim_file=None, plots=False, no_mp=False, fix_closure=True, qc=True): """Evaluate using multiple processes if necessary""" # will only work if simulations are already run. try: datasets = get_sites(site) except KeyError: # Unknown site, single site eval_simulation(name, site, sim_file, plots=plots, fix_closure=fix_closure, qc=qc) else: if no_mp: for s in datasets: eval_simulation(name, s, plots=plots, fix_closure=fix_closure, qc=qc) else: f_args = [[name, s, None, plots, fix_closure, qc] for s in datasets] ncores = min(os.cpu_count(), 1 + int(os.cpu_count() * 0.5)) with Pool(ncores) as p: p.starmap(eval_simulation, f_args)
def run_simulation_mp(name, site, no_mp=False, multivariate=False, overwrite=False, fix_closure=True): """Multi-processor run handling.""" # TODO: refactor to work with above caller. model = get_model(name) if site in ['all', 'PLUMBER_ext', 'PLUMBER']: logger.info('Running {n} at {s} sites'.format(n=name, s=site)) datasets = get_sites(site) if no_mp: for s in datasets: run_simulation(model, name, s, multivariate, overwrite, fix_closure) else: f_args = [(model, name, s, multivariate, overwrite, fix_closure) for s in datasets] ncores = get_suitable_ncores() if site is not 'debug' and hasattr(model, 'memory_requirement'): ncores = max( 1, int((psutil.virtual_memory().total / 2) // model.memory_requirement)) logger.info("Running on %d core(s)" % ncores) with Pool(ncores) as p: p.starmap(run_simulation, f_args) else: run_simulation(model, name, site, multivariate, overwrite, fix_closure) return
def get_PLUMBER_metrics(name, site='all', variables=['Qle', 'Qh', 'NEE']): """get metrics dataframe from a site, with benchmarks for comparison :returns: dataframe with metrics for model at site """ csv_file = './source/models/{n}/metrics/{n}_{s}_metrics.csv' # benchmark_names = ['1lin', '2lin', '3km27'] benchmark_names = ['S_lin', 'ST_lin', 'STH_km27'] if site == 'all': sites = get_sites('PLUMBER_ext') else: sites = [site] metric_df = [] failures = [] for s in sites: try: site_metrics = pd.read_csv(csv_file.format(n=name, s=s)) site_metrics = pd.melt(site_metrics, id_vars='metric') site_metrics['name'] = name site_metrics['site'] = s metric_df.append( site_metrics[site_metrics.variable.isin(variables)]) for b in benchmark_names: benchmark_metrics = pd.read_csv(csv_file.format(n=b, s=s)) benchmark_metrics = pd.melt(benchmark_metrics, id_vars='metric') benchmark_metrics['name'] = b benchmark_metrics['site'] = s metric_df.append(benchmark_metrics[ benchmark_metrics.variable.isin(variables)]) except Exception: failures.append(s) continue if len(failures) > 0: logging.warning('Skipped {l} sites: {f}'.format(l=len(failures), f=', '.join(failures))) if len(metric_df) == 0: logger.error( 'Failed to load any csv files for {n} at {s} - skipping plot.'. format(n=name, s=site)) return metric_df = pd.concat(metric_df).reset_index(drop=True) metric_df = rank_metric_df(metric_df) return metric_df
def get_train_test_data(site, met_vars, flux_vars, use_names, qc=True, fix_closure=True): """Gets training and testing data, PLUMBER style (leave one out) Set the training set using pals.data.set_config(['datasets', 'train'])""" if site == 'debug': train_sites = ['Amplero'] test_site = 'Tumba' # Use non-quality controlled data, to ensure there's enough to train qc = False else: train_sites = get_sites(get_config(['datasets', 'train'])) test_site = site if test_site not in train_sites: # Running on a non-training site, train on all training sites. train_sites = train_sites else: # Running on a training site, so leave it out. train_sites = [s for s in train_sites if s != test_site] print("Training with {n} datasets".format(n=len(train_sites))) train_dict = get_train_data(train_sites, met_vars, flux_vars, use_names=use_names, qc=qc, fix_closure=fix_closure) test_dict = get_test_data(test_site, met_vars, use_names=use_names, qc=qc) train_test_data = train_dict train_test_data.update(test_dict) train_test_data['site'] = site with open('new_data.txt', 'w') as f: for k in sorted(train_test_data.keys()): print(k, ':\n', train_test_data[k], file=f) return train_test_data
def main_rst_gen_mp(name, site, sim_file=None, no_mp=False): """Generate rst files using multiple processes if necessary :name: TODO :site: TODO :sim_file: TODO :returns: TODO """ if site in ['all', 'PLUMBER_ext']: # will only work if simulations are already evaluated. datasets = get_sites('PLUMBER_ext') if no_mp: for s in datasets: main_rst_gen(name, s) else: f_args = [[name, s] for s in datasets] ncores = min(os.cpu_count(), 2 + int(os.cpu_count() * 0.25)) with Pool(ncores) as p: p.starmap(main_rst_gen, f_args) else: main_rst_gen(name, site)