def RDT_grabby(expname, rank, output_fname=None, plot_bairros=True) : calib_folder = calib_base + expname +"/" if not output_fname: output_fname = calib_folder + "rank{}_rdt".format(rank) LL_fname = calib_folder + "_plots/LL_all.csv" LL_df = pd.read_csv(LL_fname) LL_df.sort_values(by='total', ascending=False, inplace=True) LL_df.reset_index(inplace=True) sample = LL_df.loc[rank, 'sample'] iteration = LL_df.loc[rank, 'iteration'] start_date = "2009-01-01" am = AnalyzeManager() am.add_analyzer(PrevAnalyzer(start_date=start_date, save_file=output_fname, cait_output_mode=True, plot_bairros=plot_bairros)) with open(calib_folder+"iter{}/IterationState.json".format(iteration)) as fin: iteration_state = json.loads(fin.read()) siminfo = OrderedDict(iteration_state['simulations']) for item in list(siminfo.items()) : if item[1]['__sample_index__'] == sample : simid = item[0] # print("Sim ID: ",simid) am.add_simulation(simid) am.analyze()
def incidence_grabby(expname, hfca, rank, output_fname=None) : calib_folder = calib_base + expname +"/" if not output_fname: output_fname = calib_folder + "rank{}_cases".format(rank) LL_fname = calib_folder + "_plots/LL_all.csv" LL_df = pd.read_csv(LL_fname) LL_df.sort_values(by='total', ascending=False, inplace=True) LL_df.reset_index(inplace=True) sample = LL_df.loc[rank, 'sample'] iteration = LL_df.loc[rank, 'iteration'] am = AnalyzeManager() # am.add_analyzer(IncidencePlotter(GriddedCalibSite(hfca),save_file=output_fname)) am.add_analyzer(IncidencePlotter(hfca, save_file=output_fname, save_csv=True)) with open(calib_folder+"iter{}/IterationState.json".format(iteration)) as fin: iteration_state = json.loads(fin.read()) siminfo = OrderedDict(iteration_state['simulations']) for item in list(siminfo.items()) : if item[1]['__sample_index__'] == sample : simid = item[0] am.add_simulation(simid) am.analyze()
def _analyze(self, experiment, analyzers, points_ran): """ This method is the in-common route for Resamplers to analyze simulations for liklihood. :param experiment: the experiment to analyze, should be from self._run() :param points_ran: Points objects that were just _run() :return: The supplied points_ran with their .likelihood attribute set, AND the direct results of the analyzer as a list. """ am = AnalyzeManager(analyzers=analyzers, exp_list=experiment) am.analyze() # The provided likelihood analyzer MUST set self.result to be a list of Point objects # with the .likelihood attribute set to the likelihood value in its .finalize() method. results = am.analyzers[0].result.tolist() for i in range(len(results)): # Add the likelihood points_ran[i].likelihood = results[i] # verify that the returned points all have a likelihood attribute set likelihoods_are_missing = True in { point.likelihood is None for point in points_ran } if likelihoods_are_missing: raise Exception( 'At least one Point object returned by the provided analyzer does not have ' 'its .likelihood attribute set.') return points_ran, results
def plot_vectors(exp_id, sample, save_file=None): am = AnalyzeManager() am.add_experiment(retrieve_experiment(exp_id)) am.add_analyzer( VectorSpeciesReportAnalyzer(sample, save_file=save_file, channel='Daily HBR')) am.analyze()
def analyze(args, unknownArgs, builtinAnalyzers): # validate parameters if args.config_name is None: logger.error('Please provide Analyzer (-a or --config_name).') exit() # Retrieve what we need itemids = args.itemids batch_name = args.batch_name # collect all experiments and simulations exp_dict, sim_dict = collect_experiments_simulations(itemids) # consider batch existing case exp_dict, sim_dict = consolidate_experiments_with_options(exp_dict, sim_dict, batch_name) # check status for each experiment if not args.force: check_status(exp_dict.values()) # collect all analyzers analyzers = collect_analyzers(args, builtinAnalyzers) if not exp_dict and not sim_dict: # No experiment specified -> using latest experiment latest = DataStore.get_most_recent_experiment() exp_dict[latest.exp_id] = latest # create instance of AnalyzeManager analyzeManager = AnalyzeManager(exp_list=exp_dict.values(), sim_list=sim_dict.values(), analyzers=analyzers) exp_ids_to_be_saved = list(set(exp_dict.keys()) - set(analyzeManager.experiments_simulations.keys())) exp_to_be_saved = [exp_dict[exp_id] for exp_id in exp_ids_to_be_saved] # if batch name exists, always save experiments if batch_name: # save/create batch save_batch(batch_name, exp_to_be_saved, sim_dict.values()) # Only create a batch if we pass more than one experiment or simulation in total elif len(exp_dict) + len(sim_dict) > 1: # check if there is any existing batch containing the same experiments batch_existing = check_existing_batch(exp_dict, sim_dict) if batch_existing is None: # save/create batch save_batch(batch_name, exp_to_be_saved, sim_dict.values()) else: # display the existing batch logger.info('\nBatch: %s (id=%s)' % (batch_existing.name, batch_existing.id)) # start to analyze analyzeManager.analyze() # remove empty batches clean_batch() return analyzeManager
def analyze_iteration(self): """ Analyze the output of completed simulations by using the relevant analyzers by site. Cache the results that are returned by those analyzers. """ if self.results: logger.info('Reloading results from cached iteration state.') return self.results['total'] if not self.exp_manager: self.exp_manager = ExperimentManagerFactory.from_experiment( self.experiment_id) from simtools.Analysis.BaseAnalyzers.BaseAnalyzer import BaseAnalyzer from simtools.Analysis.AnalyzeManager import AnalyzeManager as am if all(isinstance(a, BaseAnalyzer) for a in self.analyzer_list): analyzerManager = am(exp_list=self.exp_manager.experiment, analyzers=self.analyzer_list, working_dir=self.iteration_directory, verbose=True) else: analyzerManager = AnalyzeManager( exp_list=self.exp_manager.experiment, analyzers=self.analyzer_list, working_dir=self.iteration_directory) analyzerManager.analyze() # Ask the analyzers to cache themselves cached_analyses = { a.uid if not callable(a.uid) else a.uid(): a.cache() for a in analyzerManager.analyzers } logger.debug(cached_analyses) # Get the results from the analyzers and ask the next point how it wants to cache them results = pd.DataFrame({ a.uid if not callable(a.uid) else a.uid(): a.result for a in analyzerManager.analyzers }) cached_results = self.next_point_algo.get_results_to_cache(results) # Store the analyzers and results in the iteration state self.analyzers = cached_analyses self.results = cached_results # Set those results in the next point algorithm self.next_point_algo.set_results_for_iteration(self.iteration, results) # Update the summary table and all the results self.all_results, self.summary_table = self.next_point_algo.update_summary_table( self, self.all_results) logger.info(self.summary_table)
def vector(expname, calib_stage, rank) : calib_folder = calib_base + expname +"/" output_fname = calib_folder + "rank{}_vectors".format(rank) LL_fname = calib_folder + "_plots/LL_all.csv" LL_df = pd.read_csv(LL_fname) LL_df.sort_values(by='total', ascending=False, inplace=True) LL_df.reset_index(inplace=True) sample = LL_df.loc[rank, 'sample'] iteration = LL_df.loc[rank, 'iteration'] am = AnalyzeManager() am.add_analyzer(VectorSpeciesReportAnalyzer(save_file=output_fname, channel='Daily HBR')) with open(calib_folder+"iter{}/IterationState.json".format(iteration)) as fin: iteration_state = json.loads(fin.read()) siminfo = OrderedDict(iteration_state['simulations']) for item in list(siminfo.items()) : if item[1]['__sample_index__'] == sample : simid = item[0] am.add_simulation(simid) am.analyze()
def stdout(args, unknownArgs): exp_manager = reload_experiment(args) states, msgs = exp_manager.get_simulation_status() if not exp_manager.status_succeeded(states): logger.warning( 'WARNING: not all jobs have finished successfully yet...') found = False for sim_id, state in states.items(): if (state is SimulationState.Succeeded and args.succeeded) or\ (state is SimulationState.Failed and args.failed) or \ (not args.succeeded and not args.failed): found = True break if not found: print("No simulations found...") else: am = AnalyzeManager(exp_list=[exp_manager.experiment], analyzers=StdoutAnalyzer([sim_id], args.error), force_analyze=True, verbose=False) am.analyze()
selected = [ p.selected_data[id(self)] for p in parsers.values() if id(self) in p.selected_data ] self.data = pd.concat(selected) def finalize(self): import seaborn as sns sns.set_style("darkgrid") fig = plt.figure(figsize=(10, 6)) ax = fig.gca() for a, adf in self.data.groupby('sim_id'): for s, sdf in adf.groupby('species'): ax.plot(sdf['date'], sdf[self.channel], label=s) ax.legend() plt.ylabel(self.channel) if self.save_file: plt.savefig(self.save_file + ".png") plt.savefig(self.save_file + ".pdf") else: plt.show() if __name__ == '__main__': am = AnalyzeManager() am.add_analyzer(VectorSpeciesReportAnalyzer()) am.add_simulation('4047a20f-b33d-e811-a2bf-c4346bcb7274') am.analyze()
def plot_RDT(exp_id, sample, save_file=None, **kwargs): am = AnalyzeManager() am.add_experiment(retrieve_experiment(exp_id)) am.add_analyzer( prevalence_plot_analyzer(catch, sample, save_file=save_file, **kwargs)) am.analyze()
def catalyst(args, unknownArgs): """ Catalyst run-and-analyze process as ported from the test team. Programmatic-only arguments: args.mode : used by FidelityReportExperimentDefinition, default: 'prod' args.report_label : attached to the experiment name args.debug : True/False, passed into FidelityReportAnalyzer, default: False :param args: :param unknownArgs: :return: """ from dtk.utils.builders.sweep import GenericSweepBuilder from catalyst_report.fidelity_report_analyzer import FidelityReportAnalyzer from catalyst_report.fidelity_report_experiment_definition import FidelityReportExperimentDefinition import catalyst_report.utils as catalyst_utils from simtools.Analysis.AnalyzeManager import AnalyzeManager # we're going to do a dtk run, then a set-piece analysis. But first we need to do some overrides # to get the run part to do the desired parameter sweep. mod = args.loaded_module # when run with 'dtk catalyst', run_sim_args['exp_name'] will have additional information appended. mod.run_sim_args[ 'exp_name'] = mod.run_sim_args['exp_name'] + '-development' # lining up the arguments expected by FidelityReportExperimentDefinition args.sweep = args.sweep_method # hidden, programmatic arguments args.mode = args.mode if hasattr(args, 'mode') else 'prod' args.report_label = args.report_label if hasattr(args, 'report_label') else None args.debug = args.debug if hasattr(args, 'debug') else False # determine which report is being asked for. If not specified, default to what the config.json file says # ck4, this should go somewhere else, on a Config object of some sort? (prob not the builder, though) report_type_mapping = { 'DENGUE_SIM': 'dengue', 'GENERIC_SIM': 'generic', 'HIV_SIM': 'hiv', 'MALARIA_SIM': 'malaria', 'POLIO_SIM': 'polio', 'STI_SIM': 'sti', 'TB_SIM': 'tb', 'TYPHOID_SIM': 'typhoid', 'VECTOR_SIM': 'generic' } if args.report_type: report_type = args.report_type else: sim_type = mod.run_sim_args['config_builder'].config['parameters'][ 'Simulation_Type'] report_type = report_type_mapping.get(sim_type, None) if not report_type: raise KeyError( 'Default report type could not be determined for sim_type: %s. Report type must be specified' ' via -r flag.' % sim_type) # Create and set a builder to sweep over population scaling or model timestep reports = catalyst_utils.load_report_definitions( definitions_filename=args.report_definitions) if report_type in reports: args.report_channel_list = reports[report_type]['inset_channel_names'] else: raise Exception('Invalid report: %s. Available reports: %s' % (report_type, sorted(reports.keys()))) catalyst_config = catalyst_utils.load_sweep_configs( sweep_type=args.sweep_type, config_filename=args.sweep_definitions) defn = FidelityReportExperimentDefinition(catalyst_config, args) # redefine the experiment name so it doesn't conflict with the likely follow-up non-catalyst experiment mod.run_sim_args['exp_name'] = 'Catalyst-' + mod.run_sim_args['exp_name'] # define the sweep to perform sweep_dict = { 'Run_Number': range(1, int(defn['nruns']) + 1), defn['sweep_param']: defn['sweep_values'] } mod.run_sim_args['exp_builder'] = GenericSweepBuilder.from_dict(sweep_dict) # overwrite spatial output channels to those used in the catalyst report spatial_channel_names = defn['spatial_channel_names'] if len(spatial_channel_names) > 0: mod.run_sim_args['config_builder'].enable('Spatial_Output') mod.run_sim_args['config_builder'].params[ 'Spatial_Output_Channels'] = spatial_channel_names else: mod.run_sim_args['config_builder'].disable('Spatial_Output') mod.run_sim_args['config_builder'].params[ 'Spatial_Output_Channels'] = [] # now run if no preexisting experiment id was provided if not args.experiment_id: # we must always block so that we can run the analysis at the end; run and analyze! args.blocking = True experiment_manager = run(args, unknownArgs) experiment = experiment_manager.experiment print('Done running experiment: %s' % experiment.exp_id) else: experiment = retrieve_experiment(args.experiment_id) # Create an analyze manager am = AnalyzeManager(exp_list=[experiment], verbose=False) # Add the TimeSeriesAnalyzer to the manager and do analysis # ck4, is there a better way to specify the first 4 arguments? The DTKCase from Test-land might be nicer. # After all, the names COULD be different analyzer = FidelityReportAnalyzer( 'catalyst_report', 'config.json', mod.run_sim_args['config_builder'].get_param( 'Demographics_Filenames')[0], experiment_definition=defn, experiment_id=experiment.exp_id, experiment_name=experiment.exp_name, label=args.report_label, time_series_step_from=defn['step_from'], time_series_step_to=defn['step_to'], time_series_equal_step_count=True, raw_data=True, debug=args.debug) am.add_analyzer(analyzer) am.analyze() import webbrowser webbrowser.open_new("file:///{}".format( os.path.join(os.getcwd(), "catalyst_report", "summary_report.html")))