Esempio n. 1
0
def RDT_grabby(expname, rank, output_fname=None, plot_bairros=True) :
    calib_folder = calib_base + expname +"/"
    if not output_fname:
        output_fname = calib_folder + "rank{}_rdt".format(rank)

    LL_fname = calib_folder + "_plots/LL_all.csv"
    LL_df = pd.read_csv(LL_fname)
    LL_df.sort_values(by='total', ascending=False, inplace=True)
    LL_df.reset_index(inplace=True)

    sample = LL_df.loc[rank, 'sample']
    iteration = LL_df.loc[rank, 'iteration']

    start_date = "2009-01-01"

    am = AnalyzeManager()
    am.add_analyzer(PrevAnalyzer(start_date=start_date,
                                 save_file=output_fname,
                                 cait_output_mode=True,
                                 plot_bairros=plot_bairros))

    with open(calib_folder+"iter{}/IterationState.json".format(iteration)) as fin:
        iteration_state = json.loads(fin.read())
    siminfo = OrderedDict(iteration_state['simulations'])
    for item in list(siminfo.items()) :
        if item[1]['__sample_index__'] == sample :
            simid = item[0]
            # print("Sim ID: ",simid)
            am.add_simulation(simid)
    am.analyze()
Esempio n. 2
0
def incidence_grabby(expname, hfca, rank, output_fname=None) :
    calib_folder = calib_base + expname +"/"
    if not output_fname:
        output_fname = calib_folder + "rank{}_cases".format(rank)

    LL_fname = calib_folder + "_plots/LL_all.csv"
    LL_df = pd.read_csv(LL_fname)
    LL_df.sort_values(by='total', ascending=False, inplace=True)
    LL_df.reset_index(inplace=True)

    sample = LL_df.loc[rank, 'sample']
    iteration = LL_df.loc[rank, 'iteration']

    am = AnalyzeManager()
    # am.add_analyzer(IncidencePlotter(GriddedCalibSite(hfca),save_file=output_fname))
    am.add_analyzer(IncidencePlotter(hfca, save_file=output_fname, save_csv=True))

    with open(calib_folder+"iter{}/IterationState.json".format(iteration)) as fin:
        iteration_state = json.loads(fin.read())
    siminfo = OrderedDict(iteration_state['simulations'])
    for item in list(siminfo.items()) :
        if item[1]['__sample_index__'] == sample :
            simid = item[0]
            am.add_simulation(simid)
    am.analyze()
    def _analyze(self, experiment, analyzers, points_ran):
        """
        This method is the in-common route for Resamplers to analyze simulations for liklihood.
        :param experiment: the experiment to analyze, should be from self._run()
        :param points_ran: Points objects that were just _run()
        :return: The supplied points_ran with their .likelihood attribute set, AND the direct results of the analyzer
                 as a list.
        """
        am = AnalyzeManager(analyzers=analyzers, exp_list=experiment)
        am.analyze()

        # The provided likelihood analyzer MUST set self.result to be a list of Point objects
        # with the .likelihood attribute set to the likelihood value in its .finalize() method.
        results = am.analyzers[0].result.tolist()

        for i in range(len(results)):
            # Add the likelihood
            points_ran[i].likelihood = results[i]

        # verify that the returned points all have a likelihood attribute set
        likelihoods_are_missing = True in {
            point.likelihood is None
            for point in points_ran
        }
        if likelihoods_are_missing:
            raise Exception(
                'At least one Point object returned by the provided analyzer does not have '
                'its .likelihood attribute set.')

        return points_ran, results
Esempio n. 4
0
def plot_vectors(exp_id, sample, save_file=None):
    am = AnalyzeManager()
    am.add_experiment(retrieve_experiment(exp_id))
    am.add_analyzer(
        VectorSpeciesReportAnalyzer(sample,
                                    save_file=save_file,
                                    channel='Daily HBR'))
    am.analyze()
def analyze(args, unknownArgs, builtinAnalyzers):
    # validate parameters
    if args.config_name is None:
        logger.error('Please provide Analyzer (-a or --config_name).')
        exit()

    # Retrieve what we need
    itemids = args.itemids
    batch_name = args.batch_name

    # collect all experiments and simulations
    exp_dict, sim_dict = collect_experiments_simulations(itemids)

    # consider batch existing case
    exp_dict, sim_dict = consolidate_experiments_with_options(exp_dict, sim_dict, batch_name)

    # check status for each experiment
    if not args.force:
        check_status(exp_dict.values())

    # collect all analyzers
    analyzers = collect_analyzers(args, builtinAnalyzers)

    if not exp_dict and not sim_dict:
        # No experiment specified -> using latest experiment
        latest = DataStore.get_most_recent_experiment()
        exp_dict[latest.exp_id] = latest

    # create instance of AnalyzeManager
    analyzeManager = AnalyzeManager(exp_list=exp_dict.values(), sim_list=sim_dict.values(), analyzers=analyzers)

    exp_ids_to_be_saved = list(set(exp_dict.keys()) - set(analyzeManager.experiments_simulations.keys()))
    exp_to_be_saved = [exp_dict[exp_id] for exp_id in exp_ids_to_be_saved]

    # if batch name exists, always save experiments
    if batch_name:
        # save/create batch
        save_batch(batch_name, exp_to_be_saved, sim_dict.values())
    # Only create a batch if we pass more than one experiment or simulation in total
    elif len(exp_dict) + len(sim_dict) > 1:
        # check if there is any existing batch containing the same experiments
        batch_existing = check_existing_batch(exp_dict, sim_dict)

        if batch_existing is None:
            # save/create batch
            save_batch(batch_name, exp_to_be_saved, sim_dict.values())
        else:
            # display the existing batch
            logger.info('\nBatch: %s (id=%s)' % (batch_existing.name, batch_existing.id))

    # start to analyze
    analyzeManager.analyze()

    # remove empty batches
    clean_batch()

    return analyzeManager
Esempio n. 6
0
    def analyze_iteration(self):
        """
        Analyze the output of completed simulations by using the relevant analyzers by site.
        Cache the results that are returned by those analyzers.
        """
        if self.results:
            logger.info('Reloading results from cached iteration state.')
            return self.results['total']

        if not self.exp_manager:
            self.exp_manager = ExperimentManagerFactory.from_experiment(
                self.experiment_id)

        from simtools.Analysis.BaseAnalyzers.BaseAnalyzer import BaseAnalyzer
        from simtools.Analysis.AnalyzeManager import AnalyzeManager as am
        if all(isinstance(a, BaseAnalyzer) for a in self.analyzer_list):
            analyzerManager = am(exp_list=self.exp_manager.experiment,
                                 analyzers=self.analyzer_list,
                                 working_dir=self.iteration_directory,
                                 verbose=True)
        else:
            analyzerManager = AnalyzeManager(
                exp_list=self.exp_manager.experiment,
                analyzers=self.analyzer_list,
                working_dir=self.iteration_directory)
        analyzerManager.analyze()

        # Ask the analyzers to cache themselves
        cached_analyses = {
            a.uid if not callable(a.uid) else a.uid(): a.cache()
            for a in analyzerManager.analyzers
        }
        logger.debug(cached_analyses)

        # Get the results from the analyzers and ask the next point how it wants to cache them
        results = pd.DataFrame({
            a.uid if not callable(a.uid) else a.uid(): a.result
            for a in analyzerManager.analyzers
        })
        cached_results = self.next_point_algo.get_results_to_cache(results)

        # Store the analyzers and results in the iteration state
        self.analyzers = cached_analyses
        self.results = cached_results

        # Set those results in the next point algorithm
        self.next_point_algo.set_results_for_iteration(self.iteration, results)

        # Update the summary table and all the results
        self.all_results, self.summary_table = self.next_point_algo.update_summary_table(
            self, self.all_results)
        logger.info(self.summary_table)
Esempio n. 7
0
def vector(expname, calib_stage, rank) :
    calib_folder = calib_base + expname +"/"
    output_fname = calib_folder + "rank{}_vectors".format(rank)

    LL_fname = calib_folder + "_plots/LL_all.csv"
    LL_df = pd.read_csv(LL_fname)
    LL_df.sort_values(by='total', ascending=False, inplace=True)
    LL_df.reset_index(inplace=True)

    sample = LL_df.loc[rank, 'sample']
    iteration = LL_df.loc[rank, 'iteration']

    am = AnalyzeManager()
    am.add_analyzer(VectorSpeciesReportAnalyzer(save_file=output_fname, channel='Daily HBR'))

    with open(calib_folder+"iter{}/IterationState.json".format(iteration)) as fin:
        iteration_state = json.loads(fin.read())
    siminfo = OrderedDict(iteration_state['simulations'])
    for item in list(siminfo.items()) :
        if item[1]['__sample_index__'] == sample :
            simid = item[0]
    am.add_simulation(simid)
    am.analyze()
Esempio n. 8
0
def stdout(args, unknownArgs):
    exp_manager = reload_experiment(args)
    states, msgs = exp_manager.get_simulation_status()

    if not exp_manager.status_succeeded(states):
        logger.warning(
            'WARNING: not all jobs have finished successfully yet...')

    found = False
    for sim_id, state in states.items():
        if (state is SimulationState.Succeeded and args.succeeded) or\
               (state is SimulationState.Failed and args.failed) or \
               (not args.succeeded and not args.failed):
            found = True
            break
    if not found:
        print("No simulations found...")
    else:
        am = AnalyzeManager(exp_list=[exp_manager.experiment],
                            analyzers=StdoutAnalyzer([sim_id], args.error),
                            force_analyze=True,
                            verbose=False)
        am.analyze()
Esempio n. 9
0
        selected = [
            p.selected_data[id(self)] for p in parsers.values()
            if id(self) in p.selected_data
        ]
        self.data = pd.concat(selected)

    def finalize(self):
        import seaborn as sns
        sns.set_style("darkgrid")

        fig = plt.figure(figsize=(10, 6))
        ax = fig.gca()
        for a, adf in self.data.groupby('sim_id'):
            for s, sdf in adf.groupby('species'):
                ax.plot(sdf['date'], sdf[self.channel], label=s)
        ax.legend()
        plt.ylabel(self.channel)
        if self.save_file:
            plt.savefig(self.save_file + ".png")
            plt.savefig(self.save_file + ".pdf")
        else:
            plt.show()


if __name__ == '__main__':

    am = AnalyzeManager()
    am.add_analyzer(VectorSpeciesReportAnalyzer())
    am.add_simulation('4047a20f-b33d-e811-a2bf-c4346bcb7274')
    am.analyze()
Esempio n. 10
0
def plot_RDT(exp_id, sample, save_file=None, **kwargs):
    am = AnalyzeManager()
    am.add_experiment(retrieve_experiment(exp_id))
    am.add_analyzer(
        prevalence_plot_analyzer(catch, sample, save_file=save_file, **kwargs))
    am.analyze()
Esempio n. 11
0
def catalyst(args, unknownArgs):
    """
    Catalyst run-and-analyze process as ported from the test team.
    Programmatic-only arguments:
        args.mode : used by FidelityReportExperimentDefinition, default: 'prod'
        args.report_label : attached to the experiment name
        args.debug : True/False, passed into FidelityReportAnalyzer, default: False
    :param args:
    :param unknownArgs:
    :return:
    """
    from dtk.utils.builders.sweep import GenericSweepBuilder
    from catalyst_report.fidelity_report_analyzer import FidelityReportAnalyzer
    from catalyst_report.fidelity_report_experiment_definition import FidelityReportExperimentDefinition
    import catalyst_report.utils as catalyst_utils
    from simtools.Analysis.AnalyzeManager import AnalyzeManager

    # we're going to do a dtk run, then a set-piece analysis. But first we need to do some overrides
    # to get the run part to do the desired parameter sweep.

    mod = args.loaded_module

    # when run with 'dtk catalyst', run_sim_args['exp_name'] will have additional information appended.
    mod.run_sim_args[
        'exp_name'] = mod.run_sim_args['exp_name'] + '-development'

    # lining up the arguments expected by FidelityReportExperimentDefinition
    args.sweep = args.sweep_method

    # hidden, programmatic arguments
    args.mode = args.mode if hasattr(args, 'mode') else 'prod'
    args.report_label = args.report_label if hasattr(args,
                                                     'report_label') else None
    args.debug = args.debug if hasattr(args, 'debug') else False

    # determine which report is being asked for. If not specified, default to what the config.json file says
    # ck4, this should go somewhere else, on a Config object of some sort? (prob not the builder, though)
    report_type_mapping = {
        'DENGUE_SIM': 'dengue',
        'GENERIC_SIM': 'generic',
        'HIV_SIM': 'hiv',
        'MALARIA_SIM': 'malaria',
        'POLIO_SIM': 'polio',
        'STI_SIM': 'sti',
        'TB_SIM': 'tb',
        'TYPHOID_SIM': 'typhoid',
        'VECTOR_SIM': 'generic'
    }
    if args.report_type:
        report_type = args.report_type
    else:
        sim_type = mod.run_sim_args['config_builder'].config['parameters'][
            'Simulation_Type']
        report_type = report_type_mapping.get(sim_type, None)
        if not report_type:
            raise KeyError(
                'Default report type could not be determined for sim_type: %s. Report type must be specified'
                ' via -r flag.' % sim_type)

    # Create and set a builder to sweep over population scaling or model timestep
    reports = catalyst_utils.load_report_definitions(
        definitions_filename=args.report_definitions)
    if report_type in reports:
        args.report_channel_list = reports[report_type]['inset_channel_names']
    else:
        raise Exception('Invalid report: %s. Available reports: %s' %
                        (report_type, sorted(reports.keys())))
    catalyst_config = catalyst_utils.load_sweep_configs(
        sweep_type=args.sweep_type, config_filename=args.sweep_definitions)
    defn = FidelityReportExperimentDefinition(catalyst_config, args)

    # redefine the experiment name so it doesn't conflict with the likely follow-up non-catalyst experiment
    mod.run_sim_args['exp_name'] = 'Catalyst-' + mod.run_sim_args['exp_name']

    # define the sweep to perform
    sweep_dict = {
        'Run_Number': range(1,
                            int(defn['nruns']) + 1),
        defn['sweep_param']: defn['sweep_values']
    }
    mod.run_sim_args['exp_builder'] = GenericSweepBuilder.from_dict(sweep_dict)

    # overwrite spatial output channels to those used in the catalyst report
    spatial_channel_names = defn['spatial_channel_names']
    if len(spatial_channel_names) > 0:
        mod.run_sim_args['config_builder'].enable('Spatial_Output')
        mod.run_sim_args['config_builder'].params[
            'Spatial_Output_Channels'] = spatial_channel_names
    else:
        mod.run_sim_args['config_builder'].disable('Spatial_Output')
        mod.run_sim_args['config_builder'].params[
            'Spatial_Output_Channels'] = []

    # now run if no preexisting experiment id was provided
    if not args.experiment_id:
        # we must always block so that we can run the analysis at the end; run and analyze!
        args.blocking = True
        experiment_manager = run(args, unknownArgs)
        experiment = experiment_manager.experiment
        print('Done running experiment: %s' % experiment.exp_id)
    else:
        experiment = retrieve_experiment(args.experiment_id)

    # Create an analyze manager
    am = AnalyzeManager(exp_list=[experiment], verbose=False)

    # Add the TimeSeriesAnalyzer to the manager and do analysis
    # ck4, is there a better way to specify the first 4 arguments? The DTKCase from Test-land might be nicer.
    # After all, the names COULD be different
    analyzer = FidelityReportAnalyzer(
        'catalyst_report',
        'config.json',
        mod.run_sim_args['config_builder'].get_param(
            'Demographics_Filenames')[0],
        experiment_definition=defn,
        experiment_id=experiment.exp_id,
        experiment_name=experiment.exp_name,
        label=args.report_label,
        time_series_step_from=defn['step_from'],
        time_series_step_to=defn['step_to'],
        time_series_equal_step_count=True,
        raw_data=True,
        debug=args.debug)
    am.add_analyzer(analyzer)
    am.analyze()

    import webbrowser
    webbrowser.open_new("file:///{}".format(
        os.path.join(os.getcwd(), "catalyst_report", "summary_report.html")))