def test_args_fitted_params(self): args = GenerateQuantitiesArgs(csv_files=['no_such_file']) with self.assertRaises(ValueError): args.validate(chains=1) csv_files = [ os.path.join(DATAFILES_PATH, 'runset-good', 'bern-{}.csv'.format(i + 1)) for i in range(4) ] args = GenerateQuantitiesArgs(csv_files=csv_files) args.validate(chains=4) cmd = args.compose(idx=1, cmd=[]) self.assertIn('method=generate_quantities', ' '.join(cmd)) self.assertIn('fitted_params={}'.format(csv_files[0]), ' '.join(cmd))
def generate_quantities( self, data: Union[Dict, str] = None, mcmc_sample: Union[CmdStanMCMC, List[str]] = None, seed: int = None, gq_output_dir: str = None, ) -> CmdStanGQ: """ Run CmdStan's generate_quantities method which runs the generated quantities block of a model given an existing sample. This function takes a CmdStanMCMC object and the dataset used to generate that sample and calls to the CmdStan ``generate_quantities`` method to generate additional quantities of interest. The ``CmdStanGQ`` object records the command, the return code, and the paths to the generate method output csv and console files. The output files are written either to a specified output directory or to a temporary directory which is deleted upon session exit. Output files are either written to a temporary directory or to the specified output directory. Output filenames correspond to the template '<model_name>-<YYYYMMDDHHMM>-<chain_id>' plus the file suffix which is either '.csv' for the CmdStan output or '.txt' for the console messages, e.g. 'bernoulli-201912081451-1.csv'. Output files written to the temporary directory contain an additional 8-character random string, e.g. 'bernoulli-201912081451-1-5nm6as7u.csv'. :param data: Values for all data variables in the model, specified either as a dictionary with entries matching the data variables, or as the path of a data file in JSON or Rdump format. :param mcmc_sample: Can be either a ``CmdStanMCMC`` object returned by the ``sample`` method or a list of stan-csv files generated by fitting the model to the data using any Stan interface. :param seed: The seed for random number generator. Must be an integer between 0 and 2^32 - 1. If unspecified, ``numpy.random.RandomState()`` is used to generate a seed which will be used for all chains. *NOTE: Specifying the seed will guarantee the same result for multiple invocations of this method with the same inputs. However this will not reproduce results from the sample method given the same inputs because the RNG will be in a different state.* :param gq_output_dir: Name of the directory in which the CmdStan output files are saved. If unspecified, files will be written to a temporary directory which is deleted upon session exit. :return: CmdStanGQ object """ sample_csv_files = [] sample_drawset = None chains = 0 if isinstance(mcmc_sample, CmdStanMCMC): sample_csv_files = mcmc_sample.runset.csv_files sample_drawset = mcmc_sample.draws_pd() chains = mcmc_sample.chains chain_ids = mcmc_sample.chain_ids elif isinstance(mcmc_sample, list): if len(mcmc_sample) < 1: raise ValueError('MCMC sample cannot be empty list') sample_csv_files = mcmc_sample chains = len(sample_csv_files) chain_ids = [x + 1 for x in range(chains)] else: raise ValueError('MCMC sample must be either CmdStanMCMC object' ' or list of paths to sample csv_files.') try: if sample_drawset is None: # assemble sample from csv files config = {} # scan 1st csv file to get config try: config = scan_sampler_csv(sample_csv_files[0]) except ValueError: config = scan_sampler_csv(sample_csv_files[0], True) conf_iter_sampling = None if 'num_samples' in config: conf_iter_sampling = int(config['num_samples']) conf_iter_warmup = None if 'num_warmup' in config: conf_iter_warmup = int(config['num_warmup']) conf_thin = None if 'thin' in config: conf_thin = int(config['thin']) sampler_args = SamplerArgs( iter_sampling=conf_iter_sampling, iter_warmup=conf_iter_warmup, thin=conf_thin, ) args = CmdStanArgs( self._name, self._exe_file, chain_ids=chain_ids, method_args=sampler_args, ) runset = RunSet(args=args, chains=chains, chain_ids=chain_ids) runset._csv_files = sample_csv_files sample_fit = CmdStanMCMC(runset) sample_drawset = sample_fit.draws_pd() except ValueError as exc: raise ValueError('Invalid mcmc_sample, error:\n\t{}\n\t' ' while processing files\n\t{}'.format( repr(exc), '\n\t'.join(sample_csv_files))) from exc generate_quantities_args = GenerateQuantitiesArgs( csv_files=sample_csv_files) generate_quantities_args.validate(chains) with MaybeDictToFilePath(data, None) as (_data, _inits): args = CmdStanArgs( self._name, self._exe_file, chain_ids=chain_ids, data=_data, seed=seed, output_dir=gq_output_dir, method_args=generate_quantities_args, ) runset = RunSet(args=args, chains=chains, chain_ids=chain_ids) parallel_chains_avail = cpu_count() parallel_chains = max(min(parallel_chains_avail - 2, chains), 1) with ThreadPoolExecutor(max_workers=parallel_chains) as executor: for i in range(chains): executor.submit(self._run_cmdstan, runset, i) if not runset._check_retcodes(): msg = 'Error during generate_quantities.\n{}'.format( runset.get_err_msgs()) raise RuntimeError(msg) quantities = CmdStanGQ(runset=runset, mcmc_sample=sample_drawset) return quantities
def generate_quantities( self, data: Union[Dict, str] = None, mcmc_sample: Union[CmdStanMCMC, List[str]] = None, seed: int = None, gq_output_dir: str = None, ) -> CmdStanGQ: """ Run CmdStan's generate_quantities method which runs the generated quantities block of a model given an existing sample. This function takes a CmdStanMCMC object and the dataset used to generate that sample and calls to the CmdStan ``generate_quantities`` method to generate additional quantities of interest. The ``CmdStanGQ`` object records the command, the return code, and the paths to the generate method output csv and console files. The output files are written either to a specified output directory or to a temporary directory which is deleted upon session exit. Output filenames are composed of the model name, a timestamp in the form YYYYMMDDhhmm and the chain id, plus the corresponding filetype suffix, either '.csv' for the CmdStan output or '.txt' for the console messages, e.g. `bernoulli_ppc-201912081451-1.csv`. Output files written to the temporary directory contain an additional 8-character random string, e.g. `bernoulli_ppc-201912081451-1-5nm6as7u.csv`. :param data: Values for all data variables in the model, specified either as a dictionary with entries matching the data variables, or as the path of a data file in JSON or Rdump format. :param mcmc_sample: Can be either a CmdStanMCMC object returned by CmdStanPy's `sample` method or a list of stan-csv files generated by fitting the model to the data using any Stan interface. :param seed: The seed for random number generator. Must be an integer between ``0`` and ``2^32 - 1``. If unspecified, ``numpy.random.RandomState()`` is used to generate a seed which will be used for all chains. *NOTE: Specifying the seed will guarantee the same result for multiple invocations of this method with the same inputs. However this will not reproduce results from the sample method given the same inputs because the RNG will be in a different state.* :param gq_output_dir: Name of the directory in which the CmdStan output files are saved. If unspecified, files will be written to a temporary directory which is deleted upon session exit. :return: CmdStanGQ object """ sample_csv_files = [] sample_drawset = None chains = 0 if isinstance(mcmc_sample, CmdStanMCMC): sample_csv_files = mcmc_sample.runset.csv_files sample_drawset = mcmc_sample.get_drawset() chains = mcmc_sample.chains elif isinstance(mcmc_sample, list): sample_csv_files = mcmc_sample else: raise ValueError( 'mcmc_sample must be either CmdStanMCMC object' ' or list of paths to sample csv_files' ) try: chains = len(sample_csv_files) if sample_drawset is None: # assemble sample from csv files sampler_args = SamplerArgs() args = CmdStanArgs( self._name, self._exe_file, chain_ids=[x + 1 for x in range(chains)], method_args=sampler_args, ) runset = RunSet(args=args, chains=chains) runset._csv_files = sample_csv_files sample_fit = CmdStanMCMC(runset) sample_fit._validate_csv_files() sample_drawset = sample_fit.get_drawset() except ValueError as e: raise ValueError( 'Invalid mcmc_sample, error:\n\t{}\n\t' ' while processing files\n\t{}'.format( repr(e), '\n\t'.join(sample_csv_files) ) ) generate_quantities_args = GenerateQuantitiesArgs( csv_files=sample_csv_files ) generate_quantities_args.validate(chains) with MaybeDictToFilePath(data, None) as (_data, _inits): args = CmdStanArgs( self._name, self._exe_file, chain_ids=[x + 1 for x in range(chains)], data=_data, seed=seed, output_dir=gq_output_dir, method_args=generate_quantities_args, ) runset = RunSet(args=args, chains=chains) cores_avail = cpu_count() cores = max(min(cores_avail - 2, chains), 1) with ThreadPoolExecutor(max_workers=cores) as executor: for i in range(chains): executor.submit(self._run_cmdstan, runset, i) if not runset._check_retcodes(): msg = 'Error during generate_quantities' for i in range(chains): if runset._retcode(i) != 0: msg = '{}, chain {} returned error code {}'.format( msg, i, runset._retcode(i) ) raise RuntimeError(msg) quantities = CmdStanGQ(runset=runset, mcmc_sample=sample_drawset) quantities._set_attrs_gq_csv_files(sample_csv_files[0]) return quantities
def run_generated_quantities( self, data: Union[Dict, str] = None, csv_files: List[str] = None, seed: int = None, gq_csv_basename: str = None, ) -> StanFit: """ Wrapper for generated quantities call. Given a StanFit object containing a sample from the fitted model, along with the corresponding dataset for that fit, run just the generated quantities block of the model in order to get additional quantities of interest. :param data: Values for all data variables in the model, specified either as a dictionary with entries matching the data variables, or as the path of a data file in JSON or Rdump format. :param csv_files: A list of sampler output csv files generated by fitting the model to the data, either using CmdStanPy's `sample` method or via another Stan interface. :param seed: The seed for random number generator Must be an integer between 0 and 2^32 - 1. If unspecified, numpy.random.RandomState() is used to generate a seed which will be used for all chains. *NOTE: Specifying the seed will guarantee the same result for multiple invocations of this method with the same inputs. However this will not reproduce results from the sample method given the same inputs because the RNG will be in a different state.* :param gq_csv_basename: A path or file name which will be used as the basename for the sampler output files. The csv output files for each chain are written to file ``<basename>-<chain_id>.csv`` and the console output and error messages are written to file ``<basename>-<chain_id>.txt``. :return: StanFit object """ generate_quantities_args = GenerateQuantitiesArgs(csv_files=csv_files) generate_quantities_args.validate(len(csv_files)) chains = len(csv_files) with MaybeDictToFilePath(data, None) as (_data, _inits): args = CmdStanArgs(self._name, self._exe_file, chain_ids=[x + 1 for x in range(chains)], data=_data, seed=seed, output_basename=gq_csv_basename, method_args=generate_quantities_args) stanfit = StanFit(args=args, chains=chains) cores_avail = cpu_count() cores = max(min(cores_avail - 2, chains), 1) with ThreadPoolExecutor(max_workers=cores) as executor: for i in range(chains): executor.submit(self._run_cmdstan(stanfit, i)) if not stanfit._check_retcodes(): msg = 'Error during sampling' for i in range(chains): if stanfit._retcode(i) != 0: msg = '{}, chain {} returned error code {}'.format( msg, i, stanfit._retcode(i)) raise RuntimeError(msg) stanfit._set_attrs_gq_csv_files(csv_files[0]) return stanfit
def generate_quantities( self, data: Union[Dict, str] = None, mcmc_sample: Union[CmdStanMCMC, List[str]] = None, seed: int = None, gq_csv_basename: str = None, ) -> CmdStanGQ: """ Wrapper for generated quantities call. Given a CmdStanMCMC object containing a sample from the fitted model, along with the corresponding dataset for that fit, run just the generated quantities block of the model in order to get additional quantities of interest. :param data: Values for all data variables in the model, specified either as a dictionary with entries matching the data variables, or as the path of a data file in JSON or Rdump format. :param mcmc_sample: Can be either a CmdStanMCMC object returned by CmdStanPy's `sample` method or a list of stan-csv files generated by fitting the model to the data using any Stan interface. :param seed: The seed for random number generator. Must be an integer between ``0`` and ``2^32 - 1``. If unspecified, ``numpy.random.RandomState()`` is used to generate a seed which will be used for all chains. *NOTE: Specifying the seed will guarantee the same result for multiple invocations of this method with the same inputs. However this will not reproduce results from the sample method given the same inputs because the RNG will be in a different state.* :param gq_csv_basename: A path or file name which will be used as the basename for the sampler output files. The csv output files for each chain are written to file ``<basename>-<chain_id>.csv`` and the console output and error messages are written to file ``<basename>-<chain_id>.txt``. :return: CmdStanGQ object """ sample_csv_files = [] sample_drawset = None chains = 0 if isinstance(mcmc_sample, CmdStanMCMC): sample_csv_files = mcmc_sample.runset.csv_files sample_drawset = mcmc_sample.get_drawset() chains = mcmc_sample.chains elif isinstance(mcmc_sample, list): sample_csv_files = mcmc_sample else: raise ValueError( 'mcmc_sample must be either CmdStanMCMC object' ' or list of paths to sample csv_files' ) try: chains = len(sample_csv_files) if sample_drawset is None: # assemble sample from csv files sampler_args = SamplerArgs() args = CmdStanArgs( self._name, self._exe_file, chain_ids=[x + 1 for x in range(chains)], method_args=sampler_args, ) runset = RunSet(args=args, chains=chains) runset._csv_files = sample_csv_files sample_fit = CmdStanMCMC(runset) sample_fit._validate_csv_files() sample_drawset = sample_fit.get_drawset() except ValueError as e: raise ValueError( 'Invalid mcmc_sample, error:\n\t{}\n\t' ' while processing files\n\t{}'.format( repr(e), '\n\t'.join(sample_csv_files)) ) generate_quantities_args = GenerateQuantitiesArgs( csv_files=sample_csv_files ) generate_quantities_args.validate(chains) with MaybeDictToFilePath(data, None) as (_data, _inits): args = CmdStanArgs( self._name, self._exe_file, chain_ids=[x + 1 for x in range(chains)], data=_data, seed=seed, output_basename=gq_csv_basename, method_args=generate_quantities_args, ) runset = RunSet(args=args, chains=chains) cores_avail = cpu_count() cores = max(min(cores_avail - 2, chains), 1) with ThreadPoolExecutor(max_workers=cores) as executor: for i in range(chains): executor.submit(self._run_cmdstan, runset, i) if not runset._check_retcodes(): msg = 'Error during generate_quantities' for i in range(chains): if runset._retcode(i) != 0: msg = '{}, chain {} returned error code {}'.format( msg, i, runset._retcode(i) ) raise RuntimeError(msg) quantities = CmdStanGQ(runset=runset, mcmc_sample=sample_drawset) quantities._set_attrs_gq_csv_files(sample_csv_files[0]) return quantities