def test_set_mle_attrs(self): stan = os.path.join(datafiles_path, 'optimize', 'rosenbrock.stan') model = CmdStanModel(stan_file=stan) no_data = {} args = OptimizeArgs(algorithm='Newton') cmdstan_args = CmdStanArgs( model_name=model.name, model_exe=model.exe_file, chain_ids=None, data=no_data, method_args=args, ) runset = RunSet(args=cmdstan_args, chains=1) mle = CmdStanMLE(runset) self.assertIn('CmdStanMLE: model=rosenbrock', mle.__repr__()) self.assertIn('method=optimize', mle.__repr__()) self.assertEqual(mle._column_names, ()) self.assertEqual(mle._mle, {}) output = os.path.join(datafiles_path, 'optimize', 'rosenbrock_mle.csv') mle._set_mle_attrs(output) self.assertEqual(mle.column_names, ('lp__', 'x', 'y')) self.assertAlmostEqual(mle.optimized_params_dict['x'], 1, places=3) self.assertAlmostEqual(mle.optimized_params_dict['y'], 1, places=3)
def optimize( self, data: Union[Dict, str] = None, seed: int = None, inits: Union[Dict, float, str] = None, output_dir: str = None, algorithm: str = None, init_alpha: float = None, iter: int = None, ) -> CmdStanMLE: """ Run the specified CmdStan optimize algorithm to produce a penalized maximum likelihood estimate of the model parameters. This function validates the specified configuration, composes a call to the CmdStan ``optimize`` method and spawns one subprocess to run the optimizer and waits for it to run to completion. Unspecified arguments are not included in the call to CmdStan, i.e., those arguments will have CmdStan default values. The ``CmdStanMLE`` object records the command, the return code, and the paths to the optimize method output csv and console files. The output files are written either to a specified output directory or to a temporary directory which is deleted upon session exit. Output files are either written to a temporary directory or to the specified output directory. Ouput filenames correspond to the template '<model_name>-<YYYYMMDDHHMM>-<chain_id>' plus the file suffix which is either '.csv' for the CmdStan output or '.txt' for the console messages, e.g. 'bernoulli-201912081451-1.csv'. Output files written to the temporary directory contain an additional 8-character random string, e.g. 'bernoulli-201912081451-1-5nm6as7u.csv'. :param data: Values for all data variables in the model, specified either as a dictionary with entries matching the data variables, or as the path of a data file in JSON or Rdump format. :param seed: The seed for random number generator. Must be an integer between 0 and 2^32 - 1. If unspecified, ``numpy.random.RandomState()`` is used to generate a seed. :param inits: Specifies how the sampler initializes parameter values. Initialization is either uniform random on a range centered on 0, exactly 0, or a dictionary or file of initial values for some or all parameters in the model. The default initialization behavior will initialize all parameter values on range [-2, 2] on the *unconstrained* support. If the expected parameter values are too far from this range, this option may improve estimation. The following value types are allowed: * Single number, n > 0 - initialization range is [-n, n]. * 0 - all parameters are initialized to 0. * dictionary - pairs parameter name : initial value. * string - pathname to a JSON or Rdump data file. :param output_dir: Name of the directory to which CmdStan output files are written. If unspecified, output files will be written to a temporary directory which is deleted upon session exit. :param algorithm: Algorithm to use. One of: 'BFGS', 'LBFGS', 'Newton' :param init_alpha: Line search step size for first iteration :param iter: Total number of iterations :return: CmdStanMLE object """ optimize_args = OptimizeArgs(algorithm=algorithm, init_alpha=init_alpha, iter=iter) with MaybeDictToFilePath(data, inits) as (_data, _inits): args = CmdStanArgs( self._name, self._exe_file, chain_ids=None, data=_data, seed=seed, inits=_inits, output_dir=output_dir, save_diagnostics=False, method_args=optimize_args, ) dummy_chain_id = 0 runset = RunSet(args=args, chains=1) self._run_cmdstan(runset, dummy_chain_id) if not runset._check_retcodes(): msg = 'Error during optimization.\n{}'.format( runset.get_err_msgs()) raise RuntimeError(msg) mle = CmdStanMLE(runset) return mle
def optimize( self, data: Union[Dict, str] = None, seed: int = None, inits: Union[Dict, float, str] = None, csv_basename: str = None, algorithm: str = None, init_alpha: float = None, iter: int = None, ) -> CmdStanMLE: """ Wrapper for optimize call :param data: Values for all data variables in the model, specified either as a dictionary with entries matching the data variables, or as the path of a data file in JSON or Rdump format. :param seed: The seed for random number generator. Must be an integer between ``0`` and ``2^32 - 1``. If unspecified, ``numpy.random.RandomState()`` is used to generate a seed which will be used for all chains. :param inits: Specifies how the sampler initializes parameter values. Initializiation is either uniform random on a range centered on 0, exactly 0, or a dictionary or file of initial values for some or all parameters in the model. The default initialization behavoir will initialize all parameter values on range [-2, 2] on the _unconstrained_ support. If the expected parameter values are too far from this range, this option may improve estimation. The following value types are allowed: * Single number ``n > 0`` - initialization range is [-n, n]. * ``0`` - all parameters are initialized to 0. * dictionary - pairs parameter name : initial value. * string - pathname to a JSON or Rdump data file. :param csv_basename: A path or file name which will be used as the basename for the CmdStan output files. The csv output files are written to file ``<basename>-0.csv`` and the console output and error messages are written to file ``<basename>-0.txt``. :param algorithm: Algorithm to use. One of: "BFGS", "LBFGS", "Newton" :param init_alpha: Line search step size for first iteration :param iter: Total number of iterations :return: CmdStanMLE object """ optimize_args = OptimizeArgs( algorithm=algorithm, init_alpha=init_alpha, iter=iter ) with MaybeDictToFilePath(data, inits) as (_data, _inits): args = CmdStanArgs( self._name, self._exe_file, chain_ids=None, data=_data, seed=seed, inits=_inits, output_basename=csv_basename, method_args=optimize_args, ) dummy_chain_id = 0 runset = RunSet(args=args, chains=1) self._run_cmdstan(runset, dummy_chain_id) if not runset._check_retcodes(): msg = 'Error during optimizing' if runset._retcode(dummy_chain_id) != 0: msg = '{}, error code {}'.format( msg, runset._retcode(dummy_chain_id) ) raise RuntimeError(msg) mle = CmdStanMLE(runset) mle._set_mle_attrs(runset.csv_files[0]) return mle