def test_args_algorithm(self):
     args = OptimizeArgs(algorithm='non-valid_algorithm')
     with self.assertRaises(ValueError):
         args.validate()
     args = OptimizeArgs(algorithm='Newton')
     args.validate()
     cmd = args.compose(None, cmd=['output'])
     self.assertIn('algorithm=newton', ' '.join(cmd))
 def test_args_algorithm_iter(self):
     args = OptimizeArgs(iter=400)
     args.validate()
     cmd = args.compose(None, cmd=['output'])
     self.assertIn('iter=400', ' '.join(cmd))
     args = OptimizeArgs(iter=-1)
     with self.assertRaises(ValueError):
         args.validate()
 def test_args_algorithm_iter(self):
     args = OptimizeArgs(iter=400)
     args.validate()
     cmd = args.compose(None, 'output')
     self.assertIn('iter=400', cmd)
     args = OptimizeArgs(iter=-1)
     self.assertRaises(ValueError, lambda: args.validate())
 def test_args_algorithm(self):
     args = OptimizeArgs(algorithm='non-valid_algorithm')
     self.assertRaises(ValueError, lambda: args.validate())
     args = OptimizeArgs(algorithm='Newton')
     args.validate()
     cmd = args.compose(None, 'output')
     self.assertIn('algorithm=newton', cmd)
    def test_set_mle_attrs(self):
        stan = os.path.join(datafiles_path, 'optimize', 'rosenbrock.stan')
        model = CmdStanModel(stan_file=stan)
        no_data = {}
        args = OptimizeArgs(algorithm='Newton')
        cmdstan_args = CmdStanArgs(
            model_name=model.name,
            model_exe=model.exe_file,
            chain_ids=None,
            data=no_data,
            method_args=args,
        )
        runset = RunSet(args=cmdstan_args, chains=1)
        mle = CmdStanMLE(runset)
        self.assertIn('CmdStanMLE: model=rosenbrock', mle.__repr__())
        self.assertIn('method=optimize', mle.__repr__())

        self.assertEqual(mle._column_names, ())
        self.assertEqual(mle._mle, {})

        output = os.path.join(datafiles_path, 'optimize', 'rosenbrock_mle.csv')
        mle._set_mle_attrs(output)
        self.assertEqual(mle.column_names, ('lp__', 'x', 'y'))
        self.assertAlmostEqual(mle.optimized_params_dict['x'], 1, places=3)
        self.assertAlmostEqual(mle.optimized_params_dict['y'], 1, places=3)
    def test_args_algorithm_init_alpha(self):
        args = OptimizeArgs(init_alpha=2e-4)
        args.validate()
        cmd = args.compose(None, cmd=['output'])

        self.assertIn('init_alpha=0.0002', ' '.join(cmd))
        args = OptimizeArgs(init_alpha=-1.0)
        self.assertRaises(ValueError, lambda: args.validate())
        args = OptimizeArgs(init_alpha=1.0, algorithm='Newton')
        self.assertRaises(ValueError, lambda: args.validate())
Beispiel #7
0
    def optimize(
        self,
        data: Union[Dict, str] = None,
        seed: int = None,
        inits: Union[Dict, float, str] = None,
        output_dir: str = None,
        algorithm: str = None,
        init_alpha: float = None,
        iter: int = None,
    ) -> CmdStanMLE:
        """
        Run the specified CmdStan optimize algorithm to produce a
        penalized maximum likelihood estimate of the model parameters.

        This function validates the specified configuration, composes a call to
        the CmdStan ``optimize`` method and spawns one subprocess to run the
        optimizer and waits for it to run to completion.
        Unspecified arguments are not included in the call to CmdStan, i.e.,
        those arguments will have CmdStan default values.

        The ``CmdStanMLE`` object records the command, the return code,
        and the paths to the optimize method output csv and console files.
        The output files are written either to a specified output directory
        or to a temporary directory which is deleted upon session exit.

        Output files are either written to a temporary directory or to the
        specified output directory.  Ouput filenames correspond to the template
        '<model_name>-<YYYYMMDDHHMM>-<chain_id>' plus the file suffix which is
        either '.csv' for the CmdStan output or '.txt' for
        the console messages, e.g. 'bernoulli-201912081451-1.csv'.
        Output files written to the temporary directory contain an additional
        8-character random string, e.g. 'bernoulli-201912081451-1-5nm6as7u.csv'.

        :param data: Values for all data variables in the model, specified
            either as a dictionary with entries matching the data variables,
            or as the path of a data file in JSON or Rdump format.

        :param seed: The seed for random number generator. Must be an integer
            between 0 and 2^32 - 1. If unspecified,
            ``numpy.random.RandomState()`` is used to generate a seed.

        :param inits:  Specifies how the sampler initializes parameter values.
            Initialization is either uniform random on a range centered on 0,
            exactly 0, or a dictionary or file of initial values for some or
            all parameters in the model.  The default initialization behavior
            will initialize all parameter values on range [-2, 2] on the
            *unconstrained* support.  If the expected parameter values are
            too far from this range, this option may improve estimation.
            The following value types are allowed:

            * Single number, n > 0 - initialization range is [-n, n].
            * 0 - all parameters are initialized to 0.
            * dictionary - pairs parameter name : initial value.
            * string - pathname to a JSON or Rdump data file.

        :param output_dir: Name of the directory to which CmdStan output
            files are written. If unspecified, output files will be written
            to a temporary directory which is deleted upon session exit.

        :param algorithm: Algorithm to use. One of: 'BFGS', 'LBFGS', 'Newton'

        :param init_alpha: Line search step size for first iteration

        :param iter: Total number of iterations

        :return: CmdStanMLE object
        """
        optimize_args = OptimizeArgs(algorithm=algorithm,
                                     init_alpha=init_alpha,
                                     iter=iter)

        with MaybeDictToFilePath(data, inits) as (_data, _inits):
            args = CmdStanArgs(
                self._name,
                self._exe_file,
                chain_ids=None,
                data=_data,
                seed=seed,
                inits=_inits,
                output_dir=output_dir,
                save_diagnostics=False,
                method_args=optimize_args,
            )

            dummy_chain_id = 0
            runset = RunSet(args=args, chains=1)
            self._run_cmdstan(runset, dummy_chain_id)

        if not runset._check_retcodes():
            msg = 'Error during optimization.\n{}'.format(
                runset.get_err_msgs())
            raise RuntimeError(msg)
        mle = CmdStanMLE(runset)
        return mle
Beispiel #8
0
    def optimize(
        self,
        data: Union[Dict, str] = None,
        seed: int = None,
        inits: Union[Dict, float, str] = None,
        csv_basename: str = None,
        algorithm: str = None,
        init_alpha: float = None,
        iter: int = None,
    ) -> StanFit:
        """
        Wrapper for optimize call
        :param data: Values for all data variables in the model, specified
            either as a dictionary with entries matching the data variables,
            or as the path of a data file in JSON or Rdump format.

        :param seed: The seed for random number generator Must be an integer
            between 0 and 2^32 - 1. If unspecified, numpy.random.RandomState()
            is used to generate a seed which will be used for all chains.

        :param inits:  Specifies how the sampler initializes parameter values.
            Initializiation is either uniform random on a range centered on 0,
            exactly 0, or a dictionary or file of initial values for some or
            all parameters in the model.  The default initialization behavoir
            will initialize all parameter values on range [-2, 2] on the
            _unconstrained_ support.  If the expected parameter values are
            too far from this range, this option may improve adaptation.
            The following value types are allowed:

            * Single number ``n > 0`` - initialization range is [-n, n].
            * ``0`` - all parameters are initialized to 0.
            * dictionary - pairs parameter name : initial value.
            * string - pathname to a JSON or Rdump data file.

        :param csv_basename:  A path or file name which will be used as the
            base name for the sampler output files.  The csv output files
            for each chain are written to file ``<basename>-0.csv``
            and the console output and error messages are written to file
            ``<basename>-0.txt``.

        :param algorithm: Algorithm to use. One of: "BFGS", "LBFGS", "Newton"

        :param init_alpha: Line search step size for first iteration

        :param iter: Total number of iterations

        :return: StanFit object
        """

        optimize_args = OptimizeArgs(algorithm=algorithm,
                                     init_alpha=init_alpha,
                                     iter=iter)

        with MaybeDictToFilePath(data, inits) as (_data, _inits):
            args = CmdStanArgs(
                self._name,
                self._exe_file,
                chain_ids=None,
                data=_data,
                seed=seed,
                inits=_inits,
                output_basename=csv_basename,
                method_args=optimize_args,
            )

            stanfit = StanFit(args=args, chains=1)
            dummy_chain_id = 0
            self._do_sample(stanfit, dummy_chain_id)

        if not stanfit._check_retcodes():
            msg = 'Error during optimizing'
            if stanfit._retcode(dummy_chain_id) != 0:
                msg = '{} Got returned error code {}'.format(
                    msg, stanfit._retcode(dummy_chain_id))
            raise RuntimeError(msg)
        stanfit._validate_csv_files()
        return stanfit