예제 #1
0
    def test_no_chains(self):
        # we don't have chains for optimize
        exe = os.path.join(datafiles_path, 'bernoulli')
        sampler_args = FixedParamArgs()
        jinits = os.path.join(datafiles_path, 'bernoulli.init.json')
        cmdstan_args = CmdStanArgs(
            model_name='bernoulli',
            model_exe=exe,
            chain_ids=None,
            inits=jinits,
            method_args=sampler_args,
        )
        self.assertIn('init=', cmdstan_args.compose_command(None, 'out.csv'))

        with self.assertRaises(ValueError):
            CmdStanArgs(
                model_name='bernoulli',
                model_exe=exe,
                chain_ids=None,
                seed=[1, 2, 3],
                inits=jinits,
                method_args=sampler_args,
            )

        with self.assertRaises(ValueError):
            CmdStanArgs(
                model_name='bernoulli',
                model_exe=exe,
                chain_ids=None,
                inits=[jinits],
                method_args=sampler_args,
            )
예제 #2
0
    def test_no_chains(self):
        exe = os.path.join(datafiles_path, 'bernoulli')
        jdata = os.path.join(datafiles_path, 'bernoulli.data.json')
        jinits = os.path.join(datafiles_path, 'bernoulli.init.json')

        sampler_args = SamplerArgs()
        with self.assertRaises(ValueError):
            CmdStanArgs(
                model_name='bernoulli',
                model_exe=exe,
                chain_ids=None,
                seed=[1, 2, 3],
                data=jdata,
                inits=jinits,
                method_args=sampler_args
            )

        with self.assertRaises(ValueError):
            CmdStanArgs(
                model_name='bernoulli',
                model_exe=exe,
                chain_ids=None,
                data=jdata,
                inits=[jinits],
                method_args=sampler_args
            )
예제 #3
0
    def test_save_latent_dynamics(self):
        exe = os.path.join(DATAFILES_PATH, 'bernoulli' + EXTENSION)
        jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json')
        sampler_args = SamplerArgs()
        chain_ids = [1, 2, 3, 4]
        cmdstan_args = CmdStanArgs(
            model_name='bernoulli',
            model_exe=exe,
            chain_ids=chain_ids,
            data=jdata,
            method_args=sampler_args,
            save_latent_dynamics=True,
        )
        runset = RunSet(args=cmdstan_args, chains=4)
        self.assertIn(_TMPDIR, runset.diagnostic_files[0])

        cmdstan_args = CmdStanArgs(
            model_name='bernoulli',
            model_exe=exe,
            chain_ids=chain_ids,
            data=jdata,
            method_args=sampler_args,
            save_latent_dynamics=True,
            output_dir=os.path.abspath('.'),
        )
        runset = RunSet(args=cmdstan_args, chains=4)
        self.assertIn(os.path.abspath('.'), runset.diagnostic_files[0])
예제 #4
0
 def test_validate_big_run(self):
     exe = os.path.join(DATAFILES_PATH, 'bernoulli' + EXTENSION)
     sampler_args = SamplerArgs(iter_warmup=1500, iter_sampling=1000)
     cmdstan_args = CmdStanArgs(
         model_name='bernoulli',
         model_exe=exe,
         chain_ids=[1, 2],
         seed=12345,
         output_dir=DATAFILES_PATH,
         method_args=sampler_args,
     )
     runset = RunSet(args=cmdstan_args, chains=2)
     runset._csv_files = [
         os.path.join(DATAFILES_PATH, 'runset-big',
                      'output_icar_nyc-1.csv'),
         os.path.join(DATAFILES_PATH, 'runset-big',
                      'output_icar_nyc-1.csv'),
     ]
     fit = CmdStanMCMC(runset)
     phis = ['phi[{}]'.format(str(x + 1)) for x in range(2095)]
     column_names = SAMPLER_STATE + phis
     self.assertEqual(fit.num_draws_sampling, 1000)
     self.assertEqual(fit.column_names, tuple(column_names))
     self.assertEqual(fit.metric_type, 'diag_e')
     self.assertEqual(fit.step_size.shape, (2, ))
     self.assertEqual(fit.metric.shape, (2, 2095))
     self.assertEqual((1000, 2, 2102), fit.draws().shape)
     phis = fit.draws_pd(params=['phi'])
     self.assertEqual((2000, 2095), phis.shape)
     with self.assertRaisesRegex(ValueError, r'unknown parameter: gamma'):
         fit.draws_pd(params=['gamma'])
예제 #5
0
 def test_instantiate(self):
     stan = os.path.join(DATAFILES_PATH, 'variational',
                         'eta_should_be_big.stan')
     model = CmdStanModel(stan_file=stan)
     no_data = {}
     args = VariationalArgs(algorithm='meanfield')
     cmdstan_args = CmdStanArgs(
         model_name=model.name,
         model_exe=model.exe_file,
         chain_ids=None,
         data=no_data,
         method_args=args,
     )
     runset = RunSet(args=cmdstan_args, chains=1)
     runset._csv_files = [
         os.path.join(DATAFILES_PATH, 'variational', 'eta_big_output.csv')
     ]
     variational = CmdStanVB(runset)
     self.assertIn('CmdStanVB: model=eta_should_be_big',
                   variational.__repr__())
     self.assertIn('method=variational', variational.__repr__())
     self.assertEqual(
         variational.column_names,
         ('lp__', 'log_p__', 'log_g__', 'mu[1]', 'mu[2]'),
     )
     self.assertAlmostEqual(variational.variational_params_dict['mu[1]'],
                            31.0299,
                            places=2)
     self.assertAlmostEqual(variational.variational_params_dict['mu[2]'],
                            28.8141,
                            places=2)
     self.assertEqual(variational.variational_sample.shape, (1000, 5))
예제 #6
0
 def __init__(self,
              args: CmdStanArgs,
              chains: int = 4,
              logger: logging.Logger = None) -> None:
     """Initialize object."""
     self._args = args
     self._chains = chains
     self._logger = logger or get_logger()
     if chains < 1:
         raise ValueError('chains must be positive integer value, '
                          'found {i]}'.format(chains))
     self._csv_files = []
     if args.output_basename is None:
         csv_basename = 'stan-{}-{}'.format(args.model_name, args.method)
         for i in range(chains):
             fd_name = create_named_text_file(
                 dir=TMPDIR,
                 prefix='{}-{}-'.format(csv_basename, i + 1),
                 suffix='.csv',
             )
             self._csv_files.append(fd_name)
     else:
         for i in range(chains):
             self._csv_files.append('{}-{}.csv'.format(
                 args.output_basename, i + 1))
     self._console_files = []
     for i in range(chains):
         txt_file = ''.join(
             [os.path.splitext(self._csv_files[i])[0], '.txt'])
         self._console_files.append(txt_file)
     self._cmds = [
         args.compose_command(i, self._csv_files[i]) for i in range(chains)
     ]
     self._retcodes = [-1 for _ in range(chains)]
예제 #7
0
 def test_variables(self):
     # construct fit using existing sampler output
     exe = os.path.join(DATAFILES_PATH, 'lotka-volterra' + EXTENSION)
     jdata = os.path.join(DATAFILES_PATH, 'lotka-volterra.data.json')
     sampler_args = SamplerArgs(iter_sampling=20)
     cmdstan_args = CmdStanArgs(
         model_name='lotka-volterra',
         model_exe=exe,
         chain_ids=[1],
         seed=12345,
         data=jdata,
         output_dir=DATAFILES_PATH,
         method_args=sampler_args,
     )
     runset = RunSet(args=cmdstan_args, chains=1)
     runset._csv_files = [
         os.path.join(DATAFILES_PATH, 'lotka-volterra.csv')
     ]
     runset._set_retcode(0, 0)
     fit = CmdStanMCMC(runset)
     self.assertEqual(20, fit.num_draws)
     self.assertEqual(8, len(fit._stan_variable_dims))
     self.assertTrue('z' in fit._stan_variable_dims)
     self.assertEqual(fit._stan_variable_dims['z'], (20, 2))
     vars = fit.stan_variables()
     self.assertEqual(len(vars), len(fit._stan_variable_dims))
     self.assertTrue('z' in vars)
     self.assertEqual(vars['z'].shape, (20, 20, 2))
     self.assertTrue('theta' in vars)
     self.assertEqual(vars['theta'].shape, (20, 4))
예제 #8
0
 def test_diagnose_divergences(self):
     exe = os.path.join(DATAFILES_PATH, 'bernoulli' + EXTENSION)
     sampler_args = SamplerArgs()
     cmdstan_args = CmdStanArgs(
         model_name='bernoulli',
         model_exe=exe,
         chain_ids=[1],
         output_dir=DATAFILES_PATH,
         method_args=sampler_args,
     )
     runset = RunSet(args=cmdstan_args, chains=1)
     runset._csv_files = [
         os.path.join(DATAFILES_PATH, 'diagnose-good',
                      'corr_gauss_depth8-1.csv')
     ]
     fit = CmdStanMCMC(runset)
     # TODO - use cmdstan test files instead
     expected = '\n'.join([
         'Checking sampler transitions treedepth.',
         '424 of 1000 (42%) transitions hit the maximum '
         'treedepth limit of 8, or 2^8 leapfrog steps.',
         'Trajectories that are prematurely terminated '
         'due to this limit will result in slow exploration.',
         'For optimal performance, increase this limit.',
     ])
     self.assertIn(expected, fit.diagnose().replace('\r\n', '\n'))
예제 #9
0
    def test_set_mle_attrs(self):
        stan = os.path.join(datafiles_path, 'optimize', 'rosenbrock.stan')
        model = CmdStanModel(stan_file=stan)
        no_data = {}
        args = OptimizeArgs(algorithm='Newton')
        cmdstan_args = CmdStanArgs(
            model_name=model.name,
            model_exe=model.exe_file,
            chain_ids=None,
            data=no_data,
            method_args=args,
        )
        runset = RunSet(args=cmdstan_args, chains=1)
        mle = CmdStanMLE(runset)
        self.assertIn('CmdStanMLE: model=rosenbrock', mle.__repr__())
        self.assertIn('method=optimize', mle.__repr__())

        self.assertEqual(mle._column_names, ())
        self.assertEqual(mle._mle, {})

        output = os.path.join(datafiles_path, 'optimize', 'rosenbrock_mle.csv')
        mle._set_mle_attrs(output)
        self.assertEqual(mle.column_names, ('lp__', 'x', 'y'))
        self.assertAlmostEqual(mle.optimized_params_dict['x'], 1, places=3)
        self.assertAlmostEqual(mle.optimized_params_dict['y'], 1, places=3)
예제 #10
0
    def test_check_retcodes(self):
        exe = os.path.join(DATAFILES_PATH, 'bernoulli' + EXTENSION)
        jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json')
        sampler_args = SamplerArgs()
        chain_ids = [1, 2, 3, 4]  # default
        cmdstan_args = CmdStanArgs(
            model_name='bernoulli',
            model_exe=exe,
            chain_ids=chain_ids,
            data=jdata,
            method_args=sampler_args,
        )
        runset = RunSet(args=cmdstan_args, chains=4)

        retcodes = runset._retcodes
        self.assertEqual(4, len(retcodes))
        for i in range(len(retcodes)):
            self.assertEqual(-1, runset._retcode(i))
        runset._set_retcode(0, 0)
        self.assertEqual(0, runset._retcode(0))
        for i in range(1, len(retcodes)):
            self.assertEqual(-1, runset._retcode(i))
        self.assertFalse(runset._check_retcodes())
        for i in range(1, len(retcodes)):
            runset._set_retcode(i, 0)
        self.assertTrue(runset._check_retcodes())
예제 #11
0
    def test_set_variational_attrs(self):
        stan = os.path.join(datafiles_path, 'variational',
                            'eta_should_be_big.stan')
        model = CmdStanModel(stan_file=stan)
        no_data = {}
        args = VariationalArgs(algorithm='meanfield')
        cmdstan_args = CmdStanArgs(model_name=model.name,
                                   model_exe=model.exe_file,
                                   chain_ids=None,
                                   data=no_data,
                                   method_args=args)
        runset = RunSet(args=cmdstan_args, chains=1)
        vi = CmdStanVB(runset)
        self.assertIn('CmdStanVB: model=eta_should_be_big', vi.__repr__())
        self.assertIn('method=variational', vi.__repr__())

        # check CmdStanVB.__init__ state
        self.assertEqual(vi._column_names, ())
        self.assertEqual(vi._variational_mean, {})
        self.assertEqual(vi._variational_sample, None)

        # process csv file, check attrs
        output = os.path.join(datafiles_path, 'variational',
                              'eta_big_output.csv')
        vi._set_variational_attrs(output)
        self.assertEqual(vi.column_names,
                         ('lp__', 'log_p__', 'log_g__', 'mu.1', 'mu.2'))
        self.assertAlmostEqual(vi.variational_params_dict['mu.1'],
                               31.0299,
                               places=2)
        self.assertAlmostEqual(vi.variational_params_dict['mu.2'],
                               28.8141,
                               places=2)
        self.assertEqual(vi.variational_sample.shape, (1000, 5))
예제 #12
0
    def test_good(self):
        # construct fit using existing sampler output
        exe = os.path.join(DATAFILES_PATH, 'bernoulli' + EXTENSION)
        jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json')
        sampler_args = SamplerArgs(iter_sampling=100,
                                   max_treedepth=11,
                                   adapt_delta=0.95)
        cmdstan_args = CmdStanArgs(
            model_name='bernoulli',
            model_exe=exe,
            chain_ids=[1, 2, 3, 4],
            seed=12345,
            data=jdata,
            output_dir=DATAFILES_PATH,
            method_args=sampler_args,
        )
        runset = RunSet(args=cmdstan_args)
        runset._csv_files = [
            os.path.join(DATAFILES_PATH, 'runset-good', 'bern-1.csv'),
            os.path.join(DATAFILES_PATH, 'runset-good', 'bern-2.csv'),
            os.path.join(DATAFILES_PATH, 'runset-good', 'bern-3.csv'),
            os.path.join(DATAFILES_PATH, 'runset-good', 'bern-4.csv'),
        ]
        retcodes = runset._retcodes
        for i in range(len(retcodes)):
            runset._set_retcode(i, 0)
        config = check_sampler_csv(
            path=runset.csv_files[i],
            is_fixed_param=False,
            iter_sampling=100,
            iter_warmup=1000,
            save_warmup=False,
            thin=1,
        )
        expected = 'Metadata:\n{}\n'.format(config)
        metadata = InferenceMetadata(config)
        actual = '{}'.format(metadata)
        self.assertEqual(expected, actual)
        self.assertEqual(config, metadata.cmdstan_config)

        hmc_vars = {
            'lp__',
            'accept_stat__',
            'stepsize__',
            'treedepth__',
            'n_leapfrog__',
            'divergent__',
            'energy__',
        }

        sampler_vars_cols = metadata.sampler_vars_cols
        self.assertEqual(hmc_vars, sampler_vars_cols.keys())
        bern_model_vars = {'theta'}
        self.assertEqual(bern_model_vars, metadata.stan_vars_dims.keys())
        self.assertEqual((), metadata.stan_vars_dims['theta'])
        self.assertEqual(bern_model_vars, metadata.stan_vars_cols.keys())
        self.assertEqual((7, ), metadata.stan_vars_cols['theta'])
예제 #13
0
    def test_validate_good_run(self):
        # construct fit using existing sampler output
        exe = os.path.join(DATAFILES_PATH, 'bernoulli' + EXTENSION)
        jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json')
        sampler_args = SamplerArgs(iter_sampling=100,
                                   max_treedepth=11,
                                   adapt_delta=0.95)
        cmdstan_args = CmdStanArgs(
            model_name='bernoulli',
            model_exe=exe,
            chain_ids=[1, 2, 3, 4],
            seed=12345,
            data=jdata,
            output_dir=DATAFILES_PATH,
            method_args=sampler_args,
        )
        runset = RunSet(args=cmdstan_args, chains=4)
        runset._csv_files = [
            os.path.join(DATAFILES_PATH, 'runset-good', 'bern-1.csv'),
            os.path.join(DATAFILES_PATH, 'runset-good', 'bern-2.csv'),
            os.path.join(DATAFILES_PATH, 'runset-good', 'bern-3.csv'),
            os.path.join(DATAFILES_PATH, 'runset-good', 'bern-4.csv'),
        ]
        self.assertEqual(4, runset.chains)
        retcodes = runset._retcodes
        for i in range(len(retcodes)):
            runset._set_retcode(i, 0)
        self.assertTrue(runset._check_retcodes())

        fit = CmdStanMCMC(runset)
        self.assertEqual(100, fit.num_draws)
        self.assertEqual(len(BERNOULLI_COLS), len(fit.column_names))
        self.assertEqual('lp__', fit.column_names[0])

        drawset = fit.get_drawset()
        self.assertEqual(
            drawset.shape,
            (fit.runset.chains * fit.num_draws, len(fit.column_names)),
        )
        _ = fit.summary()
        self.assertTrue(True)

        # TODO - use cmdstan test files instead
        expected = '\n'.join([
            'Checking sampler transitions treedepth.',
            'Treedepth satisfactory for all transitions.',
            '\nChecking sampler transitions for divergences.',
            'No divergent transitions found.',
            '\nChecking E-BFMI - sampler transitions HMC potential energy.',
            'E-BFMI satisfactory for all transitions.',
            '\nEffective sample size satisfactory.',
        ])
        self.assertIn(expected, fit.diagnose().replace('\r\n', '\n'))
예제 #14
0
    def test_args_good(self):
        exe = os.path.join(datafiles_path, 'bernoulli')
        jdata = os.path.join(datafiles_path, 'bernoulli.data.json')
        sampler_args = SamplerArgs()

        cmdstan_args = CmdStanArgs(
            model_name='bernoulli',
            model_exe=exe,
            chain_ids=[1, 2, 3, 4],
            data=jdata,
            method_args=sampler_args,
        )
        self.assertEqual(cmdstan_args.method, Method.SAMPLE)
        cmd = cmdstan_args.compose_command(idx=0, csv_file='bern-output-1.csv')
        self.assertIn('id=1 random seed=', ' '.join(cmd))
        self.assertIn('data file=', ' '.join(cmd))
        self.assertIn('output file=', ' '.join(cmd))
        self.assertIn('method=sample algorithm=hmc', ' '.join(cmd))

        cmdstan_args = CmdStanArgs(
            model_name='bernoulli',
            model_exe=exe,
            chain_ids=[7, 11, 18, 29],
            data=jdata,
            method_args=sampler_args,
        )
        cmd = cmdstan_args.compose_command(idx=0, csv_file='bern-output-1.csv')
        self.assertIn('id=7 random seed=', ' '.join(cmd))
예제 #15
0
    def test_gen_quantities_good(self):
        stan = os.path.join(datafiles_path, 'bernoulli_ppc.stan')
        model = Model(stan_file=stan)
        model.compile()

        jdata = os.path.join(datafiles_path, 'bernoulli.data.json')

        # synthesize stanfit object -
        # see test_stanfit.py, method 'test_validate_good_run'
        goodfiles_path = os.path.join(datafiles_path, 'runset-good')
        output = os.path.join(goodfiles_path, 'bern')
        sampler_args = SamplerArgs(
            sampling_iters=100, max_treedepth=11, adapt_delta=0.95
        )
        cmdstan_args = CmdStanArgs(
            model_name=model.name,
            model_exe=model.exe_file,
            chain_ids=[1, 2, 3, 4],
            seed=12345,
            data=jdata,
            output_basename=output,
            method_args=sampler_args,
        )
        sampler_fit = StanFit(args=cmdstan_args, chains=4)
        for i in range(4):
            sampler_fit._set_retcode(i, 0)

        bern_fit = model.run_generated_quantities(
            csv_files=sampler_fit.csv_files,
            data=jdata)

        # check results - ouput files, quantities of interest, draws
        self.assertEqual(bern_fit.chains, 4)
        for i in range(4):
            self.assertEqual(bern_fit._retcodes[i], 0)
            csv_file = bern_fit.csv_files[i]
            self.assertTrue(os.path.exists(csv_file))
        column_names = [
            'y_rep.1',
            'y_rep.2',
            'y_rep.3',
            'y_rep.4',
            'y_rep.5',
            'y_rep.6',
            'y_rep.7',
            'y_rep.8',
            'y_rep.9',
            'y_rep.10'
        ]
        self.assertEqual(bern_fit.column_names, tuple(column_names))
        self.assertEqual(bern_fit.draws, 100) 
예제 #16
0
 def test_commands(self):
     exe = os.path.join(DATAFILES_PATH, 'bernoulli' + EXTENSION)
     jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json')
     sampler_args = SamplerArgs()
     cmdstan_args = CmdStanArgs(
         model_name='bernoulli',
         model_exe=exe,
         chain_ids=[1, 2, 3, 4],
         data=jdata,
         method_args=sampler_args,
     )
     runset = RunSet(args=cmdstan_args, chains=4)
     self.assertIn('id=1', runset._cmds[0])
     self.assertIn('id=4', runset._cmds[3])
예제 #17
0
 def test_validate_big_run(self):
     exe = os.path.join(DATAFILES_PATH, 'bernoulli' + EXTENSION)
     sampler_args = SamplerArgs()
     cmdstan_args = CmdStanArgs(
         model_name='bernoulli',
         model_exe=exe,
         chain_ids=[1, 2],
         seed=12345,
         output_dir=DATAFILES_PATH,
         method_args=sampler_args,
     )
     runset = RunSet(args=cmdstan_args, chains=2)
     runset._csv_files = [
         os.path.join(DATAFILES_PATH, 'runset-big',
                      'output_icar_nyc-1.csv'),
         os.path.join(DATAFILES_PATH, 'runset-big',
                      'output_icar_nyc-1.csv'),
     ]
     fit = CmdStanMCMC(runset)
     fit._validate_csv_files()
     sampler_state = [
         'lp__',
         'accept_stat__',
         'stepsize__',
         'treedepth__',
         'n_leapfrog__',
         'divergent__',
         'energy__',
     ]
     phis = ['phi.{}'.format(str(x + 1)) for x in range(2095)]
     column_names = sampler_state + phis
     self.assertEqual(fit.columns, len(column_names))
     self.assertEqual(fit.column_names, tuple(column_names))
     self.assertEqual(fit.metric_type, 'diag_e')
     self.assertEqual(fit.stepsize.shape, (2, ))
     self.assertEqual(fit.metric.shape, (2, 2095))
     self.assertEqual((1000, 2, 2102), fit.sample.shape)
     phis = fit.get_drawset(params=['phi'])
     self.assertEqual((2000, 2095), phis.shape)
     phi1 = fit.get_drawset(params=['phi.1'])
     self.assertEqual((2000, 1), phi1.shape)
     mo_phis = fit.get_drawset(params=['phi.1', 'phi.10', 'phi.100'])
     self.assertEqual((2000, 3), mo_phis.shape)
     phi2095 = fit.get_drawset(params=['phi.2095'])
     self.assertEqual((2000, 1), phi2095.shape)
     with self.assertRaises(Exception):
         fit.get_drawset(params=['phi.2096'])
     with self.assertRaises(Exception):
         fit.get_drawset(params=['ph'])
예제 #18
0
파일: stanfit.py 프로젝트: clonyjr/prophet
 def __init__(self,
              args: CmdStanArgs,
              chains: int = 4,
              logger: logging.Logger = None) -> None:
     """Initialize object."""
     self._args = args
     self._is_optimizing = isinstance(self._args.method_args, OptimizeArgs)
     self._chains = chains
     self._logger = logger or get_logger()
     if chains < 1:
         raise ValueError('chains must be positive integer value, '
                          'found {i]}'.format(chains))
     self.csv_files = []
     """per-chain sample csv files."""
     if args.output_basename is None:
         csv_basename = 'stan-{}-draws'.format(args.model_name)
         for i in range(chains):
             fd = tempfile.NamedTemporaryFile(
                 mode='w+',
                 prefix='{}-{}-'.format(csv_basename, i + 1),
                 suffix='.csv',
                 dir=TMPDIR,
                 delete=False,
             )
             self.csv_files.append(fd.name)
     else:
         for i in range(chains):
             self.csv_files.append('{}-{}.csv'.format(
                 args.output_basename, i + 1))
     self.console_files = []
     """per-chain sample console output files."""
     for i in range(chains):
         txt_file = ''.join(
             [os.path.splitext(self.csv_files[i])[0], '.txt'])
         self.console_files.append(txt_file)
     self.cmds = [
         args.compose_command(i, self.csv_files[i]) for i in range(chains)
     ]
     """per-chain sampler command."""
     self._retcodes = [-1 for _ in range(chains)]
     self._draws = None
     self._column_names = None
     self._num_params = None  # metric dim(s)
     self._metric_type = None
     self._metric = None
     self._stepsize = None
     self._sample = None
     self._first_draw = None
예제 #19
0
 def test_output_filenames(self):
     exe = os.path.join(DATAFILES_PATH, 'bernoulli' + EXTENSION)
     jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json')
     sampler_args = SamplerArgs()
     chain_ids = [1, 2, 3, 4]
     cmdstan_args = CmdStanArgs(
         model_name='bernoulli',
         model_exe=exe,
         chain_ids=chain_ids,
         data=jdata,
         method_args=sampler_args,
     )
     runset = RunSet(args=cmdstan_args, chains=4)
     self.assertIn('bernoulli-', runset._csv_files[0])
     self.assertIn('_1.csv', runset._csv_files[0])
     self.assertIn('_4.csv', runset._csv_files[3])
예제 #20
0
 def test_ctor_checks(self):
     exe = os.path.join(DATAFILES_PATH, 'bernoulli' + EXTENSION)
     jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json')
     sampler_args = SamplerArgs()
     chain_ids = [11, 12, 13, 14]
     cmdstan_args = CmdStanArgs(
         model_name='bernoulli',
         model_exe=exe,
         chain_ids=chain_ids,
         data=jdata,
         method_args=sampler_args,
     )
     with self.assertRaises(ValueError):
         RunSet(args=cmdstan_args, chains=0)
     with self.assertRaises(ValueError):
         RunSet(args=cmdstan_args, chains=4, chain_ids=[1, 2, 3])
예제 #21
0
 def test_chain_ids(self):
     exe = os.path.join(DATAFILES_PATH, 'bernoulli' + EXTENSION)
     jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json')
     sampler_args = SamplerArgs()
     chain_ids = [11, 12, 13, 14]
     cmdstan_args = CmdStanArgs(
         model_name='bernoulli',
         model_exe=exe,
         chain_ids=chain_ids,
         data=jdata,
         method_args=sampler_args,
     )
     runset = RunSet(args=cmdstan_args, chains=4, chain_ids=chain_ids)
     self.assertIn('id=11', runset.cmd(0))
     self.assertIn('_11.csv', runset._csv_files[0])
     self.assertIn('id=14', runset.cmd(3))
     self.assertIn('_14.csv', runset._csv_files[3])
예제 #22
0
    def test_validate_good_run(self):
        # construct fit using existing sampler output
        exe = os.path.join(datafiles_path, 'bernoulli' + EXTENSION)
        jdata = os.path.join(datafiles_path, 'bernoulli.data.json')
        output = os.path.join(goodfiles_path, 'bern')
        sampler_args = SamplerArgs(sampling_iters=100,
                                   max_treedepth=11,
                                   adapt_delta=0.95)
        cmdstan_args = CmdStanArgs(
            model_name='bernoulli',
            model_exe=exe,
            chain_ids=[1, 2, 3, 4],
            seed=12345,
            data=jdata,
            output_basename=output,
            method_args=sampler_args,
        )
        fit = StanFit(args=cmdstan_args, chains=4)
        retcodes = fit._retcodes
        for i in range(len(retcodes)):
            fit._set_retcode(i, 0)
        self.assertTrue(fit._check_retcodes())
        fit._check_console_msgs()
        fit._validate_csv_files()
        self.assertEqual(4, fit.chains)
        self.assertEqual(100, fit.draws)
        self.assertEqual(8, len(fit.column_names))
        self.assertEqual('lp__', fit.column_names[0])

        df = fit.get_drawset()
        self.assertEqual(df.shape,
                         (fit.chains * fit.draws, len(fit.column_names)))
        _ = fit.summary()

        # TODO - use cmdstan test files instead
        expected = '\n'.join([
            'Checking sampler transitions treedepth.',
            'Treedepth satisfactory for all transitions.',
            '\nChecking sampler transitions for divergences.',
            'No divergent transitions found.',
            '\nChecking E-BFMI - sampler transitions HMC potential energy.',
            'E-BFMI satisfactory for all transitions.',
            '\nEffective sample size satisfactory.',
        ])
        self.assertIn(expected, fit.diagnose().replace("\r\n", "\n"))
예제 #23
0
    def test_validate_summary_sig_figs(self):
        # construct CmdStanMCMC from logistic model output, config
        exe = os.path.join(DATAFILES_PATH, 'logistic' + EXTENSION)
        rdata = os.path.join(DATAFILES_PATH, 'logistic.data.R')
        sampler_args = SamplerArgs(iter_sampling=100)
        cmdstan_args = CmdStanArgs(
            model_name='logistic',
            model_exe=exe,
            chain_ids=[1, 2, 3, 4],
            seed=12345,
            data=rdata,
            output_dir=DATAFILES_PATH,
            sig_figs=17,
            method_args=sampler_args,
        )
        runset = RunSet(args=cmdstan_args)
        runset._csv_files = [
            os.path.join(DATAFILES_PATH, 'logistic_output_1.csv'),
            os.path.join(DATAFILES_PATH, 'logistic_output_2.csv'),
            os.path.join(DATAFILES_PATH, 'logistic_output_3.csv'),
            os.path.join(DATAFILES_PATH, 'logistic_output_4.csv'),
        ]
        retcodes = runset._retcodes
        for i in range(len(retcodes)):
            runset._set_retcode(i, 0)
        fit = CmdStanMCMC(runset)

        sum_default = fit.summary()
        beta1_default = format(sum_default.iloc[1, 0], '.18g')
        self.assertTrue(beta1_default.startswith('1.3'))

        if cmdstan_version_at(2, 25):
            sum_17 = fit.summary(sig_figs=17)
            beta1_17 = format(sum_17.iloc[1, 0], '.18g')
            self.assertTrue(beta1_17.startswith('1.345767078273'))

            sum_10 = fit.summary(sig_figs=10)
            beta1_10 = format(sum_10.iloc[1, 0], '.18g')
            self.assertTrue(beta1_10.startswith('1.34576707'))

        with self.assertRaises(ValueError):
            fit.summary(sig_figs=20)
        with self.assertRaises(ValueError):
            fit.summary(sig_figs=-1)
예제 #24
0
 def test_compose(self):
     exe = os.path.join(datafiles_path, 'bernoulli')
     sampler_args = SamplerArgs()
     cmdstan_args = CmdStanArgs(
         model_name='bernoulli',
         model_exe=exe,
         chain_ids=[1, 2, 3, 4],
         method_args=sampler_args,
     )
     with self.assertRaises(ValueError):
         cmdstan_args.compose_command(idx=4, csv_file='foo')
     with self.assertRaises(ValueError):
         cmdstan_args.compose_command(idx=-1, csv_file='foo')
예제 #25
0
 def test_get_err_msgs(self):
     exe = os.path.join(DATAFILES_PATH, 'logistic' + EXTENSION)
     rdata = os.path.join(DATAFILES_PATH, 'logistic.data.R')
     sampler_args = SamplerArgs()
     cmdstan_args = CmdStanArgs(
         model_name='logistic',
         model_exe=exe,
         chain_ids=[1, 2, 3],
         data=rdata,
         method_args=sampler_args,
     )
     runset = RunSet(args=cmdstan_args, chains=3)
     for i in range(3):
         runset._set_retcode(i, 70)
         stdout_file = 'chain-' + str(i + 1) + '-missing-data-stdout.txt'
         path = os.path.join(DATAFILES_PATH, stdout_file)
         runset._stdout_files[i] = path
     errs = '\n\t'.join(runset._get_err_msgs())
     self.assertIn('Exception', errs)
예제 #26
0
 def test_check_repr(self):
     exe = os.path.join(DATAFILES_PATH, 'bernoulli' + EXTENSION)
     jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json')
     sampler_args = SamplerArgs()
     chain_ids = [1, 2, 3, 4]  # default
     cmdstan_args = CmdStanArgs(
         model_name='bernoulli',
         model_exe=exe,
         chain_ids=chain_ids,
         data=jdata,
         method_args=sampler_args,
     )
     runset = RunSet(args=cmdstan_args, chains=4)
     self.assertIn('RunSet: chains=4', runset.__repr__())
     self.assertIn('method=sample', runset.__repr__())
     self.assertIn('retcodes=[-1, -1, -1, -1]', runset.__repr__())
     self.assertIn('csv_file', runset.__repr__())
     self.assertIn('console_msgs', runset.__repr__())
     self.assertNotIn('diagnostics_file', runset.__repr__())
예제 #27
0
    def test_args_good(self):
        exe = os.path.join(DATAFILES_PATH, 'bernoulli')
        jdata = os.path.join(DATAFILES_PATH, 'bernoulli.data.json')
        sampler_args = SamplerArgs()

        cmdstan_args = CmdStanArgs(
            model_name='bernoulli',
            model_exe=exe,
            chain_ids=[1, 2, 3, 4],
            data=jdata,
            method_args=sampler_args,
            refresh=10,
        )
        self.assertEqual(cmdstan_args.method, Method.SAMPLE)
        cmd = cmdstan_args.compose_command(idx=0, csv_file='bern-output-1.csv')
        self.assertIn('id=1 random seed=', ' '.join(cmd))
        self.assertIn('data file=', ' '.join(cmd))
        self.assertIn('output file=', ' '.join(cmd))
        self.assertIn('method=sample algorithm=hmc', ' '.join(cmd))
        self.assertIn('refresh=10', ' '.join(cmd))

        cmdstan_args = CmdStanArgs(
            model_name='bernoulli',
            model_exe=exe,
            chain_ids=[7, 11, 18, 29],
            data=jdata,
            method_args=sampler_args,
        )
        cmd = cmdstan_args.compose_command(idx=0, csv_file='bern-output-1.csv')
        self.assertIn('id=7 random seed=', ' '.join(cmd))

        dirname = 'tmp' + str(time())
        if os.path.exists(dirname):
            os.rmdir(dirname)
        CmdStanArgs(
            model_name='bernoulli',
            model_exe='bernoulli.exe',
            chain_ids=[1, 2, 3, 4],
            output_dir=dirname,
            method_args=sampler_args,
        )
        self.assertTrue(os.path.exists(dirname))
        os.rmdir(dirname)
예제 #28
0
 def test_args_sig_figs(self):
     sampler_args = SamplerArgs()
     cmdstan_path()  # sets os.environ['CMDSTAN']
     if cmdstan_version_before(2, 25):
         with LogCapture() as log:
             logging.getLogger()
             CmdStanArgs(
                 model_name='bernoulli',
                 model_exe='bernoulli.exe',
                 chain_ids=[1, 2, 3, 4],
                 sig_figs=12,
                 method_args=sampler_args,
             )
         expect = (
             'Argument "sig_figs" invalid for CmdStan versions < 2.25, '
             'using version {} in directory {}').format(
                 os.path.basename(cmdstan_path()),
                 os.path.dirname(cmdstan_path()),
             )
         log.check_present(('cmdstanpy', 'WARNING', expect))
     else:
         cmdstan_args = CmdStanArgs(
             model_name='bernoulli',
             model_exe='bernoulli.exe',
             chain_ids=[1, 2, 3, 4],
             sig_figs=12,
             method_args=sampler_args,
         )
         cmd = cmdstan_args.compose_command(idx=0,
                                            csv_file='bern-output-1.csv')
         self.assertIn('sig_figs=', ' '.join(cmd))
         with self.assertRaises(ValueError):
             CmdStanArgs(
                 model_name='bernoulli',
                 model_exe='bernoulli.exe',
                 chain_ids=[1, 2, 3, 4],
                 sig_figs=-1,
                 method_args=sampler_args,
             )
         with self.assertRaises(ValueError):
             CmdStanArgs(
                 model_name='bernoulli',
                 model_exe='bernoulli.exe',
                 chain_ids=[1, 2, 3, 4],
                 sig_figs=20,
                 method_args=sampler_args,
             )
예제 #29
0
 def test_variables_3d(self):
     # construct fit using existing sampler output
     exe = os.path.join(DATAFILES_PATH, 'multidim_vars' + EXTENSION)
     jdata = os.path.join(DATAFILES_PATH, 'logistic.data.R')
     sampler_args = SamplerArgs(iter_sampling=20)
     cmdstan_args = CmdStanArgs(
         model_name='multidim_vars',
         model_exe=exe,
         chain_ids=[1],
         seed=12345,
         data=jdata,
         output_dir=DATAFILES_PATH,
         method_args=sampler_args,
     )
     runset = RunSet(args=cmdstan_args, chains=1)
     runset._csv_files = [os.path.join(DATAFILES_PATH, 'multidim_vars.csv')]
     runset._set_retcode(0, 0)
     fit = CmdStanMCMC(runset)
     self.assertEqual(20, fit.num_draws_sampling)
     self.assertEqual(3, len(fit.stan_vars_dims))
     self.assertTrue('y_rep' in fit.stan_vars_dims)
     self.assertEqual(fit.stan_vars_dims['y_rep'], (5, 4, 3))
     var_y_rep = fit.stan_variable(name='y_rep')
     self.assertEqual(var_y_rep.shape, (20, 5, 4, 3))
     var_beta = fit.stan_variable(name='beta')
     self.assertEqual(var_beta.shape, (20, 2))
     var_frac_60 = fit.stan_variable(name='frac_60')
     self.assertEqual(var_frac_60.shape, (20, ))
     vars = fit.stan_variables()
     self.assertEqual(len(vars), len(fit.stan_vars_dims))
     self.assertTrue('y_rep' in vars)
     self.assertEqual(vars['y_rep'].shape, (20, 5, 4, 3))
     self.assertTrue('beta' in vars)
     self.assertEqual(vars['beta'].shape, (20, 2))
     self.assertTrue('frac_60' in vars)
     self.assertEqual(vars['frac_60'].shape, (20, ))
예제 #30
0
 def test_diagnose_divergences(self):
     exe = os.path.join(datafiles_path,
                        'bernoulli' + EXTENSION)  # fake out validation
     output = os.path.join(datafiles_path, 'diagnose-good',
                           'corr_gauss_depth8')
     sampler_args = SamplerArgs()
     cmdstan_args = CmdStanArgs(
         model_name='bernoulli',
         model_exe=exe,
         chain_ids=[1],
         output_basename=output,
         method_args=sampler_args,
     )
     fit = StanFit(args=cmdstan_args, chains=1)
     # TODO - use cmdstan test files instead
     expected = '\n'.join([
         'Checking sampler transitions treedepth.',
         '424 of 1000 (42%) transitions hit the maximum '
         'treedepth limit of 8, or 2^8 leapfrog steps.',
         'Trajectories that are prematurely terminated '
         'due to this limit will result in slow exploration.',
         'For optimal performance, increase this limit.',
     ])
     self.assertIn(expected, fit.diagnose().replace("\r\n", "\n"))