def test_default_path(self): cur_value = None if 'CMDSTAN' in os.environ: cur_value = os.environ['CMDSTAN'] try: if 'CMDSTAN' in os.environ: self.assertEqual(cmdstan_path(), os.environ['CMDSTAN']) path = os.environ['CMDSTAN'] del os.environ['CMDSTAN'] self.assertFalse('CMDSTAN' in os.environ) set_cmdstan_path(path) self.assertEqual(cmdstan_path(), path) self.assertTrue('CMDSTAN' in os.environ) else: install_dir = os.path.expanduser( os.path.join('~', '.cmdstanpy') ) install_version = os.path.expanduser( os.path.join(install_dir, get_latest_cmdstan(install_dir)) ) self.assertTrue( os.path.samefile(cmdstan_path(), install_version) ) self.assertTrue('CMDSTAN' in os.environ) finally: if cur_value is not None: os.environ['CMDSTAN'] = cur_value else: if 'CMDSTAN' in os.environ: del os.environ['CMDSTAN']
def test_cmdstan_version(self): with tempfile.TemporaryDirectory(prefix="cmdstan_tests", dir=_TMPDIR) as tmpdir: tdir = os.path.join(tmpdir, 'tmpdir_xxx') os.makedirs(tdir) fake_path = os.path.join(tdir, 'cmdstan-2.22.0') os.makedirs(os.path.join(fake_path)) fake_bin = os.path.join(fake_path, 'bin') os.makedirs(fake_bin) Path(os.path.join(fake_bin, 'stanc' + EXTENSION)).touch() with self.modified_environ(CMDSTAN=fake_path): self.assertTrue(fake_path == cmdstan_path()) expect = ('CmdStan installation {} missing makefile, ' 'cannot get version.'.format(fake_path)) with LogCapture() as log: logging.getLogger() cmdstan_version() log.check_present(('cmdstanpy', 'INFO', expect)) fake_makefile = os.path.join(fake_path, 'makefile') with open(fake_makefile, 'w') as fd: fd.write('... CMDSTAN_VERSION := dont_need_no_mmp\n\n') expect = ( 'Cannot parse version, expected "<major>.<minor>.<patch>", ' 'found: "dont_need_no_mmp".') with LogCapture() as log: logging.getLogger() cmdstan_version() log.check_present(('cmdstanpy', 'INFO', expect)) cmdstan_path()
def compile_model( stan_file: str = None, opt_lvl: int = 2, overwrite: bool = False, include_paths: List[str] = None, ) -> Model: """ Compile the given Stan model file to an executable. :param stan_file: Path to Stan program :param opt_lvl: Optimization level for C++ compiler, one of {0, 1, 2, 3} where level 0 optimization results in the shortest compilation time with code that may run slowly and increasing optimization levels increase compile time and runtime performance. :param overwrite: When True, existing executable will be overwritten. Defaults to False. :param include_paths: List of paths to directories where Stan should look for files to include in compilation of the C++ executable. """ if stan_file is None: raise Exception('must specify argument "stan_file"') if not os.path.exists(stan_file): raise Exception('no such stan_file {}'.format(stan_file)) program_name = os.path.basename(stan_file) exe_file, _ = os.path.splitext(os.path.abspath(stan_file)) hpp_file = '.'.join([exe_file, 'hpp']) if overwrite or not os.path.exists(hpp_file): print('translating to {}'.format(hpp_file)) stanc_path = os.path.join(cmdstan_path(), 'bin', 'stanc') cmd = [stanc_path, '--o={}'.format(hpp_file), stan_file] if include_paths is not None: bad_paths = [d for d in include_paths if not os.path.exists(d)] if any(bad_paths): raise Exception('invalid include paths: {}'.format( ', '.join(bad_paths))) cmd.append('--include_paths=' + ','.join(include_paths)) print('stan to c++: make args {}'.format(cmd)) do_command(cmd) if not os.path.exists(hpp_file): raise Exception('syntax error'.format(stan_file)) if platform.system().lower().startswith('win'): exe_file += '.exe' if not overwrite and os.path.exists(exe_file): # print('model is up to date') # notify user or not? return Model(stan_file, exe_file) exe_file_path = Path(exe_file).as_posix() cmd = ['make', 'O={}'.format(opt_lvl), exe_file_path] print('compiling c++: make args {}'.format(cmd)) try: do_command(cmd, cmdstan_path()) except Exception: return Model(stan_file) return Model(stan_file, exe_file)
def test_set_path(self): if 'CMDSTAN' in os.environ: self.assertEqual(cmdstan_path(), os.environ['CMDSTAN']) else: install_dir = os.path.expanduser(os.path.join('~', '.cmdstanpy')) install_version = os.path.expanduser( os.path.join(install_dir, get_latest_cmdstan(install_dir))) set_cmdstan_path(install_version) self.assertEqual(install_version, cmdstan_path()) self.assertEqual(install_version, os.environ['CMDSTAN'])
def test_set_path(self): if 'CMDSTAN' in os.environ: self.assertEqual(cmdstan_path(), os.environ['CMDSTAN']) else: cmdstan_dir = os.path.expanduser(os.path.join('~', _DOT_CMDSTAN)) if not os.path.exists(cmdstan_dir): cmdstan_dir = os.path.expanduser( os.path.join('~', _DOT_CMDSTANPY)) install_version = os.path.join(cmdstan_dir, get_latest_cmdstan(cmdstan_dir)) set_cmdstan_path(install_version) self.assertEqual(install_version, cmdstan_path()) self.assertEqual(install_version, os.environ['CMDSTAN'])
def test_default_path(self): if 'CMDSTAN' in os.environ: self.assertPathsEqual(cmdstan_path(), os.environ['CMDSTAN']) path = os.environ['CMDSTAN'] with self.modified_environ('CMDSTAN'): self.assertFalse('CMDSTAN' in os.environ) set_cmdstan_path(path) self.assertPathsEqual(cmdstan_path(), path) self.assertTrue('CMDSTAN' in os.environ) else: cmdstan_dir = os.path.expanduser(os.path.join('~', _DOT_CMDSTAN)) install_version = os.path.join(cmdstan_dir, get_latest_cmdstan(cmdstan_dir)) self.assertTrue(os.path.samefile(cmdstan_path(), install_version)) self.assertTrue('CMDSTAN' in os.environ)
def summary(self) -> pd.DataFrame: """ Run cmdstan/bin/stansummary over all output csv files. Echo stansummary stdout/stderr to console. Assemble csv tempfile contents into pandasDataFrame. """ names = self.column_names cmd_path = os.path.join(cmdstan_path(), 'bin', 'stansummary' + EXTENSION) tmp_csv_file = 'stansummary-{}-{}-chain-'.format( self.runset._args.model_name, self.runset.chains) tmp_csv_path = create_named_text_file(dir=TMPDIR, prefix=tmp_csv_file, suffix='.csv') cmd = [ cmd_path, '--csv_file={}'.format(tmp_csv_path), ] + self.runset.csv_files do_command(cmd, logger=self.runset._logger) with open(tmp_csv_path, 'rb') as fd: summary_data = pd.read_csv(fd, delimiter=',', header=0, index_col=0, comment='#') mask = [ x == 'lp__' or not x.endswith('__') for x in summary_data.index ] return summary_data[mask]
def test_set_path(self): install_dir = os.path.expanduser(os.path.join('~', '.cmdstanpy')) install_version = os.path.expanduser( os.path.join(install_dir, get_latest_cmdstan(install_dir)) ) set_cmdstan_path(install_version) self.assertEqual(install_version, cmdstan_path())
def summary(self) -> pd.DataFrame: """ Run cmdstan/bin/stansummary over all output csv files. Echo stansummary stdout/stderr to console. Assemble csv tempfile contents into pandasDataFrame. """ self._sampling_only() names = self.column_names cmd_path = os.path.join(cmdstan_path(), 'bin', 'stansummary' + EXTENSION) tmp_csv_file = 'stansummary-{}-{}-chains-'.format( self._args.model_name, self.chains) fd, tmp_csv_path = tempfile.mkstemp(suffix='.csv', prefix=tmp_csv_file, dir=TMPDIR, text=True) cmd = '{} --csv_file={} {}'.format(cmd_path, tmp_csv_path, ' '.join(self.csv_files)) # breaks on all whitespace do_command(cmd.split(), logger=self._logger) summary_data = pd.read_csv(tmp_csv_path, delimiter=',', header=0, index_col=0, comment='#') mask = [ x == 'lp__' or not x.endswith('__') for x in summary_data.index ] return summary_data[mask]
def summary(runset: RunSet) -> pd.DataFrame: """ Run cmdstan/bin/stansummary over all output csv files. Echo stansummary stdout/stderr to console. Assemble csv tempfile contents into pandasDataFrame. :param runset: record of completed run of NUTS sampler """ names = runset.column_names cmd_path = os.path.join(cmdstan_path(), 'bin', 'stansummary') tmp_csv_file = 'stansummary-{}-{}-chains-'.format(runset.model, runset.chains) fd, tmp_csv_path = tempfile.mkstemp(suffix='.csv', prefix=tmp_csv_file, dir=TMPDIR, text=True) cmd = '{} --csv_file={} {}'.format(cmd_path, tmp_csv_path, ' '.join(runset.csv_files)) do_command(cmd.split()) # breaks on all whitespace summary_data = pd.read_csv(tmp_csv_path, delimiter=',', header=0, index_col=0, comment='#') mask = [x == 'lp__' or not x.endswith('__') for x in summary_data.index] return summary_data[mask]
def diagnose(self) -> str: """ Run cmdstan/bin/diagnose over all output csv files. Returns output of diagnose (stdout/stderr) The diagnose utility reads the outputs of all chains and checks for the following potential problems: + Transitions that hit the maximum treedepth + Divergent transitions + Low E-BFMI values (sampler transitions HMC potential energy) + Low effective sample sizes + High R-hat values :return str empty if no problems found """ self._sampling_only() cmd_path = os.path.join(cmdstan_path(), 'bin', 'diagnose' + EXTENSION) csv_files = ' '.join(self.csv_files) cmd = '{} {} '.format(cmd_path, csv_files) result = do_command(cmd=cmd.split(), logger=self._logger) if result: self._logger.warning(result) return result
def summary(self, percentiles: List[int] = None) -> pd.DataFrame: """ Run cmdstan/bin/stansummary over all output csv files. Echo stansummary stdout/stderr to console. Assemble csv tempfile contents into pandasDataFrame. :param percentiles: Ordered non-empty list of percentiles to report. Must be integers from (1, 99), inclusive. """ percentiles_str = '--percentiles=5,50,95' if percentiles is not None: if len(percentiles) == 0: raise ValueError( 'invalid percentiles argument, must be ordered' ' non-empty list from (1, 99), inclusive.' ) cur_pct = 0 for pct in percentiles: if pct > 99 or not pct > cur_pct: raise ValueError( 'invalid percentiles spec, must be ordered' ' non-empty list from (1, 99), inclusive.' ) cur_pct = pct percentiles_str = '='.join( ['--percentiles', ','.join([str(x) for x in percentiles])] ) cmd_path = os.path.join( cmdstan_path(), 'bin', 'stansummary' + EXTENSION ) tmp_csv_file = 'stansummary-{}-{}-chain-'.format( self.runset._args.model_name, self.runset.chains ) tmp_csv_path = create_named_text_file( dir=_TMPDIR, prefix=tmp_csv_file, suffix='.csv' ) cmd = [ cmd_path, percentiles_str, '--csv_file={}'.format(tmp_csv_path), ] + self.runset.csv_files do_command(cmd, logger=self.runset._logger) with open(tmp_csv_path, 'rb') as fd: summary_data = pd.read_csv( fd, delimiter=',', header=0, index_col=0, comment='#', float_precision='high', ) mask = [x == 'lp__' or not x.endswith('__') for x in summary_data.index] return summary_data[mask]
def diagnose(self) -> str: """ Run cmdstan/bin/diagnose over all output csv files. Returns output of diagnose (stdout/stderr). The diagnose utility reads the outputs of all chains and checks for the following potential problems: + Transitions that hit the maximum treedepth + Divergent transitions + Low E-BFMI values (sampler transitions HMC potential energy) + Low effective sample sizes + High R-hat values """ cmd_path = os.path.join(cmdstan_path(), 'bin', 'diagnose' + EXTENSION) cmd = [cmd_path] + self.runset.csv_files result = do_command(cmd=cmd, logger=self.runset._logger) if result: self.runset._logger.warning(result) return result
def rebuild_cmdstan(verbose: bool = False, progress: bool = True, cores: int = 1) -> None: """ Rebuilds the existing CmdStan installation. This assumes CmdStan has already been installed, though it need not be installed via CmdStanPy for this function to work. :param verbose: Boolean value; when ``True``, show output from make command. Default is ``False``. :param progress: Boolean value; when ``True`` display progress progress bar. Default is ``True``. :param cores: Integer, number of cores to use in the ``make`` command. Default is 1 core. """ try: with pushd(cmdstan_path()): clean_all(verbose) build(verbose, progress, cores) compile_example(verbose) except ValueError as e: raise CmdStanInstallError( "Failed to rebuild CmdStan. Are you sure it is installed?") from e
def diagnose(runset: RunSet) -> None: """ Run cmdstan/bin/diagnose over all output csv files. Echo diagnose stdout/stderr to console. The diagnose utility reads the outputs of all chains and checks for the following potential problems: + Transitions that hit the maximum treedepth + Divergent transitions + Low E-BFMI values (sampler transitions HMC potential energy) + Low effective sample sizes + High R-hat values :param runset: record of completed run of NUTS sampler """ cmd_path = os.path.join(cmdstan_path(), 'bin', 'diagnose') csv_files = ' '.join(runset.csv_files) cmd = '{} {} '.format(cmd_path, csv_files) result = do_command(cmd=cmd.split()) if result is None: print('No problems detected.') else: print(result)
def validate(self) -> None: """ Check arguments correctness and consistency. * input files must exist * output files must be in a writeable directory * if no seed specified, set random seed. * length of per-chain lists equals specified # of chains """ if self.model_name is None: raise ValueError('no stan model specified') if self.model_exe is None: raise ValueError('model not compiled') if self.chain_ids is not None: for chain_id in self.chain_ids: if chain_id < 1: raise ValueError('invalid chain_id {}'.format(chain_id)) if self.output_dir is not None: self.output_dir = os.path.realpath( os.path.expanduser(self.output_dir)) if not os.path.exists(self.output_dir): try: os.makedirs(self.output_dir) get_logger().info('created output directory: %s', self.output_dir) except (RuntimeError, PermissionError) as exc: raise ValueError('Invalid path for output files, ' 'no such dir: {}.'.format( self.output_dir)) from exc if not os.path.isdir(self.output_dir): raise ValueError( 'Specified output_dir is not a directory: {}.'.format( self.output_dir)) try: testpath = os.path.join(self.output_dir, str(time())) with open(testpath, 'w+'): pass os.remove(testpath) # cleanup except Exception as exc: raise ValueError('Invalid path for output files,' ' cannot write to dir: {}.'.format( self.output_dir)) from exc if self.refresh is not None: if not isinstance(self.refresh, int) or self.refresh < 1: raise ValueError( 'Argument "refresh" must be a positive integer value, ' 'found {}.'.format(self.refresh)) if self.sig_figs is not None: if (not isinstance(self.sig_figs, int) or self.sig_figs < 1 or self.sig_figs > 18): raise ValueError( 'Argument "sig_figs" must be an integer between 1 and 18,' ' found {}'.format(self.sig_figs)) # TODO: remove at some future release if cmdstan_version_before(2, 25): self.sig_figs = None get_logger().warning( 'Argument "sig_figs" invalid for CmdStan versions < 2.25, ' 'using version %s in directory %s', os.path.basename(cmdstan_path()), os.path.dirname(cmdstan_path()), ) if self.seed is None: rng = RandomState() self.seed = rng.randint(1, 99999 + 1) else: if not isinstance(self.seed, (int, list)): raise ValueError('Argument "seed" must be an integer between ' '0 and 2**32-1, found {}.'.format(self.seed)) if isinstance(self.seed, int): if self.seed < 0 or self.seed > 2**32 - 1: raise ValueError( 'Argument "seed" must be an integer between ' '0 and 2**32-1, found {}.'.format(self.seed)) else: if self.chain_ids is None: raise ValueError( 'List of per-chain seeds cannot be evaluated without ' 'corresponding list of chain_ids.') if len(self.seed) != len(self.chain_ids): raise ValueError( 'Number of seeds must match number of chains,' ' found {} seed for {} chains.'.format( len(self.seed), len(self.chain_ids))) for seed in self.seed: if seed < 0 or seed > 2**32 - 1: raise ValueError( 'Argument "seed" must be an integer value' ' between 0 and 2**32-1,' ' found {}'.format(seed)) if isinstance(self.data, str): if not os.path.exists(self.data): raise ValueError('no such file {}'.format(self.data)) elif self.data is not None and not isinstance(self.data, (str, dict)): raise ValueError('Argument "data" must be string or dict') if self.inits is not None: if isinstance(self.inits, (float, int)): if self.inits < 0: raise ValueError( 'Argument "inits" must be > 0, found {}'.format( self.inits)) elif isinstance(self.inits, str): if not os.path.exists(self.inits): raise ValueError('no such file {}'.format(self.inits)) elif isinstance(self.inits, list): if self.chain_ids is None: raise ValueError( 'List of inits files cannot be evaluated without ' 'corresponding list of chain_ids.') if len(self.inits) != len(self.chain_ids): raise ValueError( 'Number of inits files must match number of chains,' ' found {} inits files for {} chains.'.format( len(self.inits), len(self.chain_ids))) for inits in self.inits: if not os.path.exists(inits): raise ValueError('no such file {}'.format(inits))
def __init__( self, model_name: str = None, stan_file: str = None, exe_file: str = None, compile: bool = True, stanc_options: Dict = None, cpp_options: Dict = None, logger: logging.Logger = None, ) -> None: """ Initialize object given constructor args. :param model_name: Model name, used for output file names. :param stan_file: Path to Stan program file. :param exe_file: Path to compiled executable file. :param compile: Whether or not to compile the model. :param stanc_options: Options for stanc compiler. :param cpp_options: Options for C++ compiler. :param logger: Python logger object. """ self._name = None self._stan_file = None self._exe_file = None self._compiler_options = CompilerOptions(stanc_options=stanc_options, cpp_options=cpp_options) self._logger = logger or get_logger() if model_name is not None: if not model_name.strip(): raise ValueError( 'Invalid value for argument model name, found "{}"'.format( model_name)) self._name = model_name.strip() if stan_file is None: if exe_file is None: raise ValueError( 'Missing model file arguments, you must specify ' 'either Stan source or executable program file or both.') else: self._stan_file = os.path.realpath(os.path.expanduser(stan_file)) if not os.path.exists(self._stan_file): raise ValueError('no such file {}'.format(self._stan_file)) _, filename = os.path.split(stan_file) if len(filename) < 6 or not filename.endswith('.stan'): raise ValueError('invalid stan filename {}'.format( self._stan_file)) if self._name is None: self._name, _ = os.path.splitext(filename) # if program has include directives, record path with open(self._stan_file, 'r') as fd: program = fd.read() if '#include' in program: path, _ = os.path.split(self._stan_file) if self._compiler_options is None: self._compiler_options = CompilerOptions( stanc_options={'include_paths': [path]}) elif self._compiler_options._stanc_options is None: self._compiler_options._stanc_options = { 'include_paths': [path] } else: self._compiler_options.add_include_path(path) if exe_file is not None: self._exe_file = os.path.realpath(os.path.expanduser(exe_file)) if not os.path.exists(self._exe_file): raise ValueError('no such file {}'.format(self._exe_file)) _, exename = os.path.split(self._exe_file) if self._name is None: self._name, _ = os.path.splitext(exename) else: if self._name != os.path.splitext(exename)[0]: raise ValueError( 'Name mismatch between Stan file and compiled' ' executable, expecting basename: {}' ' found: {}.'.format(self._name, exename)) if self._compiler_options is not None: self._compiler_options.validate() if platform.system() == 'Windows': # Add tbb to the $PATH on Windows libtbb = os.environ.get('STAN_TBB') if libtbb is None: libtbb = os.path.join(cmdstan_path(), 'stan', 'lib', 'stan_math', 'lib', 'tbb') os.environ['PATH'] = ';'.join( list( OrderedDict.fromkeys( [libtbb] + os.environ.get('PATH', '').split(';')))) if compile and self._exe_file is None: self.compile() if self._exe_file is None: raise ValueError( 'Unable to compile Stan model file: {}.'.format( self._stan_file))
def compile( self, force: bool = False, stanc_options: Dict = None, cpp_options: Dict = None, override_options: bool = False, ) -> None: """ Compile the given Stan program file. Translates the Stan code to C++, then calls the C++ compiler. By default, this function compares the timestamps on the source and executable files; if the executable is newer than the source file, it will not recompile the file, unless argument ``force`` is ``True``. :param force: When ``True``, always compile, even if the executable file is newer than the source file. Used for Stan models which have ``#include`` directives in order to force recompilation when changes are made to the included files. :param stanc_options: Options for stanc compiler. :param cpp_options: Options for C++ compiler. :param override_options: When ``True``, override existing option. When ``False``, add/replace existing options. Default is ``False``. """ if not self._stan_file: raise RuntimeError('Please specify source file') compiler_options = None if not (stanc_options is None and cpp_options is None): compiler_options = CompilerOptions(stanc_options=stanc_options, cpp_options=cpp_options) compiler_options.validate() if self._compiler_options is None: self._compiler_options = compiler_options elif override_options: self._compiler_options = compiler_options else: self._compiler_options.add(compiler_options) compilation_failed = False with TemporaryCopiedFile(self._stan_file) as (stan_file, is_copied): exe_file, _ = os.path.splitext(os.path.abspath(stan_file)) exe_file = Path(exe_file).as_posix() + EXTENSION do_compile = True if os.path.exists(exe_file): src_time = os.path.getmtime(self._stan_file) exe_time = os.path.getmtime(exe_file) if exe_time > src_time and not force: do_compile = False self._logger.info('found newer exe file, not recompiling') if do_compile: self._logger.info('compiling stan program, exe file: %s', exe_file) if self._compiler_options is not None: self._compiler_options.validate() self._logger.info('compiler options: %s', self._compiler_options) make = os.getenv( 'MAKE', 'make' if platform.system() != 'Windows' else 'mingw32-make', ) cmd = [make] if self._compiler_options is not None: cmd.extend(self._compiler_options.compose()) cmd.append(Path(exe_file).as_posix()) try: do_command(cmd, cmdstan_path(), logger=self._logger) except RuntimeError as e: self._logger.error('file %s, exception %s', stan_file, str(e)) compilation_failed = True if not compilation_failed: if is_copied: original_target_dir = os.path.dirname( os.path.abspath(self._stan_file)) new_exec_name = (os.path.basename( os.path.splitext(self._stan_file)[0]) + EXTENSION) self._exe_file = os.path.join(original_target_dir, new_exec_name) shutil.copy(exe_file, self._exe_file) else: self._exe_file = exe_file self._logger.info('compiled model file: %s', self._exe_file) else: self._logger.error('model compilation failed')
def test_default_path(self): abs_rel_path = os.path.expanduser( os.path.join('~', '.cmdstanpy', 'cmdstan')) self.assertTrue(cmdstan_path().startswith(abs_rel_path))
def show_cmdstan_version(self): print('\n\nCmdStan version: {}\n\n'.format(cmdstan_path())) self.assertTrue(True)
def compile( self, opt_lvl: int = 2, overwrite: bool = False, include_paths: List[str] = None, ) -> None: """ Compile the given Stan program file. Translates the Stan code to C++, then calls the C++ compiler. :param opt_lvl: Optimization level used by the C++ compiler, one of {0, 1, 2, 3}. Defaults to level 2. Level 0 optimization results in the shortest compilation time with code that may run slowly. Higher optimization levels increase runtime performance but will take longer to compile. :param overwrite: When True, existing executable will be overwritten. Defaults to False. :param include_paths: List of paths to directories where Stan should look for files to include in compilation of the C++ executable. """ if not self._stan_file: raise RuntimeError('Please specify source file') if self._exe_file is not None and not overwrite: self._logger.warning('model is already compiled') return with TemporaryCopiedFile(self._stan_file) as (stan_file, is_copied): hpp_file = os.path.splitext(stan_file)[0] + '.hpp' hpp_file = Path(hpp_file).as_posix() if overwrite or not os.path.exists(hpp_file): self._logger.info('stan to c++ (%s)', hpp_file) stanc_path = os.path.join(cmdstan_path(), 'bin', 'stanc' + EXTENSION) stanc_path = Path(stanc_path).as_posix() cmd = [ stanc_path, '--o={}'.format(hpp_file), Path(stan_file).as_posix(), ] if include_paths is not None: bad_paths = [ d for d in include_paths if not os.path.exists(d) ] if any(bad_paths): raise Exception('invalid include paths: {}'.format( ', '.join(bad_paths))) cmd.append('--include_paths=' + ','.join((Path(p).as_posix() for p in include_paths))) do_command(cmd, logger=self._logger) if not os.path.exists(hpp_file): raise Exception('syntax error'.format(stan_file)) exe_file, _ = os.path.splitext(os.path.abspath(stan_file)) exe_file = Path(exe_file).as_posix() exe_file += EXTENSION make = os.getenv('MAKE', 'make') cmd = [make, 'O={}'.format(opt_lvl), exe_file] self._logger.info('compiling c++') try: do_command(cmd, cmdstan_path(), self._logger) except Exception as e: self._logger.error('make cmd failed %s', e) if is_copied: original_target_dir = os.path.dirname(self._stan_file) # reconstruct the output file name new_exec_name = ( os.path.basename(os.path.splitext(self._stan_file)[0]) + EXTENSION) self._exe_file = os.path.join(original_target_dir, new_exec_name) # copy the generated file back to the original directory shutil.copy(exe_file, self._exe_file) else: self._exe_file = exe_file self._logger.info('compiled model file: %s', self._exe_file)
def compile(self, opt_lvl: int = 3, force: bool = False) -> None: """ Compile the given Stan program file. Translates the Stan code to C++, then calls the C++ compiler. By default, this function compares the timestamps on the source and executable files; if the executable is newer than the source file, it will not recompile the file, unless argument ``force`` is True. :param opt_lvl: Optimization level used by the C++ compiler, one of {0, 1, 2, 3}. Defaults to level 2. Level 0 optimization results in the shortest compilation time with code that may run slowly. Higher optimization levels increase runtime performance but will take longer to compile. :param force: When ``True``, always compile, even if the executable file is newer than the source file. Used for Stan models which have ``#include`` directives in order to force recompilation when changes are made to the included files. """ if not self._stan_file: raise RuntimeError('Please specify source file') compilation_failed = False with TemporaryCopiedFile(self._stan_file) as (stan_file, is_copied): exe_file, _ = os.path.splitext(os.path.abspath(stan_file)) exe_file = Path(exe_file).as_posix() exe_file += EXTENSION do_compile = True if os.path.exists(exe_file): src_time = os.path.getmtime(self._stan_file) exe_time = os.path.getmtime(exe_file) if exe_time > src_time and not force: do_compile = False self._logger.info('found newer exe file, not recompiling') if do_compile: make = os.getenv( 'MAKE', 'make' if platform.system() != 'Windows' else 'mingw32-make', ) hpp_file = os.path.splitext(stan_file)[0] + '.hpp' hpp_file = Path(hpp_file).as_posix() if not os.path.exists(hpp_file): self._logger.info('stan to c++ (%s)', hpp_file) cmd = [ make, Path(exe_file).as_posix(), 'STANCFLAGS+=--o={}'.format(hpp_file), ] if self._include_paths is not None: bad_paths = [ d for d in self._include_paths if not os.path.exists(d) ] if any(bad_paths): raise ValueError( 'invalid include paths: {}'.format( ', '.join(bad_paths) ) ) cmd.append( 'STANCFLAGS+=--include_paths=' + ','.join( ( Path(p).as_posix() for p in self._include_paths ) ) ) try: do_command(cmd, cmdstan_path(), logger=self._logger) except RuntimeError as e: self._logger.error( 'file %s, exception %s', stan_file, str(e) ) compilation_failed = True if not compilation_failed: cmd = [make, 'O={}'.format(opt_lvl), exe_file] self._logger.info('compiling c++') try: do_command(cmd, cmdstan_path(), logger=self._logger) except RuntimeError as e: self._logger.error('make cmd failed %s', repr(e)) compilation_failed = True if not compilation_failed: if is_copied: original_target_dir = os.path.dirname( os.path.abspath(self._stan_file) ) new_exec_name = ( os.path.basename(os.path.splitext(self._stan_file)[0]) + EXTENSION ) self._exe_file = os.path.join( original_target_dir, new_exec_name ) shutil.copy(exe_file, self._exe_file) else: self._exe_file = exe_file self._logger.info('compiled model file: %s', self._exe_file) else: self._logger.error('model compilation failed')
def test_cmdstan_version_at(self): cmdstan_path() # sets os.environ['CMDSTAN'] self.assertFalse(cmdstan_version_at(99, 99))
def test_cmdstan_version_before(self): cmdstan_path() # sets os.environ['CMDSTAN'] self.assertTrue(cmdstan_version_before(99, 99)) self.assertFalse(cmdstan_version_before(1, 1))
def validate(self) -> None: """ Check arguments correctness and consistency. * input files must exist * output files must be in a writeable directory * if no seed specified, set random seed. * length of per-chain lists equals specified # of chains """ if self.model_name is None: raise ValueError('no stan model specified') if self.model_exe is None: raise ValueError('model not compiled') if self.chain_ids is not None: for i in range(len(self.chain_ids)): if self.chain_ids[i] < 1: raise ValueError('invalid chain_id {}'.format( self.chain_ids[i])) if self.output_dir is not None: self.output_dir = os.path.realpath( os.path.expanduser(self.output_dir)) if not os.path.exists(self.output_dir): try: os.makedirs(self.output_dir) self._logger.info('created output directory: %s', self.output_dir) except (RuntimeError, PermissionError) as exc: raise ValueError( 'invalid path for output files, no such dir: {}'. format(self.output_dir)) from exc if not os.path.isdir(self.output_dir): raise ValueError( 'specified output_dir not a directory: {}'.format( self.output_dir)) try: testpath = os.path.join(self.output_dir, str(time())) with open(testpath, 'w+'): pass os.remove(testpath) # cleanup except Exception as exc: raise ValueError('invalid path for output files,' ' cannot write to dir: {}'.format( self.output_dir)) from exc if self.refresh is not None: if not isinstance(self.refresh, int) or self.refresh < 1: raise ValueError( 'Argument refresh must be a positive integer value, ' 'found {}.'.format(self.refresh)) if self.sig_figs is not None: if (not isinstance(self.sig_figs, int) or self.sig_figs < 1 or self.sig_figs > 18): raise ValueError( 'sig_figs must be an integer between 1 and 18,' ' found {}'.format(self.sig_figs)) if not cmdstan_version_at(2, 25): self.sig_figs = None self._logger.warning( 'arg sig_figs not valid, CmdStan version must be 2.25 ' 'or higher, using verson %s in directory %s', os.path.basename(cmdstan_path()), os.path.dirname(cmdstan_path()), ) if self.seed is None: rng = RandomState() self.seed = rng.randint(1, 99999 + 1) else: if not isinstance(self.seed, (int, list)): raise ValueError( 'seed must be an integer between 0 and 2**32-1,' ' found {}'.format(self.seed)) if isinstance(self.seed, int): if self.seed < 0 or self.seed > 2**32 - 1: raise ValueError( 'seed must be an integer between 0 and 2**32-1,' ' found {}'.format(self.seed)) else: if self.chain_ids is None: raise ValueError( 'seed must not be a list when no chains used') if len(self.seed) != len(self.chain_ids): raise ValueError( 'number of seeds must match number of chains,' ' found {} seed for {} chains '.format( len(self.seed), len(self.chain_ids))) for i in range(len(self.seed)): if self.seed[i] < 0 or self.seed[i] > 2**32 - 1: raise ValueError('seed must be an integer value' ' between 0 and 2**32-1,' ' found {}'.format(self.seed[i])) if isinstance(self.data, str): if not os.path.exists(self.data): raise ValueError('no such file {}'.format(self.data)) elif self.data is not None and not isinstance(self.data, (str, dict)): raise ValueError('data must be string or dict') if self.inits is not None: if isinstance(self.inits, (Integral, Real)): if self.inits < 0: raise ValueError('inits must be > 0, found {}'.format( self.inits)) elif isinstance(self.inits, str): if not os.path.exists(self.inits): raise ValueError('no such file {}'.format(self.inits)) elif isinstance(self.inits, list): if self.chain_ids is None: raise ValueError( 'inits must not be a list when no chains are used') if len(self.inits) != len(self.chain_ids): raise ValueError( 'number of inits files must match number of chains,' ' found {} inits files for {} chains '.format( len(self.inits), len(self.chain_ids))) names_set = set(self.inits) if len(names_set) != len(self.inits): raise ValueError('each chain must have its own init file,' ' found duplicates in inits files list.') for i in range(len(self.inits)): if not os.path.exists(self.inits[i]): raise ValueError('no such file {}'.format( self.inits[i]))
def summary(self, percentiles: List[int] = None, sig_figs: int = None) -> pd.DataFrame: """ Run cmdstan/bin/stansummary over all output csv files, assemble summary into DataFrame object; first row contains summary statistics for total joint log probability `lp__`, remaining rows contain summary statistics for all parameters, transformed parameters, and generated quantities variables listed in the order in which they were declared in the Stan program. :param percentiles: Ordered non-empty list of percentiles to report. Must be integers from (1, 99), inclusive. :param sig_figs: Number of significant figures to report. Must be an integer between 1 and 18. If unspecified, the default precision for the system file I/O is used; the usual value is 6. If precision above 6 is requested, sample must have been produced by CmdStan version 2.25 or later and sampler output precision must equal to or greater than the requested summary precision. :return: pandas.DataFrame """ percentiles_str = '--percentiles=5,50,95' if percentiles is not None: if len(percentiles) == 0: raise ValueError( 'invalid percentiles argument, must be ordered' ' non-empty list from (1, 99), inclusive.') cur_pct = 0 for pct in percentiles: if pct > 99 or not pct > cur_pct: raise ValueError( 'invalid percentiles spec, must be ordered' ' non-empty list from (1, 99), inclusive.') cur_pct = pct percentiles_str = '='.join( ['--percentiles', ','.join([str(x) for x in percentiles])]) sig_figs_str = '--sig_figs=2' if sig_figs is not None: if not isinstance(sig_figs, int) or sig_figs < 1 or sig_figs > 18: raise ValueError( 'sig_figs must be an integer between 1 and 18,' ' found {}'.format(sig_figs)) csv_sig_figs = self._sig_figs or 6 if sig_figs > csv_sig_figs: self._logger.warning( 'Requesting %d significant digits of output, but CSV files' ' only have %d digits of precision.', sig_figs, csv_sig_figs, ) sig_figs_str = '--sig_figs=' + str(sig_figs) cmd_path = os.path.join(cmdstan_path(), 'bin', 'stansummary' + EXTENSION) tmp_csv_file = 'stansummary-{}-'.format(self.runset._args.model_name) tmp_csv_path = create_named_text_file(dir=_TMPDIR, prefix=tmp_csv_file, suffix='.csv', name_only=True) csv_str = '--csv_filename={}'.format(tmp_csv_path) if not cmdstan_version_at(2, 24): csv_str = '--csv_file={}'.format(tmp_csv_path) cmd = [ cmd_path, percentiles_str, sig_figs_str, csv_str, ] + self.runset.csv_files do_command(cmd, logger=self.runset._logger) with open(tmp_csv_path, 'rb') as fd: summary_data = pd.read_csv( fd, delimiter=',', header=0, index_col=0, comment='#', float_precision='high', ) mask = [ x == 'lp__' or not x.endswith('__') for x in summary_data.index ] return summary_data[mask]
def __init__( self, stan_file: str = None, exe_file: str = None, include_paths: List[str] = None, compile: bool = True, logger: logging.Logger = None, ) -> None: """Initialize object.""" self._stan_file = None self._name = None self._exe_file = None self._include_paths = None self._logger = logger or get_logger() if stan_file is None: if exe_file is None: raise ValueError( 'must specify Stan source or executable program file' ) else: self._stan_file = os.path.realpath(os.path.expanduser(stan_file)) if not os.path.exists(self._stan_file): raise ValueError('no such file {}'.format(self._stan_file)) _, filename = os.path.split(stan_file) if len(filename) < 6 or not filename.endswith('.stan'): raise ValueError( 'invalid stan filename {}'.format(self._stan_file) ) self._name, _ = os.path.splitext(filename) # if program has #includes, search program dir with open(self._stan_file, 'r') as fd: program = fd.read() if '#include' in program: path, _ = os.path.split(self._stan_file) if include_paths is None: include_paths = [] if path not in include_paths: include_paths.append(path) if exe_file is not None: self._exe_file = os.path.realpath(os.path.expanduser(exe_file)) if not os.path.exists(self._exe_file): raise ValueError('no such file {}'.format(self._exe_file)) _, exename = os.path.split(self._exe_file) if self._name is None: self._name, _ = os.path.splitext(exename) else: if self._name != os.path.splitext(exename)[0]: raise ValueError( 'name mismatch between Stan file and compiled' ' executable, expecting basename: {}' ' found: {}'.format(self._name, exename) ) if include_paths is not None: bad_paths = [d for d in include_paths if not os.path.exists(d)] if any(bad_paths): raise ValueError( 'invalid include paths: {}'.format(', '.join(bad_paths)) ) self._include_paths = include_paths if platform.system() == 'Windows': # Add tbb to the $PATH on Windows libtbb = os.getenv('STAN_TBB') if libtbb is None: libtbb = os.path.join( cmdstan_path(), 'stan', 'lib', 'stan_math', 'lib', 'tbb' ) os.environ['PATH'] = ';'.join( list( OrderedDict.fromkeys( [libtbb] + os.getenv('PATH', '').split(';') ) ) ) if compile and self._exe_file is None: self.compile() if self._exe_file is None: raise ValueError( 'unable to compile Stan model file: {}'.format( self._stan_file ) )