def summary(self) -> pd.DataFrame: """ Run cmdstan/bin/stansummary over all output csv files. Echo stansummary stdout/stderr to console. Assemble csv tempfile contents into pandasDataFrame. """ names = self.column_names cmd_path = os.path.join(cmdstan_path(), 'bin', 'stansummary' + EXTENSION) tmp_csv_file = 'stansummary-{}-{}-chain-'.format( self.runset._args.model_name, self.runset.chains) tmp_csv_path = create_named_text_file(dir=TMPDIR, prefix=tmp_csv_file, suffix='.csv') cmd = [ cmd_path, '--csv_file={}'.format(tmp_csv_path), ] + self.runset.csv_files do_command(cmd, logger=self.runset._logger) with open(tmp_csv_path, 'rb') as fd: summary_data = pd.read_csv(fd, delimiter=',', header=0, index_col=0, comment='#') mask = [ x == 'lp__' or not x.endswith('__') for x in summary_data.index ] return summary_data[mask]
def summary(self) -> pd.DataFrame: """ Run cmdstan/bin/stansummary over all output csv files. Echo stansummary stdout/stderr to console. Assemble csv tempfile contents into pandasDataFrame. """ self._sampling_only() names = self.column_names cmd_path = os.path.join(cmdstan_path(), 'bin', 'stansummary' + EXTENSION) tmp_csv_file = 'stansummary-{}-{}-chains-'.format( self._args.model_name, self.chains) fd, tmp_csv_path = tempfile.mkstemp(suffix='.csv', prefix=tmp_csv_file, dir=TMPDIR, text=True) cmd = '{} --csv_file={} {}'.format(cmd_path, tmp_csv_path, ' '.join(self.csv_files)) # breaks on all whitespace do_command(cmd.split(), logger=self._logger) summary_data = pd.read_csv(tmp_csv_path, delimiter=',', header=0, index_col=0, comment='#') mask = [ x == 'lp__' or not x.endswith('__') for x in summary_data.index ] return summary_data[mask]
def summary(self, percentiles: List[int] = None) -> pd.DataFrame: """ Run cmdstan/bin/stansummary over all output csv files. Echo stansummary stdout/stderr to console. Assemble csv tempfile contents into pandasDataFrame. :param percentiles: Ordered non-empty list of percentiles to report. Must be integers from (1, 99), inclusive. """ percentiles_str = '--percentiles=5,50,95' if percentiles is not None: if len(percentiles) == 0: raise ValueError( 'invalid percentiles argument, must be ordered' ' non-empty list from (1, 99), inclusive.' ) cur_pct = 0 for pct in percentiles: if pct > 99 or not pct > cur_pct: raise ValueError( 'invalid percentiles spec, must be ordered' ' non-empty list from (1, 99), inclusive.' ) cur_pct = pct percentiles_str = '='.join( ['--percentiles', ','.join([str(x) for x in percentiles])] ) cmd_path = os.path.join( cmdstan_path(), 'bin', 'stansummary' + EXTENSION ) tmp_csv_file = 'stansummary-{}-{}-chain-'.format( self.runset._args.model_name, self.runset.chains ) tmp_csv_path = create_named_text_file( dir=_TMPDIR, prefix=tmp_csv_file, suffix='.csv' ) cmd = [ cmd_path, percentiles_str, '--csv_file={}'.format(tmp_csv_path), ] + self.runset.csv_files do_command(cmd, logger=self.runset._logger) with open(tmp_csv_path, 'rb') as fd: summary_data = pd.read_csv( fd, delimiter=',', header=0, index_col=0, comment='#', float_precision='high', ) mask = [x == 'lp__' or not x.endswith('__') for x in summary_data.index] return summary_data[mask]
def clean_all(verbose: bool = False) -> None: """ Run `make clean-all` in the current directory (must be a cmdstan library). :param verbose: Boolean value; when ``True``, show output from make command. """ cmd = [MAKE, 'clean-all'] try: if verbose: do_command(cmd) else: do_command(cmd, fd_out=None) except RuntimeError as e: # pylint: disable=raise-missing-from raise CmdStanInstallError(f'Command "make clean-all" failed\n{str(e)}')
def diagnose(self) -> str: """ Run cmdstan/bin/diagnose over all output csv files. Returns output of diagnose (stdout/stderr) The diagnose utility reads the outputs of all chains and checks for the following potential problems: + Transitions that hit the maximum treedepth + Divergent transitions + Low E-BFMI values (sampler transitions HMC potential energy) + Low effective sample sizes + High R-hat values :return str empty if no problems found """ self._sampling_only() cmd_path = os.path.join(cmdstan_path(), 'bin', 'diagnose' + EXTENSION) csv_files = ' '.join(self.csv_files) cmd = '{} {} '.format(cmd_path, csv_files) result = do_command(cmd=cmd.split(), logger=self._logger) if result: self._logger.warning(result) return result
def compile_example(verbose: bool = False) -> None: """ Compile the example model. The current directory must be a cmdstan installation, i.e., contains the makefile, Stanc compiler, and all libraries. :param verbose: Boolean value; when ``True``, show output from make command. """ cmd = [ MAKE, Path(os.path.join('examples', 'bernoulli', 'bernoulli' + EXTENSION)).as_posix(), ] try: if verbose: do_command(cmd) else: do_command(cmd, fd_out=None) except RuntimeError as e: # pylint: disable=raise-missing-from raise CmdStanInstallError(f'Command "make clean-all" failed\n{e}')
def diagnose(self) -> str: """ Run cmdstan/bin/diagnose over all output csv files. Returns output of diagnose (stdout/stderr). The diagnose utility reads the outputs of all chains and checks for the following potential problems: + Transitions that hit the maximum treedepth + Divergent transitions + Low E-BFMI values (sampler transitions HMC potential energy) + Low effective sample sizes + High R-hat values """ cmd_path = os.path.join(cmdstan_path(), 'bin', 'diagnose' + EXTENSION) cmd = [cmd_path] + self.runset.csv_files result = do_command(cmd=cmd, logger=self.runset._logger) if result: self.runset._logger.warning(result) return result
def build(verbose: bool = False, progress: bool = True, cores: int = 1) -> None: """ Run command ``make build`` in the current directory, which must be the home directory of a CmdStan version (or GitHub repo). By default, displays a progress bar which tracks make command outputs. If argument ``verbose=True``, instead of a progress bar, streams make command outputs to sys.stdout. When both ``verbose`` and ``progress`` are ``False``, runs silently. :param verbose: Boolean value; when ``True``, show output from make command. Default is ``False``. :param progress: Boolean value; when ``True`` display progress progress bar. Default is ``True``. :param cores: Integer, number of cores to use in the ``make`` command. Default is 1 core. """ cmd = [MAKE, 'build', f'-j{cores}'] try: if verbose: do_command(cmd) elif progress and progbar.allow_show_progress(): progress_hook: Any = _wrap_build_progress_hook() do_command(cmd, fd_out=None, pbar=progress_hook) else: do_command(cmd, fd_out=None) except RuntimeError as e: # pylint: disable=raise-missing-from raise CmdStanInstallError(f'Command "make build" failed\n{str(e)}') if not os.path.exists(os.path.join('bin', 'stansummary' + EXTENSION)): raise CmdStanInstallError(f'bin/stansummary{EXTENSION} not found' ', please rebuild or report a bug!') if not os.path.exists(os.path.join('bin', 'diagnose' + EXTENSION)): raise CmdStanInstallError(f'bin/stansummary{EXTENSION} not found' ', please rebuild or report a bug!') if platform.system() == 'Windows': # Add tbb to the $PATH on Windows libtbb = os.path.join(os.getcwd(), 'stan', 'lib', 'stan_math', 'lib', 'tbb') os.environ['PATH'] = ';'.join( list( OrderedDict.fromkeys([libtbb] + os.environ.get('PATH', '').split(';'))))
def compile( self, force: bool = False, stanc_options: Dict = None, cpp_options: Dict = None, override_options: bool = False, ) -> None: """ Compile the given Stan program file. Translates the Stan code to C++, then calls the C++ compiler. By default, this function compares the timestamps on the source and executable files; if the executable is newer than the source file, it will not recompile the file, unless argument ``force`` is ``True``. :param force: When ``True``, always compile, even if the executable file is newer than the source file. Used for Stan models which have ``#include`` directives in order to force recompilation when changes are made to the included files. :param stanc_options: Options for stanc compiler. :param cpp_options: Options for C++ compiler. :param override_options: When ``True``, override existing option. When ``False``, add/replace existing options. Default is ``False``. """ if not self._stan_file: raise RuntimeError('Please specify source file') compiler_options = None if not (stanc_options is None and cpp_options is None): compiler_options = CompilerOptions(stanc_options=stanc_options, cpp_options=cpp_options) compiler_options.validate() if self._compiler_options is None: self._compiler_options = compiler_options elif override_options: self._compiler_options = compiler_options else: self._compiler_options.add(compiler_options) compilation_failed = False with TemporaryCopiedFile(self._stan_file) as (stan_file, is_copied): exe_file, _ = os.path.splitext(os.path.abspath(stan_file)) exe_file = Path(exe_file).as_posix() + EXTENSION do_compile = True if os.path.exists(exe_file): src_time = os.path.getmtime(self._stan_file) exe_time = os.path.getmtime(exe_file) if exe_time > src_time and not force: do_compile = False self._logger.info('found newer exe file, not recompiling') if do_compile: self._logger.info('compiling stan program, exe file: %s', exe_file) if self._compiler_options is not None: self._compiler_options.validate() self._logger.info('compiler options: %s', self._compiler_options) make = os.getenv( 'MAKE', 'make' if platform.system() != 'Windows' else 'mingw32-make', ) cmd = [make] if self._compiler_options is not None: cmd.extend(self._compiler_options.compose()) cmd.append(Path(exe_file).as_posix()) try: do_command(cmd, cmdstan_path(), logger=self._logger) except RuntimeError as e: self._logger.error('file %s, exception %s', stan_file, str(e)) compilation_failed = True if not compilation_failed: if is_copied: original_target_dir = os.path.dirname( os.path.abspath(self._stan_file)) new_exec_name = (os.path.basename( os.path.splitext(self._stan_file)[0]) + EXTENSION) self._exe_file = os.path.join(original_target_dir, new_exec_name) shutil.copy(exe_file, self._exe_file) else: self._exe_file = exe_file self._logger.info('compiled model file: %s', self._exe_file) else: self._logger.error('model compilation failed')
def compile( self, opt_lvl: int = 2, overwrite: bool = False, include_paths: List[str] = None, ) -> None: """ Compile the given Stan program file. Translates the Stan code to C++, then calls the C++ compiler. :param opt_lvl: Optimization level used by the C++ compiler, one of {0, 1, 2, 3}. Defaults to level 2. Level 0 optimization results in the shortest compilation time with code that may run slowly. Higher optimization levels increase runtime performance but will take longer to compile. :param overwrite: When True, existing executable will be overwritten. Defaults to False. :param include_paths: List of paths to directories where Stan should look for files to include in compilation of the C++ executable. """ if not self._stan_file: raise RuntimeError('Please specify source file') if self._exe_file is not None and not overwrite: self._logger.warning('model is already compiled') return with TemporaryCopiedFile(self._stan_file) as (stan_file, is_copied): hpp_file = os.path.splitext(stan_file)[0] + '.hpp' hpp_file = Path(hpp_file).as_posix() if overwrite or not os.path.exists(hpp_file): self._logger.info('stan to c++ (%s)', hpp_file) stanc_path = os.path.join(cmdstan_path(), 'bin', 'stanc' + EXTENSION) stanc_path = Path(stanc_path).as_posix() cmd = [ stanc_path, '--o={}'.format(hpp_file), Path(stan_file).as_posix(), ] if include_paths is not None: bad_paths = [ d for d in include_paths if not os.path.exists(d) ] if any(bad_paths): raise Exception('invalid include paths: {}'.format( ', '.join(bad_paths))) cmd.append('--include_paths=' + ','.join((Path(p).as_posix() for p in include_paths))) do_command(cmd, logger=self._logger) if not os.path.exists(hpp_file): raise Exception('syntax error'.format(stan_file)) exe_file, _ = os.path.splitext(os.path.abspath(stan_file)) exe_file = Path(exe_file).as_posix() exe_file += EXTENSION make = os.getenv('MAKE', 'make') cmd = [make, 'O={}'.format(opt_lvl), exe_file] self._logger.info('compiling c++') try: do_command(cmd, cmdstan_path(), self._logger) except Exception as e: self._logger.error('make cmd failed %s', e) if is_copied: original_target_dir = os.path.dirname(self._stan_file) # reconstruct the output file name new_exec_name = ( os.path.basename(os.path.splitext(self._stan_file)[0]) + EXTENSION) self._exe_file = os.path.join(original_target_dir, new_exec_name) # copy the generated file back to the original directory shutil.copy(exe_file, self._exe_file) else: self._exe_file = exe_file self._logger.info('compiled model file: %s', self._exe_file)
def summary(self, percentiles: List[int] = None, sig_figs: int = None) -> pd.DataFrame: """ Run cmdstan/bin/stansummary over all output csv files, assemble summary into DataFrame object; first row contains summary statistics for total joint log probability `lp__`, remaining rows contain summary statistics for all parameters, transformed parameters, and generated quantities variables listed in the order in which they were declared in the Stan program. :param percentiles: Ordered non-empty list of percentiles to report. Must be integers from (1, 99), inclusive. :param sig_figs: Number of significant figures to report. Must be an integer between 1 and 18. If unspecified, the default precision for the system file I/O is used; the usual value is 6. If precision above 6 is requested, sample must have been produced by CmdStan version 2.25 or later and sampler output precision must equal to or greater than the requested summary precision. :return: pandas.DataFrame """ percentiles_str = '--percentiles=5,50,95' if percentiles is not None: if len(percentiles) == 0: raise ValueError( 'invalid percentiles argument, must be ordered' ' non-empty list from (1, 99), inclusive.') cur_pct = 0 for pct in percentiles: if pct > 99 or not pct > cur_pct: raise ValueError( 'invalid percentiles spec, must be ordered' ' non-empty list from (1, 99), inclusive.') cur_pct = pct percentiles_str = '='.join( ['--percentiles', ','.join([str(x) for x in percentiles])]) sig_figs_str = '--sig_figs=2' if sig_figs is not None: if not isinstance(sig_figs, int) or sig_figs < 1 or sig_figs > 18: raise ValueError( 'sig_figs must be an integer between 1 and 18,' ' found {}'.format(sig_figs)) csv_sig_figs = self._sig_figs or 6 if sig_figs > csv_sig_figs: self._logger.warning( 'Requesting %d significant digits of output, but CSV files' ' only have %d digits of precision.', sig_figs, csv_sig_figs, ) sig_figs_str = '--sig_figs=' + str(sig_figs) cmd_path = os.path.join(cmdstan_path(), 'bin', 'stansummary' + EXTENSION) tmp_csv_file = 'stansummary-{}-'.format(self.runset._args.model_name) tmp_csv_path = create_named_text_file(dir=_TMPDIR, prefix=tmp_csv_file, suffix='.csv', name_only=True) csv_str = '--csv_filename={}'.format(tmp_csv_path) if not cmdstan_version_at(2, 24): csv_str = '--csv_file={}'.format(tmp_csv_path) cmd = [ cmd_path, percentiles_str, sig_figs_str, csv_str, ] + self.runset.csv_files do_command(cmd, logger=self.runset._logger) with open(tmp_csv_path, 'rb') as fd: summary_data = pd.read_csv( fd, delimiter=',', header=0, index_col=0, comment='#', float_precision='high', ) mask = [ x == 'lp__' or not x.endswith('__') for x in summary_data.index ] return summary_data[mask]
def compile(self, opt_lvl: int = 3, force: bool = False) -> None: """ Compile the given Stan program file. Translates the Stan code to C++, then calls the C++ compiler. By default, this function compares the timestamps on the source and executable files; if the executable is newer than the source file, it will not recompile the file, unless argument ``force`` is True. :param opt_lvl: Optimization level used by the C++ compiler, one of {0, 1, 2, 3}. Defaults to level 2. Level 0 optimization results in the shortest compilation time with code that may run slowly. Higher optimization levels increase runtime performance but will take longer to compile. :param force: When ``True``, always compile, even if the executable file is newer than the source file. Used for Stan models which have ``#include`` directives in order to force recompilation when changes are made to the included files. """ if not self._stan_file: raise RuntimeError('Please specify source file') compilation_failed = False with TemporaryCopiedFile(self._stan_file) as (stan_file, is_copied): exe_file, _ = os.path.splitext(os.path.abspath(stan_file)) exe_file = Path(exe_file).as_posix() exe_file += EXTENSION do_compile = True if os.path.exists(exe_file): src_time = os.path.getmtime(self._stan_file) exe_time = os.path.getmtime(exe_file) if exe_time > src_time and not force: do_compile = False self._logger.info('found newer exe file, not recompiling') if do_compile: make = os.getenv( 'MAKE', 'make' if platform.system() != 'Windows' else 'mingw32-make', ) hpp_file = os.path.splitext(stan_file)[0] + '.hpp' hpp_file = Path(hpp_file).as_posix() if not os.path.exists(hpp_file): self._logger.info('stan to c++ (%s)', hpp_file) cmd = [ make, Path(exe_file).as_posix(), 'STANCFLAGS+=--o={}'.format(hpp_file), ] if self._include_paths is not None: bad_paths = [ d for d in self._include_paths if not os.path.exists(d) ] if any(bad_paths): raise ValueError( 'invalid include paths: {}'.format( ', '.join(bad_paths) ) ) cmd.append( 'STANCFLAGS+=--include_paths=' + ','.join( ( Path(p).as_posix() for p in self._include_paths ) ) ) try: do_command(cmd, cmdstan_path(), logger=self._logger) except RuntimeError as e: self._logger.error( 'file %s, exception %s', stan_file, str(e) ) compilation_failed = True if not compilation_failed: cmd = [make, 'O={}'.format(opt_lvl), exe_file] self._logger.info('compiling c++') try: do_command(cmd, cmdstan_path(), logger=self._logger) except RuntimeError as e: self._logger.error('make cmd failed %s', repr(e)) compilation_failed = True if not compilation_failed: if is_copied: original_target_dir = os.path.dirname( os.path.abspath(self._stan_file) ) new_exec_name = ( os.path.basename(os.path.splitext(self._stan_file)[0]) + EXTENSION ) self._exe_file = os.path.join( original_target_dir, new_exec_name ) shutil.copy(exe_file, self._exe_file) else: self._exe_file = exe_file self._logger.info('compiled model file: %s', self._exe_file) else: self._logger.error('model compilation failed')
def test_exit(self): sys_stdout = io.StringIO() with contextlib.redirect_stdout(sys_stdout): args = ['bash', '/bin/junk'] with self.assertRaises(RuntimeError): do_command(args, HERE)
def test_capture_console(self): tmp = io.StringIO() do_command(cmd=['ls'], cwd=HERE, fd_out=tmp) self.assertTrue('test_utils.py' in tmp.getvalue())