Beispiel #1
0
 def summary(self) -> pd.DataFrame:
     """
     Run cmdstan/bin/stansummary over all output csv files.
     Echo stansummary stdout/stderr to console.
     Assemble csv tempfile contents into pandasDataFrame.
     """
     names = self.column_names
     cmd_path = os.path.join(cmdstan_path(), 'bin',
                             'stansummary' + EXTENSION)
     tmp_csv_file = 'stansummary-{}-{}-chain-'.format(
         self.runset._args.model_name, self.runset.chains)
     tmp_csv_path = create_named_text_file(dir=TMPDIR,
                                           prefix=tmp_csv_file,
                                           suffix='.csv')
     cmd = [
         cmd_path,
         '--csv_file={}'.format(tmp_csv_path),
     ] + self.runset.csv_files
     do_command(cmd, logger=self.runset._logger)
     with open(tmp_csv_path, 'rb') as fd:
         summary_data = pd.read_csv(fd,
                                    delimiter=',',
                                    header=0,
                                    index_col=0,
                                    comment='#')
     mask = [
         x == 'lp__' or not x.endswith('__') for x in summary_data.index
     ]
     return summary_data[mask]
Beispiel #2
0
    def summary(self) -> pd.DataFrame:
        """
        Run cmdstan/bin/stansummary over all output csv files.
        Echo stansummary stdout/stderr to console.
        Assemble csv tempfile contents into pandasDataFrame.
        """
        self._sampling_only()

        names = self.column_names
        cmd_path = os.path.join(cmdstan_path(), 'bin',
                                'stansummary' + EXTENSION)
        tmp_csv_file = 'stansummary-{}-{}-chains-'.format(
            self._args.model_name, self.chains)
        fd, tmp_csv_path = tempfile.mkstemp(suffix='.csv',
                                            prefix=tmp_csv_file,
                                            dir=TMPDIR,
                                            text=True)
        cmd = '{} --csv_file={} {}'.format(cmd_path, tmp_csv_path,
                                           ' '.join(self.csv_files))
        # breaks on all whitespace
        do_command(cmd.split(), logger=self._logger)
        summary_data = pd.read_csv(tmp_csv_path,
                                   delimiter=',',
                                   header=0,
                                   index_col=0,
                                   comment='#')
        mask = [
            x == 'lp__' or not x.endswith('__') for x in summary_data.index
        ]
        return summary_data[mask]
Beispiel #3
0
    def summary(self, percentiles: List[int] = None) -> pd.DataFrame:
        """
        Run cmdstan/bin/stansummary over all output csv files.
        Echo stansummary stdout/stderr to console.
        Assemble csv tempfile contents into pandasDataFrame.

        :param percentiles: Ordered non-empty list of percentiles to report.
            Must be integers from (1, 99), inclusive.
        """
        percentiles_str = '--percentiles=5,50,95'
        if percentiles is not None:
            if len(percentiles) == 0:
                raise ValueError(
                    'invalid percentiles argument, must be ordered'
                    ' non-empty list from (1, 99), inclusive.'
                )

            cur_pct = 0
            for pct in percentiles:
                if pct > 99 or not pct > cur_pct:
                    raise ValueError(
                        'invalid percentiles spec, must be ordered'
                        ' non-empty list from (1, 99), inclusive.'
                    )
                cur_pct = pct
            percentiles_str = '='.join(
                ['--percentiles', ','.join([str(x) for x in percentiles])]
            )
        cmd_path = os.path.join(
            cmdstan_path(), 'bin', 'stansummary' + EXTENSION
        )
        tmp_csv_file = 'stansummary-{}-{}-chain-'.format(
            self.runset._args.model_name, self.runset.chains
        )
        tmp_csv_path = create_named_text_file(
            dir=_TMPDIR, prefix=tmp_csv_file, suffix='.csv'
        )
        cmd = [
            cmd_path,
            percentiles_str,
            '--csv_file={}'.format(tmp_csv_path),
        ] + self.runset.csv_files
        do_command(cmd, logger=self.runset._logger)
        with open(tmp_csv_path, 'rb') as fd:
            summary_data = pd.read_csv(
                fd,
                delimiter=',',
                header=0,
                index_col=0,
                comment='#',
                float_precision='high',
            )
        mask = [x == 'lp__' or not x.endswith('__') for x in summary_data.index]
        return summary_data[mask]
Beispiel #4
0
def clean_all(verbose: bool = False) -> None:
    """
    Run `make clean-all` in the current directory (must be a cmdstan library).

    :param verbose: Boolean value; when ``True``, show output from make command.
    """
    cmd = [MAKE, 'clean-all']
    try:
        if verbose:
            do_command(cmd)
        else:
            do_command(cmd, fd_out=None)

    except RuntimeError as e:
        # pylint: disable=raise-missing-from
        raise CmdStanInstallError(f'Command "make clean-all" failed\n{str(e)}')
Beispiel #5
0
    def diagnose(self) -> str:
        """
        Run cmdstan/bin/diagnose over all output csv files.
        Returns output of diagnose (stdout/stderr)

        The diagnose utility reads the outputs of all chains
        and checks for the following potential problems:

        + Transitions that hit the maximum treedepth
        + Divergent transitions
        + Low E-BFMI values (sampler transitions HMC potential energy)
        + Low effective sample sizes
        + High R-hat values

        :return str empty if no problems found
        """
        self._sampling_only()

        cmd_path = os.path.join(cmdstan_path(), 'bin', 'diagnose' + EXTENSION)
        csv_files = ' '.join(self.csv_files)
        cmd = '{} {} '.format(cmd_path, csv_files)
        result = do_command(cmd=cmd.split(), logger=self._logger)
        if result:
            self._logger.warning(result)
        return result
Beispiel #6
0
def compile_example(verbose: bool = False) -> None:
    """
    Compile the example model.
    The current directory must be a cmdstan installation, i.e.,
    contains the makefile, Stanc compiler, and all libraries.

    :param verbose: Boolean value; when ``True``, show output from make command.
    """
    cmd = [
        MAKE,
        Path(os.path.join('examples', 'bernoulli',
                          'bernoulli' + EXTENSION)).as_posix(),
    ]
    try:
        if verbose:
            do_command(cmd)
        else:
            do_command(cmd, fd_out=None)
    except RuntimeError as e:
        # pylint: disable=raise-missing-from
        raise CmdStanInstallError(f'Command "make clean-all" failed\n{e}')
Beispiel #7
0
    def diagnose(self) -> str:
        """
        Run cmdstan/bin/diagnose over all output csv files.
        Returns output of diagnose (stdout/stderr).

        The diagnose utility reads the outputs of all chains
        and checks for the following potential problems:

        + Transitions that hit the maximum treedepth
        + Divergent transitions
        + Low E-BFMI values (sampler transitions HMC potential energy)
        + Low effective sample sizes
        + High R-hat values
        """
        cmd_path = os.path.join(cmdstan_path(), 'bin', 'diagnose' + EXTENSION)
        cmd = [cmd_path] + self.runset.csv_files
        result = do_command(cmd=cmd, logger=self.runset._logger)
        if result:
            self.runset._logger.warning(result)
        return result
Beispiel #8
0
def build(verbose: bool = False,
          progress: bool = True,
          cores: int = 1) -> None:
    """
    Run command ``make build`` in the current directory, which must be
    the home directory of a CmdStan version (or GitHub repo).
    By default, displays a progress bar which tracks make command outputs.
    If argument ``verbose=True``, instead of a progress bar, streams
    make command outputs to sys.stdout.  When both ``verbose`` and ``progress``
    are ``False``, runs silently.

    :param verbose: Boolean value; when ``True``, show output from make command.
        Default is ``False``.
    :param progress: Boolean value; when ``True`` display progress progress bar.
        Default is ``True``.
    :param cores: Integer, number of cores to use in the ``make`` command.
        Default is 1 core.
    """
    cmd = [MAKE, 'build', f'-j{cores}']
    try:
        if verbose:
            do_command(cmd)
        elif progress and progbar.allow_show_progress():
            progress_hook: Any = _wrap_build_progress_hook()
            do_command(cmd, fd_out=None, pbar=progress_hook)
        else:
            do_command(cmd, fd_out=None)

    except RuntimeError as e:
        # pylint: disable=raise-missing-from
        raise CmdStanInstallError(f'Command "make build" failed\n{str(e)}')
    if not os.path.exists(os.path.join('bin', 'stansummary' + EXTENSION)):
        raise CmdStanInstallError(f'bin/stansummary{EXTENSION} not found'
                                  ', please rebuild or report a bug!')
    if not os.path.exists(os.path.join('bin', 'diagnose' + EXTENSION)):
        raise CmdStanInstallError(f'bin/stansummary{EXTENSION} not found'
                                  ', please rebuild or report a bug!')

    if platform.system() == 'Windows':
        # Add tbb to the $PATH on Windows
        libtbb = os.path.join(os.getcwd(), 'stan', 'lib', 'stan_math', 'lib',
                              'tbb')
        os.environ['PATH'] = ';'.join(
            list(
                OrderedDict.fromkeys([libtbb] +
                                     os.environ.get('PATH', '').split(';'))))
Beispiel #9
0
    def compile(
        self,
        force: bool = False,
        stanc_options: Dict = None,
        cpp_options: Dict = None,
        override_options: bool = False,
    ) -> None:
        """
        Compile the given Stan program file.  Translates the Stan code to
        C++, then calls the C++ compiler.

        By default, this function compares the timestamps on the source and
        executable files; if the executable is newer than the source file, it
        will not recompile the file, unless argument ``force`` is ``True``.

        :param force: When ``True``, always compile, even if the executable file
            is newer than the source file.  Used for Stan models which have
            ``#include`` directives in order to force recompilation when changes
            are made to the included files.

        :param stanc_options: Options for stanc compiler.
        :param cpp_options: Options for C++ compiler.

        :param override_options: When ``True``, override existing option.
            When ``False``, add/replace existing options.  Default is ``False``.
        """
        if not self._stan_file:
            raise RuntimeError('Please specify source file')

        compiler_options = None
        if not (stanc_options is None and cpp_options is None):
            compiler_options = CompilerOptions(stanc_options=stanc_options,
                                               cpp_options=cpp_options)
            compiler_options.validate()
            if self._compiler_options is None:
                self._compiler_options = compiler_options
            elif override_options:
                self._compiler_options = compiler_options
            else:
                self._compiler_options.add(compiler_options)

        compilation_failed = False
        with TemporaryCopiedFile(self._stan_file) as (stan_file, is_copied):
            exe_file, _ = os.path.splitext(os.path.abspath(stan_file))
            exe_file = Path(exe_file).as_posix() + EXTENSION
            do_compile = True
            if os.path.exists(exe_file):
                src_time = os.path.getmtime(self._stan_file)
                exe_time = os.path.getmtime(exe_file)
                if exe_time > src_time and not force:
                    do_compile = False
                    self._logger.info('found newer exe file, not recompiling')

            if do_compile:
                self._logger.info('compiling stan program, exe file: %s',
                                  exe_file)
                if self._compiler_options is not None:
                    self._compiler_options.validate()
                    self._logger.info('compiler options: %s',
                                      self._compiler_options)
                make = os.getenv(
                    'MAKE',
                    'make'
                    if platform.system() != 'Windows' else 'mingw32-make',
                )
                cmd = [make]
                if self._compiler_options is not None:
                    cmd.extend(self._compiler_options.compose())
                cmd.append(Path(exe_file).as_posix())
                try:
                    do_command(cmd, cmdstan_path(), logger=self._logger)
                except RuntimeError as e:
                    self._logger.error('file %s, exception %s', stan_file,
                                       str(e))
                    compilation_failed = True

            if not compilation_failed:
                if is_copied:
                    original_target_dir = os.path.dirname(
                        os.path.abspath(self._stan_file))
                    new_exec_name = (os.path.basename(
                        os.path.splitext(self._stan_file)[0]) + EXTENSION)
                    self._exe_file = os.path.join(original_target_dir,
                                                  new_exec_name)
                    shutil.copy(exe_file, self._exe_file)
                else:
                    self._exe_file = exe_file
                self._logger.info('compiled model file: %s', self._exe_file)
            else:
                self._logger.error('model compilation failed')
Beispiel #10
0
    def compile(
        self,
        opt_lvl: int = 2,
        overwrite: bool = False,
        include_paths: List[str] = None,
    ) -> None:
        """
        Compile the given Stan program file.  Translates the Stan code to
        C++, then calls the C++ compiler.

        :param opt_lvl: Optimization level used by the C++ compiler, one of
            {0, 1, 2, 3}.  Defaults to level 2. Level 0 optimization results
            in the shortest compilation time with code that may run slowly.
            Higher optimization levels increase runtime performance but will
            take longer to compile.

        :param overwrite: When True, existing executable will be overwritten.
            Defaults to False.

        :param include_paths: List of paths to directories where Stan should
            look for files to include in compilation of the C++ executable.
        """
        if not self._stan_file:
            raise RuntimeError('Please specify source file')

        if self._exe_file is not None and not overwrite:
            self._logger.warning('model is already compiled')
            return

        with TemporaryCopiedFile(self._stan_file) as (stan_file, is_copied):
            hpp_file = os.path.splitext(stan_file)[0] + '.hpp'
            hpp_file = Path(hpp_file).as_posix()
            if overwrite or not os.path.exists(hpp_file):
                self._logger.info('stan to c++ (%s)', hpp_file)
                stanc_path = os.path.join(cmdstan_path(), 'bin',
                                          'stanc' + EXTENSION)
                stanc_path = Path(stanc_path).as_posix()
                cmd = [
                    stanc_path,
                    '--o={}'.format(hpp_file),
                    Path(stan_file).as_posix(),
                ]
                if include_paths is not None:
                    bad_paths = [
                        d for d in include_paths if not os.path.exists(d)
                    ]
                    if any(bad_paths):
                        raise Exception('invalid include paths: {}'.format(
                            ', '.join(bad_paths)))
                    cmd.append('--include_paths=' +
                               ','.join((Path(p).as_posix()
                                         for p in include_paths)))

                do_command(cmd, logger=self._logger)
                if not os.path.exists(hpp_file):
                    raise Exception('syntax error'.format(stan_file))

            exe_file, _ = os.path.splitext(os.path.abspath(stan_file))
            exe_file = Path(exe_file).as_posix()
            exe_file += EXTENSION
            make = os.getenv('MAKE', 'make')
            cmd = [make, 'O={}'.format(opt_lvl), exe_file]
            self._logger.info('compiling c++')
            try:
                do_command(cmd, cmdstan_path(), self._logger)
            except Exception as e:
                self._logger.error('make cmd failed %s', e)

            if is_copied:

                original_target_dir = os.path.dirname(self._stan_file)
                # reconstruct the output file name
                new_exec_name = (
                    os.path.basename(os.path.splitext(self._stan_file)[0]) +
                    EXTENSION)

                self._exe_file = os.path.join(original_target_dir,
                                              new_exec_name)

                # copy the generated file back to the original directory
                shutil.copy(exe_file, self._exe_file)
            else:
                self._exe_file = exe_file

        self._logger.info('compiled model file: %s', self._exe_file)
Beispiel #11
0
    def summary(self,
                percentiles: List[int] = None,
                sig_figs: int = None) -> pd.DataFrame:
        """
        Run cmdstan/bin/stansummary over all output csv files, assemble
        summary into DataFrame object; first row contains summary statistics
        for total joint log probability `lp__`, remaining rows contain summary
        statistics for all parameters, transformed parameters, and generated
        quantities variables listed in the order in which they were declared
        in the Stan program.

        :param percentiles: Ordered non-empty list of percentiles to report.
            Must be integers from (1, 99), inclusive.

        :param sig_figs: Number of significant figures to report.
            Must be an integer between 1 and 18.  If unspecified, the default
            precision for the system file I/O is used; the usual value is 6.
            If precision above 6 is requested, sample must have been produced
            by CmdStan version 2.25 or later and sampler output precision
            must equal to or greater than the requested summary precision.

        :return: pandas.DataFrame
        """
        percentiles_str = '--percentiles=5,50,95'
        if percentiles is not None:
            if len(percentiles) == 0:
                raise ValueError(
                    'invalid percentiles argument, must be ordered'
                    ' non-empty list from (1, 99), inclusive.')
            cur_pct = 0
            for pct in percentiles:
                if pct > 99 or not pct > cur_pct:
                    raise ValueError(
                        'invalid percentiles spec, must be ordered'
                        ' non-empty list from (1, 99), inclusive.')
                cur_pct = pct
            percentiles_str = '='.join(
                ['--percentiles', ','.join([str(x) for x in percentiles])])
        sig_figs_str = '--sig_figs=2'
        if sig_figs is not None:
            if not isinstance(sig_figs, int) or sig_figs < 1 or sig_figs > 18:
                raise ValueError(
                    'sig_figs must be an integer between 1 and 18,'
                    ' found {}'.format(sig_figs))
            csv_sig_figs = self._sig_figs or 6
            if sig_figs > csv_sig_figs:
                self._logger.warning(
                    'Requesting %d significant digits of output, but CSV files'
                    ' only have %d digits of precision.',
                    sig_figs,
                    csv_sig_figs,
                )
            sig_figs_str = '--sig_figs=' + str(sig_figs)
        cmd_path = os.path.join(cmdstan_path(), 'bin',
                                'stansummary' + EXTENSION)
        tmp_csv_file = 'stansummary-{}-'.format(self.runset._args.model_name)
        tmp_csv_path = create_named_text_file(dir=_TMPDIR,
                                              prefix=tmp_csv_file,
                                              suffix='.csv',
                                              name_only=True)
        csv_str = '--csv_filename={}'.format(tmp_csv_path)
        if not cmdstan_version_at(2, 24):
            csv_str = '--csv_file={}'.format(tmp_csv_path)
        cmd = [
            cmd_path,
            percentiles_str,
            sig_figs_str,
            csv_str,
        ] + self.runset.csv_files
        do_command(cmd, logger=self.runset._logger)
        with open(tmp_csv_path, 'rb') as fd:
            summary_data = pd.read_csv(
                fd,
                delimiter=',',
                header=0,
                index_col=0,
                comment='#',
                float_precision='high',
            )
        mask = [
            x == 'lp__' or not x.endswith('__') for x in summary_data.index
        ]
        return summary_data[mask]
Beispiel #12
0
    def compile(self, opt_lvl: int = 3, force: bool = False) -> None:
        """
        Compile the given Stan program file.  Translates the Stan code to
        C++, then calls the C++ compiler.

        By default, this function compares the timestamps on the source and
        executable files; if the executable is newer than the source file, it
        will not recompile the file, unless argument ``force`` is True.

        :param opt_lvl: Optimization level used by the C++ compiler, one of
            {0, 1, 2, 3}.  Defaults to level 2. Level 0 optimization results
            in the shortest compilation time with code that may run slowly.
            Higher optimization levels increase runtime performance but will
            take longer to compile.

        :param force: When ``True``, always compile, even if the executable file
            is newer than the source file.  Used for Stan models which have
            ``#include`` directives in order to force recompilation when changes
            are made to the included files.
        """
        if not self._stan_file:
            raise RuntimeError('Please specify source file')

        compilation_failed = False

        with TemporaryCopiedFile(self._stan_file) as (stan_file, is_copied):
            exe_file, _ = os.path.splitext(os.path.abspath(stan_file))
            exe_file = Path(exe_file).as_posix()
            exe_file += EXTENSION
            do_compile = True
            if os.path.exists(exe_file):
                src_time = os.path.getmtime(self._stan_file)
                exe_time = os.path.getmtime(exe_file)
                if exe_time > src_time and not force:
                    do_compile = False
                    self._logger.info('found newer exe file, not recompiling')

            if do_compile:
                make = os.getenv(
                    'MAKE',
                    'make'
                    if platform.system() != 'Windows'
                    else 'mingw32-make',
                )
                hpp_file = os.path.splitext(stan_file)[0] + '.hpp'
                hpp_file = Path(hpp_file).as_posix()
                if not os.path.exists(hpp_file):
                    self._logger.info('stan to c++ (%s)', hpp_file)
                    cmd = [
                        make,
                        Path(exe_file).as_posix(),
                        'STANCFLAGS+=--o={}'.format(hpp_file),
                    ]
                    if self._include_paths is not None:
                        bad_paths = [
                            d
                            for d in self._include_paths
                            if not os.path.exists(d)
                        ]
                        if any(bad_paths):
                            raise ValueError(
                                'invalid include paths: {}'.format(
                                    ', '.join(bad_paths)
                                )
                            )
                        cmd.append(
                            'STANCFLAGS+=--include_paths='
                            + ','.join(
                                (
                                    Path(p).as_posix()
                                    for p in self._include_paths
                                )
                            )
                        )
                    try:
                        do_command(cmd, cmdstan_path(), logger=self._logger)
                    except RuntimeError as e:
                        self._logger.error(
                            'file %s, exception %s', stan_file, str(e)
                        )
                        compilation_failed = True

                if not compilation_failed:
                    cmd = [make, 'O={}'.format(opt_lvl), exe_file]
                    self._logger.info('compiling c++')
                    try:
                        do_command(cmd, cmdstan_path(), logger=self._logger)
                    except RuntimeError as e:
                        self._logger.error('make cmd failed %s', repr(e))
                        compilation_failed = True

            if not compilation_failed:
                if is_copied:
                    original_target_dir = os.path.dirname(
                        os.path.abspath(self._stan_file)
                    )
                    new_exec_name = (
                        os.path.basename(os.path.splitext(self._stan_file)[0])
                        + EXTENSION
                    )
                    self._exe_file = os.path.join(
                        original_target_dir, new_exec_name
                    )
                    shutil.copy(exe_file, self._exe_file)
                else:
                    self._exe_file = exe_file
                self._logger.info('compiled model file: %s', self._exe_file)
            else:
                self._logger.error('model compilation failed')
Beispiel #13
0
 def test_exit(self):
     sys_stdout = io.StringIO()
     with contextlib.redirect_stdout(sys_stdout):
         args = ['bash', '/bin/junk']
         with self.assertRaises(RuntimeError):
             do_command(args, HERE)
Beispiel #14
0
 def test_capture_console(self):
     tmp = io.StringIO()
     do_command(cmd=['ls'], cwd=HERE, fd_out=tmp)
     self.assertTrue('test_utils.py' in tmp.getvalue())