Ejemplo n.º 1
0
 def test_default_path(self):
     cur_value = None
     if 'CMDSTAN' in os.environ:
         cur_value = os.environ['CMDSTAN']
     try:
         if 'CMDSTAN' in os.environ:
             self.assertEqual(cmdstan_path(), os.environ['CMDSTAN'])
             path = os.environ['CMDSTAN']
             del os.environ['CMDSTAN']
             self.assertFalse('CMDSTAN' in os.environ)
             set_cmdstan_path(path)
             self.assertEqual(cmdstan_path(), path)
             self.assertTrue('CMDSTAN' in os.environ)
         else:
             install_dir = os.path.expanduser(
                 os.path.join('~', '.cmdstanpy')
             )
             install_version = os.path.expanduser(
                 os.path.join(install_dir, get_latest_cmdstan(install_dir))
             )
             self.assertTrue(
                 os.path.samefile(cmdstan_path(), install_version)
             )
             self.assertTrue('CMDSTAN' in os.environ)
     finally:
         if cur_value is not None:
             os.environ['CMDSTAN'] = cur_value
         else:
             if 'CMDSTAN' in os.environ:
                 del os.environ['CMDSTAN']
Ejemplo n.º 2
0
 def test_cmdstan_version(self):
     with tempfile.TemporaryDirectory(prefix="cmdstan_tests",
                                      dir=_TMPDIR) as tmpdir:
         tdir = os.path.join(tmpdir, 'tmpdir_xxx')
         os.makedirs(tdir)
         fake_path = os.path.join(tdir, 'cmdstan-2.22.0')
         os.makedirs(os.path.join(fake_path))
         fake_bin = os.path.join(fake_path, 'bin')
         os.makedirs(fake_bin)
         Path(os.path.join(fake_bin, 'stanc' + EXTENSION)).touch()
         with self.modified_environ(CMDSTAN=fake_path):
             self.assertTrue(fake_path == cmdstan_path())
             expect = ('CmdStan installation {} missing makefile, '
                       'cannot get version.'.format(fake_path))
             with LogCapture() as log:
                 logging.getLogger()
                 cmdstan_version()
             log.check_present(('cmdstanpy', 'INFO', expect))
             fake_makefile = os.path.join(fake_path, 'makefile')
             with open(fake_makefile, 'w') as fd:
                 fd.write('...  CMDSTAN_VERSION := dont_need_no_mmp\n\n')
             expect = (
                 'Cannot parse version, expected "<major>.<minor>.<patch>", '
                 'found: "dont_need_no_mmp".')
             with LogCapture() as log:
                 logging.getLogger()
                 cmdstan_version()
             log.check_present(('cmdstanpy', 'INFO', expect))
     cmdstan_path()
Ejemplo n.º 3
0
def compile_model(
    stan_file: str = None,
    opt_lvl: int = 2,
    overwrite: bool = False,
    include_paths: List[str] = None,
) -> Model:
    """
    Compile the given Stan model file to an executable.

    :param stan_file: Path to Stan program

    :param opt_lvl: Optimization level for C++ compiler, one of {0, 1, 2, 3}
      where level 0 optimization results in the shortest compilation time
      with code that may run slowly and increasing optimization levels increase
      compile time and runtime performance.

    :param overwrite: When True, existing executable will be overwritten.
      Defaults to False.

    :param include_paths: List of paths to directories where Stan should look
      for files to include in compilation of the C++ executable.
    """
    if stan_file is None:
        raise Exception('must specify argument "stan_file"')
    if not os.path.exists(stan_file):
        raise Exception('no such stan_file {}'.format(stan_file))
    program_name = os.path.basename(stan_file)
    exe_file, _ = os.path.splitext(os.path.abspath(stan_file))
    hpp_file = '.'.join([exe_file, 'hpp'])
    if overwrite or not os.path.exists(hpp_file):
        print('translating to {}'.format(hpp_file))
        stanc_path = os.path.join(cmdstan_path(), 'bin', 'stanc')
        cmd = [stanc_path, '--o={}'.format(hpp_file), stan_file]
        if include_paths is not None:
            bad_paths = [d for d in include_paths if not os.path.exists(d)]
            if any(bad_paths):
                raise Exception('invalid include paths: {}'.format(
                    ', '.join(bad_paths)))
            cmd.append('--include_paths=' + ','.join(include_paths))
        print('stan to c++: make args {}'.format(cmd))
        do_command(cmd)
        if not os.path.exists(hpp_file):
            raise Exception('syntax error'.format(stan_file))

    if platform.system().lower().startswith('win'):
        exe_file += '.exe'
    if not overwrite and os.path.exists(exe_file):
        # print('model is up to date') # notify user or not?
        return Model(stan_file, exe_file)
    exe_file_path = Path(exe_file).as_posix()
    cmd = ['make', 'O={}'.format(opt_lvl), exe_file_path]
    print('compiling c++: make args {}'.format(cmd))
    try:
        do_command(cmd, cmdstan_path())
    except Exception:
        return Model(stan_file)
    return Model(stan_file, exe_file)
Ejemplo n.º 4
0
 def test_set_path(self):
     if 'CMDSTAN' in os.environ:
         self.assertEqual(cmdstan_path(), os.environ['CMDSTAN'])
     else:
         install_dir = os.path.expanduser(os.path.join('~', '.cmdstanpy'))
         install_version = os.path.expanduser(
             os.path.join(install_dir, get_latest_cmdstan(install_dir)))
         set_cmdstan_path(install_version)
         self.assertEqual(install_version, cmdstan_path())
         self.assertEqual(install_version, os.environ['CMDSTAN'])
Ejemplo n.º 5
0
 def test_set_path(self):
     if 'CMDSTAN' in os.environ:
         self.assertEqual(cmdstan_path(), os.environ['CMDSTAN'])
     else:
         cmdstan_dir = os.path.expanduser(os.path.join('~', _DOT_CMDSTAN))
         if not os.path.exists(cmdstan_dir):
             cmdstan_dir = os.path.expanduser(
                 os.path.join('~', _DOT_CMDSTANPY))
         install_version = os.path.join(cmdstan_dir,
                                        get_latest_cmdstan(cmdstan_dir))
         set_cmdstan_path(install_version)
         self.assertEqual(install_version, cmdstan_path())
         self.assertEqual(install_version, os.environ['CMDSTAN'])
Ejemplo n.º 6
0
 def test_default_path(self):
     if 'CMDSTAN' in os.environ:
         self.assertPathsEqual(cmdstan_path(), os.environ['CMDSTAN'])
         path = os.environ['CMDSTAN']
         with self.modified_environ('CMDSTAN'):
             self.assertFalse('CMDSTAN' in os.environ)
             set_cmdstan_path(path)
             self.assertPathsEqual(cmdstan_path(), path)
             self.assertTrue('CMDSTAN' in os.environ)
     else:
         cmdstan_dir = os.path.expanduser(os.path.join('~', _DOT_CMDSTAN))
         install_version = os.path.join(cmdstan_dir,
                                        get_latest_cmdstan(cmdstan_dir))
         self.assertTrue(os.path.samefile(cmdstan_path(), install_version))
         self.assertTrue('CMDSTAN' in os.environ)
Ejemplo n.º 7
0
 def summary(self) -> pd.DataFrame:
     """
     Run cmdstan/bin/stansummary over all output csv files.
     Echo stansummary stdout/stderr to console.
     Assemble csv tempfile contents into pandasDataFrame.
     """
     names = self.column_names
     cmd_path = os.path.join(cmdstan_path(), 'bin',
                             'stansummary' + EXTENSION)
     tmp_csv_file = 'stansummary-{}-{}-chain-'.format(
         self.runset._args.model_name, self.runset.chains)
     tmp_csv_path = create_named_text_file(dir=TMPDIR,
                                           prefix=tmp_csv_file,
                                           suffix='.csv')
     cmd = [
         cmd_path,
         '--csv_file={}'.format(tmp_csv_path),
     ] + self.runset.csv_files
     do_command(cmd, logger=self.runset._logger)
     with open(tmp_csv_path, 'rb') as fd:
         summary_data = pd.read_csv(fd,
                                    delimiter=',',
                                    header=0,
                                    index_col=0,
                                    comment='#')
     mask = [
         x == 'lp__' or not x.endswith('__') for x in summary_data.index
     ]
     return summary_data[mask]
Ejemplo n.º 8
0
 def test_set_path(self):
     install_dir = os.path.expanduser(os.path.join('~', '.cmdstanpy'))
     install_version = os.path.expanduser(
         os.path.join(install_dir, get_latest_cmdstan(install_dir))
     )
     set_cmdstan_path(install_version)
     self.assertEqual(install_version, cmdstan_path())
Ejemplo n.º 9
0
    def summary(self) -> pd.DataFrame:
        """
        Run cmdstan/bin/stansummary over all output csv files.
        Echo stansummary stdout/stderr to console.
        Assemble csv tempfile contents into pandasDataFrame.
        """
        self._sampling_only()

        names = self.column_names
        cmd_path = os.path.join(cmdstan_path(), 'bin',
                                'stansummary' + EXTENSION)
        tmp_csv_file = 'stansummary-{}-{}-chains-'.format(
            self._args.model_name, self.chains)
        fd, tmp_csv_path = tempfile.mkstemp(suffix='.csv',
                                            prefix=tmp_csv_file,
                                            dir=TMPDIR,
                                            text=True)
        cmd = '{} --csv_file={} {}'.format(cmd_path, tmp_csv_path,
                                           ' '.join(self.csv_files))
        # breaks on all whitespace
        do_command(cmd.split(), logger=self._logger)
        summary_data = pd.read_csv(tmp_csv_path,
                                   delimiter=',',
                                   header=0,
                                   index_col=0,
                                   comment='#')
        mask = [
            x == 'lp__' or not x.endswith('__') for x in summary_data.index
        ]
        return summary_data[mask]
Ejemplo n.º 10
0
def summary(runset: RunSet) -> pd.DataFrame:
    """
    Run cmdstan/bin/stansummary over all output csv files.
    Echo stansummary stdout/stderr to console.
    Assemble csv tempfile contents into pandasDataFrame.

    :param runset: record of completed run of NUTS sampler
    """
    names = runset.column_names
    cmd_path = os.path.join(cmdstan_path(), 'bin', 'stansummary')
    tmp_csv_file = 'stansummary-{}-{}-chains-'.format(runset.model,
                                                      runset.chains)
    fd, tmp_csv_path = tempfile.mkstemp(suffix='.csv',
                                        prefix=tmp_csv_file,
                                        dir=TMPDIR,
                                        text=True)
    cmd = '{} --csv_file={} {}'.format(cmd_path, tmp_csv_path,
                                       ' '.join(runset.csv_files))
    do_command(cmd.split())  # breaks on all whitespace
    summary_data = pd.read_csv(tmp_csv_path,
                               delimiter=',',
                               header=0,
                               index_col=0,
                               comment='#')
    mask = [x == 'lp__' or not x.endswith('__') for x in summary_data.index]
    return summary_data[mask]
Ejemplo n.º 11
0
    def diagnose(self) -> str:
        """
        Run cmdstan/bin/diagnose over all output csv files.
        Returns output of diagnose (stdout/stderr)

        The diagnose utility reads the outputs of all chains
        and checks for the following potential problems:

        + Transitions that hit the maximum treedepth
        + Divergent transitions
        + Low E-BFMI values (sampler transitions HMC potential energy)
        + Low effective sample sizes
        + High R-hat values

        :return str empty if no problems found
        """
        self._sampling_only()

        cmd_path = os.path.join(cmdstan_path(), 'bin', 'diagnose' + EXTENSION)
        csv_files = ' '.join(self.csv_files)
        cmd = '{} {} '.format(cmd_path, csv_files)
        result = do_command(cmd=cmd.split(), logger=self._logger)
        if result:
            self._logger.warning(result)
        return result
Ejemplo n.º 12
0
    def summary(self, percentiles: List[int] = None) -> pd.DataFrame:
        """
        Run cmdstan/bin/stansummary over all output csv files.
        Echo stansummary stdout/stderr to console.
        Assemble csv tempfile contents into pandasDataFrame.

        :param percentiles: Ordered non-empty list of percentiles to report.
            Must be integers from (1, 99), inclusive.
        """
        percentiles_str = '--percentiles=5,50,95'
        if percentiles is not None:
            if len(percentiles) == 0:
                raise ValueError(
                    'invalid percentiles argument, must be ordered'
                    ' non-empty list from (1, 99), inclusive.'
                )

            cur_pct = 0
            for pct in percentiles:
                if pct > 99 or not pct > cur_pct:
                    raise ValueError(
                        'invalid percentiles spec, must be ordered'
                        ' non-empty list from (1, 99), inclusive.'
                    )
                cur_pct = pct
            percentiles_str = '='.join(
                ['--percentiles', ','.join([str(x) for x in percentiles])]
            )
        cmd_path = os.path.join(
            cmdstan_path(), 'bin', 'stansummary' + EXTENSION
        )
        tmp_csv_file = 'stansummary-{}-{}-chain-'.format(
            self.runset._args.model_name, self.runset.chains
        )
        tmp_csv_path = create_named_text_file(
            dir=_TMPDIR, prefix=tmp_csv_file, suffix='.csv'
        )
        cmd = [
            cmd_path,
            percentiles_str,
            '--csv_file={}'.format(tmp_csv_path),
        ] + self.runset.csv_files
        do_command(cmd, logger=self.runset._logger)
        with open(tmp_csv_path, 'rb') as fd:
            summary_data = pd.read_csv(
                fd,
                delimiter=',',
                header=0,
                index_col=0,
                comment='#',
                float_precision='high',
            )
        mask = [x == 'lp__' or not x.endswith('__') for x in summary_data.index]
        return summary_data[mask]
Ejemplo n.º 13
0
    def diagnose(self) -> str:
        """
        Run cmdstan/bin/diagnose over all output csv files.
        Returns output of diagnose (stdout/stderr).

        The diagnose utility reads the outputs of all chains
        and checks for the following potential problems:

        + Transitions that hit the maximum treedepth
        + Divergent transitions
        + Low E-BFMI values (sampler transitions HMC potential energy)
        + Low effective sample sizes
        + High R-hat values
        """
        cmd_path = os.path.join(cmdstan_path(), 'bin', 'diagnose' + EXTENSION)
        cmd = [cmd_path] + self.runset.csv_files
        result = do_command(cmd=cmd, logger=self.runset._logger)
        if result:
            self.runset._logger.warning(result)
        return result
Ejemplo n.º 14
0
def rebuild_cmdstan(verbose: bool = False,
                    progress: bool = True,
                    cores: int = 1) -> None:
    """
    Rebuilds the existing CmdStan installation.
    This assumes CmdStan has already been installed,
    though it need not be installed via CmdStanPy for
    this function to work.

    :param verbose: Boolean value; when ``True``, show output from make command.
        Default is ``False``.
    :param progress: Boolean value; when ``True`` display progress progress bar.
        Default is ``True``.
    :param cores: Integer, number of cores to use in the ``make`` command.
        Default is 1 core.
    """
    try:
        with pushd(cmdstan_path()):
            clean_all(verbose)
            build(verbose, progress, cores)
            compile_example(verbose)
    except ValueError as e:
        raise CmdStanInstallError(
            "Failed to rebuild CmdStan. Are you sure it is installed?") from e
Ejemplo n.º 15
0
def diagnose(runset: RunSet) -> None:
    """
    Run cmdstan/bin/diagnose over all output csv files.
    Echo diagnose stdout/stderr to console.

    The diagnose utility reads the outputs of all chains
    and checks for the following potential problems:

    + Transitions that hit the maximum treedepth
    + Divergent transitions
    + Low E-BFMI values (sampler transitions HMC potential energy)
    + Low effective sample sizes
    + High R-hat values

    :param runset: record of completed run of NUTS sampler
    """
    cmd_path = os.path.join(cmdstan_path(), 'bin', 'diagnose')
    csv_files = ' '.join(runset.csv_files)
    cmd = '{} {} '.format(cmd_path, csv_files)
    result = do_command(cmd=cmd.split())
    if result is None:
        print('No problems detected.')
    else:
        print(result)
Ejemplo n.º 16
0
    def validate(self) -> None:
        """
        Check arguments correctness and consistency.

        * input files must exist
        * output files must be in a writeable directory
        * if no seed specified, set random seed.
        * length of per-chain lists equals specified # of chains
        """
        if self.model_name is None:
            raise ValueError('no stan model specified')
        if self.model_exe is None:
            raise ValueError('model not compiled')

        if self.chain_ids is not None:
            for chain_id in self.chain_ids:
                if chain_id < 1:
                    raise ValueError('invalid chain_id {}'.format(chain_id))
        if self.output_dir is not None:
            self.output_dir = os.path.realpath(
                os.path.expanduser(self.output_dir))
            if not os.path.exists(self.output_dir):
                try:
                    os.makedirs(self.output_dir)
                    get_logger().info('created output directory: %s',
                                      self.output_dir)
                except (RuntimeError, PermissionError) as exc:
                    raise ValueError('Invalid path for output files, '
                                     'no such dir: {}.'.format(
                                         self.output_dir)) from exc
            if not os.path.isdir(self.output_dir):
                raise ValueError(
                    'Specified output_dir is not a directory: {}.'.format(
                        self.output_dir))
            try:
                testpath = os.path.join(self.output_dir, str(time()))
                with open(testpath, 'w+'):
                    pass
                os.remove(testpath)  # cleanup
            except Exception as exc:
                raise ValueError('Invalid path for output files,'
                                 ' cannot write to dir: {}.'.format(
                                     self.output_dir)) from exc
        if self.refresh is not None:
            if not isinstance(self.refresh, int) or self.refresh < 1:
                raise ValueError(
                    'Argument "refresh" must be a positive integer value, '
                    'found {}.'.format(self.refresh))

        if self.sig_figs is not None:
            if (not isinstance(self.sig_figs, int) or self.sig_figs < 1
                    or self.sig_figs > 18):
                raise ValueError(
                    'Argument "sig_figs" must be an integer between 1 and 18,'
                    ' found {}'.format(self.sig_figs))
            # TODO: remove at some future release
            if cmdstan_version_before(2, 25):
                self.sig_figs = None
                get_logger().warning(
                    'Argument "sig_figs" invalid for CmdStan versions < 2.25, '
                    'using version %s in directory %s',
                    os.path.basename(cmdstan_path()),
                    os.path.dirname(cmdstan_path()),
                )

        if self.seed is None:
            rng = RandomState()
            self.seed = rng.randint(1, 99999 + 1)
        else:
            if not isinstance(self.seed, (int, list)):
                raise ValueError('Argument "seed" must be an integer between '
                                 '0 and 2**32-1, found {}.'.format(self.seed))
            if isinstance(self.seed, int):
                if self.seed < 0 or self.seed > 2**32 - 1:
                    raise ValueError(
                        'Argument "seed" must be an integer between '
                        '0 and 2**32-1, found {}.'.format(self.seed))
            else:
                if self.chain_ids is None:
                    raise ValueError(
                        'List of per-chain seeds cannot be evaluated without '
                        'corresponding list of chain_ids.')
                if len(self.seed) != len(self.chain_ids):
                    raise ValueError(
                        'Number of seeds must match number of chains,'
                        ' found {} seed for {} chains.'.format(
                            len(self.seed), len(self.chain_ids)))
                for seed in self.seed:
                    if seed < 0 or seed > 2**32 - 1:
                        raise ValueError(
                            'Argument "seed" must be an integer value'
                            ' between 0 and 2**32-1,'
                            ' found {}'.format(seed))

        if isinstance(self.data, str):
            if not os.path.exists(self.data):
                raise ValueError('no such file {}'.format(self.data))
        elif self.data is not None and not isinstance(self.data, (str, dict)):
            raise ValueError('Argument "data" must be string or dict')

        if self.inits is not None:
            if isinstance(self.inits, (float, int)):
                if self.inits < 0:
                    raise ValueError(
                        'Argument "inits" must be > 0, found {}'.format(
                            self.inits))
            elif isinstance(self.inits, str):
                if not os.path.exists(self.inits):
                    raise ValueError('no such file {}'.format(self.inits))
            elif isinstance(self.inits, list):
                if self.chain_ids is None:
                    raise ValueError(
                        'List of inits files cannot be evaluated without '
                        'corresponding list of chain_ids.')

                if len(self.inits) != len(self.chain_ids):
                    raise ValueError(
                        'Number of inits files must match number of chains,'
                        ' found {} inits files for {} chains.'.format(
                            len(self.inits), len(self.chain_ids)))
                for inits in self.inits:
                    if not os.path.exists(inits):
                        raise ValueError('no such file {}'.format(inits))
Ejemplo n.º 17
0
    def __init__(
        self,
        model_name: str = None,
        stan_file: str = None,
        exe_file: str = None,
        compile: bool = True,
        stanc_options: Dict = None,
        cpp_options: Dict = None,
        logger: logging.Logger = None,
    ) -> None:
        """
        Initialize object given constructor args.

        :param model_name: Model name, used for output file names.
        :param stan_file: Path to Stan program file.
        :param exe_file: Path to compiled executable file.
        :param compile: Whether or not to compile the model.
        :param stanc_options: Options for stanc compiler.
        :param cpp_options: Options for C++ compiler.
        :param logger: Python logger object.
        """
        self._name = None
        self._stan_file = None
        self._exe_file = None
        self._compiler_options = CompilerOptions(stanc_options=stanc_options,
                                                 cpp_options=cpp_options)
        self._logger = logger or get_logger()

        if model_name is not None:
            if not model_name.strip():
                raise ValueError(
                    'Invalid value for argument model name, found "{}"'.format(
                        model_name))
            self._name = model_name.strip()

        if stan_file is None:
            if exe_file is None:
                raise ValueError(
                    'Missing model file arguments, you must specify '
                    'either Stan source or executable program file or both.')
        else:
            self._stan_file = os.path.realpath(os.path.expanduser(stan_file))
            if not os.path.exists(self._stan_file):
                raise ValueError('no such file {}'.format(self._stan_file))
            _, filename = os.path.split(stan_file)
            if len(filename) < 6 or not filename.endswith('.stan'):
                raise ValueError('invalid stan filename {}'.format(
                    self._stan_file))
            if self._name is None:
                self._name, _ = os.path.splitext(filename)
            # if program has include directives, record path
            with open(self._stan_file, 'r') as fd:
                program = fd.read()
            if '#include' in program:
                path, _ = os.path.split(self._stan_file)
                if self._compiler_options is None:
                    self._compiler_options = CompilerOptions(
                        stanc_options={'include_paths': [path]})
                elif self._compiler_options._stanc_options is None:
                    self._compiler_options._stanc_options = {
                        'include_paths': [path]
                    }
                else:
                    self._compiler_options.add_include_path(path)

        if exe_file is not None:
            self._exe_file = os.path.realpath(os.path.expanduser(exe_file))
            if not os.path.exists(self._exe_file):
                raise ValueError('no such file {}'.format(self._exe_file))
            _, exename = os.path.split(self._exe_file)
            if self._name is None:
                self._name, _ = os.path.splitext(exename)
            else:
                if self._name != os.path.splitext(exename)[0]:
                    raise ValueError(
                        'Name mismatch between Stan file and compiled'
                        ' executable, expecting basename: {}'
                        ' found: {}.'.format(self._name, exename))

        if self._compiler_options is not None:
            self._compiler_options.validate()

        if platform.system() == 'Windows':
            # Add tbb to the $PATH on Windows
            libtbb = os.environ.get('STAN_TBB')
            if libtbb is None:
                libtbb = os.path.join(cmdstan_path(), 'stan', 'lib',
                                      'stan_math', 'lib', 'tbb')
            os.environ['PATH'] = ';'.join(
                list(
                    OrderedDict.fromkeys(
                        [libtbb] + os.environ.get('PATH', '').split(';'))))

        if compile and self._exe_file is None:
            self.compile()
            if self._exe_file is None:
                raise ValueError(
                    'Unable to compile Stan model file: {}.'.format(
                        self._stan_file))
Ejemplo n.º 18
0
    def compile(
        self,
        force: bool = False,
        stanc_options: Dict = None,
        cpp_options: Dict = None,
        override_options: bool = False,
    ) -> None:
        """
        Compile the given Stan program file.  Translates the Stan code to
        C++, then calls the C++ compiler.

        By default, this function compares the timestamps on the source and
        executable files; if the executable is newer than the source file, it
        will not recompile the file, unless argument ``force`` is ``True``.

        :param force: When ``True``, always compile, even if the executable file
            is newer than the source file.  Used for Stan models which have
            ``#include`` directives in order to force recompilation when changes
            are made to the included files.

        :param stanc_options: Options for stanc compiler.
        :param cpp_options: Options for C++ compiler.

        :param override_options: When ``True``, override existing option.
            When ``False``, add/replace existing options.  Default is ``False``.
        """
        if not self._stan_file:
            raise RuntimeError('Please specify source file')

        compiler_options = None
        if not (stanc_options is None and cpp_options is None):
            compiler_options = CompilerOptions(stanc_options=stanc_options,
                                               cpp_options=cpp_options)
            compiler_options.validate()
            if self._compiler_options is None:
                self._compiler_options = compiler_options
            elif override_options:
                self._compiler_options = compiler_options
            else:
                self._compiler_options.add(compiler_options)

        compilation_failed = False
        with TemporaryCopiedFile(self._stan_file) as (stan_file, is_copied):
            exe_file, _ = os.path.splitext(os.path.abspath(stan_file))
            exe_file = Path(exe_file).as_posix() + EXTENSION
            do_compile = True
            if os.path.exists(exe_file):
                src_time = os.path.getmtime(self._stan_file)
                exe_time = os.path.getmtime(exe_file)
                if exe_time > src_time and not force:
                    do_compile = False
                    self._logger.info('found newer exe file, not recompiling')

            if do_compile:
                self._logger.info('compiling stan program, exe file: %s',
                                  exe_file)
                if self._compiler_options is not None:
                    self._compiler_options.validate()
                    self._logger.info('compiler options: %s',
                                      self._compiler_options)
                make = os.getenv(
                    'MAKE',
                    'make'
                    if platform.system() != 'Windows' else 'mingw32-make',
                )
                cmd = [make]
                if self._compiler_options is not None:
                    cmd.extend(self._compiler_options.compose())
                cmd.append(Path(exe_file).as_posix())
                try:
                    do_command(cmd, cmdstan_path(), logger=self._logger)
                except RuntimeError as e:
                    self._logger.error('file %s, exception %s', stan_file,
                                       str(e))
                    compilation_failed = True

            if not compilation_failed:
                if is_copied:
                    original_target_dir = os.path.dirname(
                        os.path.abspath(self._stan_file))
                    new_exec_name = (os.path.basename(
                        os.path.splitext(self._stan_file)[0]) + EXTENSION)
                    self._exe_file = os.path.join(original_target_dir,
                                                  new_exec_name)
                    shutil.copy(exe_file, self._exe_file)
                else:
                    self._exe_file = exe_file
                self._logger.info('compiled model file: %s', self._exe_file)
            else:
                self._logger.error('model compilation failed')
Ejemplo n.º 19
0
 def test_default_path(self):
     abs_rel_path = os.path.expanduser(
         os.path.join('~', '.cmdstanpy', 'cmdstan'))
     self.assertTrue(cmdstan_path().startswith(abs_rel_path))
Ejemplo n.º 20
0
 def show_cmdstan_version(self):
     print('\n\nCmdStan version: {}\n\n'.format(cmdstan_path()))
     self.assertTrue(True)
Ejemplo n.º 21
0
    def compile(
        self,
        opt_lvl: int = 2,
        overwrite: bool = False,
        include_paths: List[str] = None,
    ) -> None:
        """
        Compile the given Stan program file.  Translates the Stan code to
        C++, then calls the C++ compiler.

        :param opt_lvl: Optimization level used by the C++ compiler, one of
            {0, 1, 2, 3}.  Defaults to level 2. Level 0 optimization results
            in the shortest compilation time with code that may run slowly.
            Higher optimization levels increase runtime performance but will
            take longer to compile.

        :param overwrite: When True, existing executable will be overwritten.
            Defaults to False.

        :param include_paths: List of paths to directories where Stan should
            look for files to include in compilation of the C++ executable.
        """
        if not self._stan_file:
            raise RuntimeError('Please specify source file')

        if self._exe_file is not None and not overwrite:
            self._logger.warning('model is already compiled')
            return

        with TemporaryCopiedFile(self._stan_file) as (stan_file, is_copied):
            hpp_file = os.path.splitext(stan_file)[0] + '.hpp'
            hpp_file = Path(hpp_file).as_posix()
            if overwrite or not os.path.exists(hpp_file):
                self._logger.info('stan to c++ (%s)', hpp_file)
                stanc_path = os.path.join(cmdstan_path(), 'bin',
                                          'stanc' + EXTENSION)
                stanc_path = Path(stanc_path).as_posix()
                cmd = [
                    stanc_path,
                    '--o={}'.format(hpp_file),
                    Path(stan_file).as_posix(),
                ]
                if include_paths is not None:
                    bad_paths = [
                        d for d in include_paths if not os.path.exists(d)
                    ]
                    if any(bad_paths):
                        raise Exception('invalid include paths: {}'.format(
                            ', '.join(bad_paths)))
                    cmd.append('--include_paths=' +
                               ','.join((Path(p).as_posix()
                                         for p in include_paths)))

                do_command(cmd, logger=self._logger)
                if not os.path.exists(hpp_file):
                    raise Exception('syntax error'.format(stan_file))

            exe_file, _ = os.path.splitext(os.path.abspath(stan_file))
            exe_file = Path(exe_file).as_posix()
            exe_file += EXTENSION
            make = os.getenv('MAKE', 'make')
            cmd = [make, 'O={}'.format(opt_lvl), exe_file]
            self._logger.info('compiling c++')
            try:
                do_command(cmd, cmdstan_path(), self._logger)
            except Exception as e:
                self._logger.error('make cmd failed %s', e)

            if is_copied:

                original_target_dir = os.path.dirname(self._stan_file)
                # reconstruct the output file name
                new_exec_name = (
                    os.path.basename(os.path.splitext(self._stan_file)[0]) +
                    EXTENSION)

                self._exe_file = os.path.join(original_target_dir,
                                              new_exec_name)

                # copy the generated file back to the original directory
                shutil.copy(exe_file, self._exe_file)
            else:
                self._exe_file = exe_file

        self._logger.info('compiled model file: %s', self._exe_file)
Ejemplo n.º 22
0
    def compile(self, opt_lvl: int = 3, force: bool = False) -> None:
        """
        Compile the given Stan program file.  Translates the Stan code to
        C++, then calls the C++ compiler.

        By default, this function compares the timestamps on the source and
        executable files; if the executable is newer than the source file, it
        will not recompile the file, unless argument ``force`` is True.

        :param opt_lvl: Optimization level used by the C++ compiler, one of
            {0, 1, 2, 3}.  Defaults to level 2. Level 0 optimization results
            in the shortest compilation time with code that may run slowly.
            Higher optimization levels increase runtime performance but will
            take longer to compile.

        :param force: When ``True``, always compile, even if the executable file
            is newer than the source file.  Used for Stan models which have
            ``#include`` directives in order to force recompilation when changes
            are made to the included files.
        """
        if not self._stan_file:
            raise RuntimeError('Please specify source file')

        compilation_failed = False

        with TemporaryCopiedFile(self._stan_file) as (stan_file, is_copied):
            exe_file, _ = os.path.splitext(os.path.abspath(stan_file))
            exe_file = Path(exe_file).as_posix()
            exe_file += EXTENSION
            do_compile = True
            if os.path.exists(exe_file):
                src_time = os.path.getmtime(self._stan_file)
                exe_time = os.path.getmtime(exe_file)
                if exe_time > src_time and not force:
                    do_compile = False
                    self._logger.info('found newer exe file, not recompiling')

            if do_compile:
                make = os.getenv(
                    'MAKE',
                    'make'
                    if platform.system() != 'Windows'
                    else 'mingw32-make',
                )
                hpp_file = os.path.splitext(stan_file)[0] + '.hpp'
                hpp_file = Path(hpp_file).as_posix()
                if not os.path.exists(hpp_file):
                    self._logger.info('stan to c++ (%s)', hpp_file)
                    cmd = [
                        make,
                        Path(exe_file).as_posix(),
                        'STANCFLAGS+=--o={}'.format(hpp_file),
                    ]
                    if self._include_paths is not None:
                        bad_paths = [
                            d
                            for d in self._include_paths
                            if not os.path.exists(d)
                        ]
                        if any(bad_paths):
                            raise ValueError(
                                'invalid include paths: {}'.format(
                                    ', '.join(bad_paths)
                                )
                            )
                        cmd.append(
                            'STANCFLAGS+=--include_paths='
                            + ','.join(
                                (
                                    Path(p).as_posix()
                                    for p in self._include_paths
                                )
                            )
                        )
                    try:
                        do_command(cmd, cmdstan_path(), logger=self._logger)
                    except RuntimeError as e:
                        self._logger.error(
                            'file %s, exception %s', stan_file, str(e)
                        )
                        compilation_failed = True

                if not compilation_failed:
                    cmd = [make, 'O={}'.format(opt_lvl), exe_file]
                    self._logger.info('compiling c++')
                    try:
                        do_command(cmd, cmdstan_path(), logger=self._logger)
                    except RuntimeError as e:
                        self._logger.error('make cmd failed %s', repr(e))
                        compilation_failed = True

            if not compilation_failed:
                if is_copied:
                    original_target_dir = os.path.dirname(
                        os.path.abspath(self._stan_file)
                    )
                    new_exec_name = (
                        os.path.basename(os.path.splitext(self._stan_file)[0])
                        + EXTENSION
                    )
                    self._exe_file = os.path.join(
                        original_target_dir, new_exec_name
                    )
                    shutil.copy(exe_file, self._exe_file)
                else:
                    self._exe_file = exe_file
                self._logger.info('compiled model file: %s', self._exe_file)
            else:
                self._logger.error('model compilation failed')
Ejemplo n.º 23
0
 def test_cmdstan_version_at(self):
     cmdstan_path()  # sets os.environ['CMDSTAN']
     self.assertFalse(cmdstan_version_at(99, 99))
Ejemplo n.º 24
0
 def test_cmdstan_version_before(self):
     cmdstan_path()  # sets os.environ['CMDSTAN']
     self.assertTrue(cmdstan_version_before(99, 99))
     self.assertFalse(cmdstan_version_before(1, 1))
Ejemplo n.º 25
0
    def validate(self) -> None:
        """
        Check arguments correctness and consistency.

        * input files must exist
        * output files must be in a writeable directory
        * if no seed specified, set random seed.
        * length of per-chain lists equals specified # of chains
        """
        if self.model_name is None:
            raise ValueError('no stan model specified')
        if self.model_exe is None:
            raise ValueError('model not compiled')

        if self.chain_ids is not None:
            for i in range(len(self.chain_ids)):
                if self.chain_ids[i] < 1:
                    raise ValueError('invalid chain_id {}'.format(
                        self.chain_ids[i]))
        if self.output_dir is not None:
            self.output_dir = os.path.realpath(
                os.path.expanduser(self.output_dir))
            if not os.path.exists(self.output_dir):
                try:
                    os.makedirs(self.output_dir)
                    self._logger.info('created output directory: %s',
                                      self.output_dir)
                except (RuntimeError, PermissionError) as exc:
                    raise ValueError(
                        'invalid path for output files, no such dir: {}'.
                        format(self.output_dir)) from exc
            if not os.path.isdir(self.output_dir):
                raise ValueError(
                    'specified output_dir not a directory: {}'.format(
                        self.output_dir))
            try:
                testpath = os.path.join(self.output_dir, str(time()))
                with open(testpath, 'w+'):
                    pass
                os.remove(testpath)  # cleanup
            except Exception as exc:
                raise ValueError('invalid path for output files,'
                                 ' cannot write to dir: {}'.format(
                                     self.output_dir)) from exc
        if self.refresh is not None:
            if not isinstance(self.refresh, int) or self.refresh < 1:
                raise ValueError(
                    'Argument refresh must be a positive integer value, '
                    'found {}.'.format(self.refresh))

        if self.sig_figs is not None:
            if (not isinstance(self.sig_figs, int) or self.sig_figs < 1
                    or self.sig_figs > 18):
                raise ValueError(
                    'sig_figs must be an integer between 1 and 18,'
                    ' found {}'.format(self.sig_figs))
            if not cmdstan_version_at(2, 25):
                self.sig_figs = None
                self._logger.warning(
                    'arg sig_figs not valid, CmdStan version must be 2.25 '
                    'or higher, using verson %s in directory %s',
                    os.path.basename(cmdstan_path()),
                    os.path.dirname(cmdstan_path()),
                )

        if self.seed is None:
            rng = RandomState()
            self.seed = rng.randint(1, 99999 + 1)
        else:
            if not isinstance(self.seed, (int, list)):
                raise ValueError(
                    'seed must be an integer between 0 and 2**32-1,'
                    ' found {}'.format(self.seed))
            if isinstance(self.seed, int):
                if self.seed < 0 or self.seed > 2**32 - 1:
                    raise ValueError(
                        'seed must be an integer between 0 and 2**32-1,'
                        ' found {}'.format(self.seed))
            else:
                if self.chain_ids is None:
                    raise ValueError(
                        'seed must not be a list when no chains used')

                if len(self.seed) != len(self.chain_ids):
                    raise ValueError(
                        'number of seeds must match number of chains,'
                        ' found {} seed for {} chains '.format(
                            len(self.seed), len(self.chain_ids)))
                for i in range(len(self.seed)):
                    if self.seed[i] < 0 or self.seed[i] > 2**32 - 1:
                        raise ValueError('seed must be an integer value'
                                         ' between 0 and 2**32-1,'
                                         ' found {}'.format(self.seed[i]))

        if isinstance(self.data, str):
            if not os.path.exists(self.data):
                raise ValueError('no such file {}'.format(self.data))
        elif self.data is not None and not isinstance(self.data, (str, dict)):
            raise ValueError('data must be string or dict')

        if self.inits is not None:
            if isinstance(self.inits, (Integral, Real)):
                if self.inits < 0:
                    raise ValueError('inits must be > 0, found {}'.format(
                        self.inits))
            elif isinstance(self.inits, str):
                if not os.path.exists(self.inits):
                    raise ValueError('no such file {}'.format(self.inits))
            elif isinstance(self.inits, list):
                if self.chain_ids is None:
                    raise ValueError(
                        'inits must not be a list when no chains are used')

                if len(self.inits) != len(self.chain_ids):
                    raise ValueError(
                        'number of inits files must match number of chains,'
                        ' found {} inits files for {} chains '.format(
                            len(self.inits), len(self.chain_ids)))
                names_set = set(self.inits)
                if len(names_set) != len(self.inits):
                    raise ValueError('each chain must have its own init file,'
                                     ' found duplicates in inits files list.')
                for i in range(len(self.inits)):
                    if not os.path.exists(self.inits[i]):
                        raise ValueError('no such file {}'.format(
                            self.inits[i]))
Ejemplo n.º 26
0
    def summary(self,
                percentiles: List[int] = None,
                sig_figs: int = None) -> pd.DataFrame:
        """
        Run cmdstan/bin/stansummary over all output csv files, assemble
        summary into DataFrame object; first row contains summary statistics
        for total joint log probability `lp__`, remaining rows contain summary
        statistics for all parameters, transformed parameters, and generated
        quantities variables listed in the order in which they were declared
        in the Stan program.

        :param percentiles: Ordered non-empty list of percentiles to report.
            Must be integers from (1, 99), inclusive.

        :param sig_figs: Number of significant figures to report.
            Must be an integer between 1 and 18.  If unspecified, the default
            precision for the system file I/O is used; the usual value is 6.
            If precision above 6 is requested, sample must have been produced
            by CmdStan version 2.25 or later and sampler output precision
            must equal to or greater than the requested summary precision.

        :return: pandas.DataFrame
        """
        percentiles_str = '--percentiles=5,50,95'
        if percentiles is not None:
            if len(percentiles) == 0:
                raise ValueError(
                    'invalid percentiles argument, must be ordered'
                    ' non-empty list from (1, 99), inclusive.')
            cur_pct = 0
            for pct in percentiles:
                if pct > 99 or not pct > cur_pct:
                    raise ValueError(
                        'invalid percentiles spec, must be ordered'
                        ' non-empty list from (1, 99), inclusive.')
                cur_pct = pct
            percentiles_str = '='.join(
                ['--percentiles', ','.join([str(x) for x in percentiles])])
        sig_figs_str = '--sig_figs=2'
        if sig_figs is not None:
            if not isinstance(sig_figs, int) or sig_figs < 1 or sig_figs > 18:
                raise ValueError(
                    'sig_figs must be an integer between 1 and 18,'
                    ' found {}'.format(sig_figs))
            csv_sig_figs = self._sig_figs or 6
            if sig_figs > csv_sig_figs:
                self._logger.warning(
                    'Requesting %d significant digits of output, but CSV files'
                    ' only have %d digits of precision.',
                    sig_figs,
                    csv_sig_figs,
                )
            sig_figs_str = '--sig_figs=' + str(sig_figs)
        cmd_path = os.path.join(cmdstan_path(), 'bin',
                                'stansummary' + EXTENSION)
        tmp_csv_file = 'stansummary-{}-'.format(self.runset._args.model_name)
        tmp_csv_path = create_named_text_file(dir=_TMPDIR,
                                              prefix=tmp_csv_file,
                                              suffix='.csv',
                                              name_only=True)
        csv_str = '--csv_filename={}'.format(tmp_csv_path)
        if not cmdstan_version_at(2, 24):
            csv_str = '--csv_file={}'.format(tmp_csv_path)
        cmd = [
            cmd_path,
            percentiles_str,
            sig_figs_str,
            csv_str,
        ] + self.runset.csv_files
        do_command(cmd, logger=self.runset._logger)
        with open(tmp_csv_path, 'rb') as fd:
            summary_data = pd.read_csv(
                fd,
                delimiter=',',
                header=0,
                index_col=0,
                comment='#',
                float_precision='high',
            )
        mask = [
            x == 'lp__' or not x.endswith('__') for x in summary_data.index
        ]
        return summary_data[mask]
Ejemplo n.º 27
0
    def __init__(
        self,
        stan_file: str = None,
        exe_file: str = None,
        include_paths: List[str] = None,
        compile: bool = True,
        logger: logging.Logger = None,
    ) -> None:
        """Initialize object."""
        self._stan_file = None
        self._name = None
        self._exe_file = None
        self._include_paths = None
        self._logger = logger or get_logger()

        if stan_file is None:
            if exe_file is None:
                raise ValueError(
                    'must specify Stan source or executable program file'
                )
        else:
            self._stan_file = os.path.realpath(os.path.expanduser(stan_file))
            if not os.path.exists(self._stan_file):
                raise ValueError('no such file {}'.format(self._stan_file))
            _, filename = os.path.split(stan_file)
            if len(filename) < 6 or not filename.endswith('.stan'):
                raise ValueError(
                    'invalid stan filename {}'.format(self._stan_file)
                )
            self._name, _ = os.path.splitext(filename)
            # if program has #includes, search program dir
            with open(self._stan_file, 'r') as fd:
                program = fd.read()
            if '#include' in program:
                path, _ = os.path.split(self._stan_file)
                if include_paths is None:
                    include_paths = []
                if path not in include_paths:
                    include_paths.append(path)

        if exe_file is not None:
            self._exe_file = os.path.realpath(os.path.expanduser(exe_file))
            if not os.path.exists(self._exe_file):
                raise ValueError('no such file {}'.format(self._exe_file))
            _, exename = os.path.split(self._exe_file)
            if self._name is None:
                self._name, _ = os.path.splitext(exename)
            else:
                if self._name != os.path.splitext(exename)[0]:
                    raise ValueError(
                        'name mismatch between Stan file and compiled'
                        ' executable, expecting basename: {}'
                        ' found: {}'.format(self._name, exename)
                    )

        if include_paths is not None:
            bad_paths = [d for d in include_paths if not os.path.exists(d)]
            if any(bad_paths):
                raise ValueError(
                    'invalid include paths: {}'.format(', '.join(bad_paths))
                )
            self._include_paths = include_paths

        if platform.system() == 'Windows':
            # Add tbb to the $PATH on Windows
            libtbb = os.getenv('STAN_TBB')
            if libtbb is None:
                libtbb = os.path.join(
                    cmdstan_path(), 'stan', 'lib', 'stan_math', 'lib', 'tbb'
                )
            os.environ['PATH'] = ';'.join(
                list(
                    OrderedDict.fromkeys(
                        [libtbb] + os.getenv('PATH', '').split(';')
                    )
                )
            )

        if compile and self._exe_file is None:
            self.compile()
            if self._exe_file is None:
                raise ValueError(
                    'unable to compile Stan model file: {}'.format(
                        self._stan_file
                    )
                )