def __init__( self, runset: RunSet, validate_csv: bool = True, logger: logging.Logger = None, ) -> None: """Initialize object.""" if not runset.method == Method.SAMPLE: raise ValueError('Wrong runset method, expecting sample runset, ' 'found method {}'.format(runset.method)) self.runset = runset self._logger = logger or get_logger() # copy info from runset self._is_fixed_param = runset._args.method_args.fixed_param self._iter_sampling = runset._args.method_args.iter_sampling self._iter_warmup = runset._args.method_args.iter_warmup self._save_warmup = runset._args.method_args.save_warmup self._thin = runset._args.method_args.thin # parse the remainder from csv files self._draws_sampling = None self._draws_warmup = None self._column_names = () self._num_params = None # metric dim(s) self._metric_type = None self._metric = None self._stepsize = None self._draws = None self._draws_as_df = None self._stan_variable_dims = {} self._validate_csv = validate_csv if validate_csv: self.validate_csv_files()
def __init__(self, args: CmdStanArgs, chains: int = 4, logger: logging.Logger = None) -> None: """Initialize object.""" self._args = args self._chains = chains self._logger = logger or get_logger() if chains < 1: raise ValueError('chains must be positive integer value, ' 'found {i]}'.format(chains)) self._csv_files = [] if args.output_basename is None: csv_basename = 'stan-{}-{}'.format(args.model_name, args.method) for i in range(chains): fd_name = create_named_text_file( dir=TMPDIR, prefix='{}-{}-'.format(csv_basename, i + 1), suffix='.csv', ) self._csv_files.append(fd_name) else: for i in range(chains): self._csv_files.append('{}-{}.csv'.format( args.output_basename, i + 1)) self._console_files = [] for i in range(chains): txt_file = ''.join( [os.path.splitext(self._csv_files[i])[0], '.txt']) self._console_files.append(txt_file) self._cmds = [ args.compose_command(i, self._csv_files[i]) for i in range(chains) ] self._retcodes = [-1 for _ in range(chains)]
def __init__(self, stan_file: str = None, exe_file: str = None, logger: logging.Logger = None) -> None: """Initialize object.""" self._stan_file = stan_file self._name = None self._exe_file = None self._logger = logger or get_logger() if stan_file is None: if exe_file is None: raise ValueError( 'must specify Stan source or executable program file') else: if not os.path.exists(stan_file): raise ValueError('no such file {}'.format(self._stan_file)) _, filename = os.path.split(stan_file) if len(filename) < 6 or not filename.endswith('.stan'): raise ValueError('invalid stan filename {}'.format( self._stan_file)) self._name, _ = os.path.splitext(filename) self._exe_file = None if exe_file is not None: if not os.path.exists(exe_file): raise ValueError('no such file {}'.format(exe_file)) _, exename = os.path.split(exe_file) if self._name is None: self._name, _ = os.path.splitext(exename) else: if self._name != ''.join([exename, EXTENSION]): raise ValueError( 'name mismatch between Stan file and compiled' ' executable, expecting basename: {}' ' found: {}'.format(self._name, exename)) self._exe_file = exe_file
def __init__( self, stanc_options: Dict = None, cpp_options: Dict = None, logger: logging.Logger = None, ) -> None: """Initialize object.""" self._stanc_options = stanc_options self._cpp_options = cpp_options self._logger = logger or get_logger()
def validate_stanc_opts(self) -> None: """ Check stanc compiler args and consistency between stanc and C++ options. Raise ValueError if bad config is found. """ # pylint: disable=no-member if self._stanc_options is None: return ignore = [] paths = None for key, val in self._stanc_options.items(): if key in STANC_IGNORE_OPTS: get_logger().info('ignoring compiler option: %s', key) ignore.append(key) elif key not in STANC_OPTS: raise ValueError(f'unknown stanc compiler option: {key}') elif key == 'include_paths': paths = val if isinstance(val, str): paths = val.split(',') elif not isinstance(val, list): raise ValueError( 'Invalid include_paths, expecting list or ' f'string, found type: {type(val)}.') elif key == 'use-opencl': if self._cpp_options is None: self._cpp_options = {'STAN_OPENCL': 'TRUE'} else: self._cpp_options['STAN_OPENCL'] = 'TRUE' for opt in ignore: del self._stanc_options[opt] if paths is not None: self._stanc_options['include_paths'] = paths bad_paths = [ dir for dir in self._stanc_options['include_paths'] if not os.path.exists(dir) ] if any(bad_paths): raise ValueError('invalid include paths: {}'.format( ', '.join(bad_paths)))
def __init__(self, args: CmdStanArgs, chains: int = 4, logger: logging.Logger = None) -> None: """Initialize object.""" self._args = args self._is_optimizing = isinstance(self._args.method_args, OptimizeArgs) self._chains = chains self._logger = logger or get_logger() if chains < 1: raise ValueError('chains must be positive integer value, ' 'found {i]}'.format(chains)) self.csv_files = [] """per-chain sample csv files.""" if args.output_basename is None: csv_basename = 'stan-{}-draws'.format(args.model_name) for i in range(chains): fd = tempfile.NamedTemporaryFile( mode='w+', prefix='{}-{}-'.format(csv_basename, i + 1), suffix='.csv', dir=TMPDIR, delete=False, ) self.csv_files.append(fd.name) else: for i in range(chains): self.csv_files.append('{}-{}.csv'.format( args.output_basename, i + 1)) self.console_files = [] """per-chain sample console output files.""" for i in range(chains): txt_file = ''.join( [os.path.splitext(self.csv_files[i])[0], '.txt']) self.console_files.append(txt_file) self.cmds = [ args.compose_command(i, self.csv_files[i]) for i in range(chains) ] """per-chain sampler command.""" self._retcodes = [-1 for _ in range(chains)] self._draws = None self._column_names = None self._num_params = None # metric dim(s) self._metric_type = None self._metric = None self._stepsize = None self._sample = None self._first_draw = None
def __init__( self, model_name: str, model_exe: str, chain_ids: Union[List[int], None], method_args: Union[SamplerArgs, OptimizeArgs, GenerateQuantitiesArgs, VariationalArgs], data: Union[str, dict] = None, seed: Union[int, List[int]] = None, inits: Union[int, float, str, List[str]] = None, output_dir: str = None, sig_figs: str = None, save_diagnostics: bool = False, save_profile: bool = False, refresh: int = None, logger: logging.Logger = None, ) -> None: """Initialize object.""" self.model_name = model_name self.model_exe = model_exe self.chain_ids = chain_ids self.data = data self.seed = seed self.inits = inits self.output_dir = output_dir self.sig_figs = sig_figs self.save_diagnostics = save_diagnostics self.save_profile = save_profile self.refresh = refresh self.method_args = method_args if isinstance(method_args, SamplerArgs): self.method = Method.SAMPLE elif isinstance(method_args, OptimizeArgs): self.method = Method.OPTIMIZE elif isinstance(method_args, GenerateQuantitiesArgs): self.method = Method.GENERATE_QUANTITIES elif isinstance(method_args, VariationalArgs): self.method = Method.VARIATIONAL self.method_args.validate(len(chain_ids) if chain_ids else None) self._logger = logger or get_logger() self.validate()
def __init__( self, runset: RunSet, validate_csv: bool = True, logger: logging.Logger = None, ) -> None: """Initialize object.""" if not runset.method == Method.SAMPLE: raise ValueError( 'Wrong runset method, expecting sample runset, ' 'found method {}'.format(runset.method) ) self.runset = runset self._logger = logger or get_logger() # copy info from runset self._iter_sampling = runset._args.method_args.iter_sampling if self._iter_sampling is None: self._iter_sampling = _CMDSTAN_SAMPLING self._iter_warmup = runset._args.method_args.iter_warmup if self._iter_warmup is None: self._iter_warmup = _CMDSTAN_WARMUP self._thin = runset._args.method_args.thin if self._thin is None: self._thin = _CMDSTAN_THIN self._is_fixed_param = runset._args.method_args.fixed_param self._save_warmup = runset._args.method_args.save_warmup self._sig_figs = runset._args.sig_figs # metadata from Stan CSV files self._metadata = None # HMC tuning params self._metric = None self._step_size = None # inference self._draws = None self._draws_pd = None self._validate_csv = validate_csv if validate_csv: self.validate_csv_files()
def __init__(self, args: CmdStanArgs, chains: int = 4, logger: logging.Logger = None) -> None: """Initialize object.""" self._args = args self._chains = chains self._logger = logger or get_logger() if chains < 1: raise ValueError('chains must be positive integer value, ' 'found {}'.format(chains)) self._retcodes = [-1 for _ in range(chains)] # output and console messages are written to a text file: # ``<model_name>-<YYYYMMDDHHMM>-<chain_id>.txt`` now = datetime.now() now_str = now.strftime('%Y%m%d%H%M') file_basename = '-'.join([args.model_name, now_str]) if args.output_dir is not None: output_dir = args.output_dir else: output_dir = TMPDIR self._csv_files = [] self._diagnostic_files = [None for _ in range(chains)] self._console_files = [] self._cmds = [] for i in range(chains): if args.output_dir is None: csv_file = create_named_text_file( dir=output_dir, prefix='{}-{}-'.format(file_basename, i + 1), suffix='.csv', ) else: csv_file = os.path.join( output_dir, '{}-{}.{}'.format(file_basename, i + 1, 'csv')) self._csv_files.append(csv_file) txt_file = ''.join([os.path.splitext(csv_file)[0], '.txt']) self._console_files.append(txt_file) if args.save_diagnostics: if args.output_dir is None: diag_file = create_named_text_file( dir=TMPDIR, prefix='{}-diagnostic-{}-'.format( file_basename, i + 1), suffix='.csv', ) else: diag_file = os.path.join( output_dir, '{}-diagnostic-{}.{}'.format(file_basename, i + 1, 'csv'), ) self._diagnostic_files.append(diag_file) self._cmds.append( args.compose_command(i, self._csv_files[i], self._diagnostic_files[i])) else: self._cmds.append(args.compose_command(i, self._csv_files[i]))
def __init__( self, args: CmdStanArgs, chains: int = 4, chain_ids: List[int] = None, logger: logging.Logger = None, ) -> None: """Initialize object.""" self._args = args self._chains = chains self._logger = logger or get_logger() if chains < 1: raise ValueError('chains must be positive integer value, ' 'found {}'.format(chains)) if chain_ids is None: chain_ids = [x + 1 for x in range(chains)] elif len(chain_ids) != chains: raise ValueError( 'mismatch between number of chains and chain_ids, ' 'found {} chains, but {} chain_ids'.format( chains, len(chain_ids))) self._chain_ids = chain_ids self._retcodes = [-1 for _ in range(chains)] # stdout, stderr are written to text files # prefix: ``<model_name>-<YYYYMMDDHHMM>-<chain_id>`` # suffixes: ``-stdout.txt``, ``-stderr.txt`` now = datetime.now() now_str = now.strftime('%Y%m%d%H%M') file_basename = '-'.join([args.model_name, now_str]) if args.output_dir is not None: output_dir = args.output_dir else: output_dir = _TMPDIR self._csv_files = [None for _ in range(chains)] self._diagnostic_files = [None for _ in range(chains)] self._stdout_files = [None for _ in range(chains)] self._stderr_files = [None for _ in range(chains)] self._cmds = [] for i in range(chains): if args.output_dir is None: csv_file = create_named_text_file( dir=output_dir, prefix='{}-{}-'.format(file_basename, str(chain_ids[i])), suffix='.csv', ) else: csv_file = os.path.join( output_dir, '{}-{}.{}'.format(file_basename, str(chain_ids[i]), 'csv'), ) self._csv_files[i] = csv_file stdout_file = ''.join( [os.path.splitext(csv_file)[0], '-stdout.txt']) self._stdout_files[i] = stdout_file stderr_file = ''.join( [os.path.splitext(csv_file)[0], '-stderr.txt']) self._stderr_files[i] = stderr_file if args.save_diagnostics: if args.output_dir is None: diag_file = create_named_text_file( dir=_TMPDIR, prefix='{}-diagnostic-{}-'.format( file_basename, str(chain_ids[i])), suffix='.csv', ) else: diag_file = os.path.join( output_dir, '{}-diagnostic-{}.{}'.format(file_basename, str(chain_ids[i]), 'csv'), ) self._diagnostic_files[i] = diag_file self._cmds.append( args.compose_command(i, self._csv_files[i], self._diagnostic_files[i])) else: self._cmds.append(args.compose_command(i, self._csv_files[i]))
def validate(self) -> None: """ Check arguments correctness and consistency. * input files must exist * output files must be in a writeable directory * if no seed specified, set random seed. * length of per-chain lists equals specified # of chains """ if self.model_name is None: raise ValueError('no stan model specified') if self.model_exe is None: raise ValueError('model not compiled') if self.chain_ids is not None: for chain_id in self.chain_ids: if chain_id < 1: raise ValueError('invalid chain_id {}'.format(chain_id)) if self.output_dir is not None: self.output_dir = os.path.realpath( os.path.expanduser(self.output_dir)) if not os.path.exists(self.output_dir): try: os.makedirs(self.output_dir) get_logger().info('created output directory: %s', self.output_dir) except (RuntimeError, PermissionError) as exc: raise ValueError('Invalid path for output files, ' 'no such dir: {}.'.format( self.output_dir)) from exc if not os.path.isdir(self.output_dir): raise ValueError( 'Specified output_dir is not a directory: {}.'.format( self.output_dir)) try: testpath = os.path.join(self.output_dir, str(time())) with open(testpath, 'w+'): pass os.remove(testpath) # cleanup except Exception as exc: raise ValueError('Invalid path for output files,' ' cannot write to dir: {}.'.format( self.output_dir)) from exc if self.refresh is not None: if not isinstance(self.refresh, int) or self.refresh < 1: raise ValueError( 'Argument "refresh" must be a positive integer value, ' 'found {}.'.format(self.refresh)) if self.sig_figs is not None: if (not isinstance(self.sig_figs, int) or self.sig_figs < 1 or self.sig_figs > 18): raise ValueError( 'Argument "sig_figs" must be an integer between 1 and 18,' ' found {}'.format(self.sig_figs)) # TODO: remove at some future release if cmdstan_version_before(2, 25): self.sig_figs = None get_logger().warning( 'Argument "sig_figs" invalid for CmdStan versions < 2.25, ' 'using version %s in directory %s', os.path.basename(cmdstan_path()), os.path.dirname(cmdstan_path()), ) if self.seed is None: rng = RandomState() self.seed = rng.randint(1, 99999 + 1) else: if not isinstance(self.seed, (int, list)): raise ValueError('Argument "seed" must be an integer between ' '0 and 2**32-1, found {}.'.format(self.seed)) if isinstance(self.seed, int): if self.seed < 0 or self.seed > 2**32 - 1: raise ValueError( 'Argument "seed" must be an integer between ' '0 and 2**32-1, found {}.'.format(self.seed)) else: if self.chain_ids is None: raise ValueError( 'List of per-chain seeds cannot be evaluated without ' 'corresponding list of chain_ids.') if len(self.seed) != len(self.chain_ids): raise ValueError( 'Number of seeds must match number of chains,' ' found {} seed for {} chains.'.format( len(self.seed), len(self.chain_ids))) for seed in self.seed: if seed < 0 or seed > 2**32 - 1: raise ValueError( 'Argument "seed" must be an integer value' ' between 0 and 2**32-1,' ' found {}'.format(seed)) if isinstance(self.data, str): if not os.path.exists(self.data): raise ValueError('no such file {}'.format(self.data)) elif self.data is not None and not isinstance(self.data, (str, dict)): raise ValueError('Argument "data" must be string or dict') if self.inits is not None: if isinstance(self.inits, (float, int)): if self.inits < 0: raise ValueError( 'Argument "inits" must be > 0, found {}'.format( self.inits)) elif isinstance(self.inits, str): if not os.path.exists(self.inits): raise ValueError('no such file {}'.format(self.inits)) elif isinstance(self.inits, list): if self.chain_ids is None: raise ValueError( 'List of inits files cannot be evaluated without ' 'corresponding list of chain_ids.') if len(self.inits) != len(self.chain_ids): raise ValueError( 'Number of inits files must match number of chains,' ' found {} inits files for {} chains.'.format( len(self.inits), len(self.chain_ids))) for inits in self.inits: if not os.path.exists(inits): raise ValueError('no such file {}'.format(inits))
def __init__( self, model_name: str = None, stan_file: str = None, exe_file: str = None, compile: bool = True, stanc_options: Dict = None, cpp_options: Dict = None, logger: logging.Logger = None, ) -> None: """ Initialize object given constructor args. :param model_name: Model name, used for output file names. :param stan_file: Path to Stan program file. :param exe_file: Path to compiled executable file. :param compile: Whether or not to compile the model. :param stanc_options: Options for stanc compiler. :param cpp_options: Options for C++ compiler. :param logger: Python logger object. """ self._name = None self._stan_file = None self._exe_file = None self._compiler_options = CompilerOptions(stanc_options=stanc_options, cpp_options=cpp_options) self._logger = logger or get_logger() if model_name is not None: if not model_name.strip(): raise ValueError( 'Invalid value for argument model name, found "{}"'.format( model_name)) self._name = model_name.strip() if stan_file is None: if exe_file is None: raise ValueError( 'Missing model file arguments, you must specify ' 'either Stan source or executable program file or both.') else: self._stan_file = os.path.realpath(os.path.expanduser(stan_file)) if not os.path.exists(self._stan_file): raise ValueError('no such file {}'.format(self._stan_file)) _, filename = os.path.split(stan_file) if len(filename) < 6 or not filename.endswith('.stan'): raise ValueError('invalid stan filename {}'.format( self._stan_file)) if self._name is None: self._name, _ = os.path.splitext(filename) # if program has include directives, record path with open(self._stan_file, 'r') as fd: program = fd.read() if '#include' in program: path, _ = os.path.split(self._stan_file) if self._compiler_options is None: self._compiler_options = CompilerOptions( stanc_options={'include_paths': [path]}) elif self._compiler_options._stanc_options is None: self._compiler_options._stanc_options = { 'include_paths': [path] } else: self._compiler_options.add_include_path(path) if exe_file is not None: self._exe_file = os.path.realpath(os.path.expanduser(exe_file)) if not os.path.exists(self._exe_file): raise ValueError('no such file {}'.format(self._exe_file)) _, exename = os.path.split(self._exe_file) if self._name is None: self._name, _ = os.path.splitext(exename) else: if self._name != os.path.splitext(exename)[0]: raise ValueError( 'Name mismatch between Stan file and compiled' ' executable, expecting basename: {}' ' found: {}.'.format(self._name, exename)) if self._compiler_options is not None: self._compiler_options.validate() if platform.system() == 'Windows': # Add tbb to the $PATH on Windows libtbb = os.environ.get('STAN_TBB') if libtbb is None: libtbb = os.path.join(cmdstan_path(), 'stan', 'lib', 'stan_math', 'lib', 'tbb') os.environ['PATH'] = ';'.join( list( OrderedDict.fromkeys( [libtbb] + os.environ.get('PATH', '').split(';')))) if compile and self._exe_file is None: self.compile() if self._exe_file is None: raise ValueError( 'Unable to compile Stan model file: {}.'.format( self._stan_file))
def __init__( self, stan_file: str = None, exe_file: str = None, include_paths: List[str] = None, compile: bool = True, logger: logging.Logger = None, ) -> None: """Initialize object.""" self._stan_file = None self._name = None self._exe_file = None self._include_paths = None self._logger = logger or get_logger() if stan_file is None: if exe_file is None: raise ValueError( 'must specify Stan source or executable program file' ) else: self._stan_file = os.path.realpath(os.path.expanduser(stan_file)) if not os.path.exists(self._stan_file): raise ValueError('no such file {}'.format(self._stan_file)) _, filename = os.path.split(stan_file) if len(filename) < 6 or not filename.endswith('.stan'): raise ValueError( 'invalid stan filename {}'.format(self._stan_file) ) self._name, _ = os.path.splitext(filename) # if program has #includes, search program dir with open(self._stan_file, 'r') as fd: program = fd.read() if '#include' in program: path, _ = os.path.split(self._stan_file) if include_paths is None: include_paths = [] if path not in include_paths: include_paths.append(path) if exe_file is not None: self._exe_file = os.path.realpath(os.path.expanduser(exe_file)) if not os.path.exists(self._exe_file): raise ValueError('no such file {}'.format(self._exe_file)) _, exename = os.path.split(self._exe_file) if self._name is None: self._name, _ = os.path.splitext(exename) else: if self._name != os.path.splitext(exename)[0]: raise ValueError( 'name mismatch between Stan file and compiled' ' executable, expecting basename: {}' ' found: {}'.format(self._name, exename) ) if include_paths is not None: bad_paths = [d for d in include_paths if not os.path.exists(d)] if any(bad_paths): raise ValueError( 'invalid include paths: {}'.format(', '.join(bad_paths)) ) self._include_paths = include_paths if platform.system() == 'Windows': # Add tbb to the $PATH on Windows libtbb = os.getenv('STAN_TBB') if libtbb is None: libtbb = os.path.join( cmdstan_path(), 'stan', 'lib', 'stan_math', 'lib', 'tbb' ) os.environ['PATH'] = ';'.join( list( OrderedDict.fromkeys( [libtbb] + os.getenv('PATH', '').split(';') ) ) ) if compile and self._exe_file is None: self.compile() if self._exe_file is None: raise ValueError( 'unable to compile Stan model file: {}'.format( self._stan_file ) )