from .cubes import PerplexityStrategy, GreedyStrategy from .model_constructor import init_simple_default_model, create_default_topics from .rel_toolbox_lite import count_vocab_size, handle_regularizer import artm from inspect import signature, Parameter from strictyaml import Map, Str, Int, Seq, Float, Bool from strictyaml import Any, Optional, EmptyDict, EmptyNone, EmptyList from strictyaml import dirty_load # TODO: use stackoverflow.com/questions/37929851/parse-numpydoc-docstring-and-access-components # for now just hardcode most common / important types ARTM_TYPES = { "tau": Float(), "topic_names": Str() | Seq(Str()) | EmptyNone(), # TODO: handle class_ids in model and in regularizers separately "class_ids": Str() | Seq(Str()) | EmptyNone(), "gamma": Float() | EmptyNone(), "seed": Int(), "num_document_passes": Int(), "num_processors": Int(), "cache_theta": Bool(), "reuse_theta": Bool(), "theta_name": Str() } # change log style lc = artm.messages.ConfigureLoggingArgs() lc.minloglevel = 3 lib = artm.wrapper.LibArtm(logging_config=lc)
'report': _REPORT_DEFAULTS, }, 'onSuccess': { 'report': _REPORT_DEFAULTS, }, 'environment': [], 'executionTimeout': None, 'killTimeout': 30, 'statsd': None, } _report_schema = Map({ Opt("sentry"): Map({ Opt("dsn"): Map({ Opt("value"): EmptyNone() | Str(), Opt("fromFile"): EmptyNone() | Str(), Opt("fromEnvVar"): EmptyNone() | Str(), }), Opt("fingerprint"): Seq(Str()), Opt("level"): Str(), Opt("extra"): MapPattern(Str(), Str() | Int() | Bool()), }), Opt("mail"): Map({ "from": EmptyNone() | Str(), "to": EmptyNone() | Str(), Opt("smtpHost"): Str(), Opt("smtpPort"): Int(), Opt("subject"): Str(), Opt("body"): Str(), })
class TestConfig: _str_to_dtype = { 'float16': torch.float16, 'float32': torch.float32, 'float64': torch.float64, 'uint8': torch.uint8, 'int8': torch.int8, 'int16': torch.int16, 'int32': torch.int32, 'int64': torch.int64, } _schema = Map({ 'simulation': Map({ 'Ts': Int(), # Time-discretization in milliseconds 'tSample': Int(), # Number of simulation steps 'tStartLoss': Int(), # Start computing loss at this time-step }), 'model': Map({ 'type': Str(), # {cnn5-avgp-fc1} 'CkptFile': Str(), # Path to checkpoint 'dtype': EmptyNone() | Str( ), # {float16, float32, float64, uint8, int8, int16, int32, int64} }), 'batchsize': Int(), 'hardware': Map({ 'readerThreads': EmptyNone() | Int(), # {empty: cpu_count, 0: main thread, >0: num threads used} 'gpuDevice': Int(), # GPU to be used by device number }), }) def __init__(self, config_filepath): with open(config_filepath, 'r') as stream: self.dictionary = load(stream.read(), self._schema).data # Some sanity checks. assert self.dictionary['simulation'][ 'Ts'] == 1, "Only 1 ms time-step is tested" assert self.dictionary['simulation'][ 'tSample'] == 100, "Only 100 ms available" assert self.dictionary['simulation']['tSample'] > self.dictionary[ 'simulation']['tStartLoss'] assert os.path.exists(self.dictionary['model']['CkptFile']) assert self.dictionary['batchsize'] >= 1 model_dtype_str = self.dictionary['model']['dtype'] if model_dtype_str is None: self.dictionary['model']['dtype'] = torch.float32 else: self.dictionary['model']['dtype'] = self._str_to_dtype[ model_dtype_str] self.dictionary['hardware']['gpuDevice'] = torch.device( 'cuda:{}'.format(self.dictionary['hardware']['gpuDevice'])) if self.dictionary['hardware']['readerThreads'] is None: self.dictionary['hardware']['readerThreads'] = os.cpu_count() def __getitem__(self, key): return self.dictionary[key] def __setitem__(self, key, value): self.dictionary[key] = value