def test_explicit_run_finalize(self): run = Run(system_tracking_interval=None) for i in range(10): run.track(i, name='seq') self.assertIsNone(run.end_time) run.finalize() self.assertIsNotNone(run.end_time)
def _func(): run = Run(system_tracking_interval=None) run_hash.append(run.hash) self.assertIsNone(run.end_time) for i in range(10): run.track(i, name='seq') self.assertIsNone(run.end_time)
def test_type_compatibility_for_empty_list(self): run = Run(system_tracking_interval=None) context = {} ctx = Context(context) seq_name = 'obj_list' sequence_info = run.meta_run_tree.subtree( ('traces', ctx.idx, seq_name)) typed_sequences_info = run.meta_tree.subtree('traces_types') run.track([], name=seq_name, context=context) self.assertEqual('list', sequence_info['dtype']) self.assertEqual(1, typed_sequences_info['list', ctx.idx, seq_name]) self.assertIsNone( typed_sequences_info.get(('list(float)', ctx.idx, seq_name), None)) run.track([], name=seq_name, context=context) self.assertEqual('list', sequence_info['dtype']) self.assertIsNone( typed_sequences_info.get(('list(float)', ctx.idx, seq_name), None)) run.track([1.], name=seq_name, context=context) self.assertEqual('list(float)', sequence_info['dtype']) self.assertEqual( 1, typed_sequences_info['list(float)', ctx.idx, seq_name]) run.track([], name=seq_name, context=context) self.assertEqual('list(float)', sequence_info['dtype']) with self.assertRaises(ValueError) as cm: run.track([5], name=seq_name, context=context) exception = cm.exception self.assertEqual( f'Cannot log value \'{[5]}\' on sequence \'{seq_name}\'. Incompatible data types.', exception.args[0])
def test_explicit_run_delete(self): run = Run(system_tracking_interval=None) run_hash = run.hash for i in range(10): run.track(i, name='seq') del run time.sleep(.1) self.assertIsNotNone(self._query_run_finalized_at(run_hash))
def test_incompatible_type_during_tracking(self): run = Run(system_tracking_interval=None) run.track(1., name='numbers', context={}) with self.assertRaises(ValueError) as cm: run.track(1, name='numbers', context={}) exception = cm.exception self.assertEqual( 'Cannot log value \'1\' on sequence \'numbers\'. Incompatible data types.', exception.args[0])
def setUpClass(cls) -> None: super().setUpClass() run = Run(repo=cls.repo) run['images_per_step'] = 16 for step in range(100): images = generate_image_set(img_count=16, caption_prefix=f'Image {step}') run.track(images, name='random_images') run.track(random.random(), name='random_values') cls.run_hash = run.hash
def setUpClass(cls) -> None: super().setUpClass() run = Run(system_tracking_interval=None) cls.run_hash = run.hash for step in range(5): images = generate_image_set(img_count=5, caption_prefix=f'Image {step}') run.track(images, name='image_lists', context={}) run.track(images[0], name='single_images', context={})
class _XgboostCallback(TrainingCallback): def __init__(self, repo: Optional[str] = None, experiment: Optional[str] = None, system_tracking_interval: Optional[ int] = DEFAULT_SYSTEM_TRACKING_INT): super().__init__() self.repo = repo self.experiment = experiment self.system_tracking_interval = system_tracking_interval self.initialized = False self.aim_run = None def before_training(self, model): self.aim_run = Run( repo=self.repo, experiment=self.experiment, system_tracking_interval=self.system_tracking_interval) self.initialized = True return model def after_iteration(self, model, epoch: int, evals_log: CallbackContainer.EvalsLog) -> bool: if not evals_log: return False for data, metric in evals_log.items(): for metric_name, log in metric.items(): stdv: Optional[float] = None if isinstance(log[-1], tuple): score = log[-1][0] stdv = log[-1][1] else: score = log[-1] self.aim_run.track(score, step=0, name=metric_name, context={'stdv': False}) if stdv is not None: self.aim_run.track(score, step=0, name=metric_name, context={'stdv': True}) return False def after_training(self, model): if self.initialized and self.aim_run: del self.aim_run self.aim_run = None return model
def test_incompatible_type_after_tracking_restart(self): run = Run(system_tracking_interval=None) run_hash = run.hash run.track(1., name='numbers', context={}) run.finalize() del run new_run = Run(run_hash=run_hash, system_tracking_interval=None) with self.assertRaises(ValueError) as cm: new_run.track(1, name='numbers', context={}) exception = cm.exception self.assertEqual( 'Cannot log value \'1\' on sequence \'numbers\'. Incompatible data types.', exception.args[0])
def fill_up_test_data(): remove_test_data() # put dummy data into test repo with 10 runs, tracking 2 metrics over 3 contexts repo = Repo.default_repo() run_hashes = [hex(random.getrandbits(64))[-7:] for _ in range(10)] contexts = [{ 'is_training': True, 'subset': 'train' }, { 'is_training': True, 'subset': 'val' }, { 'is_training': False }] metrics = ['loss', 'accuracy'] with repo.structured_db: try: for idx, hash_name in enumerate(run_hashes): run = Run(hashname=hash_name, repo=repo, system_tracking_interval=None) run['hparams'] = create_run_params() run['run_index'] = idx run['start_time'] = datetime.datetime.utcnow().isoformat() run['name'] = f'Run # {idx}' run.props.name = run['name'] metric_contexts = itertools.product(metrics, contexts) for metric_context in metric_contexts: metric = metric_context[0] context = metric_context[1] if metric == 'accuracy' and 'subset' in context: continue else: # track 100 values per run for step in range(100): val = 1.0 - 1.0 / (step + 1) run.track(val, name=metric, step=step, epoch=1, context=context) finally: del run
def setUpClass(cls) -> None: super().setUpClass() run1 = Run(system_tracking_interval=None) run1.track(1., name='metric1', context={'a': True}) run1.track(generate_image_set(1), name='images1', context={'a': True}) run1.track(generate_image_set(1), name='images1', context={'b': True}) run2 = Run(system_tracking_interval=None) run2.track(1, name='metric2', context={'a': True}) run2.track(generate_image_set(1)[0], name='images2', context={'b': True})
def test_different_types_on_different_contexts_and_runs(self): run = Run(system_tracking_interval=None) # same sequence name, different contexts run.track(1., name='numbers', context={'type': 'float'}) run.track(1, name='numbers', context={'type': 'integer'}) run2 = Run(system_tracking_interval=None) # same sequence name, different runs run2.track(1, name='numbers', context={'type': 'float'})
class _HuggingFaceCallback(TrainerCallback): def __init__( self, repo: Optional[str] = None, experiment: Optional[str] = None, system_tracking_interval: Optional[ int] = DEFAULT_SYSTEM_TRACKING_INT, ): self._repo_path = repo self._experiment_name = experiment self._system_tracking_interval = system_tracking_interval self._initialized = False self._current_shift = None self._run = None def setup(self, args, state, model): self._initialized = True self._run = Run( repo=self._repo_path, experiment=self._experiment_name, system_tracking_interval=self._system_tracking_interval, ) combined_dict = {**args.to_sanitized_dict()} self._run['hparams'] = combined_dict # Store model configs as well # if hasattr(model, 'config') and model.config is not None: # model_config = model.config.to_dict() # self._run['model'] = model_config def on_train_begin(self, args, state, control, model=None, **kwargs): if not self._initialized: self.setup(args, state, model) self._current_shift = 'train' def on_evaluate(self, args, state, control, **kwargs): self._current_shift = 'val' def on_prediction_step(self, args, state, control, **kwargs): self._current_shift = 'pred' def on_log(self, args, state, control, model=None, logs=None, **kwargs): if not self._initialized: self.setup(args, state, model) context = { 'subset': self._current_shift, } for log_name, log_value in logs.items(): self._run.track(log_value, name=log_name, context=context) def on_epoch_end(self, args, state, control, **kwargs): pass def __del__(self): if self._initialized and self._run: del self._run self._run = None
class Session: sessions = {} _are_exit_listeners_set = False _original_sigint_handler = None _original_sigterm_handler = None @deprecated def __init__(self, repo: Optional[str] = None, experiment: Optional[str] = None, flush_frequency: int = 0, # unused block_termination: bool = True, # unused run: Optional[str] = None, system_tracking_interval: Optional[int] = DEFAULT_SYSTEM_TRACKING_INT): self._repo = Repo.from_path(repo) if repo else Repo.default_repo() self._repo_path = self._repo.path self._run = Run(run, repo=self._repo, experiment=experiment, system_tracking_interval=system_tracking_interval) self._run_hash = self._run.hashname self.active = True Session.sessions.setdefault(self._repo_path, []) Session.sessions[self._repo_path].append(self) # Bind signal listeners self._set_exit_handlers() @property def run_hash(self): return self._run_hash @property def repo_path(self): return self._repo_path @exception_resistant def track(self, *args, **kwargs): val = args[0] name = kwargs.pop('name') step = kwargs.pop('step', None) epoch = kwargs.pop('epoch', None) for key in kwargs.keys(): if key.startswith('__'): del kwargs[key] self._run.track(val, name=name, step=step, epoch=epoch, context=kwargs) @exception_resistant def set_params(self, params: dict, name: Optional[str] = None): if name is None: self._run[...] = params else: self._run[name] = params def flush(self): pass @exception_resistant def close(self): if not self.active: raise Exception('session is closed') if self._run: del self._run self._run = None if self._repo_path in Session.sessions \ and self in Session.sessions[self._repo_path]: Session.sessions[self._repo_path].remove(self) if len(Session.sessions[self._repo_path]) == 0: del Session.sessions[self._repo_path] self.active = False @classmethod def _close_sessions(cls, *args, **kwargs): threads = [] for _, sessions in cls.sessions.items(): for session in sessions: th = threading.Thread(target=session.close) th.daemon = True threads.append(th) for th in threads: th.start() for th in threads: th.join() if len(args): if args[0] == 15: signal.signal(signal.SIGTERM, cls._original_sigterm_handler) os.kill(os.getpid(), 15) # elif args[0] == 2: # signal.signal(signal.SIGINT, cls._original_sigint_handler) # os.kill(os.getpid(), 2) @classmethod def _set_exit_handlers(cls): if not cls._are_exit_listeners_set: cls._are_exit_listeners_set = True # cls._original_sigint_handler = signal.getsignal(signal.SIGINT) cls._original_sigterm_handler = signal.getsignal(signal.SIGTERM) atexit.register(cls._close_sessions) # signal.signal(signal.SIGINT, cls._close_sessions) signal.signal(signal.SIGTERM, cls._close_sessions)
def setUpClass(cls) -> None: super().setUpClass() # run1 -> context {'subset': 'train'} -> Image[] # | -> integers # | -> floats # -> context {'subset': 'val'} -> floats # ------------------------------------------------ # run2 -> context {'subset': 'train'} -> Image # | -> floats # -> context {'subset': 'val'} -> floats run1 = Run(system_tracking_interval=None) cls.run1_hash = run1.hash images = generate_image_set(img_count=2, caption_prefix=f'Image 0') run1.track(images, name='image_lists', context={'subset': 'train'}) run1.track(random.random(), name='floats', context={'subset': 'train'}) run1.track(random.randint(100, 200), name='integers', context={'subset': 'train'}) run1.track(random.random(), name='floats', context={'subset': 'val'}) run2 = Run(system_tracking_interval=None) run2.track(images[0], name='single_images', context={'subset': 'val'}) run2.track(random.random(), name='floats', context={'subset': 'train'}) run2.track(random.random(), name='floats', context={'subset': 'val'}) cls.run2_hash = run2.hash