Esempio n. 1
0
 def test_explicit_run_finalize(self):
     run = Run(system_tracking_interval=None)
     for i in range(10):
         run.track(i, name='seq')
     self.assertIsNone(run.end_time)
     run.finalize()
     self.assertIsNotNone(run.end_time)
Esempio n. 2
0
 def _func():
     run = Run(system_tracking_interval=None)
     run_hash.append(run.hash)
     self.assertIsNone(run.end_time)
     for i in range(10):
         run.track(i, name='seq')
     self.assertIsNone(run.end_time)
Esempio n. 3
0
    def test_type_compatibility_for_empty_list(self):
        run = Run(system_tracking_interval=None)
        context = {}
        ctx = Context(context)
        seq_name = 'obj_list'

        sequence_info = run.meta_run_tree.subtree(
            ('traces', ctx.idx, seq_name))
        typed_sequences_info = run.meta_tree.subtree('traces_types')

        run.track([], name=seq_name, context=context)
        self.assertEqual('list', sequence_info['dtype'])
        self.assertEqual(1, typed_sequences_info['list', ctx.idx, seq_name])
        self.assertIsNone(
            typed_sequences_info.get(('list(float)', ctx.idx, seq_name), None))

        run.track([], name=seq_name, context=context)
        self.assertEqual('list', sequence_info['dtype'])
        self.assertIsNone(
            typed_sequences_info.get(('list(float)', ctx.idx, seq_name), None))

        run.track([1.], name=seq_name, context=context)
        self.assertEqual('list(float)', sequence_info['dtype'])
        self.assertEqual(
            1, typed_sequences_info['list(float)', ctx.idx, seq_name])

        run.track([], name=seq_name, context=context)
        self.assertEqual('list(float)', sequence_info['dtype'])

        with self.assertRaises(ValueError) as cm:
            run.track([5], name=seq_name, context=context)
        exception = cm.exception
        self.assertEqual(
            f'Cannot log value \'{[5]}\' on sequence \'{seq_name}\'. Incompatible data types.',
            exception.args[0])
Esempio n. 4
0
 def test_explicit_run_delete(self):
     run = Run(system_tracking_interval=None)
     run_hash = run.hash
     for i in range(10):
         run.track(i, name='seq')
     del run
     time.sleep(.1)
     self.assertIsNotNone(self._query_run_finalized_at(run_hash))
Esempio n. 5
0
 def test_incompatible_type_during_tracking(self):
     run = Run(system_tracking_interval=None)
     run.track(1., name='numbers', context={})
     with self.assertRaises(ValueError) as cm:
         run.track(1, name='numbers', context={})
     exception = cm.exception
     self.assertEqual(
         'Cannot log value \'1\' on sequence \'numbers\'. Incompatible data types.',
         exception.args[0])
Esempio n. 6
0
 def setUpClass(cls) -> None:
     super().setUpClass()
     run = Run(repo=cls.repo)
     run['images_per_step'] = 16
     for step in range(100):
         images = generate_image_set(img_count=16,
                                     caption_prefix=f'Image {step}')
         run.track(images, name='random_images')
         run.track(random.random(), name='random_values')
     cls.run_hash = run.hash
Esempio n. 7
0
    def setUpClass(cls) -> None:
        super().setUpClass()

        run = Run(system_tracking_interval=None)
        cls.run_hash = run.hash

        for step in range(5):
            images = generate_image_set(img_count=5,
                                        caption_prefix=f'Image {step}')
            run.track(images, name='image_lists', context={})
            run.track(images[0], name='single_images', context={})
Esempio n. 8
0
        class _XgboostCallback(TrainingCallback):
            def __init__(self,
                         repo: Optional[str] = None,
                         experiment: Optional[str] = None,
                         system_tracking_interval: Optional[
                             int] = DEFAULT_SYSTEM_TRACKING_INT):
                super().__init__()
                self.repo = repo
                self.experiment = experiment
                self.system_tracking_interval = system_tracking_interval
                self.initialized = False
                self.aim_run = None

            def before_training(self, model):
                self.aim_run = Run(
                    repo=self.repo,
                    experiment=self.experiment,
                    system_tracking_interval=self.system_tracking_interval)
                self.initialized = True
                return model

            def after_iteration(self, model, epoch: int,
                                evals_log: CallbackContainer.EvalsLog) -> bool:
                if not evals_log:
                    return False

                for data, metric in evals_log.items():
                    for metric_name, log in metric.items():
                        stdv: Optional[float] = None
                        if isinstance(log[-1], tuple):
                            score = log[-1][0]
                            stdv = log[-1][1]
                        else:
                            score = log[-1]

                        self.aim_run.track(score,
                                           step=0,
                                           name=metric_name,
                                           context={'stdv': False})
                        if stdv is not None:
                            self.aim_run.track(score,
                                               step=0,
                                               name=metric_name,
                                               context={'stdv': True})

                return False

            def after_training(self, model):
                if self.initialized and self.aim_run:
                    del self.aim_run
                    self.aim_run = None
                return model
Esempio n. 9
0
    def test_incompatible_type_after_tracking_restart(self):
        run = Run(system_tracking_interval=None)
        run_hash = run.hash
        run.track(1., name='numbers', context={})
        run.finalize()
        del run

        new_run = Run(run_hash=run_hash, system_tracking_interval=None)
        with self.assertRaises(ValueError) as cm:
            new_run.track(1, name='numbers', context={})
        exception = cm.exception
        self.assertEqual(
            'Cannot log value \'1\' on sequence \'numbers\'. Incompatible data types.',
            exception.args[0])
Esempio n. 10
0
def fill_up_test_data():
    remove_test_data()

    # put dummy data into test repo with 10 runs, tracking 2 metrics over 3 contexts
    repo = Repo.default_repo()
    run_hashes = [hex(random.getrandbits(64))[-7:] for _ in range(10)]

    contexts = [{
        'is_training': True,
        'subset': 'train'
    }, {
        'is_training': True,
        'subset': 'val'
    }, {
        'is_training': False
    }]
    metrics = ['loss', 'accuracy']

    with repo.structured_db:
        try:
            for idx, hash_name in enumerate(run_hashes):
                run = Run(hashname=hash_name,
                          repo=repo,
                          system_tracking_interval=None)
                run['hparams'] = create_run_params()
                run['run_index'] = idx
                run['start_time'] = datetime.datetime.utcnow().isoformat()
                run['name'] = f'Run # {idx}'
                run.props.name = run['name']

                metric_contexts = itertools.product(metrics, contexts)
                for metric_context in metric_contexts:
                    metric = metric_context[0]
                    context = metric_context[1]
                    if metric == 'accuracy' and 'subset' in context:
                        continue
                    else:
                        # track 100 values per run
                        for step in range(100):
                            val = 1.0 - 1.0 / (step + 1)
                            run.track(val,
                                      name=metric,
                                      step=step,
                                      epoch=1,
                                      context=context)
        finally:
            del run
Esempio n. 11
0
    def setUpClass(cls) -> None:
        super().setUpClass()
        run1 = Run(system_tracking_interval=None)
        run1.track(1., name='metric1', context={'a': True})
        run1.track(generate_image_set(1), name='images1', context={'a': True})
        run1.track(generate_image_set(1), name='images1', context={'b': True})

        run2 = Run(system_tracking_interval=None)
        run2.track(1, name='metric2', context={'a': True})
        run2.track(generate_image_set(1)[0],
                   name='images2',
                   context={'b': True})
Esempio n. 12
0
    def test_different_types_on_different_contexts_and_runs(self):
        run = Run(system_tracking_interval=None)
        # same sequence name, different contexts
        run.track(1., name='numbers', context={'type': 'float'})
        run.track(1, name='numbers', context={'type': 'integer'})

        run2 = Run(system_tracking_interval=None)
        # same sequence name, different runs
        run2.track(1, name='numbers', context={'type': 'float'})
Esempio n. 13
0
        class _HuggingFaceCallback(TrainerCallback):
            def __init__(
                self,
                repo: Optional[str] = None,
                experiment: Optional[str] = None,
                system_tracking_interval: Optional[
                    int] = DEFAULT_SYSTEM_TRACKING_INT,
            ):
                self._repo_path = repo
                self._experiment_name = experiment
                self._system_tracking_interval = system_tracking_interval
                self._initialized = False
                self._current_shift = None
                self._run = None

            def setup(self, args, state, model):
                self._initialized = True

                self._run = Run(
                    repo=self._repo_path,
                    experiment=self._experiment_name,
                    system_tracking_interval=self._system_tracking_interval,
                )

                combined_dict = {**args.to_sanitized_dict()}
                self._run['hparams'] = combined_dict

                # Store model configs as well
                # if hasattr(model, 'config') and model.config is not None:
                #     model_config = model.config.to_dict()
                #     self._run['model'] = model_config

            def on_train_begin(self,
                               args,
                               state,
                               control,
                               model=None,
                               **kwargs):
                if not self._initialized:
                    self.setup(args, state, model)
                self._current_shift = 'train'

            def on_evaluate(self, args, state, control, **kwargs):
                self._current_shift = 'val'

            def on_prediction_step(self, args, state, control, **kwargs):
                self._current_shift = 'pred'

            def on_log(self,
                       args,
                       state,
                       control,
                       model=None,
                       logs=None,
                       **kwargs):
                if not self._initialized:
                    self.setup(args, state, model)

                context = {
                    'subset': self._current_shift,
                }
                for log_name, log_value in logs.items():
                    self._run.track(log_value, name=log_name, context=context)

            def on_epoch_end(self, args, state, control, **kwargs):
                pass

            def __del__(self):
                if self._initialized and self._run:
                    del self._run
                    self._run = None
Esempio n. 14
0
class Session:
    sessions = {}

    _are_exit_listeners_set = False
    _original_sigint_handler = None
    _original_sigterm_handler = None

    @deprecated
    def __init__(self, repo: Optional[str] = None,
                 experiment: Optional[str] = None,
                 flush_frequency: int = 0,  # unused
                 block_termination: bool = True,  # unused
                 run: Optional[str] = None,
                 system_tracking_interval: Optional[int] = DEFAULT_SYSTEM_TRACKING_INT):

        self._repo = Repo.from_path(repo) if repo else Repo.default_repo()
        self._repo_path = self._repo.path
        self._run = Run(run, repo=self._repo, experiment=experiment,
                        system_tracking_interval=system_tracking_interval)
        self._run_hash = self._run.hashname
        self.active = True

        Session.sessions.setdefault(self._repo_path, [])
        Session.sessions[self._repo_path].append(self)

        # Bind signal listeners
        self._set_exit_handlers()

    @property
    def run_hash(self):
        return self._run_hash

    @property
    def repo_path(self):
        return self._repo_path

    @exception_resistant
    def track(self, *args, **kwargs):
        val = args[0]
        name = kwargs.pop('name')
        step = kwargs.pop('step', None)
        epoch = kwargs.pop('epoch', None)
        for key in kwargs.keys():
            if key.startswith('__'):
                del kwargs[key]

        self._run.track(val, name=name, step=step, epoch=epoch, context=kwargs)

    @exception_resistant
    def set_params(self, params: dict, name: Optional[str] = None):
        if name is None:
            self._run[...] = params
        else:
            self._run[name] = params

    def flush(self):
        pass

    @exception_resistant
    def close(self):
        if not self.active:
            raise Exception('session is closed')
        if self._run:
            del self._run
            self._run = None
        if self._repo_path in Session.sessions \
                and self in Session.sessions[self._repo_path]:
            Session.sessions[self._repo_path].remove(self)
            if len(Session.sessions[self._repo_path]) == 0:
                del Session.sessions[self._repo_path]
        self.active = False

    @classmethod
    def _close_sessions(cls, *args, **kwargs):
        threads = []
        for _, sessions in cls.sessions.items():
            for session in sessions:
                th = threading.Thread(target=session.close)
                th.daemon = True
                threads.append(th)

        for th in threads:
            th.start()

        for th in threads:
            th.join()

        if len(args):
            if args[0] == 15:
                signal.signal(signal.SIGTERM, cls._original_sigterm_handler)
                os.kill(os.getpid(), 15)
            # elif args[0] == 2:
            #     signal.signal(signal.SIGINT, cls._original_sigint_handler)
            #     os.kill(os.getpid(), 2)

    @classmethod
    def _set_exit_handlers(cls):
        if not cls._are_exit_listeners_set:
            cls._are_exit_listeners_set = True
            # cls._original_sigint_handler = signal.getsignal(signal.SIGINT)
            cls._original_sigterm_handler = signal.getsignal(signal.SIGTERM)

            atexit.register(cls._close_sessions)
            # signal.signal(signal.SIGINT, cls._close_sessions)
            signal.signal(signal.SIGTERM, cls._close_sessions)
Esempio n. 15
0
    def setUpClass(cls) -> None:
        super().setUpClass()

        # run1 -> context {'subset': 'train'} -> Image[]
        #      |                              -> integers
        #      |                              -> floats
        #      -> context {'subset': 'val'}   -> floats
        # ------------------------------------------------
        # run2 -> context {'subset': 'train'} -> Image
        #      |                              -> floats
        #      -> context {'subset': 'val'}   -> floats

        run1 = Run(system_tracking_interval=None)
        cls.run1_hash = run1.hash
        images = generate_image_set(img_count=2, caption_prefix=f'Image 0')
        run1.track(images, name='image_lists', context={'subset': 'train'})
        run1.track(random.random(), name='floats', context={'subset': 'train'})
        run1.track(random.randint(100, 200),
                   name='integers',
                   context={'subset': 'train'})
        run1.track(random.random(), name='floats', context={'subset': 'val'})

        run2 = Run(system_tracking_interval=None)
        run2.track(images[0], name='single_images', context={'subset': 'val'})
        run2.track(random.random(), name='floats', context={'subset': 'train'})
        run2.track(random.random(), name='floats', context={'subset': 'val'})
        cls.run2_hash = run2.hash