Exemple #1
0
 def _func():
     run = Run(system_tracking_interval=None)
     run_hash.append(run.hash)
     self.assertIsNone(run.end_time)
     for i in range(10):
         run.track(i, name='seq')
     self.assertIsNone(run.end_time)
Exemple #2
0
 def before_training(self, model):
     self.aim_run = Run(
         repo=self.repo,
         experiment=self.experiment,
         system_tracking_interval=self.system_tracking_interval)
     self.initialized = True
     return model
Exemple #3
0
def _track_legacy_run_step(run: Run, metric_name: str, context: dict, val):
    (value, step, epoch, timestamp) = val

    from aim.storage.context import Context, Metric
    if context is None:
        context = {}

    ctx = Context(context)
    metric = Metric(metric_name, ctx)

    if ctx not in run.contexts:
        run.meta_tree['contexts', ctx.idx] = ctx.to_dict()
        run.meta_run_tree['contexts', ctx.idx] = ctx.to_dict()
        run.contexts[ctx] = ctx.idx
        run._idx_to_ctx[ctx.idx] = ctx

    time_view = run.series_run_tree.view(metric.selector).array('time').allocate()
    val_view = run.series_run_tree.view(metric.selector).array('val').allocate()
    epoch_view = run.series_run_tree.view(metric.selector).array('epoch').allocate()

    max_idx = run.series_counters.get((ctx, metric_name), None)
    if max_idx == None:
        max_idx = len(val_view)
    if max_idx == 0:
        run.meta_tree['traces', ctx.idx, metric_name] = 1
    run.meta_run_tree['traces', ctx.idx, metric_name, "last"] = value

    run.series_counters[ctx, metric_name] = max_idx + 1

    time_view[step] = timestamp
    val_view[step] = value
    epoch_view[step] = epoch
Exemple #4
0
async def custom_aligned_metrics_streamer(requested_runs: List[AlignedRunIn],
                                          x_axis: str) -> bytes:
    for run_data in requested_runs:
        run_hashname = run_data.run_id
        requested_traces = run_data.traces
        run = Run(hashname=run_hashname)

        traces_list = []
        for trace_data in requested_traces:
            context = Context(trace_data.context)
            trace = run.get_trace(metric_name=trace_data.metric_name,
                                  context=context)
            x_axis_trace = run.get_trace(metric_name=x_axis, context=context)
            if not (trace and x_axis_trace):
                continue

            _slice = slice(*trace_data.slice)
            iters = trace.values.sparse_numpy()[0]
            sliced_iters = sliced_np_array(iters, _slice)
            x_axis_iters, x_axis_values = collect_x_axis_data(
                x_axis_trace, sliced_iters)
            traces_list.append({
                'metric_name': trace.name,
                'context': trace.context.to_dict(),
                'x_axis_values': x_axis_values,
                'x_axis_iters': x_axis_iters,
            })
        run_dict = {run_hashname: traces_list}
        encoded_tree = encode_tree(run_dict)
        yield collect_run_streamable_data(encoded_tree)
Exemple #5
0
 def test_explicit_run_delete(self):
     run = Run(system_tracking_interval=None)
     run_hash = run.hash
     for i in range(10):
         run.track(i, name='seq')
     del run
     time.sleep(.1)
     self.assertIsNotNone(self._query_run_finalized_at(run_hash))
Exemple #6
0
    def test_different_types_on_different_contexts_and_runs(self):
        run = Run(system_tracking_interval=None)
        # same sequence name, different contexts
        run.track(1., name='numbers', context={'type': 'float'})
        run.track(1, name='numbers', context={'type': 'integer'})

        run2 = Run(system_tracking_interval=None)
        # same sequence name, different runs
        run2.track(1, name='numbers', context={'type': 'float'})
Exemple #7
0
            def setup(self, args, state, model):
                self._initialized = True

                self._run = Run(
                    repo=self._repo_path,
                    experiment=self._experiment_name,
                    system_tracking_interval=self._system_tracking_interval,
                )

                combined_dict = {**args.to_sanitized_dict()}
                self._run['hparams'] = combined_dict
Exemple #8
0
        class _XgboostCallback(TrainingCallback):
            def __init__(self,
                         repo: Optional[str] = None,
                         experiment: Optional[str] = None,
                         system_tracking_interval: Optional[
                             int] = DEFAULT_SYSTEM_TRACKING_INT):
                super().__init__()
                self.repo = repo
                self.experiment = experiment
                self.system_tracking_interval = system_tracking_interval
                self.initialized = False
                self.aim_run = None

            def before_training(self, model):
                self.aim_run = Run(
                    repo=self.repo,
                    experiment=self.experiment,
                    system_tracking_interval=self.system_tracking_interval)
                self.initialized = True
                return model

            def after_iteration(self, model, epoch: int,
                                evals_log: CallbackContainer.EvalsLog) -> bool:
                if not evals_log:
                    return False

                for data, metric in evals_log.items():
                    for metric_name, log in metric.items():
                        stdv: Optional[float] = None
                        if isinstance(log[-1], tuple):
                            score = log[-1][0]
                            stdv = log[-1][1]
                        else:
                            score = log[-1]

                        self.aim_run.track(score,
                                           step=0,
                                           name=metric_name,
                                           context={'stdv': False})
                        if stdv is not None:
                            self.aim_run.track(score,
                                               step=0,
                                               name=metric_name,
                                               context={'stdv': True})

                return False

            def after_training(self, model):
                if self.initialized and self.aim_run:
                    del self.aim_run
                    self.aim_run = None
                return model
Exemple #9
0
    def test_incompatible_type_after_tracking_restart(self):
        run = Run(system_tracking_interval=None)
        run_hash = run.hash
        run.track(1., name='numbers', context={})
        run.finalize()
        del run

        new_run = Run(run_hash=run_hash, system_tracking_interval=None)
        with self.assertRaises(ValueError) as cm:
            new_run.track(1, name='numbers', context={})
        exception = cm.exception
        self.assertEqual(
            'Cannot log value \'1\' on sequence \'numbers\'. Incompatible data types.',
            exception.args[0])
Exemple #10
0
def collect_requested_traces(run: Run,
                             requested_traces: List[TraceBase],
                             steps_num: int = 200) -> List[dict]:
    processed_traces_list = []
    for requested_trace in requested_traces:
        metric_name = requested_trace.metric_name
        context = Context(requested_trace.context)
        trace = run.get_trace(metric_name=metric_name, context=context)
        if not trace:
            continue

        iters, values = trace.values.sparse_list()

        num_records = len(values)
        step = (num_records // steps_num) or 1
        _slice = slice(0, num_records, step)

        processed_traces_list.append({
            'metric_name': trace.name,
            'context': trace.context.to_dict(),
            'values': sliced_array(values, _slice),
            'iters': sliced_array(iters, _slice),
        })

    return processed_traces_list
Exemple #11
0
 def experiment(self) -> Run:
     if self._run is None:
         self._run = Run(repo=self._repo_path,
                         experiment=self._experiment_name,
                         system_tracking_interval=self.
                         _system_tracking_interval)
     return self._run
Exemple #12
0
def requested_figure_object_traces_streamer(
        run: Run, requested_traces: List[TraceBase], rec_range, rec_num: int = 50
) -> List[dict]:
    for requested_trace in requested_traces:
        trace_name = requested_trace.name
        context = Context(requested_trace.context)
        trace = run.get_figure_sequence(name=trace_name, context=context)
        if not trace:
            continue

        record_range_missing = rec_range.start is None or rec_range.stop is None
        if record_range_missing:
            rec_range = IndexRange(trace.first_step(), trace.last_step() + 1)

        steps = []
        values = []
        steps_vals = trace.values.items_in_range(
            rec_range.start, rec_range.stop, rec_num
        )
        for step, val in steps_vals:
            steps.append(step)
            values.append(preparer(val, trace, step, decode=True))

        trace_dict = {
            'name': trace.name,
            'context': trace.context.to_dict(),
            'values': values,
            'iters': steps,
            'record_range': (trace.first_step(), trace.last_step() + 1),
        }
        encoded_tree = encode_tree(trace_dict)
        yield collect_run_streamable_data(encoded_tree)
Exemple #13
0
        def __init__(self,
                     repo: Optional[str] = None,
                     experiment: Optional[str] = None,
                     run: Optional[Run] = None):
            super(KerasTrackerCallback, self).__init__()

            if run is None:
                if repo is None and experiment is None:
                    self._run = Run()
                else:
                    self._run = Run(repo=repo, experiment=experiment)
            else:
                print('Passing Run instance to AimCallback will be '
                      'deprecated in future versions, '
                      'pass the callback arguments explicitly')
                self._run = run
Exemple #14
0
def fill_up_test_data():
    remove_test_data()

    # put dummy data into test repo with 10 runs, tracking 2 metrics over 3 contexts
    repo = Repo.default_repo()
    run_hashes = [hex(random.getrandbits(64))[-7:] for _ in range(10)]

    contexts = [{
        'is_training': True,
        'subset': 'train'
    }, {
        'is_training': True,
        'subset': 'val'
    }, {
        'is_training': False
    }]
    metrics = ['loss', 'accuracy']

    with repo.structured_db:
        try:
            for idx, hash_name in enumerate(run_hashes):
                run = Run(hashname=hash_name,
                          repo=repo,
                          system_tracking_interval=None)
                run['hparams'] = create_run_params()
                run['run_index'] = idx
                run['start_time'] = datetime.datetime.utcnow().isoformat()
                run['name'] = f'Run # {idx}'
                run.props.name = run['name']

                metric_contexts = itertools.product(metrics, contexts)
                for metric_context in metric_contexts:
                    metric = metric_context[0]
                    context = metric_context[1]
                    if metric == 'accuracy' and 'subset' in context:
                        continue
                    else:
                        # track 100 values per run
                        for step in range(100):
                            val = 1.0 - 1.0 / (step + 1)
                            run.track(val,
                                      name=metric,
                                      step=step,
                                      epoch=1,
                                      context=context)
        finally:
            del run
Exemple #15
0
    def iter_runs(self) -> Iterator['Run']:
        """Iterate over Repo runs.

        Yields:
            next :obj:`Run` in readonly mode .
        """
        self.meta_tree.preload()
        for run_name in self.meta_tree.subtree('chunks').keys():
            yield Run(run_name, repo=self, read_only=True)
Exemple #16
0
def finalize_stalled_runs(repo: 'Repo', runs: set):
    runs_in_progress = []
    for run_hash in tqdm.tqdm(runs,
                              desc='Finalizing stalled runs',
                              total=len(runs)):
        try:
            run = Run(run_hash=run_hash,
                      repo=repo,
                      system_tracking_interval=None)
        except filelock.Timeout:
            runs_in_progress.append(run_hash)
        else:
            # TODO: [AT] handle lock timeout on index db (retry logic).
            run.finalize()
    if runs_in_progress:
        click.echo('Skipped indexing for the following runs in progress:')
        for run_hash in runs_in_progress:
            click.secho(f'\t\'{run_hash}\'', fg='yellow')
Exemple #17
0
    def __init__(self, repo: Optional[str] = None,
                 experiment: Optional[str] = None,
                 flush_frequency: int = 0,  # unused
                 block_termination: bool = True,  # unused
                 run: Optional[str] = None,
                 system_tracking_interval: Optional[int] = DEFAULT_SYSTEM_TRACKING_INT):

        self._repo = Repo.from_path(repo) if repo else Repo.default_repo()
        self._repo_path = self._repo.path
        self._run = Run(run, repo=self._repo, experiment=experiment,
                        system_tracking_interval=system_tracking_interval)
        self._run_hash = self._run.hashname
        self.active = True

        Session.sessions.setdefault(self._repo_path, [])
        Session.sessions[self._repo_path].append(self)

        # Bind signal listeners
        self._set_exit_handlers()
Exemple #18
0
 def setUpClass(cls) -> None:
     super().setUpClass()
     cls.image_blobs = {}
     run = Run(run_hash=cls.run_hash, read_only=True)
     empty_context = Context({})
     for step in range(10):
         for idx in range(5):
             img_view = run.series_run_tree.subtree(
                 (empty_context.idx, 'random_images', 'val', step, idx))
             cls.image_blobs[img_view['caption']] = img_view['data'].load()
Exemple #19
0
 def test_explicit_run_finalize(self):
     run = Run(system_tracking_interval=None)
     for i in range(10):
         run.track(i, name='seq')
     self.assertIsNone(run.end_time)
     run.finalize()
     self.assertIsNotNone(run.end_time)
Exemple #20
0
 def iter_runs_from_cache(self, offset: str = None) -> Iterator['Run']:
     db = self.structured_db
     cache = db.caches.get('runs_cache')
     if cache:
         run_names = cache.keys()
         try:
             offset_idx = run_names.index(offset) + 1
         except ValueError:
             offset_idx = 0
         for run_name in run_names[offset_idx:]:
             yield Run(run_name, repo=self, read_only=True)
     else:
         raise StopIteration
Exemple #21
0
    def get_run(self, run_hash: str) -> Optional['Run']:
        """Get run if exists.

        Args:
            run_hash (str): Run hash.
        Returns:
            :obj:`Run` object if hash is found in repository. `None` otherwise.
        """
        # TODO: [MV] optimize existence check for run
        if run_hash is None or run_hash not in self.meta_tree.subtree(
                'chunks').keys():
            return None
        else:
            return Run(run_hash, repo=self, read_only=True)
Exemple #22
0
 def test_incompatible_type_during_tracking(self):
     run = Run(system_tracking_interval=None)
     run.track(1., name='numbers', context={})
     with self.assertRaises(ValueError) as cm:
         run.track(1, name='numbers', context={})
     exception = cm.exception
     self.assertEqual(
         'Cannot log value \'1\' on sequence \'numbers\'. Incompatible data types.',
         exception.args[0])
Exemple #23
0
 def setUpClass(cls) -> None:
     super().setUpClass()
     run = Run(repo=cls.repo)
     run['images_per_step'] = 16
     for step in range(100):
         images = generate_image_set(img_count=16,
                                     caption_prefix=f'Image {step}')
         run.track(images, name='random_images')
         run.track(random.random(), name='random_values')
     cls.run_hash = run.hash
Exemple #24
0
def convert_run(lrun: LegacyRun, repo: Repo, legacy_run_map, skip_failed):
    try:
        run = Run(
            repo=repo, system_tracking_interval=None
        )  # do not track system metrics as they already logged if needed

        lrun.open_storage()
        if lrun.params.get(AIM_MAP_METRICS_KEYWORD):
            del lrun.params[
                AIM_MAP_METRICS_KEYWORD]  # set internally. no need to copy
        run[...] = lrun.params
        run['v2_params'] = {'run_hash': lrun.run_hash}
        if 'process' in lrun.config:
            run['v2_params',
                'start_date'] = lrun.config['process']['start_date']
            run['v2_params',
                'finish_date'] = lrun.config['process']['finish_date']

        run.experiment = lrun.experiment_name
        if lrun.config.get('archived'):
            run.archived = True

        run_metrics = {}
        legacy_run_map[lrun.run_hash] = run_metrics
        for metric in lrun.get_all_metrics().values():
            try:
                metric.open_artifact()
                run_metrics[metric.name] = []
                for trace in metric.get_all_traces():
                    metric_name = metric.name
                    context = trace.context
                    run_metrics[metric.name].append(context)
                    for r in trace.read_records(slice(0, None, 1)):
                        step_record, metric_record = deserialize_pb(r)
                        # no need to track in a separate thread. use _track_impl directly.
                        run._track_impl(metric_record.value,
                                        step_record.timestamp,
                                        metric_name,
                                        step_record.step,
                                        step_record.epoch,
                                        context=context)
            except Exception:
                metric.close_artifact()
                raise
            finally:
                metric.close_artifact()
        del run
    except Exception as e:
        click.echo(
            f'\nFailed to convert run {lrun.run_hash}. Reason: {str(e)}.',
            err=True)
        if not skip_failed:
            raise
    finally:
        lrun.close_storage()
Exemple #25
0
    def setUpClass(cls) -> None:
        super().setUpClass()

        run = Run(system_tracking_interval=None)
        cls.run_hash = run.hash

        for step in range(5):
            images = generate_image_set(img_count=5,
                                        caption_prefix=f'Image {step}')
            run.track(images, name='image_lists', context={})
            run.track(images[0], name='single_images', context={})
Exemple #26
0
def requested_image_traces_streamer(run: Run,
                                    requested_traces: List[TraceBase],
                                    rec_range, idx_range,
                                    rec_num: int = 50, idx_num: int = 5) -> List[dict]:
    for requested_trace in requested_traces:
        trace_name = requested_trace.name
        context = Context(requested_trace.context)
        trace = run.get_image_sequence(name=trace_name, context=context)
        if not trace:
            continue

        record_range_missing = rec_range.start is None or rec_range.stop is None
        if record_range_missing:
            rec_range = IndexRange(trace.first_step(), trace.last_step() + 1)
        index_range_missing = idx_range.start is None or idx_range.stop is None
        if index_range_missing:
            idx_range = IndexRange(0, trace.record_length() or 1)

        rec_length = trace.record_length() or 1
        idx_step = rec_length // idx_num or 1
        idx_slice = slice(idx_range.start, idx_range.stop, idx_step)

        steps_vals = trace.values.items_in_range(rec_range.start, rec_range.stop, rec_num)
        steps = []
        values = []
        for step, val in steps_vals:
            steps.append(step)
            if isinstance(val, list):
                values.append(
                    img_collection_record_to_encodable(sliced_custom_object_record(val, idx_slice), trace, step)
                )
            elif idx_slice.start == 0:
                values.append(img_record_to_encodable(val, trace, step))
            else:
                values.append([])

        trace_dict = {
            'record_range': (trace.first_step(), trace.last_step() + 1),
            'index_range': (0, rec_length),
            'name': trace.name,
            'context': trace.context.to_dict(),
            'values': values,
            'iters': steps,
        }
        encoded_tree = encode_tree(trace_dict)
        yield collect_run_streamable_data(encoded_tree)
Exemple #27
0
    def _pack_run_data(run_: Run, traces_: list):
        _rec_range = (
            trcs_rec_range if record_range_missing or calc_total_ranges else rec_range
        )

        run_dict = {
            run_.hash: {
                'ranges': {
                    'record_range': [_rec_range.start, _rec_range.stop],
                    'record_slice': [rec_slice.start, rec_slice.stop, rec_slice.step],
                },
                'params': run_.get(...),
                'traces': traces_,
                'props': get_run_props(run_),
            }
        }
        encoded_tree = encode_tree(run_dict)
        return collect_run_streamable_data(encoded_tree)
Exemple #28
0
async def get_experiment_runs_api(exp_id: str,
                                  limit: Optional[int] = None,
                                  offset: Optional[str] = None,
                                  factory=Depends(object_factory)):
    project = Project()

    exp = factory.find_experiment(exp_id)
    if not exp:
        raise HTTPException(status_code=404)

    from aim.sdk.run import Run

    cache_name = 'exp_runs'
    project.repo.run_props_cache_hint = cache_name
    project.repo.structured_db.invalidate_cache(cache_name)
    project.repo.structured_db.init_cache(cache_name, exp.get_runs,
                                          lambda run_: run_.hash)
    exp_runs = []

    run_hashes = [run.hash for run in exp.runs]
    offset_idx = 0
    if offset:
        try:
            offset_idx = run_hashes.index(offset) + 1
        except ValueError:
            pass
    if limit:
        run_hashes = run_hashes[offset_idx:offset_idx + limit]

    for run_hash in run_hashes:
        run = Run(run_hash, repo=project.repo, read_only=True)
        exp_runs.append({
            'run_id': run.hash,
            'name': run.name,
            'creation_time': run.creation_time,
            'end_time': run.end_time
        })

    project.repo.structured_db.invalidate_cache(cache_name)
    project.repo.run_props_cache_hint = None

    response = {'id': exp.uuid, 'runs': exp_runs}
    return response
Exemple #29
0
async def get_tagged_runs_api(tag_id: str, factory=Depends(object_factory)):
    project = Project()

    tag = factory.find_tag(tag_id)
    if not tag:
        raise HTTPException

    from aim.sdk.run import Run

    cache_name = 'tag_runs'
    project.repo.run_props_cache_hint = cache_name
    project.repo.structured_db.invalidate_cache(cache_name)
    project.repo.structured_db.init_cache(cache_name, tag.get_runs,
                                          lambda run_: run_.hash)

    tag_runs = []
    for tagged_run in tag.runs:
        run = Run(tagged_run.hash, repo=project.repo, read_only=True)
        tag_runs.append({
            'run_id':
            tagged_run.hash,
            'name':
            tagged_run.name,
            'creation_time':
            run.creation_time,
            'end_time':
            run.end_time,
            'experiment':
            tagged_run.experiment if tagged_run.experiment else None
        })

    project.repo.structured_db.invalidate_cache(cache_name)
    project.repo.run_props_cache_hint = None

    response = {'id': tag.uuid, 'runs': tag_runs}
    return response
Exemple #30
0
        class _HuggingFaceCallback(TrainerCallback):
            def __init__(
                self,
                repo: Optional[str] = None,
                experiment: Optional[str] = None,
                system_tracking_interval: Optional[
                    int] = DEFAULT_SYSTEM_TRACKING_INT,
            ):
                self._repo_path = repo
                self._experiment_name = experiment
                self._system_tracking_interval = system_tracking_interval
                self._initialized = False
                self._current_shift = None
                self._run = None

            def setup(self, args, state, model):
                self._initialized = True

                self._run = Run(
                    repo=self._repo_path,
                    experiment=self._experiment_name,
                    system_tracking_interval=self._system_tracking_interval,
                )

                combined_dict = {**args.to_sanitized_dict()}
                self._run['hparams'] = combined_dict

                # Store model configs as well
                # if hasattr(model, 'config') and model.config is not None:
                #     model_config = model.config.to_dict()
                #     self._run['model'] = model_config

            def on_train_begin(self,
                               args,
                               state,
                               control,
                               model=None,
                               **kwargs):
                if not self._initialized:
                    self.setup(args, state, model)
                self._current_shift = 'train'

            def on_evaluate(self, args, state, control, **kwargs):
                self._current_shift = 'val'

            def on_prediction_step(self, args, state, control, **kwargs):
                self._current_shift = 'pred'

            def on_log(self,
                       args,
                       state,
                       control,
                       model=None,
                       logs=None,
                       **kwargs):
                if not self._initialized:
                    self.setup(args, state, model)

                context = {
                    'subset': self._current_shift,
                }
                for log_name, log_value in logs.items():
                    self._run.track(log_value, name=log_name, context=context)

            def on_epoch_end(self, args, state, control, **kwargs):
                pass

            def __del__(self):
                if self._initialized and self._run:
                    del self._run
                    self._run = None