def setUpClass(cls) -> None: super().setUpClass() # run1 -> context {'subset': 'train'} -> Image[] # | -> integers # | -> floats # -> context {'subset': 'val'} -> floats # ------------------------------------------------ # run2 -> context {'subset': 'train'} -> Image # | -> floats # -> context {'subset': 'val'} -> floats run1 = Run(system_tracking_interval=None) cls.run1_hash = run1.hash images = generate_image_set(img_count=2, caption_prefix=f'Image 0') run1.track(images, name='image_lists', context={'subset': 'train'}) run1.track(random.random(), name='floats', context={'subset': 'train'}) run1.track(random.randint(100, 200), name='integers', context={'subset': 'train'}) run1.track(random.random(), name='floats', context={'subset': 'val'}) run2 = Run(system_tracking_interval=None) run2.track(images[0], name='single_images', context={'subset': 'val'}) run2.track(random.random(), name='floats', context={'subset': 'train'}) run2.track(random.random(), name='floats', context={'subset': 'val'}) cls.run2_hash = run2.hash
def test_different_types_on_different_contexts_and_runs(self): run = Run(system_tracking_interval=None) # same sequence name, different contexts run.track(1., name='numbers', context={'type': 'float'}) run.track(1, name='numbers', context={'type': 'integer'}) run2 = Run(system_tracking_interval=None) # same sequence name, different runs run2.track(1, name='numbers', context={'type': 'float'})
def setUpClass(cls) -> None: super().setUpClass() run1 = Run(system_tracking_interval=None) run1.track(1., name='metric1', context={'a': True}) run1.track(generate_image_set(1), name='images1', context={'a': True}) run1.track(generate_image_set(1), name='images1', context={'b': True}) run2 = Run(system_tracking_interval=None) run2.track(1, name='metric2', context={'a': True}) run2.track(generate_image_set(1)[0], name='images2', context={'b': True})
def test_incompatible_type_after_tracking_restart(self): run = Run(system_tracking_interval=None) run_hash = run.hash run.track(1., name='numbers', context={}) run.finalize() del run new_run = Run(run_hash=run_hash, system_tracking_interval=None) with self.assertRaises(ValueError) as cm: new_run.track(1, name='numbers', context={}) exception = cm.exception self.assertEqual( 'Cannot log value \'1\' on sequence \'numbers\'. Incompatible data types.', exception.args[0])
def before_training(self, model): self.aim_run = Run( repo=self.repo, experiment=self.experiment, system_tracking_interval=self.system_tracking_interval) self.initialized = True return model
def experiment(self) -> Run: if self._run is None: self._run = Run(repo=self._repo_path, experiment=self._experiment_name, system_tracking_interval=self. _system_tracking_interval) return self._run
async def custom_aligned_metrics_streamer(requested_runs: List[AlignedRunIn], x_axis: str) -> bytes: for run_data in requested_runs: run_hashname = run_data.run_id requested_traces = run_data.traces run = Run(hashname=run_hashname) traces_list = [] for trace_data in requested_traces: context = Context(trace_data.context) trace = run.get_trace(metric_name=trace_data.metric_name, context=context) x_axis_trace = run.get_trace(metric_name=x_axis, context=context) if not (trace and x_axis_trace): continue _slice = slice(*trace_data.slice) iters = trace.values.sparse_numpy()[0] sliced_iters = sliced_np_array(iters, _slice) x_axis_iters, x_axis_values = collect_x_axis_data( x_axis_trace, sliced_iters) traces_list.append({ 'metric_name': trace.name, 'context': trace.context.to_dict(), 'x_axis_values': x_axis_values, 'x_axis_iters': x_axis_iters, }) run_dict = {run_hashname: traces_list} encoded_tree = encode_tree(run_dict) yield collect_run_streamable_data(encoded_tree)
def test_type_compatibility_for_empty_list(self): run = Run(system_tracking_interval=None) context = {} ctx = Context(context) seq_name = 'obj_list' sequence_info = run.meta_run_tree.subtree( ('traces', ctx.idx, seq_name)) typed_sequences_info = run.meta_tree.subtree('traces_types') run.track([], name=seq_name, context=context) self.assertEqual('list', sequence_info['dtype']) self.assertEqual(1, typed_sequences_info['list', ctx.idx, seq_name]) self.assertIsNone( typed_sequences_info.get(('list(float)', ctx.idx, seq_name), None)) run.track([], name=seq_name, context=context) self.assertEqual('list', sequence_info['dtype']) self.assertIsNone( typed_sequences_info.get(('list(float)', ctx.idx, seq_name), None)) run.track([1.], name=seq_name, context=context) self.assertEqual('list(float)', sequence_info['dtype']) self.assertEqual( 1, typed_sequences_info['list(float)', ctx.idx, seq_name]) run.track([], name=seq_name, context=context) self.assertEqual('list(float)', sequence_info['dtype']) with self.assertRaises(ValueError) as cm: run.track([5], name=seq_name, context=context) exception = cm.exception self.assertEqual( f'Cannot log value \'{[5]}\' on sequence \'{seq_name}\'. Incompatible data types.', exception.args[0])
def _func(): run = Run(system_tracking_interval=None) run_hash.append(run.hash) self.assertIsNone(run.end_time) for i in range(10): run.track(i, name='seq') self.assertIsNone(run.end_time)
def __init__(self, repo: Optional[str] = None, experiment: Optional[str] = None, run: Optional[Run] = None): super(KerasTrackerCallback, self).__init__() if run is None: if repo is None and experiment is None: self._run = Run() else: self._run = Run(repo=repo, experiment=experiment) else: print('Passing Run instance to AimCallback will be ' 'deprecated in future versions, ' 'pass the callback arguments explicitly') self._run = run
def test_explicit_run_finalize(self): run = Run(system_tracking_interval=None) for i in range(10): run.track(i, name='seq') self.assertIsNone(run.end_time) run.finalize() self.assertIsNotNone(run.end_time)
def test_explicit_run_delete(self): run = Run(system_tracking_interval=None) run_hash = run.hash for i in range(10): run.track(i, name='seq') del run time.sleep(.1) self.assertIsNotNone(self._query_run_finalized_at(run_hash))
def test_incompatible_type_during_tracking(self): run = Run(system_tracking_interval=None) run.track(1., name='numbers', context={}) with self.assertRaises(ValueError) as cm: run.track(1, name='numbers', context={}) exception = cm.exception self.assertEqual( 'Cannot log value \'1\' on sequence \'numbers\'. Incompatible data types.', exception.args[0])
def iter_runs(self) -> Iterator['Run']: """Iterate over Repo runs. Yields: next :obj:`Run` in readonly mode . """ self.meta_tree.preload() for run_name in self.meta_tree.subtree('chunks').keys(): yield Run(run_name, repo=self, read_only=True)
def setUpClass(cls) -> None: super().setUpClass() run = Run(repo=cls.repo) run['images_per_step'] = 16 for step in range(100): images = generate_image_set(img_count=16, caption_prefix=f'Image {step}') run.track(images, name='random_images') run.track(random.random(), name='random_values') cls.run_hash = run.hash
def setUpClass(cls) -> None: super().setUpClass() cls.image_blobs = {} run = Run(run_hash=cls.run_hash, read_only=True) empty_context = Context({}) for step in range(10): for idx in range(5): img_view = run.series_run_tree.subtree( (empty_context.idx, 'random_images', 'val', step, idx)) cls.image_blobs[img_view['caption']] = img_view['data'].load()
def convert_run(lrun: LegacyRun, repo: Repo, legacy_run_map, skip_failed): try: run = Run( repo=repo, system_tracking_interval=None ) # do not track system metrics as they already logged if needed lrun.open_storage() if lrun.params.get(AIM_MAP_METRICS_KEYWORD): del lrun.params[ AIM_MAP_METRICS_KEYWORD] # set internally. no need to copy run[...] = lrun.params run['v2_params'] = {'run_hash': lrun.run_hash} if 'process' in lrun.config: run['v2_params', 'start_date'] = lrun.config['process']['start_date'] run['v2_params', 'finish_date'] = lrun.config['process']['finish_date'] run.experiment = lrun.experiment_name if lrun.config.get('archived'): run.archived = True run_metrics = {} legacy_run_map[lrun.run_hash] = run_metrics for metric in lrun.get_all_metrics().values(): try: metric.open_artifact() run_metrics[metric.name] = [] for trace in metric.get_all_traces(): metric_name = metric.name context = trace.context run_metrics[metric.name].append(context) for r in trace.read_records(slice(0, None, 1)): step_record, metric_record = deserialize_pb(r) # no need to track in a separate thread. use _track_impl directly. run._track_impl(metric_record.value, step_record.timestamp, metric_name, step_record.step, step_record.epoch, context=context) except Exception: metric.close_artifact() raise finally: metric.close_artifact() del run except Exception as e: click.echo( f'\nFailed to convert run {lrun.run_hash}. Reason: {str(e)}.', err=True) if not skip_failed: raise finally: lrun.close_storage()
def setup(self, args, state, model): self._initialized = True self._run = Run( repo=self._repo_path, experiment=self._experiment_name, system_tracking_interval=self._system_tracking_interval, ) combined_dict = {**args.to_sanitized_dict()} self._run['hparams'] = combined_dict
def setUpClass(cls) -> None: super().setUpClass() run = Run(system_tracking_interval=None) cls.run_hash = run.hash for step in range(5): images = generate_image_set(img_count=5, caption_prefix=f'Image {step}') run.track(images, name='image_lists', context={}) run.track(images[0], name='single_images', context={})
def iter_runs_from_cache(self, offset: str = None) -> Iterator['Run']: db = self.structured_db cache = db.caches.get('runs_cache') if cache: run_names = cache.keys() try: offset_idx = run_names.index(offset) + 1 except ValueError: offset_idx = 0 for run_name in run_names[offset_idx:]: yield Run(run_name, repo=self, read_only=True) else: raise StopIteration
def get_run(self, run_hash: str) -> Optional['Run']: """Get run if exists. Args: run_hash (str): Run hash. Returns: :obj:`Run` object if hash is found in repository. `None` otherwise. """ # TODO: [MV] optimize existence check for run if run_hash is None or run_hash not in self.meta_tree.subtree( 'chunks').keys(): return None else: return Run(run_hash, repo=self, read_only=True)
def fill_up_test_data(): remove_test_data() # put dummy data into test repo with 10 runs, tracking 2 metrics over 3 contexts repo = Repo.default_repo() run_hashes = [hex(random.getrandbits(64))[-7:] for _ in range(10)] contexts = [{ 'is_training': True, 'subset': 'train' }, { 'is_training': True, 'subset': 'val' }, { 'is_training': False }] metrics = ['loss', 'accuracy'] with repo.structured_db: try: for idx, hash_name in enumerate(run_hashes): run = Run(hashname=hash_name, repo=repo, system_tracking_interval=None) run['hparams'] = create_run_params() run['run_index'] = idx run['start_time'] = datetime.datetime.utcnow().isoformat() run['name'] = f'Run # {idx}' run.props.name = run['name'] metric_contexts = itertools.product(metrics, contexts) for metric_context in metric_contexts: metric = metric_context[0] context = metric_context[1] if metric == 'accuracy' and 'subset' in context: continue else: # track 100 values per run for step in range(100): val = 1.0 - 1.0 / (step + 1) run.track(val, name=metric, step=step, epoch=1, context=context) finally: del run
def finalize_stalled_runs(repo: 'Repo', runs: set): runs_in_progress = [] for run_hash in tqdm.tqdm(runs, desc='Finalizing stalled runs', total=len(runs)): try: run = Run(run_hash=run_hash, repo=repo, system_tracking_interval=None) except filelock.Timeout: runs_in_progress.append(run_hash) else: # TODO: [AT] handle lock timeout on index db (retry logic). run.finalize() if runs_in_progress: click.echo('Skipped indexing for the following runs in progress:') for run_hash in runs_in_progress: click.secho(f'\t\'{run_hash}\'', fg='yellow')
def __init__(self, repo: Optional[str] = None, experiment: Optional[str] = None, flush_frequency: int = 0, # unused block_termination: bool = True, # unused run: Optional[str] = None, system_tracking_interval: Optional[int] = DEFAULT_SYSTEM_TRACKING_INT): self._repo = Repo.from_path(repo) if repo else Repo.default_repo() self._repo_path = self._repo.path self._run = Run(run, repo=self._repo, experiment=experiment, system_tracking_interval=system_tracking_interval) self._run_hash = self._run.hashname self.active = True Session.sessions.setdefault(self._repo_path, []) Session.sessions[self._repo_path].append(self) # Bind signal listeners self._set_exit_handlers()
async def get_experiment_runs_api(exp_id: str, limit: Optional[int] = None, offset: Optional[str] = None, factory=Depends(object_factory)): project = Project() exp = factory.find_experiment(exp_id) if not exp: raise HTTPException(status_code=404) from aim.sdk.run import Run cache_name = 'exp_runs' project.repo.run_props_cache_hint = cache_name project.repo.structured_db.invalidate_cache(cache_name) project.repo.structured_db.init_cache(cache_name, exp.get_runs, lambda run_: run_.hash) exp_runs = [] run_hashes = [run.hash for run in exp.runs] offset_idx = 0 if offset: try: offset_idx = run_hashes.index(offset) + 1 except ValueError: pass if limit: run_hashes = run_hashes[offset_idx:offset_idx + limit] for run_hash in run_hashes: run = Run(run_hash, repo=project.repo, read_only=True) exp_runs.append({ 'run_id': run.hash, 'name': run.name, 'creation_time': run.creation_time, 'end_time': run.end_time }) project.repo.structured_db.invalidate_cache(cache_name) project.repo.run_props_cache_hint = None response = {'id': exp.uuid, 'runs': exp_runs} return response
async def get_tagged_runs_api(tag_id: str, factory=Depends(object_factory)): project = Project() tag = factory.find_tag(tag_id) if not tag: raise HTTPException from aim.sdk.run import Run cache_name = 'tag_runs' project.repo.run_props_cache_hint = cache_name project.repo.structured_db.invalidate_cache(cache_name) project.repo.structured_db.init_cache(cache_name, tag.get_runs, lambda run_: run_.hash) tag_runs = [] for tagged_run in tag.runs: run = Run(tagged_run.hash, repo=project.repo, read_only=True) tag_runs.append({ 'run_id': tagged_run.hash, 'name': tagged_run.name, 'creation_time': run.creation_time, 'end_time': run.end_time, 'experiment': tagged_run.experiment if tagged_run.experiment else None }) project.repo.structured_db.invalidate_cache(cache_name) project.repo.run_props_cache_hint = None response = {'id': tag.uuid, 'runs': tag_runs} return response
def iter_runs(self) -> Iterator["Run"]: self.meta_tree.preload() for run_name in self.meta_tree.view('chunks').keys(): yield Run(run_name, repo=self, read_only=True)
def get_run(self, hashname: str) -> Optional['Run']: if hashname is None or hashname not in self.meta_tree.view('chunks').keys(): return None else: return Run(hashname, repo=self, read_only=True)