def retrieve_scale_metrics(runs, metrics, scaled_steps): for run in runs: if run.get('source') == 'tf_summary': run_len = len(run['data']) run_range = range(run_len)[scaled_steps.start: scaled_steps.stop: scaled_steps.step] run_scaled_data = [] for i in run_range: run_scaled_data.append(run['data'][i]) run['data'] = run_scaled_data else: # Retrieve aim metrics branch_path = os.path.join(PROJECT_PATH, run['branch']) storage_path = get_run_objects_path(branch_path, run['hash']) run['data'] = [] records_storage = Storage(storage_path, 'r') for metric in metrics: try: records_storage.open(metric, uncommitted_bucket_visible=True) for r in records_storage.read_records(metric, scaled_steps): base, metric_record = Metric.deserialize(r) run['data'].append({ 'value': metric_record.value, 'step': base.step, 'epoch': base.epoch if base.has_epoch else None, }) records_storage.close() except: pass
def test_simple_int(self): len = 1000 with tempfile.TemporaryDirectory() as temp_dir: storage_writer = Storage(temp_dir, 'w') storage_writer.open('loss') for i in range(len): storage_writer.append_record('loss', str(i).encode()) storage_writer.close('loss') storage_writer.open('accuracy') for i in range(len, 2*len): storage_writer.append_record('accuracy', str(i).encode()) storage_writer.close('accuracy') del storage_writer storage_reader = Storage(temp_dir, 'r') storage_reader.open('loss') assert storage_reader.get_records_num('loss') == len for i, record in enumerate(storage_reader.read_records('loss')): assert i == int(record.decode()) storage_reader.close('loss') storage_reader.open('accuracy') assert storage_reader.get_records_num('accuracy') == len for i, record in enumerate(storage_reader.read_records('accuracy')): assert i + len == int(record.decode()) storage_reader.close('accuracy') del storage_reader
def get_runs_metric(metrics, tag=None, experiments=None, params=None): if not metrics: return [] filtered_runs = get_runs_hashes(tag, experiments, params) # Get commits data length max_commit_len = 0 for run in filtered_runs: branch_path = os.path.join(PROJECT_PATH, run['branch']) storage_path = get_run_objects_path(branch_path, run['hash']) records_storage = Storage(storage_path, 'r') for metric in metrics: try: records_storage.open(metric, uncommitted_bucket_visible=True) run['num_steps'] = records_storage.get_records_num(metric) records_storage.close() except: run['num_steps'] = 0 if run['num_steps'] > max_commit_len: max_commit_len = run['num_steps'] # Remove empty runs filtered_runs = list(filter(lambda r: r['num_steps'] > 0, filtered_runs)) # Get tags and colors commit_models = db.session.query(Commit, Tag) \ .join(Tag, Commit.tags) \ .filter(Commit.hash.in_(map(lambda r: r['hash'], filtered_runs))).all() for i in commit_models: if len(i) <= 1 or not i[1].color: continue commit_model = i[0] commit_tag = i[1] for commit in filtered_runs: if commit['hash'] == commit_model.hash: commit['tag'] = { 'name': commit_tag.name, 'color': commit_tag.color, } return filtered_runs
import os from aim.sdk.artifacts.proto.base_pb2 import BaseRecord from aim.sdk.artifacts.proto.metric_pb2 import MetricRecord from aimrecords import Storage storage_path = os.path.join(os.getcwd(), '.aim', 'new-data-model', 'index', 'objects') storage = Storage(storage_path, 'r') storage.open('loss') for r in storage.read_records('loss', slice(None, None)): base_pb = BaseRecord() base_pb.ParseFromString(r) metric_pb = MetricRecord() metric_pb.ParseFromString(base_pb.artifact) print('loss', base_pb, metric_pb) storage.open('accuracy') for r in storage.read_records('accuracy', slice(None, None)): base_pb = BaseRecord() base_pb.ParseFromString(r) metric_pb = MetricRecord() metric_pb.ParseFromString(base_pb.artifact) print('accuracy', base_pb, metric_pb) storage.close()
storage_path = os.getcwd() # Append records writer_storage = Storage(storage_path, 'w') writer_storage.open('loss') for step in range(100): writer_storage.append_record('loss', str(step).encode(), indexing={'subset': 'train'}) if step % 4 == 0: writer_storage.append_record('loss', str(step).encode(), indexing={'subset': 'val'}) writer_storage.close() # Select records reader_storage = Storage(storage_path, 'r') reader_storage.open('loss') for r in reader_storage.read_records('loss', slice(0, 20)): print(r) print('-' * 25) for r in reader_storage.read_records('loss', slice(0, 10), indexing={'subset': 'train'}): print(r) print('-' * 25)
import os import time from aimrecords import Storage storage_path = os.getcwd() # Append records writer_storage = Storage(storage_path, 'w') writer_storage.open('loss') for step in range(10000): writer_storage.append_record('loss', str(step).encode()) writer_storage.flush() time.sleep(0.5) writer_storage.close()
def get(self, experiment_name, commit_id): project = Project() if not project.exists(): return make_response(jsonify({}), 404) dir_path = os.path.join('/store', experiment_name) # Check if experiment exists if not os.path.isdir(dir_path): return jsonify({ 'init': True, 'branch_init': False, }) # Get commits commits = get_branch_commits(dir_path) # Get specified commit commit = None if commit_id == 'latest': for commit_item, config in commits.items(): if commit is None or config['date'] > commit['date']: commit = config elif commit_id == 'index': commit = { 'hash': 'index', 'date': time.time(), 'index': True, } else: commit = commits.get(commit_id) if not commit: return make_response(jsonify({}), 404) objects_dir_path = os.path.join(dir_path, commit['hash'], 'objects') meta_file_path = os.path.join(objects_dir_path, 'meta.json') # Read meta file content try: with open(meta_file_path, 'r+') as meta_file: meta_file_content = json.loads(meta_file.read()) except: meta_file_content = {} if commit['hash'] == 'index' and len(meta_file_content) == 0: return jsonify({ 'init': True, 'branch_init': True, 'index_empty': True, 'commit': commit, 'commits': commits, }) # Get all artifacts(objects) listed in the meta file metric_objects = [] model_objects = [] dir_objects = [] map_objects = [] stats_objects = [] records_storage = Storage(objects_dir_path, 'r') # Limit distributions for obj_key, obj in meta_file_content.items(): if obj['type'] == 'dir': dir_objects.append({ 'name': obj['name'], 'cat': obj['cat'], 'data': obj['data'], 'data_path': obj['data_path'], }) elif obj['type'] == 'models': model_file_path = os.path.join(objects_dir_path, 'models', '{}.aim'.format(obj['name'])) model_file_size = os.stat(model_file_path).st_size model_objects.append({ 'name': obj['name'], 'data': obj['data'], 'size': model_file_size, }) elif (obj['type'] == 'metrics' and obj['data_path'] != '__AIMRECORDS__') or \ obj['type'] == 'map': # obj['type'] == 'distribution': # Get object's data file path obj_data_file_path = os.path.join(objects_dir_path, obj['data_path'], obj_key) # Incompatible version if obj_key.endswith('.json'): return make_response(jsonify({}), 501) if obj['type'] == 'metrics': comp_content = [] if obj['data_path'] == '__AIMRECORDS__': format = 'aimrecords' records_storage.open(obj['name'], uncommitted_bucket_visible=True) for r in records_storage.read_records( obj['name'], slice(-1000, None)): base, metric_record = Metric.deserialize(r) comp_content.append(metric_record.value) records_storage.close(obj['name']) else: format = 'json_log' obj_data_content = read_artifact_log( obj_data_file_path, 1000) comp_content = list( map(lambda x: float(x), obj_data_content)) metric_objects.append({ 'name': obj['name'], 'mode': 'plot', 'data': comp_content, 'format': format, }) elif obj['type'] == 'map': try: params_str = read_artifact_log(obj_data_file_path, 1) if params_str: map_objects.append({ 'name': obj['name'], 'data': json.loads(params_str[0]), }) except: pass records_storage.close() # Return found objects return jsonify({ 'init': True, 'branch_init': True, 'commit': commit, 'commits': commits, 'metrics': metric_objects, 'models': model_objects, 'dirs': dir_objects, 'maps': map_objects, 'stats': stats_objects, })
import os import time from aimrecords import Storage storage_path = os.getcwd() # Select records reader_storage = Storage(storage_path, 'r') cursor = None while True: reader_storage.open('loss', uncommitted_bucket_visible=True) for r in reader_storage.read_records('loss', slice(cursor, None)): print(r) cursor = int(r.decode()) + 1 reader_storage.close('loss') time.sleep(0.01)
def get_commits(metric, tag=None, experiments=None): project_path = '/store' project_branches = get_project_branches(project_path) commit_storage_path = lambda b, c: os.path.join(b, c, 'objects') # Filter by experiments if experiments and isinstance(experiments, str): experiments = filter(lambda e: e, map(lambda e: e.strip(), experiments.split(','))) project_branches = [e for e in experiments if e in project_branches] # Get all commit objects commit_objects = {} for branch in project_branches: branch_path = os.path.join(project_path, branch) branch_commits = get_branch_commits(branch_path) for c in branch_commits.values(): commit_objects[c['hash']] = { 'branch': branch, 'hash': c['hash'], 'date': c['date'], 'msg': c['message'], } # Filter by tag commit_hashes_by_tag = set() if tag is not None: tags = Tag.query.filter(Tag.name.like('{}%'.format(tag))).all() for t in tags: for tag_commit in t.commits: commit_hashes_by_tag.add(tag_commit.hash) filtered_commits = { c_hash: commit_objects[c_hash] for c_hash in commit_hashes_by_tag } else: filtered_commits = commit_objects # Get commits data length max_commit_len = 0 for commit_hash, commit in filtered_commits.items(): branch_path = os.path.join(project_path, commit['branch']) storage_path = commit_storage_path(branch_path, commit['hash']) records_storage = Storage(storage_path, 'r') try: records_storage.open(metric, uncommitted_bucket_visible=True) commit['num_steps'] = records_storage.get_records_num(metric) records_storage.close() except: commit['num_steps'] = 0 if commit['num_steps'] > max_commit_len: max_commit_len = commit['num_steps'] # Get commits data scaled_steps_len = 50 if scaled_steps_len > max_commit_len: scaled_steps_len = max_commit_len if scaled_steps_len: scaled_steps = slice(0, max_commit_len, max_commit_len // scaled_steps_len) else: scaled_steps = slice(0, 0) # Retrieve actual values from commits for commit_hash, commit in filtered_commits.items(): branch_path = os.path.join(project_path, commit['branch']) storage_path = commit_storage_path(branch_path, commit['hash']) commit['data'] = [] records_storage = Storage(storage_path, 'r') try: records_storage.open(metric, uncommitted_bucket_visible=True) for r in records_storage.read_records(metric, scaled_steps): base, metric_record = Metric.deserialize(r) commit['data'].append({ 'value': metric_record.value, 'epoch': base.epoch, 'step': base.step, }) records_storage.close() except: pass # Remove empty commits filtered_commits = { c_hash: filtered_commits[c_hash] for c_hash in filtered_commits.keys() if len(filtered_commits[c_hash]['data']) > 0 } # Get tags and colors commit_models = db.session.query(Commit, Tag) \ .join(Tag, Commit.tags) \ .filter(Commit.hash.in_(filtered_commits.keys())).all() for i in commit_models: if len(i) <= 1 or not i[1].color: continue commit_model = i[0] commit_tag = i[1] for commit_hash, commit in filtered_commits.items(): if commit_hash == commit_model.hash: commit['color'] = commit_tag.color commit['tag'] = commit_tag.name return filtered_commits