Example #1
0
def retrieve_scale_metrics(runs, metrics, scaled_steps):
    for run in runs:
        if run.get('source') == 'tf_summary':
            run_len = len(run['data'])
            run_range = range(run_len)[scaled_steps.start:
                                       scaled_steps.stop:
                                       scaled_steps.step]
            run_scaled_data = []
            for i in run_range:
                run_scaled_data.append(run['data'][i])
            run['data'] = run_scaled_data
        else:
            # Retrieve aim metrics
            branch_path = os.path.join(PROJECT_PATH, run['branch'])
            storage_path = get_run_objects_path(branch_path, run['hash'])
            run['data'] = []
            records_storage = Storage(storage_path, 'r')
            for metric in metrics:
                try:
                    records_storage.open(metric,
                                         uncommitted_bucket_visible=True)
                    for r in records_storage.read_records(metric,
                                                          scaled_steps):
                        base, metric_record = Metric.deserialize(r)
                        run['data'].append({
                            'value': metric_record.value,
                            'step': base.step,
                            'epoch': base.epoch if base.has_epoch else None,
                        })
                    records_storage.close()
                except:
                    pass
Example #2
0
    def test_simple_int(self):
        len = 1000

        with tempfile.TemporaryDirectory() as temp_dir:
            storage_writer = Storage(temp_dir, 'w')

            storage_writer.open('loss')
            for i in range(len):
                storage_writer.append_record('loss', str(i).encode())
            storage_writer.close('loss')

            storage_writer.open('accuracy')
            for i in range(len, 2*len):
                storage_writer.append_record('accuracy', str(i).encode())
            storage_writer.close('accuracy')

            del storage_writer

            storage_reader = Storage(temp_dir, 'r')

            storage_reader.open('loss')
            assert storage_reader.get_records_num('loss') == len
            for i, record in enumerate(storage_reader.read_records('loss')):
                assert i == int(record.decode())
            storage_reader.close('loss')

            storage_reader.open('accuracy')
            assert storage_reader.get_records_num('accuracy') == len
            for i, record in enumerate(storage_reader.read_records('accuracy')):
                assert i + len == int(record.decode())
            storage_reader.close('accuracy')

            del storage_reader
Example #3
0
def get_runs_metric(metrics, tag=None, experiments=None, params=None):
    if not metrics:
        return []

    filtered_runs = get_runs_hashes(tag, experiments, params)

    # Get commits data length
    max_commit_len = 0
    for run in filtered_runs:
        branch_path = os.path.join(PROJECT_PATH, run['branch'])
        storage_path = get_run_objects_path(branch_path, run['hash'])
        records_storage = Storage(storage_path, 'r')
        for metric in metrics:
            try:
                records_storage.open(metric,
                                     uncommitted_bucket_visible=True)
                run['num_steps'] = records_storage.get_records_num(metric)
                records_storage.close()
            except:
                run['num_steps'] = 0
            if run['num_steps'] > max_commit_len:
                max_commit_len = run['num_steps']

    # Remove empty runs
    filtered_runs = list(filter(lambda r: r['num_steps'] > 0,
                                filtered_runs))

    # Get tags and colors
    commit_models = db.session.query(Commit, Tag) \
        .join(Tag, Commit.tags) \
        .filter(Commit.hash.in_(map(lambda r: r['hash'], filtered_runs))).all()
    for i in commit_models:
        if len(i) <= 1 or not i[1].color:
            continue

        commit_model = i[0]
        commit_tag = i[1]
        for commit in filtered_runs:
            if commit['hash'] == commit_model.hash:
                commit['tag'] = {
                    'name': commit_tag.name,
                    'color': commit_tag.color,
                }

    return filtered_runs
Example #4
0
import os

from aim.sdk.artifacts.proto.base_pb2 import BaseRecord
from aim.sdk.artifacts.proto.metric_pb2 import MetricRecord

from aimrecords import Storage

storage_path = os.path.join(os.getcwd(), '.aim', 'new-data-model', 'index',
                            'objects')
storage = Storage(storage_path, 'r')

storage.open('loss')
for r in storage.read_records('loss', slice(None, None)):
    base_pb = BaseRecord()
    base_pb.ParseFromString(r)
    metric_pb = MetricRecord()
    metric_pb.ParseFromString(base_pb.artifact)
    print('loss', base_pb, metric_pb)

storage.open('accuracy')
for r in storage.read_records('accuracy', slice(None, None)):
    base_pb = BaseRecord()
    base_pb.ParseFromString(r)
    metric_pb = MetricRecord()
    metric_pb.ParseFromString(base_pb.artifact)
    print('accuracy', base_pb, metric_pb)

storage.close()
Example #5
0
import os

from aimrecords import Storage


storage_path = os.getcwd()

# Append records
writer_storage = Storage(storage_path, 'w')

writer_storage.open('loss')
for step in range(100):
    writer_storage.append_record('loss',
                                 str(step).encode(),
                                 indexing={'subset': 'train'})
    if step % 4 == 0:
        writer_storage.append_record('loss',
                                     str(step).encode(),
                                     indexing={'subset': 'val'})

writer_storage.close()

# Select records
reader_storage = Storage(storage_path, 'r')

reader_storage.open('loss')
for r in reader_storage.read_records('loss', slice(0, 20)):
    print(r)

print('-' * 25)
Example #6
0
    def get(self, experiment_name, commit_id):
        project = Project()

        if not project.exists():
            return make_response(jsonify({}), 404)

        dir_path = os.path.join('/store', experiment_name)

        # Check if experiment exists
        if not os.path.isdir(dir_path):
            return jsonify({
                'init': True,
                'branch_init': False,
            })

        # Get commits
        commits = get_branch_commits(dir_path)

        # Get specified commit
        commit = None
        if commit_id == 'latest':
            for commit_item, config in commits.items():
                if commit is None or config['date'] > commit['date']:
                    commit = config
        elif commit_id == 'index':
            commit = {
                'hash': 'index',
                'date': time.time(),
                'index': True,
            }
        else:
            commit = commits.get(commit_id)

        if not commit:
            return make_response(jsonify({}), 404)

        objects_dir_path = os.path.join(dir_path, commit['hash'], 'objects')
        meta_file_path = os.path.join(objects_dir_path, 'meta.json')

        # Read meta file content
        try:
            with open(meta_file_path, 'r+') as meta_file:
                meta_file_content = json.loads(meta_file.read())
        except:
            meta_file_content = {}

        if commit['hash'] == 'index' and len(meta_file_content) == 0:
            return jsonify({
                'init': True,
                'branch_init': True,
                'index_empty': True,
                'commit': commit,
                'commits': commits,
            })

        # Get all artifacts(objects) listed in the meta file
        metric_objects = []
        model_objects = []
        dir_objects = []
        map_objects = []
        stats_objects = []

        records_storage = Storage(objects_dir_path, 'r')

        # Limit distributions
        for obj_key, obj in meta_file_content.items():
            if obj['type'] == 'dir':
                dir_objects.append({
                    'name': obj['name'],
                    'cat': obj['cat'],
                    'data': obj['data'],
                    'data_path': obj['data_path'],
                })
            elif obj['type'] == 'models':
                model_file_path = os.path.join(objects_dir_path, 'models',
                                               '{}.aim'.format(obj['name']))
                model_file_size = os.stat(model_file_path).st_size
                model_objects.append({
                    'name': obj['name'],
                    'data': obj['data'],
                    'size': model_file_size,
                })
            elif (obj['type'] == 'metrics' and obj['data_path'] != '__AIMRECORDS__') or \
                    obj['type'] == 'map':
                # obj['type'] == 'distribution':
                # Get object's data file path
                obj_data_file_path = os.path.join(objects_dir_path,
                                                  obj['data_path'], obj_key)

                # Incompatible version
                if obj_key.endswith('.json'):
                    return make_response(jsonify({}), 501)

            if obj['type'] == 'metrics':
                comp_content = []
                if obj['data_path'] == '__AIMRECORDS__':
                    format = 'aimrecords'
                    records_storage.open(obj['name'],
                                         uncommitted_bucket_visible=True)
                    for r in records_storage.read_records(
                            obj['name'], slice(-1000, None)):
                        base, metric_record = Metric.deserialize(r)
                        comp_content.append(metric_record.value)
                    records_storage.close(obj['name'])
                else:
                    format = 'json_log'
                    obj_data_content = read_artifact_log(
                        obj_data_file_path, 1000)
                    comp_content = list(
                        map(lambda x: float(x), obj_data_content))
                metric_objects.append({
                    'name': obj['name'],
                    'mode': 'plot',
                    'data': comp_content,
                    'format': format,
                })
            elif obj['type'] == 'map':
                try:
                    params_str = read_artifact_log(obj_data_file_path, 1)
                    if params_str:
                        map_objects.append({
                            'name': obj['name'],
                            'data': json.loads(params_str[0]),
                        })
                except:
                    pass

        records_storage.close()

        # Return found objects
        return jsonify({
            'init': True,
            'branch_init': True,
            'commit': commit,
            'commits': commits,
            'metrics': metric_objects,
            'models': model_objects,
            'dirs': dir_objects,
            'maps': map_objects,
            'stats': stats_objects,
        })
Example #7
0
import os
import time

from aimrecords import Storage

storage_path = os.getcwd()

# Select records
reader_storage = Storage(storage_path, 'r')

cursor = None
while True:
    reader_storage.open('loss', uncommitted_bucket_visible=True)
    for r in reader_storage.read_records('loss', slice(cursor, None)):
        print(r)
        cursor = int(r.decode()) + 1
    reader_storage.close('loss')
    time.sleep(0.01)
Example #8
0
def get_commits(metric, tag=None, experiments=None):
    project_path = '/store'
    project_branches = get_project_branches(project_path)
    commit_storage_path = lambda b, c: os.path.join(b, c, 'objects')

    # Filter by experiments
    if experiments and isinstance(experiments, str):
        experiments = filter(lambda e: e,
                             map(lambda e: e.strip(), experiments.split(',')))
        project_branches = [e for e in experiments if e in project_branches]

    # Get all commit objects
    commit_objects = {}
    for branch in project_branches:
        branch_path = os.path.join(project_path, branch)
        branch_commits = get_branch_commits(branch_path)
        for c in branch_commits.values():
            commit_objects[c['hash']] = {
                'branch': branch,
                'hash': c['hash'],
                'date': c['date'],
                'msg': c['message'],
            }

    # Filter by tag
    commit_hashes_by_tag = set()
    if tag is not None:
        tags = Tag.query.filter(Tag.name.like('{}%'.format(tag))).all()
        for t in tags:
            for tag_commit in t.commits:
                commit_hashes_by_tag.add(tag_commit.hash)

        filtered_commits = {
            c_hash: commit_objects[c_hash]
            for c_hash in commit_hashes_by_tag
        }
    else:
        filtered_commits = commit_objects

    # Get commits data length
    max_commit_len = 0
    for commit_hash, commit in filtered_commits.items():
        branch_path = os.path.join(project_path, commit['branch'])
        storage_path = commit_storage_path(branch_path, commit['hash'])
        records_storage = Storage(storage_path, 'r')
        try:
            records_storage.open(metric, uncommitted_bucket_visible=True)
            commit['num_steps'] = records_storage.get_records_num(metric)
            records_storage.close()
        except:
            commit['num_steps'] = 0
        if commit['num_steps'] > max_commit_len:
            max_commit_len = commit['num_steps']

    # Get commits data
    scaled_steps_len = 50
    if scaled_steps_len > max_commit_len:
        scaled_steps_len = max_commit_len
    if scaled_steps_len:
        scaled_steps = slice(0, max_commit_len,
                             max_commit_len // scaled_steps_len)
    else:
        scaled_steps = slice(0, 0)

    # Retrieve actual values from commits
    for commit_hash, commit in filtered_commits.items():
        branch_path = os.path.join(project_path, commit['branch'])
        storage_path = commit_storage_path(branch_path, commit['hash'])
        commit['data'] = []
        records_storage = Storage(storage_path, 'r')
        try:
            records_storage.open(metric, uncommitted_bucket_visible=True)
            for r in records_storage.read_records(metric, scaled_steps):
                base, metric_record = Metric.deserialize(r)
                commit['data'].append({
                    'value': metric_record.value,
                    'epoch': base.epoch,
                    'step': base.step,
                })
            records_storage.close()
        except:
            pass

    # Remove empty commits
    filtered_commits = {
        c_hash: filtered_commits[c_hash]
        for c_hash in filtered_commits.keys()
        if len(filtered_commits[c_hash]['data']) > 0
    }

    # Get tags and colors
    commit_models = db.session.query(Commit, Tag) \
        .join(Tag, Commit.tags) \
        .filter(Commit.hash.in_(filtered_commits.keys())).all()
    for i in commit_models:
        if len(i) <= 1 or not i[1].color:
            continue

        commit_model = i[0]
        commit_tag = i[1]
        for commit_hash, commit in filtered_commits.items():
            if commit_hash == commit_model.hash:
                commit['color'] = commit_tag.color
                commit['tag'] = commit_tag.name

    return filtered_commits