예제 #1
0
파일: utils.py 프로젝트: roubkar/aimde
def retrieve_scale_metrics(runs, metrics, scaled_steps):
    for run in runs:
        if run.get('source') == 'tf_summary':
            run_len = len(run['data'])
            run_range = range(run_len)[scaled_steps.start:
                                       scaled_steps.stop:
                                       scaled_steps.step]
            run_scaled_data = []
            for i in run_range:
                run_scaled_data.append(run['data'][i])
            run['data'] = run_scaled_data
        else:
            # Retrieve aim metrics
            branch_path = os.path.join(PROJECT_PATH, run['branch'])
            storage_path = get_run_objects_path(branch_path, run['hash'])
            run['data'] = []
            records_storage = Storage(storage_path, 'r')
            for metric in metrics:
                try:
                    records_storage.open(metric,
                                         uncommitted_bucket_visible=True)
                    for r in records_storage.read_records(metric,
                                                          scaled_steps):
                        base, metric_record = Metric.deserialize(r)
                        run['data'].append({
                            'value': metric_record.value,
                            'step': base.step,
                            'epoch': base.epoch if base.has_epoch else None,
                        })
                    records_storage.close()
                except:
                    pass
예제 #2
0
파일: file.py 프로젝트: roubkar/aimde
 def deserialize_item(self, item):
     if self.data.get('format') == 'aimrecords':
         if self.data.get('insight') == 'metric':
             _, metric = Metric.deserialize(item)
             return metric.value
     else:
         return item
예제 #3
0
    def get(self):
        search_statement = request.args.get('q').strip()

        # TODO: get from request
        steps_num = 50

        runs = []

        # Parse statement
        try:
            parser = Statement()
            parsed_stmt = parser.parse(search_statement.strip())
        except:
            return make_response(jsonify({}), 403)

        statement_select = parsed_stmt.node['select']
        statement_expr = parsed_stmt.node['expression']

        aim_runs, tf_logs = separate_select_statement(statement_select)

        if 'run.archived' not in search_statement:
            default_expression = 'run.archived is not True'
        else:
            default_expression = None

        # Get project
        project = Project()
        if not project.exists():
            return make_response(jsonify({}), 404)

        aim_metrics = project.repo.select_metrics(aim_runs, statement_expr,
                                                  default_expression)
        if aim_metrics and len(aim_metrics):
            runs += aim_metrics

        # Get tf.summary logs
        if len(tf_logs) > 0:
            try:
                tf_runs = select_tf_summary_scalars(tf_logs, statement_expr)
                if tf_runs and len(tf_runs):
                    runs += tf_runs
            except:
                pass

        # Get the longest trace length
        max_num_records = 0
        for run in runs:
            if is_tf_run(run):
                for metric in run['metrics']:
                    for trace in metric['traces']:
                        if trace['num_steps'] > max_num_records:
                            max_num_records = trace['num_steps']
            else:
                run.open_storage()
                for metric in run.metrics.values():
                    try:
                        metric.open_artifact()
                        for trace in metric.traces:
                            if trace.num_records > max_num_records:
                                max_num_records = trace.num_records
                    except:
                        pass
                    finally:
                        pass
            #         metric.close_artifact()
            # run.close_storage()

        # Scale all traces
        steps = scale_trace_steps(max_num_records, steps_num)

        # Retrieve records
        for run in runs:
            if is_tf_run(run):
                for metric in run['metrics']:
                    for trace in metric['traces']:
                        trace_range = range(len(
                            trace['data']))[steps.start:steps.stop:steps.step]
                        trace_scaled_data = []
                        for i in trace_range:
                            trace_scaled_data.append(trace['data'][i])
                        trace['data'] = trace_scaled_data
            else:
                # run.open_storage()
                for metric in run.metrics.values():
                    try:
                        # metric.open_artifact()
                        for trace in metric.traces:
                            for r in trace.read_records(steps):
                                base, metric_record = MetricRecord.deserialize(
                                    r)
                                trace.append((
                                    metric_record.value,  # 0 => value
                                    base.step,  # 1 => step
                                    (
                                        base.epoch
                                        if base.has_epoch  # 2 => epoch
                                        else None),  #
                                    base.timestamp,  # 3 => time
                                ))
                    except:
                        pass
                    finally:
                        metric.close_artifact()
                run.close_storage()

        runs_list = []
        for run in runs:
            if not is_tf_run(run):
                runs_list.append(run.to_dict())
            else:
                runs_list.append(run)

        return jsonify({
            'runs': runs_list,
        })
예제 #4
0
    def get(self, experiment_name, commit_id):
        project = Project()

        if not project.exists():
            return make_response(jsonify({}), 404)

        dir_path = os.path.join('/store', experiment_name)

        # Check if experiment exists
        if not os.path.isdir(dir_path):
            return jsonify({
                'init': True,
                'branch_init': False,
            })

        # Get commits
        commits = get_branch_commits(dir_path)

        # Get specified commit
        commit = None
        if commit_id == 'latest':
            for commit_item, config in commits.items():
                if commit is None or config['date'] > commit['date']:
                    commit = config
        else:
            commit = commits.get(commit_id)

        if not commit:
            return jsonify({
                'init': True,
                'branch_init': True,
                'branch_empty': True,
            })

        if 'process' in commit.keys():
            if not commit['process']['finish']:
                if commit['process'].get('start_date'):
                    duration = time.time() - commit['process']['start_date']
                    commit['process']['time'] = duration
                else:
                    commit['process']['time'] = None
            elif commit['process'].get('start_date') is not None \
                    and commit['process'].get('finish_date') is not None:
                commit['process']['time'] = commit['process']['finish_date'] \
                                            - commit['process']['start_date']

        objects_dir_path = os.path.join(dir_path, commit['hash'], 'objects')
        meta_file_path = os.path.join(objects_dir_path, 'meta.json')

        # Read meta file content
        try:
            with open(meta_file_path, 'r+') as meta_file:
                meta_file_content = json.loads(meta_file.read())
        except:
            meta_file_content = {}

        # Get all artifacts(objects) listed in the meta file
        metric_objects = []
        model_objects = []
        dir_objects = []
        map_objects = []
        stats_objects = []

        # Limit distributions
        for obj_key, obj in meta_file_content.items():
            if obj['type'] == 'dir':
                dir_objects.append({
                    'name': obj['name'],
                    'cat': obj['cat'],
                    'data': obj['data'],
                    'data_path': obj['data_path'],
                })
            elif obj['type'] == 'models':
                model_file_path = os.path.join(objects_dir_path, 'models',
                                               '{}.aim'.format(obj['name']))
                model_file_size = os.stat(model_file_path).st_size
                model_objects.append({
                    'name': obj['name'],
                    'data': obj['data'],
                    'size': model_file_size,
                })
            elif (obj['type'] == 'metrics'
                  and obj['data_path'] != '__AIMRECORDS__') or \
                    ('map' in obj['type'] or obj['type'] == 'map'):
                # obj['type'] == 'distribution':
                # Get object's data file path
                obj_data_file_path = os.path.join(objects_dir_path,
                                                  obj['data_path'], obj_key)

                # Incompatible version
                if obj_key.endswith('.json'):
                    return make_response(jsonify({}), 501)

            if obj['type'] == 'metrics':
                steps = 75
                run = project.repo.select_run_metrics(experiment_name,
                                                      commit['hash'],
                                                      obj['name'])
                if run is not None and run.metrics.get(obj['name']) \
                        and len(run.metrics[obj['name']].traces):
                    metric = run.metrics[obj['name']]
                    run.open_storage()
                    metric.open_artifact()
                    traces = []
                    for trace in metric.traces:
                        num = trace.num_records
                        step = num // steps or 1
                        for r in trace.read_records(slice(0, num, step)):
                            base, metric_record = MetricRecord.deserialize(r)
                            trace.append((
                                base.step,  # 0 => step
                                metric_record.value,  # 1 => value
                            ))
                        traces.append(trace.to_dict())
                    metric.close_artifact()
                    run.close_storage()
                else:
                    traces = []

                metric_objects.append({
                    'name': obj['name'],
                    'mode': 'plot',
                    'traces': traces,
                })
            elif 'map' in obj['type'] or obj['type'] == 'map':
                try:
                    params_str = read_artifact_log(obj_data_file_path, 1)
                    if params_str:
                        map_objects.append({
                            'name':
                            obj['name'],
                            'data':
                            json.loads(params_str[0]),
                            'nested':
                            'nested_map' in obj['type']
                        })
                except:
                    pass

        # Return found objects
        return jsonify({
            'init': True,
            'branch_init': True,
            'branch_empty': False,
            'commit': commit,
            'commits': commits,
            'metrics': metric_objects,
            'models': model_objects,
            'dirs': dir_objects,
            'maps': map_objects,
            'stats': stats_objects,
        })
예제 #5
0
파일: views.py 프로젝트: jialin-wu-02/aimde
    def get(self, experiment_name, commit_id):
        project = Project()

        if not project.exists():
            return make_response(jsonify({}), 404)

        dir_path = os.path.join('/store', experiment_name)

        # Check if experiment exists
        if not os.path.isdir(dir_path):
            return jsonify({
                'init': True,
                'branch_init': False,
            })

        # Get commits
        commits = get_branch_commits(dir_path)

        # Get specified commit
        commit = None
        if commit_id == 'latest':
            for commit_item, config in commits.items():
                if commit is None or config['date'] > commit['date']:
                    commit = config
        elif commit_id == 'index':
            commit = {
                'hash': 'index',
                'date': time.time(),
                'index': True,
            }
        else:
            commit = commits.get(commit_id)

        if not commit:
            return make_response(jsonify({}), 404)

        objects_dir_path = os.path.join(dir_path, commit['hash'], 'objects')
        meta_file_path = os.path.join(objects_dir_path, 'meta.json')

        # Read meta file content
        try:
            with open(meta_file_path, 'r+') as meta_file:
                meta_file_content = json.loads(meta_file.read())
        except:
            meta_file_content = {}

        if commit['hash'] == 'index' and len(meta_file_content) == 0:
            return jsonify({
                'init': True,
                'branch_init': True,
                'index_empty': True,
                'commit': commit,
                'commits': commits,
            })

        # Get all artifacts(objects) listed in the meta file
        metric_objects = []
        model_objects = []
        dir_objects = []
        map_objects = []
        stats_objects = []

        records_storage = Storage(objects_dir_path, 'r')

        # Limit distributions
        for obj_key, obj in meta_file_content.items():
            if obj['type'] == 'dir':
                dir_objects.append({
                    'name': obj['name'],
                    'cat': obj['cat'],
                    'data': obj['data'],
                    'data_path': obj['data_path'],
                })
            elif obj['type'] == 'models':
                model_file_path = os.path.join(objects_dir_path, 'models',
                                               '{}.aim'.format(obj['name']))
                model_file_size = os.stat(model_file_path).st_size
                model_objects.append({
                    'name': obj['name'],
                    'data': obj['data'],
                    'size': model_file_size,
                })
            elif (obj['type'] == 'metrics' and obj['data_path'] != '__AIMRECORDS__') or \
                    obj['type'] == 'map':
                # obj['type'] == 'distribution':
                # Get object's data file path
                obj_data_file_path = os.path.join(objects_dir_path,
                                                  obj['data_path'], obj_key)

                # Incompatible version
                if obj_key.endswith('.json'):
                    return make_response(jsonify({}), 501)

            if obj['type'] == 'metrics':
                comp_content = []
                if obj['data_path'] == '__AIMRECORDS__':
                    format = 'aimrecords'
                    records_storage.open(obj['name'],
                                         uncommitted_bucket_visible=True)
                    for r in records_storage.read_records(
                            obj['name'], slice(-1000, None)):
                        base, metric_record = Metric.deserialize(r)
                        comp_content.append(metric_record.value)
                    records_storage.close(obj['name'])
                else:
                    format = 'json_log'
                    obj_data_content = read_artifact_log(
                        obj_data_file_path, 1000)
                    comp_content = list(
                        map(lambda x: float(x), obj_data_content))
                metric_objects.append({
                    'name': obj['name'],
                    'mode': 'plot',
                    'data': comp_content,
                    'format': format,
                })
            elif obj['type'] == 'map':
                try:
                    params_str = read_artifact_log(obj_data_file_path, 1)
                    if params_str:
                        map_objects.append({
                            'name': obj['name'],
                            'data': json.loads(params_str[0]),
                        })
                except:
                    pass

        records_storage.close()

        # Return found objects
        return jsonify({
            'init': True,
            'branch_init': True,
            'commit': commit,
            'commits': commits,
            'metrics': metric_objects,
            'models': model_objects,
            'dirs': dir_objects,
            'maps': map_objects,
            'stats': stats_objects,
        })
예제 #6
0
파일: utils.py 프로젝트: jialin-wu-02/aimde
def get_commits(metric, tag=None, experiments=None):
    project_path = '/store'
    project_branches = get_project_branches(project_path)
    commit_storage_path = lambda b, c: os.path.join(b, c, 'objects')

    # Filter by experiments
    if experiments and isinstance(experiments, str):
        experiments = filter(lambda e: e,
                             map(lambda e: e.strip(), experiments.split(',')))
        project_branches = [e for e in experiments if e in project_branches]

    # Get all commit objects
    commit_objects = {}
    for branch in project_branches:
        branch_path = os.path.join(project_path, branch)
        branch_commits = get_branch_commits(branch_path)
        for c in branch_commits.values():
            commit_objects[c['hash']] = {
                'branch': branch,
                'hash': c['hash'],
                'date': c['date'],
                'msg': c['message'],
            }

    # Filter by tag
    commit_hashes_by_tag = set()
    if tag is not None:
        tags = Tag.query.filter(Tag.name.like('{}%'.format(tag))).all()
        for t in tags:
            for tag_commit in t.commits:
                commit_hashes_by_tag.add(tag_commit.hash)

        filtered_commits = {
            c_hash: commit_objects[c_hash]
            for c_hash in commit_hashes_by_tag
        }
    else:
        filtered_commits = commit_objects

    # Get commits data length
    max_commit_len = 0
    for commit_hash, commit in filtered_commits.items():
        branch_path = os.path.join(project_path, commit['branch'])
        storage_path = commit_storage_path(branch_path, commit['hash'])
        records_storage = Storage(storage_path, 'r')
        try:
            records_storage.open(metric, uncommitted_bucket_visible=True)
            commit['num_steps'] = records_storage.get_records_num(metric)
            records_storage.close()
        except:
            commit['num_steps'] = 0
        if commit['num_steps'] > max_commit_len:
            max_commit_len = commit['num_steps']

    # Get commits data
    scaled_steps_len = 50
    if scaled_steps_len > max_commit_len:
        scaled_steps_len = max_commit_len
    if scaled_steps_len:
        scaled_steps = slice(0, max_commit_len,
                             max_commit_len // scaled_steps_len)
    else:
        scaled_steps = slice(0, 0)

    # Retrieve actual values from commits
    for commit_hash, commit in filtered_commits.items():
        branch_path = os.path.join(project_path, commit['branch'])
        storage_path = commit_storage_path(branch_path, commit['hash'])
        commit['data'] = []
        records_storage = Storage(storage_path, 'r')
        try:
            records_storage.open(metric, uncommitted_bucket_visible=True)
            for r in records_storage.read_records(metric, scaled_steps):
                base, metric_record = Metric.deserialize(r)
                commit['data'].append({
                    'value': metric_record.value,
                    'epoch': base.epoch,
                    'step': base.step,
                })
            records_storage.close()
        except:
            pass

    # Remove empty commits
    filtered_commits = {
        c_hash: filtered_commits[c_hash]
        for c_hash in filtered_commits.keys()
        if len(filtered_commits[c_hash]['data']) > 0
    }

    # Get tags and colors
    commit_models = db.session.query(Commit, Tag) \
        .join(Tag, Commit.tags) \
        .filter(Commit.hash.in_(filtered_commits.keys())).all()
    for i in commit_models:
        if len(i) <= 1 or not i[1].color:
            continue

        commit_model = i[0]
        commit_tag = i[1]
        for commit_hash, commit in filtered_commits.items():
            if commit_hash == commit_model.hash:
                commit['color'] = commit_tag.color
                commit['tag'] = commit_tag.name

    return filtered_commits
예제 #7
0
    def get(self):
        try:
            steps_num = int(request.args.get('p').strip())
        except:
            steps_num = 50

        # Get project
        project = Project()
        if not project.exists():
            return make_response(jsonify({}), 404)

        search_statement = request.args.get('q').strip()

        # Parse statement
        try:
            parser = Statement()
            parsed_stmt = parser.parse(search_statement.strip())
        except Diagnostic as d:
            parser_error_logs = d.logs or []
            for error_log in reversed(parser_error_logs):
                if not isinstance(error_log, Notification):
                    continue
                if error_log.severity != Severity.ERROR:
                    continue
                error_location = error_log.location
                if error_location:
                    return make_response(jsonify({
                        'type': 'parse_error',
                        'statement': search_statement,
                        'location': error_location.col,
                    }), 403)
            return make_response(jsonify({}), 403)
        except Exception:
            return make_response(jsonify({}), 403)

        statement_select = parsed_stmt.node['select']
        statement_expr = parsed_stmt.node['expression']

        aim_select, tf_logs = separate_select_statement(statement_select)

        if 'run.archived' not in search_statement:
            default_expression = 'run.archived is not True'
        else:
            default_expression = None

        aim_select_result = project.repo.select(aim_select,
                                                statement_expr,
                                                default_expression)

        (
            aim_selected_runs,
            aim_selected_params,
            aim_selected_metrics,
        ) = (
            aim_select_result.runs,
            aim_select_result.get_selected_params(),
            aim_select_result.get_selected_metrics_context()
        )

        aim_selected_runs.sort(key=lambda r: r.config.get('date'), reverse=True)

        response = {
            'runs': [],
            'params': [],
            'agg_metrics': {},
            'meta': {
                'tf_selected': False,
                'params_selected': False,
                'metrics_selected': False,
            },
        }

        retrieve_traces = False
        retrieve_agg_metrics = False

        if len(aim_selected_params):
            response['meta']['params_selected'] = True
            response['params'] = aim_selected_params
            if len(aim_selected_metrics):
                response['meta']['metrics_selected'] = True
                response['agg_metrics'] = aim_selected_metrics
                retrieve_agg_metrics = True
        elif len(aim_selected_metrics):
            response['meta']['metrics_selected'] = True
            retrieve_traces = True

        runs = []

        if aim_selected_runs and len(aim_selected_runs):
            runs += aim_selected_runs
        if len(tf_logs) > 0:
            if not retrieve_traces:
                # TODO: aggregate tf logs and return aggregated values
                response['meta']['tf_selected'] = True
                pass
            else:
                try:
                    tf_runs = select_tf_summary_scalars(tf_logs, statement_expr)
                    if tf_runs and len(tf_runs):
                        runs += tf_runs
                except:
                    pass
                else:
                    response['meta']['tf_selected'] = True

        if retrieve_traces:
            # Get the longest trace length
            max_num_records = 0
            for run in runs:
                if is_tf_run(run):
                    for metric in run['metrics']:
                        for trace in metric['traces']:
                            if trace['num_steps'] > max_num_records:
                                max_num_records = trace['num_steps']
                else:
                    run.open_storage()
                    for metric in run.metrics.values():
                        try:
                            metric.open_artifact()
                            for trace in metric.traces:
                                if trace.num_records > max_num_records:
                                    max_num_records = trace.num_records
                        except:
                            pass
                        finally:
                            pass
                            # metric.close_artifact()
                    # run.close_storage()

            # Scale all traces
            steps = scale_trace_steps(max_num_records, steps_num)

            # Retrieve records
            for run in runs:
                if is_tf_run(run):
                    for metric in run['metrics']:
                        for trace in metric['traces']:
                            trace_range = range(len(trace['data']))[steps.start:
                                                                    steps.stop:
                                                                    steps.step]
                            trace_scaled_data = []
                            for i in trace_range:
                                trace_scaled_data.append(trace['data'][i])
                            trace['data'] = trace_scaled_data
                else:
                    # run.open_storage()
                    for metric in run.metrics.values():
                        try:
                            # metric.open_artifact()
                            for trace in metric.traces:
                                for r in trace.read_records(steps):
                                    base, metric_record = MetricRecord.deserialize(r)
                                    trace.append((
                                        metric_record.value,  # 0 => value
                                        base.step,  # 1 => step
                                        (base.epoch if base.has_epoch else None), # 2 => epoch
                                        base.timestamp,  # 3 => time
                                    ))
                        except:
                            pass
                        finally:
                            metric.close_artifact()
                    run.close_storage()

        if retrieve_agg_metrics:
            # TODO: Retrieve and return aggregated metrics
            pass

        runs_list = []
        for run in runs:
            if not is_tf_run(run):
                runs_list.append(run.to_dict(include_only_selected_agg_metrics=True))
            else:
                runs_list.append(run)

        response['runs'] = runs_list

        return response