def get_chart_reference(report): r, _map = {}, _map_cycle_report_graphs() out = report.get('output', {}) it = sh.stack_nested_keys(out, key=('output', ), depth=3) for k, v in sorted(it): if k[-1] == 'ts' and 'times' in v: label = '{}/%s'.format(co2_exl._sheet_name(k)) for i, j in sorted(v.items()): param_id = co2_exl._re_params_name.match(i)['param'] m = _map.get(param_id, None) if m: d = { 'x': k + ('times', ), 'y': k + (i, ), 'label': label % i } n = k[2], param_id, 'series' sh.get_nested_dicts(r, *n, default=list).append(d) for k, v in sh.stack_nested_keys(r, depth=2): m = _map[k[1]] m.pop('label', None) v.update(m) return r
def parse_data(raw_data, sets_mapping=None): """ Extract and rename the data-sets to _process. :param raw_data: Raw Data. :type raw_data: dict[str, dict[str, numpy.array]] :param sets_mapping: Mapping of data-sets to _process. It is like `{"<set-name>": {"<new-name>": "<old-name>", ...}, ...}`. :type sets_mapping: dict[str, dict[str, str]] :return: Model data. :rtype: dict """ if sets_mapping is None: data = raw_data else: data = {} for (i, j), k in sh.stack_nested_keys(sets_mapping): sh.get_nested_dicts(data, i)[j] = raw_data[i][k] parsed_data = {} for (i, j), v in sh.stack_nested_keys(data): if not np.isnan(v).all(): sh.get_nested_dicts(parsed_data, i)[j] = v return parsed_data
def split_prediction_models(scores, models, default_models): """ Split prediction models. :param scores: Models score. :type scores: dict :param models: Calibrated models. :type models: dict :param default_models: Default calibrated models. :type default_models: dict :return: Scores and prediction models. :rtype: tuple """ sbm, model_sel, par = {}, {}, {} for (k, c), v in sh.stack_nested_keys(scores, depth=2): r = sh.selector(['models'], v, allow_miss=True) for m in r.get('models', ()): sh.get_nested_dicts(par, m, 'calibration')[c] = c r.update(v.get('score', {})) sh.get_nested_dicts(sbm, k)[c] = r r = sh.selector(['success'], r, allow_miss=True) r = sh.map_dict({'success': 'status'}, r, {'from': c}) sh.get_nested_dicts(model_sel, k, 'calibration')[c] = r p = {i: dict.fromkeys(default_models, 'input') for i in prediction_cycles} mdls = {i: default_models.copy() for i in prediction_cycles} for k, n in sorted(models.items()): d = n.get(sh.NONE, (None, True, {})) for i in prediction_cycles: c, s, m = n.get(i, d) if m: s = {'from': c, 'status': s} sh.get_nested_dicts(model_sel, k, 'prediction')[i] = s mdls[i].update(m) p[i].update(dict.fromkeys(m, c)) for k, v in sh.stack_nested_keys(p, ('prediction', ), depth=2): sh.get_nested_dicts(par, k[-1], *k[:-2])[k[-2]] = v s = { 'param_selections': par, 'model_selections': model_sel, 'score_by_model': sbm, 'scores': scores } return (s, ) + tuple(mdls.get(k, {}) for k in prediction_cycles)
def select_declaration_data(data, diff=None): res = {} for k, v in sh.stack_nested_keys(constants.con_vals.DECLARATION_DATA): if v and sh.are_in_nested_dicts(data, *k): v = sh.get_nested_dicts(data, *k) sh.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) if diff is not None: diff.clear() diff.update(v[0] for v in sh.stack_nested_keys(data, depth=4)) it = (v[0] for v in sh.stack_nested_keys(res, depth=4)) diff.difference_update(it) return res
def test_stack_nested_keys(self): d = {'a': {'b': {'c': ('d', )}}, 'A': {'B': {'C': ('D', )}}} output = sorted(sh.stack_nested_keys(d)) result = [(('A', 'B', 'C'), ('D', )), (('a', 'b', 'c'), ('d', ))] self.assertEqual(output, result) output = sorted(sh.stack_nested_keys(d, key=(0, ))) result = [((0, 'A', 'B', 'C'), ('D', )), ((0, 'a', 'b', 'c'), ('d', ))] self.assertEqual(output, result) output = sorted(sh.stack_nested_keys(d, depth=2)) result = [(('A', 'B'), {'C': ('D', )}), (('a', 'b'), {'c': ('d', )})] self.assertEqual(output, result)
def test_stack_nested_keys(self): d = {'a': {'b': {'c': ('d',)}}, 'A': {'B': {'C': ('D',)}}} output = sorted(sh.stack_nested_keys(d)) result = [(('A', 'B', 'C'), ('D',)), (('a', 'b', 'c'), ('d',))] self.assertEqual(output, result) output = sorted(sh.stack_nested_keys(d, key=(0,))) result = [((0, 'A', 'B', 'C'), ('D',)), ((0, 'a', 'b', 'c'), ('d',))] self.assertEqual(output, result) output = sorted(sh.stack_nested_keys(d, depth=2)) result = [(('A', 'B'), {'C': ('D',)}), (('a', 'b'), {'c': ('d',)})] self.assertEqual(output, result)
def _summary2df(data): res = [] summary = data.get('summary', {}) if 'results' in summary: r = {} index = ['cycle', 'stage', 'usage'] for k, v in sh.stack_nested_keys(summary['results'], depth=4): l = sh.get_nested_dicts(r, k[0], default=list) l.append(sh.combine_dicts(sh.map_list(index, *k[1:]), v)) if r: df = _dd2df(r, index=index, depth=2, col_key=functools.partial(_sort_key, p_keys=('param', ) * 2), row_key=functools.partial(_sort_key, p_keys=index)) import pandas as pd df.columns = pd.MultiIndex.from_tuples(_add_units(df.columns)) setattr(df, 'name', 'results') res.append(df) if 'selection' in summary: df = _dd2df(summary['selection'], ['model_id'], depth=2, col_key=functools.partial(_sort_key, p_keys=('stage', 'cycle')), row_key=functools.partial(_sort_key, p_keys=())) setattr(df, 'name', 'selection') res.append(df) if 'comparison' in summary: r = {} for k, v in sh.stack_nested_keys(summary['comparison'], depth=3): v = sh.combine_dicts(v, base={'param_id': k[-1]}) sh.get_nested_dicts(r, *k[:-1], default=list).append(v) if r: df = _dd2df(r, ['param_id'], depth=2, col_key=functools.partial(_sort_key, p_keys=('stage', 'cycle')), row_key=functools.partial(_sort_key, p_keys=())) setattr(df, 'name', 'comparison') res.append(df) if res: return {'summary': res} return {}
def _extract_summary_from_output(report, extracted, augmented_summary=False): for k, v in sh.stack_nested_keys(report.get('output', {}), depth=2): k = k[::-1] for u, i, j in _param_names_values(v.get('pa', {})): o = {} if i in ('has_sufficient_power', ): o = {i: j} elif augmented_summary: if i == 'co2_params_calibrated': o = _format_dict(j.valuesdict().items(), 'co2_params %s') elif i == 'calibration_status': o = _format_dict(enumerate(j), 'status co2_params step %d', lambda x: x[0]) elif i == 'willans_factors': o = j elif i == 'phases_willans_factors': for n, m in enumerate(j): o.update( _format_dict(m.items(), '%s phase {}'.format(n))) elif i == 'co2_rescaling_scores': o = sh.map_list( ['rescaling_mean', 'rescaling_std', 'rescaling_n'], *j) if o: sh.get_nested_dicts(extracted, *(k + (u, ))).update(o)
def parse_solution(solution): """ Parse the CO2MPAS model solution. :param solution: CO2MPAS model solution. :type solution: schedula.Solution :return: CO2MPAS outputs. :rtype: dict[dict] """ res = {} for k, v in solution.items(): k = k.split('.') sh.get_nested_dicts(res, *k[:-1])[k[-1]] = v for k, v in list(sh.stack_nested_keys(res, depth=3)): n, k = k[:-1], k[-1] if n == ('output', 'calibration') and k in ('wltp_l', 'wltp_h'): v = sh.selector(('co2_emission_value', ), v, allow_miss=True) if v: d = sh.get_nested_dicts(res, 'target', 'prediction') d[k] = sh.combine_dicts(v, d.get(k, {})) res['pipe'] = solution.pipe return res
def run_model(base, model): """ Run whole model (scale trace and gearshifts) for each case. :return sol: List of dictionaries that contains the solution for different cases. :rtype: list """ from tqdm import tqdm sol, input, case = [], {}, base["case"] pbar = tqdm(total=len(list(case.iterrows())), desc="Executing gearshift model", position=0) for index, row in case.iterrows(): pbar.update(1) input = _obtain_inputs(row, base) sol_case = model(dict(input)) dict_case = { "Case": row.to_dict()["case"], "NoOfGears": sol_case["shift_points"]["NoOfGearsFinal"], } for k, v in sh.stack_nested_keys(sol_case.get("shift_points", {}), depth=2): if len(k) >= 2: dict_case[str(k[1])] = v sol.append(dict_case)
def _run_variations(plan, bases, core_model, timestamp): for r in _ProgressBar(plan, _format_meter=_format_meter): sol, data = bases[r['base']], r['data'] if 'solution' in sol: s = sol['solution'] base = _define_inputs(s, sh.combine_nested_dicts(sh.selector( data, s, allow_miss=True ), data)) elif 'base' in sol: base = sh.combine_nested_dicts(sol['base'], data, depth=2) else: continue for i, d in base.items(): if hasattr(d, 'items'): base[i] = {k: v for k, v in d.items() if v is not sh.EMPTY} sol = core_model(_define_inputs(sol, dict( base=base, vehicle_name='-'.join((str(r['id']), sol['vehicle_name'])), timestamp=timestamp ))) summary, keys = {}, { tuple(k.split('.')[:0:-1]) for k in base if k.startswith('output.') } for k, v in data.items(): k = ('plan %s' % k).split('.')[::-1] sh.get_nested_dicts(summary, *k).update(v) for k, v in sh.stack_nested_keys(sol['summary'], depth=3): if k[:-1] not in keys: sh.get_nested_dicts(summary, *k).update(v) sol['summary'] = summary yield sol
def load_interpolation_methods(methods_fpath, interpolation_method='linear'): """ Load interpolation methods for each variable of each data-set. :param methods_fpath: File path (`.json`) of interpolation methods. It is like `{"<set-name>": {"<var-name>": "<interp>", ...}, ...}`. :type methods_fpath: str :param interpolation_method: Default interpolation method. :type interpolation_method: str :return: Interpolation methods for each variable of each data-set. It is like `{"<set-name>": {"<var-name>": "<interp>", ...}, ...}`. :rtype: collections.defaultdict """ import json from syncing.model.interp import METHODS from syncing.model import define_interpolation_methods with open(methods_fpath) as f: methods = define_interpolation_methods(interpolation_method) for k, v in sh.stack_nested_keys(json.load(f)): methods[k[0]][sh.bypass(*k[1:])] = METHODS[v] return methods
def _format_selection(score_by_model, depth=-1, index='model_id'): res = {} for k, v in sorted(sh.stack_nested_keys(score_by_model, depth=depth)): v = v.copy() v[index] = k[0] sh.get_nested_dicts(res, *k[1:], default=list).append(v) return res
def _extract_summary_from_model_scores(report, extracted): n = ('data', 'calibration', 'model_scores', 'model_selections') if not sh.are_in_nested_dicts(report, *n): return False sel = sh.get_nested_dicts(report, *n) s = ('data', 'calibration', 'model_scores', 'score_by_model') score = sh.get_nested_dicts(report, *s) s = ('data', 'calibration', 'model_scores', 'scores') scores = sh.get_nested_dicts(report, *s) for k, v in sh.stack_nested_keys(extracted, depth=3): n = k[1::-1] if k[-1] == 'output' and sh.are_in_nested_dicts(sel, *n): gen = sh.get_nested_dicts(sel, *n) gen = ((d['model_id'], d['status']) for d in gen if 'status' in d) o = _format_dict(gen, 'status %s') v.update(o) if k[1] == 'calibration' and k[0] in score: gen = score[k[0]] gen = ((d['model_id'], d['score']) for d in gen if 'score' in d) o = _format_dict(gen, 'score %s') v.update(o) for i, j in scores[k[0]].items(): gen = (('/'.join( (d['model_id'], d['param_id'])), d['score']) for d in j if 'score' in d) o = _format_dict(gen, 'score {}/%s'.format(i)) v.update(o) return True
def save_excel(output_fpath, outputs): """ Save dsp outputs in an Excel file. :param output_fpath: Output file path. :type output_fpath: str :param outputs: Model outputs. :type outputs: dict :return: File path where output are written. :rtype: str """ import pandas as pd os.makedirs(osp.dirname(output_fpath) or '.', exist_ok=True) with pd.ExcelWriter(output_fpath) as writer: if 'shifts' in outputs: pd.DataFrame(outputs['shifts'], index=[0]).T.to_excel( writer, 'shifts', header=False ) if 'resampled' in outputs: data = dict(sh.stack_nested_keys(outputs['resampled'])) pd.DataFrame(data).to_excel(writer, 'synced') for name, data in outputs.get('data', {}).items(): pd.DataFrame(data).to_excel(writer, 'origin.%s' % name) return output_fpath
def compare_outputs_vs_targets(data): """ Compares model outputs vs targets. :param data: Model data. :type data: dict :return: Comparison results. :rtype: dict """ res = {} metrics = _get_metrics() for k, t in sh.stack_nested_keys(data.get('target', {}), depth=3): if not sh.are_in_nested_dicts(data, 'output', *k): continue o = sh.get_nested_dicts(data, 'output', *k) v = _compare(t, o, metrics=metrics) if v: sh.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) return res
def validate_meta(meta=None, hard_validation=False): """ Validate meta data. :param meta: Meta data. :type meta: dict :param hard_validation: Add extra data validations. :type hard_validation: bool :return: Validated meta data. :rtype: dict """ i, e = _validate_base_with_schema(meta or {}, depth=2) if hard_validation: from schema import SchemaError from .hard import _hard_validation for k, v in sorted(sh.stack_nested_keys(i, depth=1)): for c, msg in _hard_validation(v, 'meta'): sh.get_nested_dicts(e, *k)[c] = SchemaError([], [msg]) if _log_errors_msg(e): return sh.NONE return i
def parse_dsp_solution(solution): """ Parses the co2mpas model results. :param solution: Co2mpas model after dispatching. :type solution: schedula.Solution :return: Mapped outputs. :rtype: dict[dict] """ res = {} for k, v in solution.items(): sh.get_nested_dicts(res, *k.split('.'), default=co2_utl.ret_v(v)) for k, v in list(sh.stack_nested_keys(res, depth=3)): n, k = k[:-1], k[-1] if n == ('output', 'calibration') and k in ('wltp_l', 'wltp_h'): v = sh.selector(('co2_emission_value', ), v, allow_miss=True) if v: d = sh.get_nested_dicts(res, 'target', 'prediction') d[k] = sh.combine_dicts(v, d.get(k, {})) res['pipe'] = solution.pipe return res
def read_json(input_fpath, data_names=None): """ Reads the json file. :param input_fpath: Input file path. :type input_fpath: str :param data_names: Data names to filter out the data sets to synchronize. :type data_names: list :return: Raw data-sets. :rtype: dict[str, dict[str, numpy.array]] """ import json import numpy as np data = {} with open(input_fpath) as file: for k, v in sh.stack_nested_keys(json.load(file)): if not data_names or k[0] in data_names: sh.get_nested_dicts(data, k[0])[sh.bypass(*k[1:])] = np.array(v) return data
def _validate_base_with_schema(data, depth=4): read_schema = define_data_schema(read=True) inputs, errors, validate = {}, {}, read_schema.validate for k, v in sorted(sh.stack_nested_keys(data, depth=depth)): d = sh.get_nested_dicts(inputs, *k[:-1]) _add_validated_input(d, validate, k, v, errors) return inputs, errors
def _log_errors_msg(errors): if errors: msg = ['\nInput cannot be parsed, due to:'] for k, v in sh.stack_nested_keys(errors, depth=4): msg.append('{} in {}: {}'.format(k[-1], '/'.join(k[:-1]), v)) log.error('\n '.join(msg)) return True return False
def _validate_base_with_schema(data, depth=4): from ..schema import define_data_schema inputs, errors, validate = {}, {}, define_data_schema().validate for k, v in sorted(sh.stack_nested_keys(data, depth=depth)): d = sh.get_nested_dicts(inputs, *k[:-1]) _add_validated_input(d, validate, k, v, errors) return inputs, errors
def _filter_data(report): report = {k: v for k, v in report.items() if k != 'pipe'} for k, v in sh.stack_nested_keys(report): if hasattr(v, '__call__') or hasattr(v, 'predict') or \ (isinstance(v, list) and isinstance(v[0], Spline)) or \ k[-1] in SKIP_PARAMETERS: continue yield '.'.join(map(str, k)), v
def format_report_output(data): res = {} func = functools.partial(sh.get_nested_dicts, default=collections.OrderedDict) for k, v in sh.stack_nested_keys(data.get('output', {}), depth=3): _add_special_data2report(data, res, k[:-1], 'target', *k) s, iv = _add_special_data2report(data, res, k[:-1], 'input', *k) if not s or (s and not _is_equal(iv, v)): func(res, *k[:-1])[k[-1]] = v output = {} for k, v in sh.stack_nested_keys(res, depth=2): v = _split_by_data_format(v) sh.get_nested_dicts(output, *k, default=co2_utl.ret_v(v)) return output
def _compare(self, books, results): for k, other in sh.stack_nested_keys(results, depth=3): value = sh.get_nested_dicts(books, *k) msg = '[{}]{}!{}'.format(*k) if is_number(value) and is_number(other): self.assertAlmostEqual(float(value), float(other), msg=msg) else: self.assertEqual(value, other, msg=msg)
def test_3_conf(self, options): import yaml import schedula as sh from co2mpas.defaults import dfl from co2mpas.cli import conf kw = conf.make_context('conf', list(options)).params t = {k for k, _ in sh.stack_nested_keys(dfl.to_dict())} with self.runner.isolated_filesystem(): result = self.invoke(('conf', ) + options) self.assertEqual(result.exit_code, 0) with open(kw['output_file'], 'rb') as f: r = dict(sh.stack_nested_keys(yaml.load(f))) self.assertSetEqual(set(r), t) if kw['model_conf']: with open(kw['model_conf'], 'rb') as f: for k, v in sh.stack_nested_keys(yaml.load(f)): self.assertEqual(r[k], v)
def _add_times_base(data, scope='base', usage='input', **match): if scope != 'base': return sh_type = _get_sheet_type(scope=scope, usage=usage, **match) n = (scope, 'target') if sh_type == 'ts' and sh.are_in_nested_dicts(data, *n): t = sh.get_nested_dicts(data, *n) for k, v in sh.stack_nested_keys(t, key=n, depth=2): if 'times' not in v: n = list(k + ('times',)) n[1] = usage if sh.are_in_nested_dicts(data, *n): v['times'] = sh.get_nested_dicts(data, *n) else: for i, j in sh.stack_nested_keys(data, depth=4): if 'times' in j: v['times'] = j['times'] break
def test_sync(self, data): args, exit_code, file = data self.maxDiff = None result = self.runner.invoke(cli.sync, args) self.assertEqual(exit_code, result.exit_code, result) if file: self.assertTrue(osp.isfile(args[1])) res = osp.join(results_dir, args[1]) if osp.isfile(res): with open(res) as e, open(args[1]) as r: self.assertEqual( { k: np.round(v, 7).tolist() for k, v in sh.stack_nested_keys(json.load(e)) }, { k: np.round(v, 7).tolist() for k, v in sh.stack_nested_keys(json.load(r)) })
def _extract_summary_from_summary(report, extracted): n = ('summary', 'results') if sh.are_in_nested_dicts(report, *n): for j, w in sh.get_nested_dicts(report, *n).items(): if j in ('declared_co2_emission', 'co2_emission', 'fuel_consumption'): for k, v in sh.stack_nested_keys(w, depth=3): if v: sh.get_nested_dicts(extracted, *k).update(v)
def _add2summary(total_summary, summary, base_keys=None): base_keys = base_keys or {} for k, v in sh.stack_nested_keys(summary, depth=3): d = sh.get_nested_dicts(total_summary, *k, default=list) if isinstance(v, list): for j in v: d.append(sh.combine_dicts(j, base_keys)) else: d.append(sh.combine_dicts(v, base_keys))
def _format_scores(scores): res = {} for k, j in sh.stack_nested_keys(scores, depth=3): if k[-1] in ('limits', 'errors'): model_id = k[0] extra_field = ('score', ) if k[-1] == 'errors' else () for i, v in sh.stack_nested_keys(j): i = ( model_id, i[-1], k[1], ) + i[:-1] + extra_field sh.get_nested_dicts(res, *i, default=co2_utl.ret_v(v)) sco = {} for k, v in sorted(sh.stack_nested_keys(res, depth=4)): v.update(sh.map_list(['model_id', 'param_id'], *k[:2])) sh.get_nested_dicts(sco, *k[2:], default=list).append(v) return sco