def filter_summary(changes, new_outputs, summary): l, variations = {tuple(k.split('.')[:0:-1]) for k in new_outputs}, {} for k, v in changes.items(): n = k[-2:1:-1] l.add(n) k = n + ('plan.%s' % '.'.join(i for i in k[:-1] if k not in n), k[-1]) sh.get_nested_dicts(variations, *k, default=co2_utl.ret_v(v)) for k, v in sh.stack_nested_keys(summary, depth=3): if k[:-1] in l: sh.get_nested_dicts(variations, *k, default=co2_utl.ret_v(v)) return variations
def format_report_scores(data): res = {} scores = 'data', 'calibration', 'model_scores' if co2_utl.are_in_nested_dicts(data, *scores): n = scores + ('selections',) selections = _format_selections(co2_utl.get_nested_dicts(data, *n)) if selections: co2_utl.get_nested_dicts(res, *n, default=co2_utl.ret_v(selections)) n = scores + ('scores',) scores = _format_scores(co2_utl.get_nested_dicts(data, *n)) if scores: co2_utl.get_nested_dicts(res, *n, default=co2_utl.ret_v(scores)) return res
def format_report_output(data): res = {} for k, v in co2_utl.stack_nested_keys(data.get('output', {}), depth=3): _add_special_data2report(data, res, k[:-1], 'target', *k) s, iv = _add_special_data2report(data, res, k[:-1], 'input', *k) if not s or (s and not _is_equal(iv, v)): co2_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) output = {} for k, v in co2_utl.stack_nested_keys(res, depth=2): v = _split_by_data_format(v) co2_utl.get_nested_dicts(output, *k, default=co2_utl.ret_v(v)) return output
def parse_dsp_solution(solution): """ Parses the co2mpas model results. :param solution: Co2mpas model after dispatching. :type solution: co2mpas.dispatcher.Solution :return: Mapped outputs. :rtype: dict[dict] """ res = {} for k, v in solution.items(): dsp_utl.get_nested_dicts(res, *k.split('.'), default=co2_utl.ret_v(v)) for k, v in list(dsp_utl.stack_nested_keys(res, depth=3)): n, k = k[:-1], k[-1] if n == ('output', 'calibration') and k in ('wltp_l', 'wltp_h'): v = dsp_utl.selector(('co2_emission_value', ), v, allow_miss=True) if v: d = dsp_utl.get_nested_dicts(res, 'target', 'prediction') d[k] = dsp_utl.combine_dicts(v, d.get(k, {})) res['pipe'] = solution.pipe return res
def compare_outputs_vs_targets(data): """ Compares model outputs vs targets. :param data: Model data. :type data: dict :return: Comparison results. :rtype: dict """ res = {} metrics = _get_metrics() for k, t in sh.stack_nested_keys(data.get('target', {}), depth=3): if not sh.are_in_nested_dicts(data, 'output', *k): continue o = sh.get_nested_dicts(data, 'output', *k) v = _compare(t, o, metrics=metrics) if v: sh.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) return res
def compare_outputs_vs_targets(data): """ Compares model outputs vs targets. :param data: Model data. :type data: dict :return: Comparison results. :rtype: dict """ res = {} metrics = { 'mean_absolute_error': mean_absolute_error, 'correlation_coefficient': _correlation_coefficient, 'accuracy_score': accuracy_score, } for k, t in co2_utl.stack_nested_keys(data.get('target', {}), depth=3): if not co2_utl.are_in_nested_dicts(data, 'output', *k): continue o = co2_utl.get_nested_dicts(data, 'output', *k) v = _compare(t, o, metrics=metrics) if v: co2_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) return res
def _summary2df(data): res = [] summary = data.get('summary', {}) if 'results' in summary: r = {} fun = partial(dsp_utl.map_list, [{}, 'cycle', 'stage', 'usage']) for n, m in summary['results'].items(): gen = ((fun(v, *k),) for k, v in co2_utl.stack_nested_keys(m, depth=3)) v = [v[0] for v in _yield_sorted_params(gen)] co2_utl.get_nested_dicts(r, n, default=co2_utl.ret_v(v)) df = _make_summarydf(r, index=['cycle', 'stage', 'usage'], depth=1) c = list(map(_rm_sub_parts, df.columns)) df.columns = pd.MultiIndex.from_tuples(c) setattr(df, 'name', 'results') res.append(df) if 'selection' in summary: df = pd.DataFrame(summary['selection']) df.set_index(['model_id'], inplace=True) setattr(df, 'name', 'selection') res.append(df) if 'comparison' in summary: df = _comparison2df(summary['comparison']) if df is not None: setattr(df, 'name', 'comparison') res.append(df) if res: return {'summary': res} return {}
def _add_special_data2report(data, report, to_keys, *from_keys): if from_keys[-1] != 'times' and \ co2_utl.are_in_nested_dicts(data, *from_keys): v = co2_utl.get_nested_dicts(data, *from_keys) n = to_keys + ('{}.{}'.format(from_keys[0], from_keys[-1]),) co2_utl.get_nested_dicts(report, *n, default=co2_utl.ret_v(v)) return True, v return False, None
def combine_scores(scores): scores = {k[:-9]: v for k, v in scores.items() if v} if not scores: return {} s = {} for (k, c), v in co2_utl.stack_nested_keys(scores, depth=2): r = {'models': v['models']} if 'models' in v else {} r.update(v.get('score', {})) co2_utl.get_nested_dicts(s, k, c, default=co2_utl.ret_v(r)) if not co2_utl.are_in_nested_dicts(s, k, 'best'): keys = {'models': 'selected_models', 'success': 'status'} best = dsp_utl.map_dict(keys, dsp_utl.selector(keys, r)) best['from'] = c co2_utl.get_nested_dicts(s, k, 'best', default=co2_utl.ret_v(best)) return {'selections': s, 'scores': scores}
def split_prediction_models(scores, calibrated_models, input_models, cycle_ids=()): sbm, model_sel, par = {}, {}, {} for (k, c), v in sh.stack_nested_keys(scores, depth=2): r = sh.selector(['models'], v, allow_miss=True) for m in r.get('models', ()): sh.get_nested_dicts(par, m, 'calibration')[c] = c r.update(v.get('score', {})) sh.get_nested_dicts(sbm, k, c, default=co2_utl.ret_v(r)) r = sh.selector(['success'], r, allow_miss=True) r = sh.map_dict({'success': 'status'}, r, {'from': c}) sh.get_nested_dicts(model_sel, k, 'calibration')[c] = r p = {i: dict.fromkeys(input_models, 'input') for i in cycle_ids} models = {i: input_models.copy() for i in cycle_ids} for k, n in sorted(calibrated_models.items()): d = n.get(sh.NONE, (None, True, {})) for i in cycle_ids: c, s, m = n.get(i, d) if m: s = {'from': c, 'status': s} sh.get_nested_dicts(model_sel, k, 'prediction')[i] = s models[i].update(m) p[i].update(dict.fromkeys(m, c)) for k, v in sh.stack_nested_keys(p, ('prediction', ), depth=2): sh.get_nested_dicts(par, k[-1], *k[:-1], default=co2_utl.ret_v(v)) s = { 'param_selections': par, 'model_selections': model_sel, 'score_by_model': sbm, 'scores': scores } return (s, ) + tuple(models.get(k, {}) for k in cycle_ids)
def format_report_scores(data): res = {} scores = 'data', 'calibration', 'model_scores' if sh.are_in_nested_dicts(data, *scores): n = scores + ('param_selections', ) v = _format_selection(sh.get_nested_dicts(data, *n), 2, 'param_id') if v: sh.get_nested_dicts(res, *n, default=co2_utl.ret_v(v)) n = scores + ('model_selections', ) v = _format_selection(sh.get_nested_dicts(data, *n), 3) if v: sh.get_nested_dicts(res, *n, default=co2_utl.ret_v(v)) n = scores + ('score_by_model', ) v = _format_selection(sh.get_nested_dicts(data, *n), 2) if v: sh.get_nested_dicts(res, *n, default=co2_utl.ret_v(v)) n = scores + ('scores', ) v = _format_scores(sh.get_nested_dicts(data, *n)) if v: sh.get_nested_dicts(res, *n, default=co2_utl.ret_v(v)) v = [] for k in ('nedc_h', 'nedc_l', 'wltp_h', 'wltp_l'): n = 'data', 'prediction', 'models_%s' % k if sh.are_in_nested_dicts(data, *n): v.append({ 'cycle': k, 'uuid': base64.encodebytes( dill.dumps(sh.get_nested_dicts(data, *n))) }) if v: n = scores + ('models_uuid', ) sh.get_nested_dicts(res, *n, default=co2_utl.ret_v(v)) return res
def select_declaration_data(data, diff=None): res = {} for k, v in sh.stack_nested_keys(constants.con_vals.DECLARATION_DATA): if v and sh.are_in_nested_dicts(data, *k): v = sh.get_nested_dicts(data, *k) sh.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) if diff is not None: diff.clear() diff.update(v[0] for v in sh.stack_nested_keys(data, depth=4)) it = (v[0] for v in sh.stack_nested_keys(res, depth=4)) diff.difference_update(it) return res
def filter_summary(changes, summary): l, variations = [], {} for k, v in changes.items(): k = tuple(k.split('.')[::-1]) l.append(k[:-1]) k = k[:-1] + ('plan.%s' % k[-1],) co2_utl.get_nested_dicts(variations, *k).update(v) for k, v in co2_utl.stack_nested_keys(summary, depth=3): if k[:-1] in l: co2_utl.get_nested_dicts(variations, *k, default=co2_utl.ret_v(v)) _add_delta2filtered_summary(variations, summary, base=variations) return variations
def overwrite_declaration_config_data(data): config = constants.con_vals.DECLARATION_SELECTOR_CONFIG res = sh.combine_nested_dicts(data, depth=3) key = ('config', 'selector', 'all') d = copy.deepcopy(sh.get_nested_dicts(res, *key)) for k, v in sh.stack_nested_keys(config): sh.get_nested_dicts(d, *k, default=co2_utl.ret_v(v)) sh.get_nested_dicts(res, *key[:-1])[key[-1]] = d return res
def _format_scores(scores): res = {} for k, j in co2_utl.stack_nested_keys(scores, depth=3): if k[-1] in ('limits', 'errors'): model_id = k[0] extra_field = ('score',) if k[-1] == 'errors' else () for i, v in co2_utl.stack_nested_keys(j): i = (model_id, i[-1], k[1],) + i[:-1] + extra_field co2_utl.get_nested_dicts(res, *i, default=co2_utl.ret_v(v)) sco = {} for k, v in co2_utl.stack_nested_keys(res, depth=4): v.update(dsp_utl.map_list(['model_id', 'param_id'], *k[:2])) co2_utl.get_nested_dicts(sco, *k[2:], default=list).append(v) return sco
def define_new_inputs(data, base): remove, new_base, new_flag, new_data = [], {}, set(), set() for k, v in sh.stack_nested_keys(base.get('data', {}), ('base', ), 4): sh.get_nested_dicts(new_base, *k, default=co2_utl.ret_v(v)) for k, v in sh.stack_nested_keys(base.get('flag', {}), ('flag', ), 1): sh.get_nested_dicts(new_base, *k, default=co2_utl.ret_v(v)) for k, v in data.items(): if v is sh.EMPTY: remove.append(k) sh.get_nested_dicts(new_base, *k[:-1])[k[-1]] = v if k[0] == 'base': new_data.add('.'.join(k[1:4])) elif k[0] == 'flag': new_flag.add(k[1:2]) if 'dsp_solution' in _get_inputs(base, new_flag)[0]: sol = base['dsp_solution'] n, out_id = _get_inputs(sol, new_data) for k in n.intersection(sol): sh.get_nested_dicts(new_base, 'base', *k.split('.'), default=co2_utl.ret_v(sol[k])) else: d = base.get_node('CO2MPAS model', node_attr='function')[0].dsp out_id = set(d.data_nodes) for k in remove: sh.get_nested_dicts(new_base, *k[:-1]).pop(k[-1]) return new_base, out_id
def get_values(data, keys, tag=(), update=lambda k, v: v, base=None): k = ('input', 'target', 'output') data = sh.selector(k, data, allow_miss=True) base = {} if base is None else base for k, v in sh.stack_nested_keys(data, depth=3): k = k[::-1] v = sh.selector(keys, v, allow_miss=True) v = update(k, v) if v: k = tag + k sh.get_nested_dicts(base, *k, default=co2_utl.ret_v(v)) return base
def calculate_delta(data): # delta n, d = ['output', 'prediction', 'cycle', 'co2_emission_value'], {} for k in ('%s_h', '%s_l'): co2 = [] for c in ('nedc', 'wltp'): n[2] = k % c if co2_utl.are_in_nested_dicts(data, *n): co2.append(co2_utl.get_nested_dicts(data, *n)) try: dco2 = co2_utl.ret_v(np.diff(co2)[0]) except IndexError: continue co2_utl.get_nested_dicts(d, k % 'nedc', *n[2:], default=dco2) return d
def format_report_output(data): res = {} func = functools.partial(sh.get_nested_dicts, default=collections.OrderedDict) for k, v in sh.stack_nested_keys(data.get('output', {}), depth=3): _add_special_data2report(data, res, k[:-1], 'target', *k) s, iv = _add_special_data2report(data, res, k[:-1], 'input', *k) if not s or (s and not _is_equal(iv, v)): func(res, *k[:-1])[k[-1]] = v output = {} for k, v in sh.stack_nested_keys(res, depth=2): v = _split_by_data_format(v) sh.get_nested_dicts(output, *k, default=co2_utl.ret_v(v)) return output
def re_sample_targets(data): res = {} for k, v in sh.stack_nested_keys(data.get('target', {}), depth=2): if sh.are_in_nested_dicts(data, 'output', *k): o = sh.get_nested_dicts(data, 'output', *k) o = _split_by_data_format(o) t = sh.selector(o, _split_by_data_format(v), allow_miss=True) if 'times' not in t.get('ts', {}) or 'times' not in o['ts']: t.pop('ts', None) else: time_series = t['ts'] x, xp = o['ts']['times'], time_series.pop('times') if not _is_equal(x, xp): for i, fp in time_series.items(): time_series[i] = np.interp(x, xp, fp) v = sh.combine_dicts(*t.values()) sh.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) return res
def _add_delta2filtered_summary(changes, summary, base=None): cycles = {'nedc_h', 'nedc_l', 'wltp_h', 'wltp_l'} value = 'co2_emission_value' ref = 'prediction', 'output', value base = {} if base is None else base def check(cycle): return co2_utl.are_in_nested_dicts(changes, cycle, *ref) for c in cycles: if not co2_utl.are_in_nested_dicts(summary, 'delta', c): continue sub_cycles = cycles - {c} if check(c) or all(check(k) for k in sub_cycles): gen = sub_cycles else: gen = (k for k in sub_cycles if check(k)) for k in gen: n = 'delta', c, k, value if co2_utl.are_in_nested_dicts(summary, *n): v = co2_utl.get_nested_dicts(summary, *n) co2_utl.get_nested_dicts(base, *n, default=co2_utl.ret_v(v)) return base