def parse_dsp_model(model): """ Parses the co2mpas model results. :param model: Co2mpas model after dispatching. :type model: co2mpas.dispatcher.Dispatcher :return: Mapped outputs. :rtype: dict[dict] """ res = {} for k, v in model.data_output.items(): co2_utl.get_nested_dicts(res, *k.split('.'), default=co2_utl.ret_v(v)) for k, v in list(co2_utl.stack_nested_keys(res, depth=3)): n, k = k[:-1], k[-1] if n == ('output', 'calibration') and k in ('wltp_l', 'wltp_h'): v = dsp_utl.selector(('co2_emission_value', ), v, allow_miss=True) if v: d = co2_utl.get_nested_dicts(res, 'target', 'prediction') d[k] = dsp_utl.combine_dicts(v, d.get(k, {})) res['pipe'] = model.pipe return res
def get_chart_reference(report): r, _map = {}, _map_cycle_report_graphs() out = report.get('output', {}) it = co2_utl.stack_nested_keys(out, key=('output',), depth=3) for k, v in sorted(it): if k[-1] == 'ts' and 'times' in v: label = '{}/%s'.format(_sheet_name(k)) for i, j in sorted(v.items()): param_id = _re_params_name.match(i)['param'] m = _map.get(param_id, None) if m: d = { 'x': k + ('times',), 'y': k + (i,), 'label': label % i } n = k[2], param_id, 'series' co2_utl.get_nested_dicts(r, *n, default=list).append(d) for k, v in co2_utl.stack_nested_keys(r, depth=2): m = _map[k[1]] m.pop('label', None) v.update(m) return r
def compare_outputs_vs_targets(data): """ Compares model outputs vs targets. :param data: Model data. :type data: dict :return: Comparison results. :rtype: dict """ res = {} metrics = { 'mean_absolute_error': mean_absolute_error, 'correlation_coefficient': _correlation_coefficient, 'accuracy_score': accuracy_score, } for k, t in co2_utl.stack_nested_keys(data.get('target', {}), depth=3): if not co2_utl.are_in_nested_dicts(data, 'output', *k): continue o = co2_utl.get_nested_dicts(data, 'output', *k) v = _compare(t, o, metrics=metrics) if v: co2_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) return res
def _summary2df(data): res = [] summary = data.get('summary', {}) if 'results' in summary: r = {} fun = partial(dsp_utl.map_list, [{}, 'cycle', 'stage', 'usage']) for n, m in summary['results'].items(): gen = ((fun(v, *k),) for k, v in co2_utl.stack_nested_keys(m, depth=3)) v = [v[0] for v in _yield_sorted_params(gen)] co2_utl.get_nested_dicts(r, n, default=co2_utl.ret_v(v)) df = _make_summarydf(r, index=['cycle', 'stage', 'usage'], depth=1) c = list(map(_rm_sub_parts, df.columns)) df.columns = pd.MultiIndex.from_tuples(c) setattr(df, 'name', 'results') res.append(df) if 'selection' in summary: df = pd.DataFrame(summary['selection']) df.set_index(['model_id'], inplace=True) setattr(df, 'name', 'selection') res.append(df) if 'comparison' in summary: df = _comparison2df(summary['comparison']) if df is not None: setattr(df, 'name', 'comparison') res.append(df) if res: return {'summary': res} return {}
def _add_special_data2report(data, report, to_keys, *from_keys): if from_keys[-1] != 'times' and \ co2_utl.are_in_nested_dicts(data, *from_keys): v = co2_utl.get_nested_dicts(data, *from_keys) n = to_keys + ('{}.{}'.format(from_keys[0], from_keys[-1]),) co2_utl.get_nested_dicts(report, *n, default=co2_utl.ret_v(v)) return True, v return False, None
def _format_selections(selections): res = {} for model_id, d in selections.items(): d = deepcopy(d) best = d.pop('best') for k, v in d.items(): v.update(best) v['model_id'] = model_id co2_utl.get_nested_dicts(res, k, default=list).append(v) return res
def _extract_summary_from_summary(report, extracted): n = ('summary', 'results') if co2_utl.are_in_nested_dicts(report, *n): for j, w in co2_utl.get_nested_dicts(report, *n).items(): if j in ('co2_emission', 'fuel_consumption'): for k, v in co2_utl.stack_nested_keys(w, depth=3): if v: co2_utl.get_nested_dicts(extracted, *k).update(v) n = ('summary', 'delta') if co2_utl.are_in_nested_dicts(report, *n): extracted['delta'] = co2_utl.get_nested_dicts(report, *n)
def _parse_base_data(res, match, sheet, sheet_name, re_params_name=_re_params_name): r = {} defaults = {'usage': 'input', 'stage': 'calibration'} if 'type' not in match: match['type'] = 'pa' if 'cycle' not in match else 'ts' match = dsp_utl.combine_dicts(defaults, match) if match['type'] == 'pa': xl_ref = '#%s!B2:C_:["pipe", ["dict", "recurse"]]' % sheet_name data = lasso(xl_ref, sheet=sheet) else: # noinspection PyBroadException try: xl_ref = '#%s!A2(R):.3:RD:["df", {"header": 0}]' % sheet_name data = lasso(xl_ref, sheet=sheet) except: return {} data.dropna(how='all', inplace=True) data.dropna(axis=1, how='all', inplace=True) mask = data.count(0) == len(data._get_axis(0)) # noinspection PyUnresolvedReferences drop = [k for k, v in mask.items() if not v] if drop: msg = 'Columns {} in {} sheet contains nan.\n ' \ 'Please correct the inputs!' raise ValueError(msg.format(drop, sheet_name)) for k, v in parse_values(data, match, re_params_name): co2_utl.get_nested_dicts(r, *k[:-1])[k[-1]] = v n = (match['scope'], 'target') if match['type'] == 'ts' and co2_utl.are_in_nested_dicts(r, *n): t = co2_utl.get_nested_dicts(r, *n) for k, v in co2_utl.stack_nested_keys(t, key=n, depth=2): if 'times' not in v: n = list(k + ('times', )) n[1] = match['usage'] if co2_utl.are_in_nested_dicts(r, *n): v['times'] = co2_utl.get_nested_dicts(r, *n) else: for i, j in co2_utl.stack_nested_keys(r, depth=4): if 'times' in j: v['times'] = j['times'] break co2_utl.combine_nested_dicts(r, depth=5, base=res)
def filter_summary(changes, summary): l, variations = [], {} for k, v in changes.items(): k = tuple(k.split('.')[::-1]) l.append(k[:-1]) k = k[:-1] + ('plan.%s' % k[-1],) co2_utl.get_nested_dicts(variations, *k).update(v) for k, v in co2_utl.stack_nested_keys(summary, depth=3): if k[:-1] in l: co2_utl.get_nested_dicts(variations, *k, default=co2_utl.ret_v(v)) _add_delta2filtered_summary(variations, summary, base=variations) return variations
def _format_scores(scores): res = {} for k, j in co2_utl.stack_nested_keys(scores, depth=3): if k[-1] in ('limits', 'errors'): model_id = k[0] extra_field = ('score',) if k[-1] == 'errors' else () for i, v in co2_utl.stack_nested_keys(j): i = (model_id, i[-1], k[1],) + i[:-1] + extra_field co2_utl.get_nested_dicts(res, *i, default=co2_utl.ret_v(v)) sco = {} for k, v in co2_utl.stack_nested_keys(res, depth=4): v.update(dsp_utl.map_list(['model_id', 'param_id'], *k[:2])) co2_utl.get_nested_dicts(sco, *k[2:], default=list).append(v) return sco
def format_report_output(data): res = {} for k, v in co2_utl.stack_nested_keys(data.get('output', {}), depth=3): _add_special_data2report(data, res, k[:-1], 'target', *k) s, iv = _add_special_data2report(data, res, k[:-1], 'input', *k) if not s or (s and not _is_equal(iv, v)): co2_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) output = {} for k, v in co2_utl.stack_nested_keys(res, depth=2): v = _split_by_data_format(v) co2_utl.get_nested_dicts(output, *k, default=co2_utl.ret_v(v)) return output
def get_values(data, keys, tag=(), update=lambda k, v: v, base=None): k = ('input', 'target', 'output') data = dsp_utl.selector(k, data, allow_miss=True) base = {} if base is None else base for k, v in co2_utl.stack_nested_keys(data, depth=3): k = k[::-1] v = dsp_utl.selector(keys, v, allow_miss=True) v = update(k, v) if v: k = tag + k co2_utl.get_nested_dicts(base, *k, default=co2_utl.ret_v(v)) return base
def validate_inputs(data, soft_validation=False, read_schema=None): res, errors, validate = {}, {}, read_schema.validate for k, v in sorted(co2_utl.stack_nested_keys(data, depth=4)): d = co2_utl.get_nested_dicts(res, *k[:-1]) _add_validated_input(d, validate, k, v, errors) if not soft_validation: for k, v in co2_utl.stack_nested_keys(res, depth=3): for c, msg in hard_validation(v): co2_utl.get_nested_dicts(errors, *k)[c] = SchemaError([], [msg]) if _log_errors_msg(errors): return {} return res
def _add_validated_input(data, validate, keys, value, errors): try: k, v = next(iter(validate({keys[-1]: value}).items())) if v is not dsp_utl.NONE: data[k] = v except SchemaError as ex: co2_utl.get_nested_dicts(errors, *keys[:-1])[keys[-1]] = ex
def calculate_delta(data): # delta n, d = ['output', 'prediction', 'cycle', 'co2_emission_value'], {} for k in ('%s_h', '%s_l'): co2 = [] for c in ('nedc', 'wltp'): n[2] = k % c if co2_utl.are_in_nested_dicts(data, *n): co2.append(co2_utl.get_nested_dicts(data, *n)) try: dco2 = co2_utl.ret_v(np.diff(co2)[0]) except IndexError: continue co2_utl.get_nested_dicts(d, k % 'nedc', *n[2:], default=dco2) return d
def define_new_inputs(data, base, dsp_model): remove = [] for k, v in co2_utl.stack_nested_keys(data, depth=2): if v is dsp_utl.EMPTY: remove.append(k) dsp = dsp_model.get_sub_dsp_from_workflow(data, check_inputs=False) n = set(base) - set(dsp.data_nodes) n.update(data) inp = dsp_utl.selector(n, base, allow_miss=True) d = co2_utl.combine_nested_dicts(inp, data, depth=2) for n, k in remove: co2_utl.get_nested_dicts(d, n).pop(k) return d
def combine_scores(scores): scores = {k[:-9]: v for k, v in scores.items() if v} if not scores: return {} s = {} for (k, c), v in co2_utl.stack_nested_keys(scores, depth=2): r = {'models': v['models']} if 'models' in v else {} r.update(v.get('score', {})) co2_utl.get_nested_dicts(s, k, c, default=co2_utl.ret_v(r)) if not co2_utl.are_in_nested_dicts(s, k, 'best'): keys = {'models': 'selected_models', 'success': 'status'} best = dsp_utl.map_dict(keys, dsp_utl.selector(keys, r)) best['from'] = c co2_utl.get_nested_dicts(s, k, 'best', default=co2_utl.ret_v(best)) return {'selections': s, 'scores': scores}
def _split_by_data_format(data): d = {} p = ('full_load_speeds', 'full_load_torques', 'full_load_powers') try: s = max(v.size for k, v in data.items() if k not in p and isinstance(v, np.ndarray)) except ValueError: s = None for k, v in data.items(): if isinstance(v, np.ndarray) and s == v.size: # series co2_utl.get_nested_dicts(d, 'ts')[k] = v else: # params co2_utl.get_nested_dicts(d, 'pa')[k] = v return d
def _extract_summary_from_model_scores(report, extracted): n = ('data', 'calibration', 'model_scores', 'selections') if co2_utl.are_in_nested_dicts(report, *n): sel = co2_utl.get_nested_dicts(report, *n) n, status = ('calibration', 'output'), {} for k, v in sel.items(): gen = ((d['model_id'], d['success']) for d in v if 'success' in d) o = _format_dict(gen, 'status %s') co2_utl.get_nested_dicts(extracted, k, *n).update(o) gen = ((d['model_id'], d['status']) for d in v) status.update(_format_dict(gen, 'status %s')) n = ('prediction', 'output') for k in extracted: if co2_utl.are_in_nested_dicts(extracted, k, n[0]): co2_utl.get_nested_dicts(extracted, k, *n).update(status)
def get_selection(data): res = [] n = ('data', 'calibration', 'model_scores', 'selections') if co2_utl.are_in_nested_dicts(data, *n): for k, v in sorted(co2_utl.get_nested_dicts(data, *n).items()): d = dsp_utl.selector(('from', 'status'), v['best']) d['model_id'] = k res.append(d) return res
def _add2summary(total_summary, summary, base_keys=None): base_keys = base_keys or {} for k, v in co2_utl.stack_nested_keys(summary, depth=3): d = co2_utl.get_nested_dicts(total_summary, *k, default=list) if isinstance(v, list): for j in v: d.append(dsp_utl.combine_dicts(j, base_keys)) else: d.append(dsp_utl.combine_dicts(v, base_keys))
def _extract_summary_from_output(report, extracted): for k, v in co2_utl.stack_nested_keys(report.get('output', {}), depth=2): k = k[::-1] for u, i, j in _param_names_values(v.get('pa', {})): o = {} if i == 'co2_params_calibrated': o = _format_dict(j.valuesdict().items(), 'co2_params %s') elif i == 'calibration_status': o = _format_dict(enumerate(j), 'status co2_params step %d', lambda x: x[0]) elif i == 'willans_factors': o = j elif i == 'phases_willans_factors': for n, m in enumerate(j): o.update(_format_dict(m.items(), '%s phase {}'.format(n))) elif i == 'has_sufficient_power': o = {i: j} if o: co2_utl.get_nested_dicts(extracted, *(k + (u,))).update(o)
def re_sample_targets(data): res = {} for k, v in co2_utl.stack_nested_keys(data.get('target', {}), depth=2): if co2_utl.are_in_nested_dicts(data, 'output', *k): o = co2_utl.get_nested_dicts(data, 'output', *k) o = _split_by_data_format(o) t = dsp_utl.selector(o, _split_by_data_format(v), allow_miss=True) if 'times' not in t.get('ts', {}) or 'times' not in o['ts']: t.pop('ts', None) else: time_series = t['ts'] x, xp = o['ts']['times'], time_series.pop('times') if not _is_equal(x, xp): for i, fp in time_series.items(): time_series[i] = np.interp(x, xp, fp) v = dsp_utl.combine_dicts(*t.values()) co2_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v)) return res
def _comparison2df(comparison): res = {} it = co2_utl.stack_nested_keys(comparison, depth=3) keys = ['usage', 'cycle', 'param'] gen = [(dsp_utl.map_list(keys, *k), k, v) for k, v in it] for s, k, v in _yield_sorted_params(gen, keys=keys): l = co2_utl.get_nested_dicts(res, *k[:-1], default=list) l.append(dsp_utl.combine_dicts({'param_id': k[-1]}, v)) if res: return _dd2df(res, 'param_id', depth=2)
def _add_delta2filtered_summary(changes, summary, base=None): cycles = {'nedc_h', 'nedc_l', 'wltp_h', 'wltp_l'} value = 'co2_emission_value' ref = 'prediction', 'output', value base = {} if base is None else base def check(cycle): return co2_utl.are_in_nested_dicts(changes, cycle, *ref) for c in cycles: if not co2_utl.are_in_nested_dicts(summary, 'delta', c): continue sub_cycles = cycles - {c} if check(c) or all(check(k) for k in sub_cycles): gen = sub_cycles else: gen = (k for k in sub_cycles if check(k)) for k in gen: n = 'delta', c, k, value if co2_utl.are_in_nested_dicts(summary, *n): v = co2_utl.get_nested_dicts(summary, *n) co2_utl.get_nested_dicts(base, *n, default=co2_utl.ret_v(v)) return base
def _scores2df(data): n = ('data', 'calibration', 'model_scores') if not co2_utl.are_in_nested_dicts(data, *n): return {} scores = co2_utl.get_nested_dicts(data, *n) idx = ['model_id', 'from', 'status', 'selected_models'] df = _dd2df(scores['selections'], idx, depth=1) setattr(df, 'name', 'selections') idx = ['model_id', 'param_id'] edf = _dd2df(scores['scores'], idx, depth=2) setattr(edf, 'name', 'scores') return {'.'.join(n): (df, edf)}
def validate_plan(plan, read_schema=None): validated_plan, errors, validate = [], {}, read_schema.validate for i, data in plan.iterrows(): inputs = {} data.dropna(how='all', inplace=True) plan_id = 'plan id:{}'.format(i[0]) for k, v in data.items(): k = (plan_id, ) + tuple(k.split('.')) d = co2_utl.get_nested_dicts(inputs, '.'.join(k[1:-1])) _add_validated_input(d, validate, k, v, errors) validated_plan.append((i, inputs)) if _log_errors_msg(errors): return [] return validated_plan
def format_report_scores(data): res = {} scores = 'data', 'calibration', 'model_scores' if co2_utl.are_in_nested_dicts(data, *scores): n = scores + ('selections',) selections = _format_selections(co2_utl.get_nested_dicts(data, *n)) if selections: co2_utl.get_nested_dicts(res, *n, default=co2_utl.ret_v(selections)) n = scores + ('scores',) scores = _format_scores(co2_utl.get_nested_dicts(data, *n)) if scores: co2_utl.get_nested_dicts(res, *n, default=co2_utl.ret_v(scores)) return res