Example #1
0
def get_chart_reference(report):
    r, _map = {}, _map_cycle_report_graphs()
    out = report.get('output', {})
    it = co2_utl.stack_nested_keys(out, key=('output',), depth=3)
    for k, v in sorted(it):
        if k[-1] == 'ts' and 'times' in v:
            label = '{}/%s'.format(_sheet_name(k))
            for i, j in sorted(v.items()):
                param_id = _re_params_name.match(i)['param']
                m = _map.get(param_id, None)
                if m:
                    d = {
                        'x': k + ('times',),
                        'y': k + (i,),
                        'label': label % i
                    }
                    n = k[2], param_id, 'series'
                    co2_utl.get_nested_dicts(r, *n, default=list).append(d)

    for k, v in co2_utl.stack_nested_keys(r, depth=2):
        m = _map[k[1]]
        m.pop('label', None)
        v.update(m)

    return r
Example #2
0
def _parse_base_data(res,
                     match,
                     sheet,
                     sheet_name,
                     re_params_name=_re_params_name):
    r = {}
    defaults = {'usage': 'input', 'stage': 'calibration'}

    if 'type' not in match:
        match['type'] = 'pa' if 'cycle' not in match else 'ts'

    match = dsp_utl.combine_dicts(defaults, match)

    if match['type'] == 'pa':
        xl_ref = '#%s!B2:C_:["pipe", ["dict", "recurse"]]' % sheet_name
        data = lasso(xl_ref, sheet=sheet)
    else:
        # noinspection PyBroadException
        try:
            xl_ref = '#%s!A2(R):.3:RD:["df", {"header": 0}]' % sheet_name
            data = lasso(xl_ref, sheet=sheet)
        except:
            return {}
        data.dropna(how='all', inplace=True)
        data.dropna(axis=1, how='all', inplace=True)
        mask = data.count(0) == len(data._get_axis(0))
        # noinspection PyUnresolvedReferences
        drop = [k for k, v in mask.items() if not v]
        if drop:
            msg = 'Columns {} in {} sheet contains nan.\n ' \
                  'Please correct the inputs!'
            raise ValueError(msg.format(drop, sheet_name))

    for k, v in parse_values(data, match, re_params_name):
        co2_utl.get_nested_dicts(r, *k[:-1])[k[-1]] = v

    n = (match['scope'], 'target')
    if match['type'] == 'ts' and co2_utl.are_in_nested_dicts(r, *n):
        t = co2_utl.get_nested_dicts(r, *n)
        for k, v in co2_utl.stack_nested_keys(t, key=n, depth=2):
            if 'times' not in v:
                n = list(k + ('times', ))
                n[1] = match['usage']
                if co2_utl.are_in_nested_dicts(r, *n):
                    v['times'] = co2_utl.get_nested_dicts(r, *n)
                else:
                    for i, j in co2_utl.stack_nested_keys(r, depth=4):
                        if 'times' in j:
                            v['times'] = j['times']
                            break

    co2_utl.combine_nested_dicts(r, depth=5, base=res)
Example #3
0
def _format_scores(scores):
    res = {}
    for k, j in co2_utl.stack_nested_keys(scores, depth=3):
        if k[-1] in ('limits', 'errors'):
            model_id = k[0]
            extra_field = ('score',) if k[-1] == 'errors' else ()
            for i, v in co2_utl.stack_nested_keys(j):
                i = (model_id, i[-1], k[1],) + i[:-1] + extra_field
                co2_utl.get_nested_dicts(res, *i, default=co2_utl.ret_v(v))
    sco = {}
    for k, v in co2_utl.stack_nested_keys(res, depth=4):
        v.update(dsp_utl.map_list(['model_id', 'param_id'], *k[:2]))
        co2_utl.get_nested_dicts(sco, *k[2:], default=list).append(v)
    return sco
Example #4
0
def format_report_output(data):
    res = {}
    for k, v in co2_utl.stack_nested_keys(data.get('output', {}), depth=3):
        _add_special_data2report(data, res, k[:-1], 'target', *k)

        s, iv = _add_special_data2report(data, res, k[:-1], 'input', *k)
        if not s or (s and not _is_equal(iv, v)):
            co2_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v))

    output = {}
    for k, v in co2_utl.stack_nested_keys(res, depth=2):
        v = _split_by_data_format(v)
        co2_utl.get_nested_dicts(output, *k, default=co2_utl.ret_v(v))

    return output
Example #5
0
def compare_outputs_vs_targets(data):
    """
    Compares model outputs vs targets.

    :param data:
        Model data.
    :type data: dict

    :return:
        Comparison results.
    :rtype: dict
    """

    res = {}
    metrics = {
        'mean_absolute_error': mean_absolute_error,
        'correlation_coefficient': _correlation_coefficient,
        'accuracy_score': accuracy_score,
    }

    for k, t in co2_utl.stack_nested_keys(data.get('target', {}), depth=3):
        if not co2_utl.are_in_nested_dicts(data, 'output', *k):
            continue

        o = co2_utl.get_nested_dicts(data, 'output', *k)
        v = _compare(t, o, metrics=metrics)
        if v:
            co2_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v))

    return res
Example #6
0
def _summary2df(data):
    res = []
    summary = data.get('summary', {})

    if 'results' in summary:
        r = {}
        fun = partial(dsp_utl.map_list, [{}, 'cycle', 'stage', 'usage'])
        for n, m in summary['results'].items():
            gen = ((fun(v, *k),)
                   for k, v in co2_utl.stack_nested_keys(m, depth=3))
            v = [v[0] for v in _yield_sorted_params(gen)]
            co2_utl.get_nested_dicts(r, n, default=co2_utl.ret_v(v))

        df = _make_summarydf(r, index=['cycle', 'stage', 'usage'], depth=1)
        c = list(map(_rm_sub_parts, df.columns))
        df.columns = pd.MultiIndex.from_tuples(c)
        setattr(df, 'name', 'results')
        res.append(df)

    if 'selection' in summary:
        df = pd.DataFrame(summary['selection'])
        df.set_index(['model_id'], inplace=True)
        setattr(df, 'name', 'selection')
        res.append(df)

    if 'comparison' in summary:
        df = _comparison2df(summary['comparison'])
        if df is not None:
            setattr(df, 'name', 'comparison')
            res.append(df)

    if res:
        return {'summary': res}
    return {}
Example #7
0
def validate_inputs(data, soft_validation=False, read_schema=None):
    res, errors, validate = {}, {}, read_schema.validate
    for k, v in sorted(co2_utl.stack_nested_keys(data, depth=4)):
        d = co2_utl.get_nested_dicts(res, *k[:-1])
        _add_validated_input(d, validate, k, v, errors)

    if not soft_validation:
        for k, v in co2_utl.stack_nested_keys(res, depth=3):
            for c, msg in hard_validation(v):
                co2_utl.get_nested_dicts(errors, *k)[c] = SchemaError([],
                                                                      [msg])

    if _log_errors_msg(errors):
        return {}

    return res
Example #8
0
def parse_dsp_model(model):
    """
    Parses the co2mpas model results.

    :param model:
        Co2mpas model after dispatching.
    :type model: co2mpas.dispatcher.Dispatcher

    :return:
        Mapped outputs.
    :rtype: dict[dict]
    """

    res = {}
    for k, v in model.data_output.items():
        co2_utl.get_nested_dicts(res, *k.split('.'), default=co2_utl.ret_v(v))

    for k, v in list(co2_utl.stack_nested_keys(res, depth=3)):
        n, k = k[:-1], k[-1]
        if n == ('output', 'calibration') and k in ('wltp_l', 'wltp_h'):
            v = dsp_utl.selector(('co2_emission_value', ), v, allow_miss=True)
            if v:
                d = co2_utl.get_nested_dicts(res, 'target', 'prediction')
                d[k] = dsp_utl.combine_dicts(v, d.get(k, {}))

    res['pipe'] = model.pipe

    return res
Example #9
0
def _log_errors_msg(errors):
    if errors:
        msg = ['\nInput cannot be parsed, due to:']
        for k, v in co2_utl.stack_nested_keys(errors, depth=4):
            msg.append('{} in {}: {}'.format(k[-1], '/'.join(k[:-1]), v))
        log.error('\n  '.join(msg))
        return True
    return False
Example #10
0
def _add2summary(total_summary, summary, base_keys=None):
    base_keys = base_keys or {}
    for k, v in co2_utl.stack_nested_keys(summary, depth=3):
        d = co2_utl.get_nested_dicts(total_summary, *k, default=list)
        if isinstance(v, list):
            for j in v:
                d.append(dsp_utl.combine_dicts(j, base_keys))
        else:
            d.append(dsp_utl.combine_dicts(v, base_keys))
Example #11
0
def validate_data(data, soft_validation, read_schema=None):
    plan = validate_plan(data.get('plan', pd.DataFrame([])), read_schema)

    inputs = validate_inputs(data.get('base', {}), soft_validation,
                             read_schema)
    inputs = {
        '.'.join(k): v
        for k, v in co2_utl.stack_nested_keys(inputs, depth=3)
    }

    return inputs, plan
Example #12
0
    def test_files(self):
        mydir = osp.dirname(__file__)
        if SEATBELT_FILE and osp.isfile(SEATBELT_FILE):
            res_file = SEATBELT_FILE
        else:
            tmpdir = tempfile.gettempdir()
            res_file = osp.join(tmpdir, 'co2mpas_seatbelt_demos.dill')

        log.info("\n  OVERWRITE_SEATBELT: %s \n"
                 "  RUN_INPUT_FOLDER: %s \n"
                 "  RUN_ALL_FILES: %s \n"
                 "  SEATBELT_FILE: %s",
                 OVERWRITE_SEATBELT, RUN_INPUT_FOLDER, RUN_ALL_FILES, res_file)

        if not OVERWRITE_SEATBELT and osp.isfile(res_file):
            old_results = dsp_utl.load_dispatcher(res_file)
            log.info("Old results loaded!")
        else:
            old_results = None

        path = RUN_INPUT_FOLDER or osp.join(mydir, '..', 'co2mpas', 'demos')
        file = (path
                if (RUN_ALL_FILES or RUN_INPUT_FOLDER)
                else osp.join(path, 'co2mpas_demo-0.xlsx'))

        model = vehicle_processing_model()

        results = []

        inp_files = file_finder([file])
        if not inp_files:
            raise AssertionError("DataCheck found no input-files in %r!" % file)

        for fpath in inp_files:
            fname = osp.splitext(osp.basename(fpath))[0]
            log.info('Processing: %s', fname)

            inputs = {
                'vehicle_name': fname,
                'input_file_name': fpath,
                'prediction_wltp': True,
            }
            r = model.dispatch(inputs=inputs, outputs=['report', 'summary'])
            r = dsp_utl.selector(['report', 'summary'], r)
            r.get('report', {}).pop('pipe', None)
            results.append(sorted(co2_utl.stack_nested_keys(r)))

        if not OVERWRITE_SEATBELT and osp.isfile(res_file):
            log.info('Comparing...')
            self._check_results(results, old_results)
        else:
            os.environ["OVERWRITE_SEATBELT"] = '0'
            dsp_utl.save_dispatcher(results, res_file)
            log.info('Overwritten seat belt %r.', res_file)
Example #13
0
def _comparison2df(comparison):
    res = {}
    it = co2_utl.stack_nested_keys(comparison, depth=3)
    keys = ['usage', 'cycle', 'param']
    gen = [(dsp_utl.map_list(keys, *k), k, v) for k, v in it]

    for s, k, v in _yield_sorted_params(gen, keys=keys):
        l = co2_utl.get_nested_dicts(res, *k[:-1], default=list)
        l.append(dsp_utl.combine_dicts({'param_id': k[-1]}, v))

    if res:
        return _dd2df(res, 'param_id', depth=2)
Example #14
0
def _extract_summary_from_summary(report, extracted):
    n = ('summary', 'results')
    if co2_utl.are_in_nested_dicts(report, *n):
        for j, w in co2_utl.get_nested_dicts(report, *n).items():
            if j in ('co2_emission', 'fuel_consumption'):
                for k, v in co2_utl.stack_nested_keys(w, depth=3):
                    if v:
                        co2_utl.get_nested_dicts(extracted, *k).update(v)

    n = ('summary', 'delta')
    if co2_utl.are_in_nested_dicts(report, *n):
        extracted['delta'] = co2_utl.get_nested_dicts(report, *n)
Example #15
0
def parse_excel_file(file_path,
                     re_sheet_name=_re_input_sheet_name,
                     re_params_name=_re_params_name):
    """
    Reads cycle's data and simulation plans.

    :param file_path:
        Excel file path.
    :type file_path: str

    :param re_sheet_name:
        Regular expression to parse sheet names.
    :type re_sheet_name: regex.Regex

    :param re_params_name:
        Regular expression to parse param names.
    :type re_params_name: regex.Regex

    :return:
        A pandas DataFrame with cycle's time series.
    :rtype: dict, pandas.DataFrame
    """

    excel_file = pd.ExcelFile(file_path)
    res, plans = {}, []

    defaults = {'scope': 'base'}

    book = excel_file.book

    for sheet_name in excel_file.sheet_names:
        match = re_sheet_name.match(sheet_name)
        if not match:
            continue
        match = {k: v.lower() for k, v in match.groupdict().items() if v}

        match = dsp_utl.combine_dicts(defaults, match)

        sheet = _open_sheet_by_name_or_index(book, 'book', sheet_name)
        if match['scope'] == 'base':
            _parse_base_data(res, match, sheet, sheet_name, re_params_name)
        elif match['scope'] == 'plan':
            _parse_plan_data(plans, match, sheet, sheet_name, re_params_name)

    for k, v in co2_utl.stack_nested_keys(res.get('base', {}), depth=3):
        if k[0] != 'target':
            v['cycle_type'] = v.get('cycle_type', k[-1].split('_')[0]).upper()
            v['cycle_name'] = v.get('cycle_name', k[-1]).upper()

    res['plan'] = _finalize_plan(res, plans, file_path)

    return res
Example #16
0
def filter_summary(changes, summary):
    l, variations = [], {}
    for k, v in changes.items():
        k = tuple(k.split('.')[::-1])
        l.append(k[:-1])
        k = k[:-1] + ('plan.%s' % k[-1],)
        co2_utl.get_nested_dicts(variations, *k).update(v)

    for k, v in co2_utl.stack_nested_keys(summary, depth=3):
        if k[:-1] in l:
            co2_utl.get_nested_dicts(variations, *k, default=co2_utl.ret_v(v))
    _add_delta2filtered_summary(variations, summary, base=variations)
    return variations
Example #17
0
def extract_summary(report, vehicle_name):
    extracted = {}

    _extract_summary_from_summary(report, extracted)

    _extract_summary_from_output(report, extracted)

    _extract_summary_from_model_scores(report, extracted)

    for k, v in co2_utl.stack_nested_keys(extracted, depth=3):
        v['vehicle_name'] = vehicle_name

    return extracted
Example #18
0
def _cycle2df(data, data_descriptions, write_schema):
    res = {}
    out = data.get('output', {})
    for k, v in co2_utl.stack_nested_keys(out, key=('output',), depth=3):
        n, k = _sheet_name(k), k[-1]
        if 'ts' == k:
            df = _time_series2df(v, data_descriptions)
        elif 'pa' == k:
            df = _parameters2df(v, data_descriptions, write_schema)
        else:
            continue

        if df is not None:
            res[n] = df
    return res
Example #19
0
def get_values(data, keys, tag=(), update=lambda k, v: v, base=None):
    k = ('input', 'target', 'output')
    data = dsp_utl.selector(k, data, allow_miss=True)

    base = {} if base is None else base
    for k, v in co2_utl.stack_nested_keys(data, depth=3):
        k = k[::-1]
        v = dsp_utl.selector(keys, v, allow_miss=True)
        v = update(k, v)

        if v:
            k = tag + k
            co2_utl.get_nested_dicts(base, *k, default=co2_utl.ret_v(v))

    return base
Example #20
0
def define_new_inputs(data, base, dsp_model):
    remove = []
    for k, v in co2_utl.stack_nested_keys(data, depth=2):
        if v is dsp_utl.EMPTY:
            remove.append(k)

    dsp = dsp_model.get_sub_dsp_from_workflow(data, check_inputs=False)
    n = set(base) - set(dsp.data_nodes)
    n.update(data)

    inp = dsp_utl.selector(n, base, allow_miss=True)
    d = co2_utl.combine_nested_dicts(inp, data, depth=2)

    for n, k in remove:
        co2_utl.get_nested_dicts(d, n).pop(k)

    return d
Example #21
0
def _dd2df(dd, index=None, depth=0):
    """

    :return:
    :rtype: pandas.DataFrame
    """
    frames = []
    for k, v in co2_utl.stack_nested_keys(dd, depth=depth):
        df = pd.DataFrame(v)
        df.drop_duplicates(subset=index, inplace=True)
        if index is not None:
            df.set_index(index, inplace=True)

        df.columns = pd.MultiIndex.from_tuples([k + (i,) for i in df.columns])
        frames.append(df)

    return pd.concat(frames, copy=False, axis=1, verify_integrity=True)
Example #22
0
def combine_scores(scores):
    scores = {k[:-9]: v for k, v in scores.items() if v}
    if not scores:
        return {}
    s = {}
    for (k, c), v in co2_utl.stack_nested_keys(scores, depth=2):
        r = {'models': v['models']} if 'models' in v else {}
        r.update(v.get('score', {}))
        co2_utl.get_nested_dicts(s, k, c, default=co2_utl.ret_v(r))

        if not co2_utl.are_in_nested_dicts(s, k, 'best'):
            keys = {'models': 'selected_models', 'success': 'status'}
            best = dsp_utl.map_dict(keys, dsp_utl.selector(keys, r))
            best['from'] = c
            co2_utl.get_nested_dicts(s, k, 'best', default=co2_utl.ret_v(best))

    return {'selections': s, 'scores': scores}
Example #23
0
def _finalize_plan(res, plans, file_path):
    if not plans:
        return pd.DataFrame()

    for k, v in co2_utl.stack_nested_keys(res.get('plan', {}), depth=4):
        n = '.'.join(k)
        m = '.'.join(k[:-1])
        for p in plans:
            if any(c.startswith(m) for c in p.columns):
                if n in p:
                    p[n].fillna(value=v, inplace=True)
                else:
                    p[n] = v

    plan = pd.concat(plans, axis=1, copy=False, verify_integrity=True)
    func = partial(osp.join, osp.dirname(file_path))
    if 'base' not in plan:
        plan['base'] = file_path
    else:
        plan['base'].fillna(file_path)
        plan['base'] = plan['base'].apply(lambda x: x or file_path).apply(func)

    plan['base'] = plan['base'].apply(osp.normpath)

    if 'defaults' not in plan:
        plan['defaults'] = ''
    else:
        plan['defaults'].fillna('')

        def _func(x):
            if x:
                return str(tuple(
                    osp.normpath(func(v)) for v in tuple(eval(x))))
            else:
                return x

        plan['defaults'] = plan['defaults'].apply(_func)

    plan['id'] = plan.index
    plan.set_index(['id', 'base', 'defaults'], inplace=True)

    return plan
Example #24
0
def _extract_summary_from_output(report, extracted):
    for k, v in co2_utl.stack_nested_keys(report.get('output', {}), depth=2):
        k = k[::-1]
        for u, i, j in _param_names_values(v.get('pa', {})):
            o = {}
            if i == 'co2_params_calibrated':
                o = _format_dict(j.valuesdict().items(), 'co2_params %s')
            elif i == 'calibration_status':
                o = _format_dict(enumerate(j), 'status co2_params step %d',
                                 lambda x: x[0])
            elif i == 'willans_factors':
                o = j
            elif i == 'phases_willans_factors':
                for n, m in enumerate(j):
                    o.update(_format_dict(m.items(), '%s phase {}'.format(n)))
            elif i == 'has_sufficient_power':
                o = {i: j}

            if o:
                co2_utl.get_nested_dicts(extracted, *(k + (u,))).update(o)
Example #25
0
def re_sample_targets(data):
    res = {}
    for k, v in co2_utl.stack_nested_keys(data.get('target', {}), depth=2):
        if co2_utl.are_in_nested_dicts(data, 'output', *k):
            o = co2_utl.get_nested_dicts(data, 'output', *k)
            o = _split_by_data_format(o)
            t = dsp_utl.selector(o, _split_by_data_format(v), allow_miss=True)

            if 'times' not in t.get('ts', {}) or 'times' not in o['ts']:
                t.pop('ts', None)
            else:
                time_series = t['ts']
                x, xp = o['ts']['times'], time_series.pop('times')
                if not _is_equal(x, xp):
                    for i, fp in time_series.items():
                        time_series[i] = np.interp(x, xp, fp)
            v = dsp_utl.combine_dicts(*t.values())
            co2_utl.get_nested_dicts(res, *k, default=co2_utl.ret_v(v))

    return res