Ejemplo n.º 1
0
    def _merge_models(feat, is_header, is_before_proj, is_gs, ds, covariance_type,  dataset_name, model_name):
        """ merge all models and datasets to one csv

        Parameters
        ----------
        feat
        is_header
        is_gs
        d

        Returns
        -------

        """
        res_ = {}
        vs = [] # store for csv
        for i, d_tup in enumerate(ds):
            _, d = d_tup
            if "OCSVM" in model_name:
                pth_cfg = (feat, is_header, is_before_proj, is_gs, d, None, model_name)
                data = _merge_datasets([dataset_name], pth_cfg)
                res_[d_tup] = data
            elif 'GMM' in model_name:
                pth_cfg = (feat, is_header, is_before_proj, is_gs, d, covariance_type, model_name)
                data = _merge_datasets([dataset_name], pth_cfg)
                res_[d_tup] = data
            else:
                msg = d_tup
                raise NotImplementedError(msg)

            # store for csv
            if i == 0:
                vs = copy.deepcopy(data)
            else:
                vs.extend(data)
        # print(vs)
        #  'feat-header_false-before_proj_False-gs_True-diag-std_False_center_False-d_5'
        out_file_ = pth.join(in_dir, feat + "-header_" + str(is_header),
                             "before_proj_" + str(is_before_proj) + "-gs_" + str(is_gs),
                             f"std_False_center_False-{str(covariance_type)}",
                             f'{dataset_name}-{model_name}.csv')
        print(f'data_models: {out_file_}')
        check_path(out_file_)
        out_file_dat = out_file_ + '.dat'
        dump_data(res_, out_file=out_file_dat)
        # save as csv
        pd.DataFrame(vs).to_csv(out_file_, index=False, encoding='utf-8-sig')
        # # save as xlsx
        # out_xlsx = dat2xlxs_new(out_file_dat, out_file=out_file_dat + '.xlsx', models=models)
        # # compute ratio OCSVM/GMM
        # out_xlsx_ratio = improvement(out_xlsx, feat_set=feat,
        #                              out_file=os.path.splitext(out_file_dat)[0] + '-ratio.xlsx')
        # print(out_xlsx)
        #
        # # for paper
        # out_latex = dat2latex(out_xlsx_ratio, out_file=os.path.splitext(out_file_dat)[0] + '-latex.xlsx')
        # print(out_latex)

        # show(in_file=out_file_)   # show model separately
        return out_file_
Ejemplo n.º 2
0
def main():
    res = {}
    feat_set = 'iat_size'
    # in_dir = 'speedup/out/kjl_serial_ind_32_threads-cProfile_perf_counter'
    in_dir = 'speedup/out/kjl_joblib_parallel_30'

    # Load models and evaluate them on test data
    for i, (data_name,
            model_name) in enumerate(itertools.product(DATASETS, MODELS)):
        out = _main(in_dir, data_name, model_name, feat_set)
        if data_name not in res.keys():
            res[data_name] = {model_name: out}
        else:
            res[data_name][model_name] = out
        # if model_name not in res.keys():
        #     res[model_name] = [out]
        # else:
        #     res[model_name].append(out)
    lg.debug(res)

    # Save results
    out_file = f'{in_dir}/res.csv'
    check_path(out_file)
    # save as csv
    _, outs = res2csv(res, out_file, feat_set=feat_set)
    out_file_dat = out_file + '.dat'
    dump_data(outs, out_file=out_file_dat)
    lg.info(out_file)
    # save as xlsx
    out_xlsx = dat2xlxs_new(out_file_dat,
                            out_file=out_file_dat + '.xlsx',
                            models=MODELS)
    # compute ratio OCSVM/GMM
    out_xlsx_ratio = improvement(out_xlsx,
                                 feat_set=feat_set,
                                 out_file=os.path.splitext(out_file_dat)[0] +
                                 '-ratio.xlsx')
    print(out_xlsx_ratio)

    lg.info('finish!')