Example #1
0
File: main.py Project: QFSW/JiTBoy
def draw_histograms(data):
    base_hist = "%s/histogram" % output_base
    base_box = "%s/boxplot" % output_base
    plots = [
        ('time', True),
        ('blocks', False),
        ('blocks executed', False),
        ('host instructions', False),
        ('source instructions', False),
        ('host instructions executed', False),
        ('source instructions emulated', False),
        ('mips', True),
        ('hotness', False),
        ('host block size', False),
        ('source block size', False),
        ('compilation inefficiency', False),
        ('execution inefficiency', False)
    ]

    for p in plots:
        name = p[0]
        processed = data
        if not p[1]:
            processed = {'JIT': data['JIT']}

        plot.histogram(processed, name, '%s/%s.png' % (base_hist, name))
        plot.boxplot(processed, name, '%s/%s.png' % (base_box, name))
Example #2
0
def plot_factor_in_subtype(cad_factors, factor_id, factor_annotation,
                           clinical_annotation, clinical_var, out, stats_out):
    factor_da = xr.open_dataset(cad_factors)['factors']
    with open(factor_annotation) as f:
        factor_index = only(
            [i for i, v in yaml.load(f).items() if v['id'] == factor_id])
    factor = factor_da.sel(factor=factor_index).load()
    factor.name = factor_id

    clin = xr.open_dataset(clinical_annotation)[clinical_var].load()
    factor, clin = xr.align(factor[factor.notnull()], clin[clin.notnull()])

    with plot.subplots(figsize=(3.5, 3.5)) as (fig, ax):
        plot.boxplot(
            clin,
            factor,
            title="",
            xlabel=clin_display_names[clinical_var],
            ylabel=f"Factor {factor_index+1}",
            ax=ax,
        )
        fig.savefig(out, format='svg')

    factor_by_clin = split_by(factor.values, clin.values)
    h, p = scipy.stats.kruskal(*factor_by_clin.values())
    with open(stats_out, 'w') as f:
        f.write(f"h: {h:.6e}\n")
        f.write(f"p: {p:.6e}\n")
Example #3
0
def plot_factor_in_subtype(mri_features, feature_id, clinical_annotation,
                           clinical_var, out, stats_out):
    mri_ds = xr.open_dataset(mri_features)
    feature = mri_ds[feature_id]

    clin = xr.open_dataset(clinical_annotation)[clinical_var].load()
    feature, clin = xr.align(feature[feature.notnull()], clin[clin.notnull()])

    with plot.subplots(figsize=(3.5, 3.5)) as (fig, ax):
        plot.boxplot(
            clin,
            feature,
            title="",
            xlabel=clin_display_names[clinical_var],
            ylabel=feature_display_names[feature_id],
            ax=ax,
        )
        fig.savefig(out, format='svg')

    feature_by_clin = split_by(feature.values, clin.values)
    h, p = scipy.stats.kruskal(*feature_by_clin.values())
    with open(stats_out, 'w') as f:
        f.write(f"h: {h:.6e}\n")
        f.write(f"p: {p:.6e}\n")
Example #4
0
        _all_player_shaps = []
        _cf_shaps = calc_n_shapley_values(N_FEATS, N_SAMPLES, N_ITER,
                DATA_TYPE, _cf, data_dir=DATA_DIR)

        # --- Group shapley decompositions per player
        # --- Normalised:
        all_cf_shaps_per_player.append([normalise(numpy.array(_cf_shaps))[:,_player] for _player in PLAYERS])
        # --- Non-normalized:
        #all_cf_shaps_per_player.append([numpy.array(_cf_shaps)[:,_player] for _player in PLAYERS])
        # ---
        print("Done with {0}.".format(_cf))
    # ---

    cf_labels = [CF_DICT.get(_cf, 0) for _cf in cfs]
    violinplot(all_cf_shaps_per_player, PLAYERS, labels=cf_labels, multi=True)
    boxplot(all_cf_shaps_per_player, PLAYERS, labels=cf_labels, multi=True)

    # --- Plot average Shapley decomposition per player
    #SHAPS_AVG = [sum(x)/len(x) for x in zip(*all_shaps)]
    #barplot_all(PLAYERS, SHAPS_AVG)
    # ---

    try:
        plt.show()
    except Exception:
        pass


# NOTES
    #plt.title("Distance correlation")
    #plt.title(r"$R^2$")
Example #5
0
def average_varimp(X,
                   y,
                   ntrees,
                   replace,
                   mtry,
                   max_depth,
                   missing_branch,
                   balance,
                   vitype='err',
                   vimissing=True,
                   ntimes=25,
                   select=True,
                   printvi=False,
                   plotvi=False,
                   cutpoint=0.0,
                   mean=False,
                   title=None,
                   missing_rate=False,
                   random_subspace=False):
    vi = {a: [] for a in range(X.shape[1])}
    for i in range(X.shape[0]):
        if (i < ntimes):
            seed = np.random.randint(0, 10000)
            clf = rf.RandomForest(ntrees=ntrees,
                                  oob_error=True,
                                  random_state=seed,
                                  mtry=mtry,
                                  missing_branch=missing_branch,
                                  prob_answer=False,
                                  max_depth=max_depth,
                                  replace=replace,
                                  balance=balance,
                                  random_subspace=random_subspace)
            clf.fit(X, y)
            varimps = clf.variable_importance(vitype=vimissing, vimissing=True)
            for var in varimps.keys():
                if (missing_rate):
                    vi[var].append(varimps[var] *
                                   utils.notNanProportion(X[X.columns[var]]))
                else:
                    vi[var].append(varimps[var])
        else:
            break

    vimean = {a: [] for a in range(X.shape[1])}
    for var in vi.keys():
        vimean[var] = np.mean(vi[var])

    if (printvi):
        vis = sorted(vimean.items(), key=lambda x: x[1], reverse=True)
        for v, i in vis:
            print('feature: %r importance: %r' % (X.columns[v], i))

    if (plotvi):
        print(cutpoint)
        importance_values = []
        features = []
        vis = sorted(vi.items(), key=lambda x: x[0])
        for v, i in vis:
            if (vimean[v] >= cutpoint):
                importance_values.append(i)
                features.append(X.columns[v])
        import plot
        plot.boxplot(importance_values, features, title)

    if (select):
        vis = sorted(vimean.items(), key=lambda x: x[1], reverse=True)
        return sorted([var[0] for var in vis if vimean[var[0]] >= cutpoint])
    if (mean):
        return sorted(vimean.items(), key=lambda x: x[1], reverse=True)
        #return [var[0] for var in vis]

    return sorted(vi.items(), key=lambda x: x[0])
Example #6
0
     RejectOptionsLogisticLearner([privileged_group], [unprivileged_group],
                                  exclude_protected=False)),
    ("average odds",
     RejectOptionsLogisticLearner([privileged_group], [unprivileged_group],
                                  metric_name='Average odds difference',
                                  exclude_protected=False))
]

if C_EXECUTE:
    # execute
    rss = list(map(lambda x: (x[0], do_sim(x[1], max_it=50)), learners))
    # save
    save(rss, "notions_of_fairness_lr_1_bf_" + str(COST_CONST))
# -

plot.boxplot(rss, unprivileged_group, privileged_group, name='sdffsdfdsfd')
#display(rss[0][1].feature_table([unprivileged_group, privileged_group]))

# +
filename = "notions_of_fairness_bf_8"
rss = load(filename)
plot.boxplot(rss, unprivileged_group, privileged_group, name=filename)

plot.plot_all_mutable_features_combined(rss,
                                        unprivileged_group,
                                        privileged_group,
                                        dataset,
                                        'purpose',
                                        filename=filename,
                                        kind='cdf',
                                        select_group='1',