def draw_histograms(data): base_hist = "%s/histogram" % output_base base_box = "%s/boxplot" % output_base plots = [ ('time', True), ('blocks', False), ('blocks executed', False), ('host instructions', False), ('source instructions', False), ('host instructions executed', False), ('source instructions emulated', False), ('mips', True), ('hotness', False), ('host block size', False), ('source block size', False), ('compilation inefficiency', False), ('execution inefficiency', False) ] for p in plots: name = p[0] processed = data if not p[1]: processed = {'JIT': data['JIT']} plot.histogram(processed, name, '%s/%s.png' % (base_hist, name)) plot.boxplot(processed, name, '%s/%s.png' % (base_box, name))
def plot_factor_in_subtype(cad_factors, factor_id, factor_annotation, clinical_annotation, clinical_var, out, stats_out): factor_da = xr.open_dataset(cad_factors)['factors'] with open(factor_annotation) as f: factor_index = only( [i for i, v in yaml.load(f).items() if v['id'] == factor_id]) factor = factor_da.sel(factor=factor_index).load() factor.name = factor_id clin = xr.open_dataset(clinical_annotation)[clinical_var].load() factor, clin = xr.align(factor[factor.notnull()], clin[clin.notnull()]) with plot.subplots(figsize=(3.5, 3.5)) as (fig, ax): plot.boxplot( clin, factor, title="", xlabel=clin_display_names[clinical_var], ylabel=f"Factor {factor_index+1}", ax=ax, ) fig.savefig(out, format='svg') factor_by_clin = split_by(factor.values, clin.values) h, p = scipy.stats.kruskal(*factor_by_clin.values()) with open(stats_out, 'w') as f: f.write(f"h: {h:.6e}\n") f.write(f"p: {p:.6e}\n")
def plot_factor_in_subtype(mri_features, feature_id, clinical_annotation, clinical_var, out, stats_out): mri_ds = xr.open_dataset(mri_features) feature = mri_ds[feature_id] clin = xr.open_dataset(clinical_annotation)[clinical_var].load() feature, clin = xr.align(feature[feature.notnull()], clin[clin.notnull()]) with plot.subplots(figsize=(3.5, 3.5)) as (fig, ax): plot.boxplot( clin, feature, title="", xlabel=clin_display_names[clinical_var], ylabel=feature_display_names[feature_id], ax=ax, ) fig.savefig(out, format='svg') feature_by_clin = split_by(feature.values, clin.values) h, p = scipy.stats.kruskal(*feature_by_clin.values()) with open(stats_out, 'w') as f: f.write(f"h: {h:.6e}\n") f.write(f"p: {p:.6e}\n")
_all_player_shaps = [] _cf_shaps = calc_n_shapley_values(N_FEATS, N_SAMPLES, N_ITER, DATA_TYPE, _cf, data_dir=DATA_DIR) # --- Group shapley decompositions per player # --- Normalised: all_cf_shaps_per_player.append([normalise(numpy.array(_cf_shaps))[:,_player] for _player in PLAYERS]) # --- Non-normalized: #all_cf_shaps_per_player.append([numpy.array(_cf_shaps)[:,_player] for _player in PLAYERS]) # --- print("Done with {0}.".format(_cf)) # --- cf_labels = [CF_DICT.get(_cf, 0) for _cf in cfs] violinplot(all_cf_shaps_per_player, PLAYERS, labels=cf_labels, multi=True) boxplot(all_cf_shaps_per_player, PLAYERS, labels=cf_labels, multi=True) # --- Plot average Shapley decomposition per player #SHAPS_AVG = [sum(x)/len(x) for x in zip(*all_shaps)] #barplot_all(PLAYERS, SHAPS_AVG) # --- try: plt.show() except Exception: pass # NOTES #plt.title("Distance correlation") #plt.title(r"$R^2$")
def average_varimp(X, y, ntrees, replace, mtry, max_depth, missing_branch, balance, vitype='err', vimissing=True, ntimes=25, select=True, printvi=False, plotvi=False, cutpoint=0.0, mean=False, title=None, missing_rate=False, random_subspace=False): vi = {a: [] for a in range(X.shape[1])} for i in range(X.shape[0]): if (i < ntimes): seed = np.random.randint(0, 10000) clf = rf.RandomForest(ntrees=ntrees, oob_error=True, random_state=seed, mtry=mtry, missing_branch=missing_branch, prob_answer=False, max_depth=max_depth, replace=replace, balance=balance, random_subspace=random_subspace) clf.fit(X, y) varimps = clf.variable_importance(vitype=vimissing, vimissing=True) for var in varimps.keys(): if (missing_rate): vi[var].append(varimps[var] * utils.notNanProportion(X[X.columns[var]])) else: vi[var].append(varimps[var]) else: break vimean = {a: [] for a in range(X.shape[1])} for var in vi.keys(): vimean[var] = np.mean(vi[var]) if (printvi): vis = sorted(vimean.items(), key=lambda x: x[1], reverse=True) for v, i in vis: print('feature: %r importance: %r' % (X.columns[v], i)) if (plotvi): print(cutpoint) importance_values = [] features = [] vis = sorted(vi.items(), key=lambda x: x[0]) for v, i in vis: if (vimean[v] >= cutpoint): importance_values.append(i) features.append(X.columns[v]) import plot plot.boxplot(importance_values, features, title) if (select): vis = sorted(vimean.items(), key=lambda x: x[1], reverse=True) return sorted([var[0] for var in vis if vimean[var[0]] >= cutpoint]) if (mean): return sorted(vimean.items(), key=lambda x: x[1], reverse=True) #return [var[0] for var in vis] return sorted(vi.items(), key=lambda x: x[0])
RejectOptionsLogisticLearner([privileged_group], [unprivileged_group], exclude_protected=False)), ("average odds", RejectOptionsLogisticLearner([privileged_group], [unprivileged_group], metric_name='Average odds difference', exclude_protected=False)) ] if C_EXECUTE: # execute rss = list(map(lambda x: (x[0], do_sim(x[1], max_it=50)), learners)) # save save(rss, "notions_of_fairness_lr_1_bf_" + str(COST_CONST)) # - plot.boxplot(rss, unprivileged_group, privileged_group, name='sdffsdfdsfd') #display(rss[0][1].feature_table([unprivileged_group, privileged_group])) # + filename = "notions_of_fairness_bf_8" rss = load(filename) plot.boxplot(rss, unprivileged_group, privileged_group, name=filename) plot.plot_all_mutable_features_combined(rss, unprivileged_group, privileged_group, dataset, 'purpose', filename=filename, kind='cdf', select_group='1',