def remove_exs(dataset, hyps, err_hyp, alpha, split_name, one_v_all): # only keep examples that we can predict with the best hypothesis if one_v_all: if np.unique(dataset['Y'].shape[0]) == 2: # binary optimal_index = np.argmin(err_hyp[0]) _, pred_class = teach.user_model_binary(hyps[optimal_index], dataset['X'], dataset['Y'], alpha) inds = np.where(dataset['Y'] == pred_class)[0] else: # multi class correctly_predicted = np.zeros(dataset['Y'].shape[0]) for cc in range(len(err_hyp)): optimal_index = np.argmin(err_hyp[cc]) Y_bin = np.zeros(dataset['Y'].shape[0]).astype(np.int) Y_bin[np.where(dataset['Y'] == cc)[0]] = 1 _, pred_class = teach.user_model_binary( hyps[optimal_index], dataset['X'], Y_bin, alpha) correctly_predicted[np.where(Y_bin == pred_class)[0]] += 1 inds = np.where(correctly_predicted == len(err_hyp))[0] else: optimal_index = np.argmin(err_hyp) _, pred_class = teach.user_model(hyps[optimal_index], dataset['X'], dataset['Y'], alpha) inds = np.where(dataset['Y'] == pred_class)[0] print(dataset['X'].shape[0] - inds.shape[0], split_name, 'examples removed') # remove the examples dataset['X'] = dataset['X'][inds, :] dataset['Y'] = dataset['Y'][inds] dataset['im_files'] = dataset['im_files'][inds] dataset['explain_files'] = dataset['explain_files'][inds] dataset['explain_interp'] = dataset['explain_interp'][inds] cls_un, cls_cnt = np.unique(dataset['Y'], return_counts=True) if 'X_density' in list(dataset.keys()): dataset['X_density'] = dataset['X_density'][inds] dataset['X_distance'] = dataset['X_distance'][inds, :] dataset['X_distance'] = dataset['X_distance'][:, inds] print('\n', split_name) for cc in range(len(cls_cnt)): print(cls_un[cc], dataset['class_names'][cls_un[cc]].ljust(30), '\t', cls_cnt[cc]) return dataset
def compute_hyps_error(hyps, X, Y, alpha, one_v_all=False): # compute err(h, h*) - vector of length H err_hyp = np.zeros(len(hyps)) for hh in range(len(hyps)): if one_v_all: _, pred_class = teach.user_model_binary(hyps[hh], X, Y, alpha) else: _, pred_class = teach.user_model(hyps[hh], X, Y, alpha) err_hyp[hh] = (Y != pred_class).sum() / float(Y.shape[0]) return err_hyp
def compute_likelihood(hyps, X, Y, alpha, one_v_all=False): # compute P(y|h,x) - size HxN # is set to one where h(x) = y i.e. correct guess likelihood = np.ones((len(hyps), X.shape[0])) likelihood_opp = np.ones((len(hyps), X.shape[0])) for hh in range(len(hyps)): if one_v_all: # assumes that hyps[hh] is a D dim vector prob_agree, pred_class = teach.user_model_binary(hyps[hh], X, Y, alpha) else: # assumes that hyps[hh] is a CxD dim maxtrix prob_agree, pred_class = teach.user_model(hyps[hh], X, Y, alpha) inds = np.where(pred_class != Y)[0] likelihood[hh, inds] = prob_agree[inds] return likelihood