コード例 #1
0
ファイル: datasets.py プロジェクト: Brickkkkkk/ALTA_AAAI21
def remove_exs(dataset, hyps, err_hyp, alpha, split_name, one_v_all):
    # only keep examples that we can predict with the best hypothesis
    if one_v_all:
        if np.unique(dataset['Y'].shape[0]) == 2:
            # binary
            optimal_index = np.argmin(err_hyp[0])
            _, pred_class = teach.user_model_binary(hyps[optimal_index],
                                                    dataset['X'], dataset['Y'],
                                                    alpha)
            inds = np.where(dataset['Y'] == pred_class)[0]
        else:
            # multi class
            correctly_predicted = np.zeros(dataset['Y'].shape[0])
            for cc in range(len(err_hyp)):
                optimal_index = np.argmin(err_hyp[cc])
                Y_bin = np.zeros(dataset['Y'].shape[0]).astype(np.int)
                Y_bin[np.where(dataset['Y'] == cc)[0]] = 1
                _, pred_class = teach.user_model_binary(
                    hyps[optimal_index], dataset['X'], Y_bin, alpha)
                correctly_predicted[np.where(Y_bin == pred_class)[0]] += 1
            inds = np.where(correctly_predicted == len(err_hyp))[0]
    else:
        optimal_index = np.argmin(err_hyp)
        _, pred_class = teach.user_model(hyps[optimal_index], dataset['X'],
                                         dataset['Y'], alpha)
        inds = np.where(dataset['Y'] == pred_class)[0]
    print(dataset['X'].shape[0] - inds.shape[0], split_name,
          'examples removed')

    # remove the examples
    dataset['X'] = dataset['X'][inds, :]
    dataset['Y'] = dataset['Y'][inds]
    dataset['im_files'] = dataset['im_files'][inds]
    dataset['explain_files'] = dataset['explain_files'][inds]
    dataset['explain_interp'] = dataset['explain_interp'][inds]
    cls_un, cls_cnt = np.unique(dataset['Y'], return_counts=True)
    if 'X_density' in list(dataset.keys()):
        dataset['X_density'] = dataset['X_density'][inds]
    dataset['X_distance'] = dataset['X_distance'][inds, :]
    dataset['X_distance'] = dataset['X_distance'][:, inds]

    print('\n', split_name)
    for cc in range(len(cls_cnt)):
        print(cls_un[cc], dataset['class_names'][cls_un[cc]].ljust(30), '\t',
              cls_cnt[cc])

    return dataset
コード例 #2
0
ファイル: hypothesis.py プロジェクト: Brickkkkkk/ALTA_AAAI21
def compute_hyps_error(hyps, X, Y, alpha, one_v_all=False):
    # compute err(h, h*) - vector of length H
    err_hyp = np.zeros(len(hyps))
    for hh in range(len(hyps)):
        if one_v_all:
            _, pred_class = teach.user_model_binary(hyps[hh], X, Y, alpha)
        else:
            _, pred_class = teach.user_model(hyps[hh], X, Y, alpha)
        err_hyp[hh] = (Y != pred_class).sum() / float(Y.shape[0])

    return err_hyp
コード例 #3
0
ファイル: utils.py プロジェクト: macaodha/explain_teach
def compute_likelihood(hyps, X, Y, alpha, one_v_all=False):
    # compute P(y|h,x) - size HxN
    # is set to one where h(x) = y i.e. correct guess
    likelihood = np.ones((len(hyps), X.shape[0]))
    likelihood_opp = np.ones((len(hyps), X.shape[0]))

    for hh in range(len(hyps)):
        if one_v_all:
            # assumes that hyps[hh] is a D dim vector
            prob_agree, pred_class = teach.user_model_binary(hyps[hh], X, Y, alpha)
        else:
            # assumes that hyps[hh] is a CxD dim maxtrix
            prob_agree, pred_class = teach.user_model(hyps[hh], X, Y, alpha)
        inds = np.where(pred_class != Y)[0]
        likelihood[hh, inds] = prob_agree[inds]

    return likelihood