def run_experiment(dataset, algorithm, explainer):
  evaluator = ExplanationEvaluator(classifier_names=[algorithm])
  evaluator.load_datasets([dataset])
  evaluator.vectorize_and_train()
  explain_fn = None


  print 'Explainer:', explainer
  if explainer == 'lime':
    rho = 25
    kernel = lambda d: np.sqrt(np.exp(-(d**2) / rho ** 2))
    explainer = explainers.GeneralizedLocalExplainer(kernel, explainers.data_labels_distances_mapping_text, num_samples=15000, return_mean=False, verbose=False, return_mapped=True)
    explain_fn = explainer.explain_instance
  elif explainer == 'parzen':
    sigmas = {'multi_polarity_electronics': {'tree': 0.5,
    'l1logreg': 1},
    'multi_polarity_kitchen': {'tree': 0.75, 'l1logreg': 2.0},
    'multi_polarity_dvd': {'tree': 8.0, 'l1logreg': 1},
    'multi_polarity_books': {'tree': 2.0, 'l1logreg': 2.0}}

    explainer = parzen_windows.ParzenWindowClassifier()
    cv_preds = cross_val_predict(evaluator.classifiers[dataset][algorithm], evaluator.train_vectors[dataset], evaluator.train_labels[dataset])
    explainer.fit(evaluator.train_vectors[dataset], cv_preds)
    explainer.sigma = sigmas[dataset][algorithm]
    explain_fn = explainer.explain_instance
  elif explainer == 'greedy':
    explain_fn = explainers.explain_greedy
  elif explainer == 'random':
    explainer = explainers.RandomExplainer()
    explain_fn = explainer.explain_instance
  train_results, test_results = evaluator.measure_explanation_hability(explain_fn)
  out = {'train': train_results[dataset][algorithm], 'test' : test_results[dataset][algorithm]}
  # Return mean of recalls and invidivual train and test recalls
  recall = np.mean(test_results[dataset][algorithm])
  print 'Recall:', recall  
  return recall, out
def main():
    parser = argparse.ArgumentParser(description='Evaluate some explanations')
    parser.add_argument('--dataset',
                        '-d',
                        type=str,
                        required=True,
                        help='dataset name')
    parser.add_argument('--algorithm',
                        '-a',
                        type=str,
                        required=True,
                        help='algorithm_name')
    parser.add_argument('--explainer',
                        '-e',
                        type=str,
                        required=True,
                        help='explainer name')
    args = parser.parse_args()
    dataset = args.dataset
    algorithm = args.algorithm
    evaluator = ExplanationEvaluator(classifier_names=[algorithm])
    evaluator.load_datasets([dataset])
    evaluator.vectorize_and_train()
    explain_fn = None
    if args.explainer == 'lime':
        rho = 25
        kernel = lambda d: np.sqrt(np.exp(-(d**2) / rho**2))
        explainer = explainers.GeneralizedLocalExplainer(
            kernel,
            explainers.data_labels_distances_mapping_text,
            num_samples=15000,
            return_mean=False,
            verbose=False,
            return_mapped=True)
        explain_fn = explainer.explain_instance
    elif args.explainer == 'parzen':
        sigmas = {
            'multi_polarity_electronics': {
                'tree': 0.5,
                'l1logreg': 1
            },
            'multi_polarity_kitchen': {
                'tree': 0.75,
                'l1logreg': 2.0
            },
            'multi_polarity_dvd': {
                'tree': 8.0,
                'l1logreg': 1
            },
            'multi_polarity_books': {
                'tree': 2.0,
                'l1logreg': 2.0
            }
        }

        explainer = parzen_windows.ParzenWindowClassifier()
        cv_preds = sklearn.cross_validation.cross_val_predict(
            evaluator.classifiers[dataset][algorithm],
            evaluator.train_vectors[dataset], evaluator.train_labels[dataset])
        explainer.fit(evaluator.train_vectors[dataset], cv_preds)
        explainer.sigma = sigmas[dataset][algorithm]
        explain_fn = explainer.explain_instance
    elif args.explainer == 'greedy':
        explain_fn = explainers.explain_greedy
    elif args.explainer == 'random':
        explainer = explainers.RandomExplainer()
        explain_fn = explainer.explain_instance
    train_results, test_results = evaluator.measure_explanation_hability(
        explain_fn)
    print('Average test: ', np.mean(test_results[dataset][algorithm]))
    out = {
        'train': train_results[dataset][algorithm],
        'test': test_results[dataset][algorithm]
    }
    print(out)
예제 #3
0
def run_experiment(df, dataset, algorithm, num_features, percent_untrustworthy,
                   num_rounds):
    train_data, train_labels, test_data, test_labels, class_names = LoadDataset(
        dataset)
    vectorizer = CountVectorizer(lowercase=False, binary=True)
    train_vectors = vectorizer.fit_transform(train_data)
    test_vectors = vectorizer.transform(test_data)
    terms = np.array(list(vectorizer.vocabulary_.keys()))
    indices = np.array(list(vectorizer.vocabulary_.values()))
    inverse_vocabulary = terms[np.argsort(indices)]

    np.random.seed(1)
    classifier = get_classifier(algorithm, vectorizer)
    classifier.fit(train_vectors, train_labels)

    np.random.seed(1)
    untrustworthy_rounds = []
    all_features = range(train_vectors.shape[1])
    num_untrustworthy = int(train_vectors.shape[1] * percent_untrustworthy)
    for _ in range(num_rounds):
        untrustworthy_rounds.append(
            np.random.choice(all_features, num_untrustworthy, replace=False))

    rho = 25
    kernel = lambda d: np.sqrt(np.exp(-(d**2) / rho**2))
    LIME = explainers.GeneralizedLocalExplainer(
        kernel,
        explainers.data_labels_distances_mapping_text,
        num_samples=15000,
        return_mean=True,
        verbose=False,
        return_mapped=True)

    parzen = parzen_windows.ParzenWindowClassifier()
    cv_preds = sklearn.cross_validation.cross_val_predict(classifier,
                                                          train_vectors,
                                                          train_labels,
                                                          cv=5)
    parzen.fit(train_vectors, cv_preds)
    sigmas = {
        'multi_polarity_electronics': {
            'neighbors': 0.75,
            'svm': 10.0,
            'tree': 0.5,
            'logreg': 0.5,
            'random_forest': 0.5,
            'embforest': 0.75
        },
        'multi_polarity_kitchen': {
            'neighbors': 1.0,
            'svm': 6.0,
            'tree': 0.75,
            'logreg': 0.25,
            'random_forest': 6.0,
            'embforest': 1.0
        },
        'multi_polarity_dvd': {
            'neighbors': 0.5,
            'svm': 0.75,
            'tree': 8.0,
            'logreg': 0.75,
            'random_forest': 0.5,
            'embforest': 5.0
        },
        'multi_polarity_books': {
            'neighbors': 0.5,
            'svm': 7.0,
            'tree': 2.0,
            'logreg': 1.0,
            'random_forest': 1.0,
            'embforest': 3.0
        }
    }
    parzen.sigma = sigmas[dataset][algorithm]

    explainer_names = ['LIME', 'random', 'greedy', 'parzen']

    # This will store the partial results so later it can be saved in "df"
    res = {k: '' for k in ['classifier'] + explainer_names}
    res['classifier'] = algorithm

    random = explainers.RandomExplainer()
    exps = {}
    for expl in explainer_names:
        exps[expl] = []

    predictions = classifier.predict(test_vectors)
    predict_probas = classifier.predict_proba(test_vectors)[:, 1]
    for i in range(test_vectors.shape[0]):
        print i
        sys.stdout.flush()
        exp, mean = LIME.explain_instance(test_vectors[i], 1,
                                          classifier.predict_proba,
                                          num_features)
        exps['LIME'].append((exp, mean))
        exp = parzen.explain_instance(test_vectors[i], 1,
                                      classifier.predict_proba, num_features,
                                      None)
        mean = parzen.predict_proba(test_vectors[i])[1]
        exps['parzen'].append((exp, mean))

        exp = random.explain_instance(test_vectors[i], 1, None, num_features,
                                      None)
        exps['random'].append(exp)

        exp = explainers.explain_greedy_martens(test_vectors[i],
                                                predictions[i],
                                                classifier.predict_proba,
                                                num_features)
        exps['greedy'].append(exp)

    precision = {}
    recall = {}
    f1 = {}
    for name in explainer_names:
        precision[name] = []
        recall[name] = []
        f1[name] = []
    flipped_preds_size = []
    for untrustworthy in untrustworthy_rounds:
        t = test_vectors.copy()
        t[:, untrustworthy] = 0
        mistrust_idx = np.argwhere(
            classifier.predict(t) != classifier.predict(test_vectors)).flatten(
            )
        print 'Number of suspect predictions', len(mistrust_idx)
        shouldnt_trust = set(mistrust_idx)
        flipped_preds_size.append(len(shouldnt_trust))
        mistrust = collections.defaultdict(lambda: set())
        trust = collections.defaultdict(lambda: set())
        trust_fn = lambda prev, curr: (prev > 0.5 and curr > 0.5) or (
            prev <= 0.5 and curr <= 0.5)
        trust_fn_all = lambda exp, unt: len([x[0] for x in exp
                                             if x[0] in unt]) == 0
        for i in range(test_vectors.shape[0]):
            exp, mean = exps['LIME'][i]
            prev_tot = predict_probas[i]
            prev_tot2 = sum([x[1] for x in exp]) + mean
            tot = prev_tot2 - sum([x[1] for x in exp if x[0] in untrustworthy])
            trust['LIME'].add(i) if trust_fn(
                tot, prev_tot) else mistrust['LIME'].add(i)

            exp, mean = exps['parzen'][i]
            prev_tot = mean
            tot = mean - sum([x[1] for x in exp if x[0] in untrustworthy])
            trust['parzen'].add(i) if trust_fn(
                tot, prev_tot) else mistrust['parzen'].add(i)
            exp = exps['random'][i]
            trust['random'].add(i) if trust_fn_all(
                exp, untrustworthy) else mistrust['random'].add(i)

            exp = exps['greedy'][i]
            trust['greedy'].add(i) if trust_fn_all(
                exp, untrustworthy) else mistrust['greedy'].add(i)

        for expl in explainer_names:
            # switching the definition
            false_positives = set(trust[expl]).intersection(shouldnt_trust)
            true_positives = set(trust[expl]).difference(shouldnt_trust)
            false_negatives = set(mistrust[expl]).difference(shouldnt_trust)
            true_negatives = set(mistrust[expl]).intersection(shouldnt_trust)

            try:
                prec = len(true_positives) / float(
                    len(true_positives) + len(false_positives))
            except:
                prec = 0
            try:
                rec = float(len(true_positives)) / (len(true_positives) +
                                                    len(false_negatives))
            except:
                rec = 0
            precision[expl].append(prec)
            recall[expl].append(rec)
            f1z = 2 * (prec * rec) / (prec + rec) if (prec and rec) else 0
            f1[expl].append(f1z)

    print 'Average number of flipped predictions:', np.mean(
        flipped_preds_size), '+-', np.std(flipped_preds_size)
    print 'Precision:'
    for expl in explainer_names:
        print expl, np.mean(precision[expl]), '+-', np.std(
            precision[expl]), 'pvalue', sp.stats.ttest_ind(
                precision[expl], precision['LIME'])[1].round(4)
    print
    print 'Recall:'
    for expl in explainer_names:
        print expl, np.mean(recall[expl]), '+-', np.std(
            recall[expl]), 'pvalue', sp.stats.ttest_ind(
                recall[expl], recall['LIME'])[1].round(4)
    print
    print 'F1:'
    for expl in explainer_names:
        print expl, np.mean(f1[expl]), '+-', np.std(
            f1[expl]), 'pvalue', sp.stats.ttest_ind(f1[expl],
                                                    f1['LIME'])[1].round(4)
        res[expl] = str('%.2f' % np.mean(f1[expl])) + '+-' + str(
            '%.2f' % np.std(f1[expl]))

    df = df.append(res, ignore_index=True)
    return df
def main():
    parser = argparse.ArgumentParser(description='Evaluate some explanations')
    parser.add_argument('--dataset',
                        '-d',
                        type=str,
                        required=True,
                        help='dataset name')
    parser.add_argument('--output_folder',
                        '-o',
                        type=str,
                        required=True,
                        help='output folder')
    parser.add_argument('--num_features',
                        '-k',
                        type=int,
                        required=True,
                        help='num features')
    parser.add_argument('--num_rounds',
                        '-r',
                        type=int,
                        required=True,
                        help='num rounds')
    parser.add_argument('--start_id',
                        '-i',
                        type=int,
                        default=0,
                        required=False,
                        help='output start id')
    args = parser.parse_args()
    dataset = args.dataset
    train_data, train_labels, test_data, test_labels, class_names = LoadDataset(
        dataset)
    rho = 25
    kernel = lambda d: np.sqrt(np.exp(-(d**2) / rho**2))
    local = explainers.GeneralizedLocalExplainer(
        kernel,
        explainers.data_labels_distances_mapping_text,
        num_samples=15000,
        return_mean=True,
        verbose=False,
        return_mapped=True)
    # Found through cross validation
    sigmas = {
        'multi_polarity_electronics': {
            'neighbors': 0.75,
            'svm': 10.0,
            'tree': 0.5,
            'logreg': 0.5,
            'random_forest': 0.5,
            'embforest': 0.75
        },
        'multi_polarity_kitchen': {
            'neighbors': 1.0,
            'svm': 6.0,
            'tree': 0.75,
            'logreg': 0.25,
            'random_forest': 6.0,
            'embforest': 1.0
        },
        'multi_polarity_dvd': {
            'neighbors': 0.5,
            'svm': 0.75,
            'tree': 8.0,
            'logreg': 0.75,
            'random_forest': 0.5,
            'embforest': 5.0
        },
        'multi_polarity_books': {
            'neighbors': 0.5,
            'svm': 7.0,
            'tree': 2.0,
            'logreg': 1.0,
            'random_forest': 1.0,
            'embforest': 3.0
        }
    }
    parzen1 = parzen_windows.ParzenWindowClassifier()
    parzen1.sigma = sigmas[dataset]['random_forest']
    parzen2 = parzen_windows.ParzenWindowClassifier()
    parzen2.sigma = sigmas[dataset]['random_forest']
    random = explainers.RandomExplainer()

    for Z in range(args.num_rounds):
        exps1 = {}
        exps2 = {}
        explainer_names = ['lime', 'parzen', 'random', 'greedy', 'mutual']
        for expl in explainer_names:
            exps1[expl] = []
            exps2[expl] = []
        print 'Round', Z
        sys.stdout.flush()
        fake_features_z = [([.1, .2], [.1,
                                       .1], 10)]  #, ([.2, .1], [.1,.1], 10)]
        clean_train, dirty_train, clean_test = corrupt_dataset(
            fake_features_z, train_data, train_labels, test_data, test_labels)
        vectorizer = CountVectorizer(lowercase=False, binary=True)
        dirty_train_vectors = vectorizer.fit_transform(dirty_train)
        clean_train_vectors = vectorizer.transform(clean_train)
        test_vectors = vectorizer.transform(clean_test)
        terms = np.array(list(vectorizer.vocabulary_.keys()))
        indices = np.array(list(vectorizer.vocabulary_.values()))
        inverse_vocabulary = terms[np.argsort(indices)]
        tokenizer = vectorizer.build_tokenizer()
        c1 = ensemble.RandomForestClassifier(n_estimators=30, max_depth=5)
        c2 = ensemble.RandomForestClassifier(n_estimators=30, max_depth=5)
        untrustworthy = [
            i for i, x in enumerate(inverse_vocabulary) if x.startswith('FAKE')
        ]
        train_idx, test_idx = tuple(
            cross_validation.ShuffleSplit(dirty_train_vectors.shape[0], 1,
                                          0.2))[0]
        train_acc1 = train_acc2 = test_acc1 = test_acc2 = 0
        print 'Trying to find trees:'
        sys.stdout.flush()
        iteration = 0
        found_tree = True
        while np.abs(train_acc1 -
                     train_acc2) > 0.001 or np.abs(test_acc1 -
                                                   test_acc2) < 0.05:
            iteration += 1
            c1.fit(dirty_train_vectors[train_idx], train_labels[train_idx])
            c2.fit(dirty_train_vectors[train_idx], train_labels[train_idx])
            train_acc1 = accuracy_score(
                train_labels[test_idx],
                c1.predict(dirty_train_vectors[test_idx]))
            train_acc2 = accuracy_score(
                train_labels[test_idx],
                c2.predict(dirty_train_vectors[test_idx]))
            test_acc1 = accuracy_score(test_labels, c1.predict(test_vectors))
            test_acc2 = accuracy_score(test_labels, c2.predict(test_vectors))
            if iteration == 3000:
                found_tree = False
                break
        if not found_tree:
            print 'skipping iteration', Z
            continue
        print 'done'
        print 'Train acc1:', train_acc1, 'Train acc2:', train_acc2
        print 'Test acc1:', test_acc1, 'Test acc2:', test_acc2
        sys.stdout.flush()
        predictions = c1.predict(dirty_train_vectors)
        predictions2 = c2.predict(dirty_train_vectors)
        predict_probas = c1.predict_proba(dirty_train_vectors)[:, 1]
        predict_probas2 = c2.predict_proba(dirty_train_vectors)[:, 1]
        cv_preds1 = cross_validation.cross_val_predict(
            c1, dirty_train_vectors[train_idx], train_labels[train_idx], cv=5)
        cv_preds2 = cross_validation.cross_val_predict(
            c2, dirty_train_vectors[train_idx], train_labels[train_idx], cv=5)
        parzen1.fit(dirty_train_vectors[train_idx], cv_preds1)
        parzen2.fit(dirty_train_vectors[train_idx], cv_preds2)
        pp = []
        pp2 = []
        true_labels = []
        iteration = 0
        for i in test_idx:
            if iteration % 50 == 0:
                print iteration
                sys.stdout.flush()
            iteration += 1
            pp.append(predict_probas[i])
            pp2.append(predict_probas2[i])
            true_labels.append(train_labels[i])
            exp, mean = local.explain_instance(dirty_train_vectors[i], 1,
                                               c1.predict_proba,
                                               args.num_features)
            exps1['lime'].append((exp, mean))

            exp = parzen1.explain_instance(dirty_train_vectors[i], 1,
                                           c1.predict_proba, args.num_features,
                                           None)
            mean = parzen1.predict_proba(dirty_train_vectors[i])[1]
            exps1['parzen'].append((exp, mean))

            exp = random.explain_instance(dirty_train_vectors[i], 1, None,
                                          args.num_features, None)
            exps1['random'].append(exp)

            exp = explainers.explain_greedy_martens(dirty_train_vectors[i],
                                                    predictions[i],
                                                    c1.predict_proba,
                                                    args.num_features)
            exps1['greedy'].append(exp)

            # Classifier 2
            exp, mean = local.explain_instance(dirty_train_vectors[i], 1,
                                               c2.predict_proba,
                                               args.num_features)
            exps2['lime'].append((exp, mean))

            exp = parzen2.explain_instance(dirty_train_vectors[i], 1,
                                           c2.predict_proba, args.num_features,
                                           None)
            mean = parzen2.predict_proba(dirty_train_vectors[i])[1]
            exps2['parzen'].append((exp, mean))

            exp = random.explain_instance(dirty_train_vectors[i], 1, None,
                                          args.num_features, None)
            exps2['random'].append(exp)

            exp = explainers.explain_greedy_martens(dirty_train_vectors[i],
                                                    predictions2[i],
                                                    c2.predict_proba,
                                                    args.num_features)
            exps2['greedy'].append(exp)

        out = {
            'true_labels': true_labels,
            'untrustworthy': untrustworthy,
            'train_acc1': train_acc1,
            'train_acc2': train_acc2,
            'test_acc1': test_acc1,
            'test_acc2': test_acc2,
            'exps1': exps1,
            'exps2': exps2,
            'predict_probas1': pp,
            'predict_probas2': pp2
        }
        pickle.dump(
            out,
            open(
                os.path.join(
                    args.output_folder, 'comparing_%s_%s_%d.pickle' %
                    (dataset, args.num_features, Z + args.start_id)), 'w'))