def main(): # dataset_name = 'german_credit.csv' path_data = '/Users/Harry/Desktop/LORE-master/datasets/' # dataset = prepare_german_dataset(dataset_name, path_data) # dataset_name = 'compas-scores-two-years.csv' # dataset = prepare_compass_dataset(dataset_name, path_data) # print(dataset['label_encoder'][dataset['class_name']].classes_) # print(dataset['possible_outcomes']) dataset_name = 'adult.csv' dataset = prepare_adult_dataset(dataset_name, path_data) X, y = dataset['X'], dataset['y'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) blackbox = RandomForestClassifier(n_estimators=20) blackbox.fit(X_train, y_train) X2E = X_test y2E = blackbox.predict(X2E) y2E = np.asarray([dataset['possible_outcomes'][i] for i in y2E]) idx_record2explain = 0 explanation, infos = lore.explain(idx_record2explain, X2E, dataset, blackbox, ng_function=genetic_neighborhood, discrete_use_probabilities=True, continuous_function_estimation=False, returns_infos=True, path=path_data, sep=';', log=False) dfX2E = build_df2explain(blackbox, X2E, dataset).to_dict('records') dfx = dfX2E[idx_record2explain] # x = build_df2explain(blackbox, X2E[idx_record2explain].reshape(1, -1), dataset).to_dict('records')[0] print('x = %s' % dfx) print('r = %s --> %s' % (explanation[0][1], explanation[0][0])) for delta in explanation[1]: print('delta', delta) covered = lore.get_covered(explanation[0][1], dfX2E, dataset) print(len(covered)) print(covered) print(explanation[0][0][dataset['class_name']], '<<<<') def eval(x, y): return 1 if x == y else 0 precision = [1-eval(v, explanation[0][0][dataset['class_name']]) for v in y2E[covered]] print(precision) print(np.mean(precision), np.std(precision))
def main(): dataset_name = 'german_credit.csv' path_data = './datasets/' dataset = prepare_german_dataset(dataset_name, path_data) X, y = dataset['X'], dataset['y'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) blackbox = RandomForestClassifier(n_estimators=20) blackbox.fit(X_train, y_train) X2E = X_test idx_record2explain = 1 explanation, infos = lore.explain(idx_record2explain, X2E, dataset, blackbox, ng_function=genetic_neighborhood, discrete_use_probabilities=True, continuous_function_estimation=True, returns_infos=True) x = build_df2explain(blackbox, X2E[idx_record2explain].reshape(1, -1), dataset).to_dict('records')[0] print('x = %s' % x) print('r = %s --> %s' % (explanation[0][1], explanation[0][0])) for delta in explanation[1]: print('delta', delta) print('Evaluation') bb_outcome = infos['bb_outcome'] cc_outcome = infos['cc_outcome'] y_pred_bb = infos['y_pred_bb'] y_pred_cc = infos['y_pred_cc'] dfZ = infos['dfZ'] dt = infos['dt'] tree_path = infos['tree_path'] leaf_nodes = infos['leaf_nodes'] diff_outcome = infos['diff_outcome'] print( evaluate_explanation(x, blackbox, dfZ, dt, tree_path, leaf_nodes, bb_outcome, cc_outcome, y_pred_bb, y_pred_cc, diff_outcome, dataset, explanation[1]))
def run_experiment(blackbox, X2E, y2E, idx_record2explain, dataset, anchor_explainer, path_data, verbose=False): nbr_run = 3 print(datetime.datetime.now(), '\tLORE') features_lore = list() features_values_lore = list() nbr_features_lore = list() for k in range(nbr_run): print('%d, ' % k, end='') attempt = 0 while True: # try: # Explanation with LORE lore_explanation, lore_info = lore.explain( idx_record2explain, X2E, dataset, blackbox, ng_function=genetic_neighborhood, discrete_use_probabilities=True, continuous_function_estimation=False, returns_infos=True, path=path_data, sep=';', log=verbose) lrule = lore_explanation[0][1] features_lore.append(list(lrule.keys())) features_values_lore.append(lrule) nbr_features_lore.append(len(list(lrule.keys()))) # except Exception: # pass # # if attempt >= 3: # break # # attempt += 1 print('') print(datetime.datetime.now(), '\tAnchor') features_anchor = list() features_values_anchor = list() nbr_features_anchor = list() for k in range(nbr_run): print('%d, ' % k, end='') attempt = 0 while True: try: # Explanation with Anchor anchor_explanation, anchor_info = anchor_explainer.explain_instance( X2E[idx_record2explain].reshape(1, -1), blackbox.predict, threshold=0.95) arule = anchor2arule(anchor_explanation) features_anchor.append(list(arule.keys())) features_values_anchor.append(arule) nbr_features_anchor.append(len(list(arule.keys()))) except Exception: pass if attempt >= 3: break attempt += 1 print('') jaccard_features_lore = list() same_features_values_lore = list() deviation_nbr_features_lore = list() jaccard_features_anchor = list() same_features_values_anchor = list() deviation_nbr_features_anchor = list() # print(len(features_lore)) # print(features_lore) # print(len(features_anchor)) # print(features_anchor) for i1 in range(0, 10): for i2 in range(i1, 10): if len(features_lore) > i2: jl = len(set(features_lore[i1]) & set(features_lore[i2])) / len( set(features_lore[i1]) | set(features_lore[i2])) sl = 1 if features_values_lore[i1] == features_values_lore[ i2] else 0 dl = np.abs(nbr_features_lore[i1] - nbr_features_lore[i2]) # print(jl,sl,dl) jaccard_features_lore.append(jl) same_features_values_lore.append(sl) deviation_nbr_features_lore.append(dl) if len(features_anchor) > i2: ja = len(set(features_anchor[i1]) & set(features_anchor[i2])) / len( set(features_anchor[i1]) | set(features_anchor[i2])) sa = 1 if features_values_anchor[i1] == features_values_anchor[ i2] else 0 da = np.abs(nbr_features_anchor[i1] - nbr_features_anchor[i2]) # print(ja, sa, da) jaccard_features_anchor.append(ja) same_features_values_anchor.append(sa) deviation_nbr_features_anchor.append(da) res = '%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f,%.6f' % ( np.mean(jaccard_features_lore), np.std(jaccard_features_lore), np.mean(same_features_values_lore), np.std(same_features_values_lore), np.mean(deviation_nbr_features_lore), np.std(deviation_nbr_features_lore), np.mean(jaccard_features_anchor), np.std(jaccard_features_anchor), np.mean(same_features_values_anchor), np.std(same_features_values_anchor), np.mean(deviation_nbr_features_anchor), np.std(deviation_nbr_features_anchor), ) return res
def run_experiment(blackbox, X2E, y2E, idx_record2explain, dataset, anchor_explainer, path_data, verbose=False): # class_name = dataset['class_name'] # columns = dataset['columns'] # features_type = dataset['features_type'] # discrete = dataset['discrete'] # continuous = dataset['continuous'] # possible_outcomes = dataset['possible_outcomes'] # label_encoder = dataset['label_encoder'] # Remove From the Dataset to Explain x and return both them # starttime = datetime.datetime.now() # dfX2E, x = dataframe2explain(X2E, dataset, idx_record2explain, blackbox) # Run Black Box on Instance to Explain bb_outcome = y2E[ idx_record2explain] #blackbox.predict(x.reshape(1, -1))[0] # print(bb_outcome, type(bb_outcome)) dfX2E = build_df2explain(blackbox, X2E, dataset).to_dict('records') individual_hit_lore = 0 fidelity_acc_lore = fidelity_f1_lore = coverage_lore = coverage_Z_lore = 0 precision_lore = [0] individual_hit_anchor = fidelity_acc_anchor = fidelity_f1_anchor = coverage_anchor = coverage_Z_anchor = 0 precision_anchor = [0] def eval(x, y): return 1 if x == y else 0 print(datetime.datetime.now(), '\tLORE') attempt = 0 while True: try: # Explanation with LORE lore_explanation, lore_info = lore.explain( idx_record2explain, X2E, dataset, blackbox, ng_function=genetic_neighborhood, discrete_use_probabilities=True, continuous_function_estimation=False, returns_infos=True, path=path_data, sep=';', log=verbose) cc_outcome_lore = lore_explanation[0][0][dataset['class_name']] # print(cc_outcome_lore, type(cc_outcome_lore), bb_outcome, type(bb_outcome)) # print(cc_outcome_lore == bb_outcome) individual_hit_lore = hit_outcome(bb_outcome, cc_outcome_lore) y_pred_bb_lore = lore_info['y_pred_bb'] y_pred_cc_lore = lore_info['y_pred_cc'] fidelity_acc_lore = accuracy_score(y_pred_bb_lore, y_pred_cc_lore) fidelity_f1_lore = f1_score(y_pred_bb_lore, y_pred_cc_lore) lrule = lore_explanation[0][1] # print(lrule) covered_lore = lore.get_covered(lrule, dfX2E, dataset) coverage_lore = len(covered_lore) / len(dfX2E) precision_lore = [ 1 - eval(v, cc_outcome_lore) for v in y2E[covered_lore] ] covered_Z_lore = lore.get_covered( lrule, lore_info['dfZ'].to_dict('records'), dataset) coverage_Z_lore = len(covered_Z_lore) / len(lore_info['dfZ']) # print(coverage_lore) # print(covered_Z_lore) # print(coverage_Z_lore) if coverage_lore > 0.0 and coverage_Z_lore > 0.0: break except Exception: pass if attempt >= 5: break attempt += 1 print(datetime.datetime.now(), '\tAnchor') attempt = 0 while True: try: # Explanation with Anchor anchor_explanation, anchor_info = anchor_explainer.explain_instance( X2E[idx_record2explain].reshape(1, -1), blackbox.predict, threshold=0.95) Zanchor = anchor_info['state']['raw_data'] y_pred_bb_anchor = blackbox.predict(Zanchor) y_pred_cc_anchor = blackbox.predict(Zanchor) fidelity_acc_anchor = accuracy_score(y_pred_bb_anchor, y_pred_cc_anchor) fidelity_f1_anchor = f1_score(y_pred_bb_anchor, y_pred_cc_anchor) arule = anchor2arule(anchor_explanation) # print(arule) covered_anchor = lore.get_covered(arule, dfX2E, dataset) coverage_anchor = len(covered_anchor) / len(dfX2E) if len(covered_anchor) > 0: if isinstance(y2E[0], str): cc_outcome_anchor = mode(y2E[covered_anchor]) else: cc_outcome_anchor = int( np.round(y2E[covered_anchor].mean())) else: cc_outcome_anchor = bb_outcome # print(cc_outcome_anchor, type(cc_outcome_anchor)) individual_hit_anchor = hit_outcome(bb_outcome, cc_outcome_anchor) precision_anchor = [ 1 - eval(v, cc_outcome_anchor) for v in y2E[covered_anchor] ] dfZanchor = build_df2explain(blackbox, Zanchor, dataset).to_dict('records')[:1000] covered_Z_anchor = lore.get_covered(arule, dfZanchor, dataset) coverage_Z_anchor = len(covered_Z_anchor) / len(Zanchor) except Exception: pass if attempt >= 5: break attempt += 1 res = '%d,%.6f,%.6f,%.6f,%.6f,%.6f,%d,%.6f,%.6f,%.6f,%.6f,%.6f' % ( individual_hit_lore, fidelity_acc_lore, fidelity_f1_lore, coverage_lore, np.mean(precision_lore), coverage_Z_lore, individual_hit_anchor, fidelity_acc_anchor, fidelity_f1_anchor, coverage_anchor, np.mean(precision_anchor), coverage_Z_anchor, ) return res