def generate_BN_explanations(instance, label_lst, feature_names, class_var, encoder, scaler, model, path, dataset_name): # necessary for starting Numpy generated random numbers in an initial state np.random.seed(515) # Necessary for starting core Python generated random numbers in a state rn.seed(515) indx = instance['index'] prediction_type = instance['prediction_type'].lower() + "s" prediction_type = prediction_type.replace(" ", "_") # generate permutations df = generate_permutations(instance, label_lst, feature_names, class_var, encoder, scaler, model) # discretize data df_discr = discretize_dataframe(df, class_var, num_bins=4) # save discretised dataframe (for debugging and reproduceability purposes) path_to_permutations = path + "feature_permutations/" + dataset_name.replace( ".csv", "") + "/" + prediction_type + "/" + str(indx) + ".csv" df_discr.to_csv(path_to_permutations, index=False) # normalise dataframe normalise_dataframe(path_to_permutations) # learn BN bn, infoBN, essencGraph = learnBN( path_to_permutations.replace(".csv", "_norm.csv")) # perform inference inference = gnb.getInference(bn, evs={}, targets=df_discr.columns.to_list(), size='12') # show networks gnb.sideBySide( *[bn, inference, infoBN], captions=["Bayesian Network", "Inference", "Information Network"]) # save to file path_to_explanation = path + "explanations/" + dataset_name.replace( ".csv", "") + "/BN/" + prediction_type + "/" gum.lib.bn2graph.dotize(bn, path_to_explanation + str(indx) + "_BN") gum.saveBN(bn, path_to_explanation + str(indx) + "_BN.net") return [bn, inference, infoBN]
template.add(gum.LabelizedVariable("occupation", "occupation",['Tech-support', 'Craft-repair', 'Other-service', 'Sales', 'Exec-managerial', 'Prof-specialty', 'Handlers-cleaners', 'Machine-op-inspct', 'Adm-clerical', 'Farming-fishing', 'Transport-moving', 'Priv-house-serv', 'Protective-serv', 'Armed-Forces'])) gnb.showBN(template) train_df.to_csv(os.path.join('/content/gdrive/My Drive/train_data2.csv'), index=False) file = os.path.join('res', 'titanic', '/content/gdrive/My Drive/train_data2.csv') learner = gum.BNLearner(file, template) bn = learner.learnBN() bn gnb.showInformation(bn,{},size="20") gnb.showInference(bn) gnb.showPosterior(bn,evs={"sex": "Male", "age_range": '21-30'},target='target') gnb.sideBySide(bn, gum.MarkovBlanket(bn, 'target'), captions=["Learned Bayesian Network", "Markov blanket of 'target'"]) ie=gum.LazyPropagation(bn) init_belief(ie) ie.addTarget('target') result = testdf.apply(lambda x: is_well_predicted(ie, bn, 0.157935, x), axis=1) result.value_counts(True) positives = sum(result.map(lambda x: 1 if x.startswith("True") else 0 )) total = result.count() print("{0:.2f}% good predictions".format(positives/total*100)) showROC(bn,file, 'target', "True", True, True)
# # $\color{red}{\text{TODO: understand these algorithms}}$ # # **Using:** LocalSearchWithTabuList # %% codecell learner = gum.BNLearner(outPath, asiaBN) # using bn as template for variables # Learn the structure of the BN learner.useLocalSearchWithTabuList() asiaBN_learnedStructure_localSearchAlgo = learner.learnBN() print("Learned in {}ms".format(1000 * learner.currentTime())) htmlInfo: str = gnb.getInformation(asiaBN_learnedStructure_localSearchAlgo) gnb.sideBySide(asiaBN_learnedStructure_localSearchAlgo, htmlInfo) # %% markdown # Notice how the original .bif BN and parameter-learned BN and structure-learned BN are different: # %% codecell asiaBN # %% codecell asiaBN_learnedParams # %% markdown # [`ExactBNdistance`](https://hyp.is/1OhsSKy4EeqyemuIJO85ew/pyagrum.readthedocs.io/en/0.18.0/BNToolsCompar.html) is a class representing exacte computation of divergence and distance between BNs # %% codecell from pyAgrum import ExactBNdistance exact: ExactBNdistance = gum.ExactBNdistance(asiaBN, asiaBN_learnedStructure_localSearchAlgo)