def test_model_fitting(self): df = pd.read_csv( "C:/code/python/interpretable_decision_sets/data/titanic.csv") cars = mine_CARs(df, rule_cutoff=40) quant_df = QuantitativeDataFrame(df) ids = IDS() ids.fit(quant_df, cars, debug=False) auc = ids.score_auc(quant_df)
def test_random_seed(self): replications_n = 10 cars_to_mine = 10 df = pd.read_csv("data/iris0.csv") quant_df = QuantitativeDataFrame(df) mined_cars_mupliple = [] mined_cars_comparison_results = [] for _ in range(replications_n): ClassAssocationRule.id = 0 cars = mine_CARs(df, cars_to_mine) mined_cars_mupliple.append(cars) for idx in range(replications_n): same = _all_rules_same(mined_cars_mupliple[0], mined_cars_mupliple[idx]) mined_cars_comparison_results.append(same) self.assertTrue(np.all(mined_cars_comparison_results)) ids_models_multiple = [] ids_comparison_results = [] for _ in range(replications_n): ids = IDS() ids = ids.fit(quant_dataframe=quant_df, class_association_rules=cars, debug=False, random_seed=2) ids_models_multiple.append(ids.clf.rules) for idx in range(replications_n): same = _all_rules_same(ids_models_multiple[0], ids_models_multiple[idx]) ids_comparison_results.append(same) self.assertTrue(np.all(ids_comparison_results))
import numpy as np import matplotlib.pyplot as plt import pandas as pd import random from sklearn.metrics import accuracy_score, auc, roc_auc_score from pyids.ids_rule import IDSRule from pyids.ids_ruleset import IDSRuleSet from pyids.ids_objective_function import ObjectiveFunctionParameters, IDSObjectiveFunction from pyids.ids_optimizer import RSOptimizer, SLSOptimizer from pyids.ids_cacher import IDSCacher from pyids.ids_classifier import IDS, mine_CARs from pyids.visualization import IDSVisualization from pyarc.qcba import * df = pd.read_csv("C:/code/python/interpretable_decision_sets/data/iris0.csv") cars = mine_CARs(df, 20) quant_df = QuantitativeDataFrame(df) ids = IDS() ids.fit(quant_df, cars, debug=False) viz = IDSVisualization(ids, quant_df) viz.visualize_dataframe()
benchmark_data = [] for data_size in data_size_quantiles: current_cars = cars[:RULE_COUNT_TO_USE] current_df = df.sample(data_size) current_quant_df = QuantitativeDataFrame(current_df) times = [] for _ in range(PY_IDS_DURATION_ITERATIONS): time1 = time.time() lambda_array = [1.0] * 7 # use separate hyperparamter search routine ids = IDS() ids.fit(class_association_rules=current_cars, quant_dataframe=current_quant_df, debug=False) time2 = time.time() duration = time2 - time1 times.append(duration) benchmark_data.append(dict(duration=np.mean(times), data_size=data_size)) # In[32]: benchmark_dataframe_pyids = pd.DataFrame(benchmark_data) benchmark_dataframe_pyids
import pandas as pd from collections import Counter from pyarc.qcba.data_structures import QuantitativeDataFrame from pyids.ids_classifier import IDS, mine_CARs df = pd.read_csv("./data/titanic.csv") cars = mine_CARs(df, 20) quant_dataframe = QuantitativeDataFrame(df) ids = IDS() ids.fit(quant_dataframe, class_association_rules=cars, debug=False) print(ids.score_auc(quant_dataframe, confidence_based=False)) print(ids.score_auc(quant_dataframe, confidence_based=True))
quant_df_train, quant_df_test = QuantitativeDataFrame( df_train), QuantitativeDataFrame(df_test) lambda_array_coordinate_ascent = [18, 18, 18, 18, 18, 18, 18] lambda_array_random_search = [ 510.6938775510204, 694.1836734693878, 918.4489795918367, 286.42857142857144, 490.30612244897964, 694.1836734693878, 62.163265306122454 ] ids_ascent = IDS() ids_random = IDS() ids_basic = IDS() ids_ascent.fit(quant_df_train, cars, debug=False, lambda_array=lambda_array_coordinate_ascent) ids_random.fit(quant_df_train, cars, debug=False, lambda_array=lambda_array_random_search) ids_basic.fit(quant_df_train, cars, debug=False, lambda_array=7 * [1]) ascent_metrics = calculate_ruleset_statistics(IDSRuleSet(ids_ascent.clf.rules), quant_df_test) random_metrics = calculate_ruleset_statistics(IDSRuleSet(ids_random.clf.rules), quant_df_test) basic_metrics = calculate_ruleset_statistics(IDSRuleSet(ids_basic.clf.rules), quant_df_test) ascent_metrics.update({"auc": ids_ascent.score_auc(quant_df_test)})
import pandas as pd from pyids.ids_classifier import IDS, mine_CARs from pyarc.qcba.data_structures import QuantitativeDataFrame df = pd.read_csv("./data/titanic.csv") cars = mine_CARs(df, rule_cutoff=80) quant_dataframe = QuantitativeDataFrame(df) ids = IDS() ids.fit(class_association_rules=cars, quant_dataframe=quant_dataframe, debug=True) auc = ids.score_auc(quant_dataframe) print(auc)
import random from collections import Counter from pyarc.qcba import * from pyarc.algorithms import createCARs, top_rules from pyarc import TransactionDB from pyids.ids_classifier import IDS, mine_IDS_ruleset, mine_CARs from pyids.ids_cacher import IDSCacher from pyids.ids_ruleset import IDSRuleSet from pyids.ids_rule import IDSRule from pyids.model_selection import encode_label, mode, RandomSearchOptimizer df = pd.read_csv("./data/titanic.csv") df["Died"] = df["Died"].astype(str) + "_" cars = mine_CARs(df, 100) quant_dataframe = QuantitativeDataFrame(df) ids = IDS() ids.fit(class_association_rules=cars, quant_dataframe=quant_dataframe, algorithm="DLS") acc = ids.score_auc(quant_dataframe) print(acc) print(ids.clf.rules)
import numpy as np import matplotlib.pyplot as plt import pandas as pd import random from sklearn.metrics import accuracy_score, auc, roc_auc_score from pyids.ids_rule import IDSRule from pyids.ids_ruleset import IDSRuleSet from pyids.ids_objective_function import ObjectiveFunctionParameters, IDSObjectiveFunction from pyids.ids_optimizer import RSOptimizer, SLSOptimizer from pyids.ids_cacher import IDSCacher from pyids.ids_classifier import IDS, mine_CARs from pyids.visualization import IDSVisualization from pyarc.qcba import * df = pd.read_csv("C:/code/python/interpretable_decision_sets/data/iris0.csv") cars = mine_CARs(df, 20) quant_df = QuantitativeDataFrame(df) ids = IDS() ids.fit(quant_df, cars, debug=True) viz = IDSVisualization(ids, quant_df) viz.visualize_dataframe()
import pandas as pd from collections import Counter from pyarc.qcba.data_structures import QuantitativeDataFrame from pyids.ids_classifier import IDS, mine_CARs df = pd.read_csv("./data/titanic.csv") cars = mine_CARs(df, 20) quant_dataframe = QuantitativeDataFrame(df) ids = IDS() ids.fit(quant_dataframe, class_association_rules=cars, debug=False, algorithm="SLS") acc = ids.score(quant_dataframe)