def test_model_fitting(self): df = pd.read_csv( "C:/code/python/interpretable_decision_sets/data/titanic.csv") cars = mine_CARs(df, rule_cutoff=40) quant_df = QuantitativeDataFrame(df) ids = IDS() ids.fit(quant_df, cars, debug=False) auc = ids.score_auc(quant_df)
def test_optimization(self): df = pd.read_csv("C:/code/python/interpretable_decision_sets/data/titanic.csv") ids_ruleset = mine_IDS_ruleset(df, rule_cutoff=40) quant_df = QuantitativeDataFrame(df) ascent = CoordinateAscentOptimizer(IDS(), maximum_consecutive_iterations=1) lambdas = ascent.fit(ids_ruleset, quant_df, quant_df)
def test_random_seed(self): replications_n = 10 cars_to_mine = 10 df = pd.read_csv("data/iris0.csv") quant_df = QuantitativeDataFrame(df) mined_cars_mupliple = [] mined_cars_comparison_results = [] for _ in range(replications_n): ClassAssocationRule.id = 0 cars = mine_CARs(df, cars_to_mine) mined_cars_mupliple.append(cars) for idx in range(replications_n): same = _all_rules_same(mined_cars_mupliple[0], mined_cars_mupliple[idx]) mined_cars_comparison_results.append(same) self.assertTrue(np.all(mined_cars_comparison_results)) ids_models_multiple = [] ids_comparison_results = [] for _ in range(replications_n): ids = IDS() ids = ids.fit(quant_dataframe=quant_df, class_association_rules=cars, debug=False, random_seed=2) ids_models_multiple.append(ids.clf.rules) for idx in range(replications_n): same = _all_rules_same(ids_models_multiple[0], ids_models_multiple[idx]) ids_comparison_results.append(same) self.assertTrue(np.all(ids_comparison_results))
import numpy as np import matplotlib.pyplot as plt import pandas as pd import random from sklearn.metrics import accuracy_score, auc, roc_auc_score from pyids.ids_rule import IDSRule from pyids.ids_ruleset import IDSRuleSet from pyids.ids_objective_function import ObjectiveFunctionParameters, IDSObjectiveFunction from pyids.ids_optimizer import RSOptimizer, SLSOptimizer from pyids.ids_cacher import IDSCacher from pyids.ids_classifier import IDS, mine_CARs from pyids.visualization import IDSVisualization from pyarc.qcba import * df = pd.read_csv("C:/code/python/interpretable_decision_sets/data/iris0.csv") cars = mine_CARs(df, 20) quant_df = QuantitativeDataFrame(df) ids = IDS() ids.fit(quant_df, cars, debug=False) viz = IDSVisualization(ids, quant_df) viz.visualize_dataframe()
# In[31]: benchmark_data = [] for data_size in data_size_quantiles: current_cars = cars[:RULE_COUNT_TO_USE] current_df = df.sample(data_size) current_quant_df = QuantitativeDataFrame(current_df) times = [] for _ in range(PY_IDS_DURATION_ITERATIONS): time1 = time.time() lambda_array = [1.0] * 7 # use separate hyperparamter search routine ids = IDS() ids.fit(class_association_rules=current_cars, quant_dataframe=current_quant_df, debug=False) time2 = time.time() duration = time2 - time1 times.append(duration) benchmark_data.append(dict(duration=np.mean(times), data_size=data_size)) # In[32]: benchmark_dataframe_pyids = pd.DataFrame(benchmark_data)
from pyarc.qcba import * from pyarc.algorithms import createCARs, top_rules from pyarc import TransactionDB from pyids.ids_classifier import IDS, mine_IDS_ruleset from pyids.ids_cacher import IDSCacher from pyids.ids_ruleset import IDSRuleSet from pyids.ids_rule import IDSRule from pyids.model_selection import encode_label, mode, RandomSearchOptimizer df = pd.read_csv("./data/titanic.csv") df["Died"] = df["Died"].astype(str) + "_" ids_ruleset = mine_IDS_ruleset(df, 40) quant_dataframe = QuantitativeDataFrame(df) ids = IDS() random_search = RandomSearchOptimizer(ids, debug=False, maximum_score_estimation_iterations=3, maximum_iterations=500) result = random_search.fit(ids_ruleset, quant_dataframe, quant_dataframe) print(result)
import pandas as pd from collections import Counter from pyarc.qcba.data_structures import QuantitativeDataFrame from pyids.ids_classifier import IDS, mine_CARs df = pd.read_csv("./data/titanic.csv") cars = mine_CARs(df, 20) quant_dataframe = QuantitativeDataFrame(df) ids = IDS() ids.fit(quant_dataframe, class_association_rules=cars, debug=False) print(ids.score_auc(quant_dataframe, confidence_based=False)) print(ids.score_auc(quant_dataframe, confidence_based=True))
from pyids.ids_classifier import IDS, mine_IDS_ruleset from pyids.ids_ruleset import IDSRuleSet from pyids.model_selection import RandomSearchOptimizer, train_test_split_pd from pyids.rule_mining import RuleMiner df = pd.read_csv("../../data/titanic.csv") rm = RuleMiner() cars = rm.mine_rules(df, minsup=0.005) ids_ruleset = IDSRuleSet.from_cba_rules(cars) df_train, df_test = train_test_split_pd(df, prop=0.25) quant_df_train, quant_df_test = QuantitativeDataFrame(df_train), QuantitativeDataFrame(df_test) random_optimizer = RandomSearchOptimizer(IDS(), maximum_score_estimation_iterations=5, maximum_iterations=500) lambda_array = random_optimizer.fit(ids_ruleset, quant_df_train, quant_df_test) all_params = random_optimizer.score_params_dict print(lambda_array) with open("results/random_search_lambda_array.txt", "w") as file: file.write(str(lambda_array)) with open("results/random_search_all_score_params.txt", "w") as file: file.write(str(random_optimizer.score_params_dict))
df = pd.read_csv("../../data/titanic.csv") cars = mine_CARs(df, rule_cutoff=40) df_train, df_test = train_test_split_pd(df, prop=0.25) quant_df_train, quant_df_test = QuantitativeDataFrame( df_train), QuantitativeDataFrame(df_test) lambda_array_coordinate_ascent = [18, 18, 18, 18, 18, 18, 18] lambda_array_random_search = [ 510.6938775510204, 694.1836734693878, 918.4489795918367, 286.42857142857144, 490.30612244897964, 694.1836734693878, 62.163265306122454 ] ids_ascent = IDS() ids_random = IDS() ids_basic = IDS() ids_ascent.fit(quant_df_train, cars, debug=False, lambda_array=lambda_array_coordinate_ascent) ids_random.fit(quant_df_train, cars, debug=False, lambda_array=lambda_array_random_search) ids_basic.fit(quant_df_train, cars, debug=False, lambda_array=7 * [1]) ascent_metrics = calculate_ruleset_statistics(IDSRuleSet(ids_ascent.clf.rules), quant_df_test)
import pandas as pd from pyids.ids_classifier import IDS, mine_CARs from pyarc.qcba.data_structures import QuantitativeDataFrame df = pd.read_csv("./data/titanic.csv") cars = mine_CARs(df, rule_cutoff=80) quant_dataframe = QuantitativeDataFrame(df) ids = IDS() ids.fit(class_association_rules=cars, quant_dataframe=quant_dataframe, debug=True) auc = ids.score_auc(quant_dataframe) print(auc)
from pyids.rule_mining import RuleMiner import pandas as pd from pyids.ids_classifier import IDS from pyarc.qcba.data_structures import QuantitativeDataFrame from pyids.ids_ruleset import IDSRuleSet rm = RuleMiner() df = pd.read_csv("./data/titanic.csv") cars = rm.mine_rules(df, minsup=0.001) print(len(cars)) quant_df = QuantitativeDataFrame(df) ids = IDS() ids.fit(quant_df, cars, debug=False) metrics = ids.score_interpretable_metrics(quant_df) print(metrics)
import random from collections import Counter from pyarc.qcba import * from pyarc.algorithms import createCARs, top_rules from pyarc import TransactionDB from pyids.ids_classifier import IDS, mine_IDS_ruleset, mine_CARs from pyids.ids_cacher import IDSCacher from pyids.ids_ruleset import IDSRuleSet from pyids.ids_rule import IDSRule from pyids.model_selection import encode_label, mode, RandomSearchOptimizer df = pd.read_csv("./data/titanic.csv") df["Died"] = df["Died"].astype(str) + "_" cars = mine_CARs(df, 100) quant_dataframe = QuantitativeDataFrame(df) ids = IDS() ids.fit(class_association_rules=cars, quant_dataframe=quant_dataframe, algorithm="DLS") acc = ids.score_auc(quant_dataframe) print(acc) print(ids.clf.rules)
from pyids.ids_classifier import IDS, mine_IDS_ruleset from pyids.ids_ruleset import IDSRuleSet from pyids.model_selection import RandomSearchOptimizer, train_test_split_pd from pyids.rule_mining import RuleMiner df = pd.read_csv("../../data/titanic.csv") rm = RuleMiner() cars = rm.mine_rules(df, minsup=0.001) ids_ruleset = IDSRuleSet.from_cba_rules(cars) df_train, df_test = train_test_split_pd(df, prop=0.25) quant_df_train, quant_df_test = QuantitativeDataFrame( df_train), QuantitativeDataFrame(df_test) random_optimizer = RandomSearchOptimizer(IDS(), maximum_score_estimation_iterations=5, maximum_iterations=500) lambda_array = random_optimizer.fit(ids_ruleset, quant_df_train, quant_df_test) all_params = random_optimizer.score_params_dict print(lambda_array) with open("results/random_search_lambda_array.txt", "w") as file: file.write(str(lambda_array)) with open("results/random_search_all_score_params.txt", "w") as file: file.write(str(random_optimizer.score_params_dict))
import numpy as np import matplotlib.pyplot as plt import pandas as pd import random from collections import Counter from pyarc.qcba import * from pyarc.algorithms import createCARs, top_rules from pyarc import TransactionDB from pyids.ids_classifier import IDS, mine_IDS_ruleset from pyids.ids_cacher import IDSCacher from pyids.ids_ruleset import IDSRuleSet from pyids.ids_rule import IDSRule from pyids.model_selection import encode_label, mode, CoordinateAscentOptimizer df = pd.read_csv("./data/titanic.csv") ids_ruleset = mine_IDS_ruleset(df, rule_cutoff=40) quant_dataframe = QuantitativeDataFrame(df) coordinate_ascent = CoordinateAscentOptimizer( IDS(), debug=True, maximum_delta_between_iterations=200, maximum_score_estimation_iterations=3, ternary_search_precision=20) coordinate_ascent.fit(ids_ruleset, quant_dataframe, quant_dataframe)
import numpy as np import matplotlib.pyplot as plt import pandas as pd import random from sklearn.metrics import accuracy_score, auc, roc_auc_score from pyids.ids_rule import IDSRule from pyids.ids_ruleset import IDSRuleSet from pyids.ids_objective_function import ObjectiveFunctionParameters, IDSObjectiveFunction from pyids.ids_optimizer import RSOptimizer, SLSOptimizer from pyids.ids_cacher import IDSCacher from pyids.ids_classifier import IDS, mine_CARs from pyids.visualization import IDSVisualization from pyarc.qcba import * df = pd.read_csv("C:/code/python/interpretable_decision_sets/data/iris0.csv") cars = mine_CARs(df, 20) quant_df = QuantitativeDataFrame(df) ids = IDS() ids.fit(quant_df, cars, debug=True) viz = IDSVisualization(ids, quant_df) viz.visualize_dataframe()
import pandas as pd from collections import Counter from pyarc.qcba.data_structures import QuantitativeDataFrame from pyids.ids_classifier import IDS, mine_CARs df = pd.read_csv("./data/titanic.csv") cars = mine_CARs(df, 20) quant_dataframe = QuantitativeDataFrame(df) ids = IDS() ids.fit(quant_dataframe, class_association_rules=cars, debug=False, algorithm="SLS") acc = ids.score(quant_dataframe)