def fmax(param_dict): print(param_dict) support, confidence = param_dict["support"] / 1000, param_dict[ "confidence"] / 1000 print(dict(support=support, confidence=confidence)) cba = CBA(support=support, confidence=confidence) cba.fit(txns) cba_clf = cba.clf ids = IDS() ids_clf = IDSClassifier( IDSRuleSet.from_cba_rules(cba_clf.rules).ruleset) ids_clf.quant_dataframe_train = quant_df ids_clf.calculate_default_class() ids.clf = ids_clf metrics = ids.score_interpretability_metrics(quant_df_test) if not is_solution_interpretable(metrics): distance = solution_interpretability_distance(metrics) print(-distance) return -distance auc = ids.score_auc(quant_df_test) print(auc) return auc
def fmax(lambda_dict): print(lambda_dict) ids = IDS(algorithm=algorithm) ids.fit(class_association_rules=cars, quant_dataframe=quant_df, lambda_array=list(lambda_dict.values())) auc = ids.score_auc(quant_df_test) print(auc) return auc
def fmax(param_dict): print(param_dict) support, confidence = param_dict["support"] / 1000, param_dict["confidence"] / 1000 print(dict(support=support, confidence=confidence)) cba = CBA(support=support, confidence=confidence) cba.fit(txns) cba_clf = cba.clf ids = IDS() ids_clf = IDSClassifier(IDSRuleSet.from_cba_rules(cba_clf.rules).ruleset) ids_clf.quant_dataframe_train = quant_df ids_clf.calculate_default_class() ids.clf = ids_clf auc = ids.score_auc(quant_df_test) print(auc) return auc
def fmax(lambda_dict): print(lambda_dict) ids = IDS(algorithm="RUSM") ids.fit(class_association_rules=cars, quant_dataframe=quant_df, lambda_array=list(lambda_dict.values())) metrics = ids.score_interpretability_metrics(quant_df) if not is_solution_interpretable(metrics): return 0 auc = ids.score_auc(quant_df) print(auc) return auc
quant_df = QuantitativeDataFrame(df) for algorithm in ["DLS", "SLS", "DUSM", "RUSM"]: durations = [] for _ in range(time_estimation_iterations): print(_) lambda_array = generate_lambda_array() print(f"data count: {data_count}") print(f"algorithm: {algorithm}") print(f"using lambda: {lambda_array}") cars = mine_CARs(df, rule_cutoff=rule_cutoff) ids = IDS(algorithm=algorithm) start = time.time() ids.fit(class_association_rules=cars, quant_dataframe=quant_df, lambda_array=lambda_array) duration = time.time() - start print(f"avg duration: {duration}") durations.append(duration) duration = np.mean(durations) print(f"avg duration: {duration}") benchmark_data.append(
ternary_search_precision=5, max_iterations=1, extension_precision=-1, func_args_extension=dict(support=0, confidence=0)) best_pars = coord_asc.fit() print("best_pars:", best_pars) support, confidence = best_pars[0] / 1000, best_pars[1] / 1000 cba = CBA(support=support, confidence=confidence) cba.fit(txns) cba_clf = cba.clf ids = IDS() ids_clf = IDSClassifier( IDSRuleSet.from_cba_rules(cba_clf.rules).ruleset) ids_clf.quant_dataframe_train = quant_df ids_clf.calculate_default_class() ids.clf = ids_clf data = dict(dataset_name=dataset_name, algorithm="pyARC", auc=ids.score_auc(quant_df_test, order_type="cba"), rule_cutoff=rule_cutoff) data.update(ids.score_interpretability_metrics(quant_df_test)) print(data)
rule_count = i rules = mine_CARs(df, rule_count) quant_df = QuantitativeDataFrame(df) cars = mine_CARs(df, rule_count) print(len(cars)) for algorithm in ["DLS", "SLS"]: times = [] for _ in range(10): start_time = time.time() ids = IDS(algorithm=algorithm) ids.fit(quant_dataframe=quant_df, class_association_rules=cars) total_time = time.time() - start_time times.append(total_time) benchmark_data = dict(rule_count=rule_count, duration=np.mean(times), algorithm=algorithm) print(benchmark_data) iris_benchmark.append(benchmark_data) times = []
import pandas as pd import numpy as np from pyids.algorithms.ids import IDS from pyids.algorithms import mine_CARs from pyarc.qcba.data_structures import QuantitativeDataFrame import random import time df = pd.read_csv("../../../data/iris0.csv") cars = mine_CARs(df, 15, sample=False) quant_dataframe = QuantitativeDataFrame(df) start = time.time() ids = IDS(algorithm="RUSM") ids.fit(class_association_rules=cars, quant_dataframe=quant_dataframe, random_seed=None, lambda_array=[1, 1, 1, 1, 1, 1, 1000000000]) end = time.time() print(end - start) for r in ids.clf.rules: print(r) auc = ids.score_auc(quant_dataframe) print("AUC", auc)
func_args_ranges=dict(l1=(1, 1000), l2=(1, 1000), l3=(1, 1000), l4=(1, 1000), l5=(1, 1000), l6=(1, 1000), l7=(1, 1000)), ternary_search_precision=50, max_iterations=2) best_lambda = coord_asc.fit() scores = [] for _ in range(10): ids = IDS(algorithm=algorithm) ids.fit(quant_df, cars, lambda_array=best_lambda) auc = ids.score_auc(quant_df_test) metrics = ids.score_interpretability_metrics(quant_df_test) interpretable = is_solution_interpretable(metrics) scores.append( dict(auc=auc, metrics=metrics, interpretable=interpretable)) scores.sort(key=lambda x: (x["interpretable"], x["auc"]), reverse=True) data = dict(dataset_name=dataset_name, algorithm=algorithm,
df_train = pd.read_csv(os.path.join(dataset_path_train, dataset_filename)) df_test = pd.read_csv(os.path.join(dataset_path_test, dataset_filename)) txns_train = TransactionDB.from_DataFrame(df_train) txns_test = TransactionDB.from_DataFrame(df_test) quant_df_train = QuantitativeDataFrame(df_train) quant_df_test = QuantitativeDataFrame(df_test) cba = CBA(support=0.1, confidence=0.1) cba.fit(txns_train) rules = cba.clf.rules ids_ruleset = IDSRuleSet.from_cba_rules(rules) ids = IDS() ids.clf = IDSClassifier(ids_ruleset.ruleset) ids.clf.default_class = cba.clf.default_class metrics_dict = ids.score_interpretability_metrics(quant_df_test) benchmark_dict = dict(dataset_filename=dataset_filename, algorithm="cba") benchmark_dict.update(metrics_dict) print(benchmark_dict) benchmark_list.append(benchmark_dict) benchmark_df = pd.DataFrame(benchmark_list) benchmark_df.to_csv("output_data/cba_interpretability_benchmark.csv", index=False)
acc_score_dict = dict() acc_score_dict["file"] = train_file acc_score_dict["rule_count"] = rule_count acc_score_dict["algorithm"] = algorithm auc_score_dict = dict(acc_score_dict) df_train = pd.read_csv(os.path.join(train_directory, train_file)) quant_df_train = QuantitativeDataFrame(df_train) df_test = pd.read_csv(os.path.join(test_directory, train_file)) quant_df_test = QuantitativeDataFrame(df_test) cars = mine_CARs(df_train, rule_count) ids = IDS(algorithm=algorithm) ids.fit(quant_dataframe=quant_df_train, class_association_rules=cars) acc = ids.score(quant_df_test) accuracy_dict[train_file] = acc acc_score_dict["accuracy"] = acc """ print("training multi") ids_multi = IDSOneVsAll(algorithm=algorithm) ids_multi.fit(quant_dataframe=quant_df_train, rule_cutoff=rule_count) auc = ids_multi.score_auc(quant_df_test) auc_score_dict["auc"] = auc
train_files = get_dataset_files(train_path, dataset_name) test_files = get_dataset_files(test_path, dataset_name) for algorithm in ["SLS", "DLS", "DUSM", "RUSM"]: for train_file, test_file in zip(train_files, test_files): dataset_path = os.path.join(train_path, train_file) dataset_test_path = os.path.join(test_path, test_file) df = pd.read_csv(dataset_path) quant_df = QuantitativeDataFrame(df) df_test = pd.read_csv(dataset_test_path) quant_df_test = QuantitativeDataFrame(df_test) cars = mine_CARs(df, rule_cutoff) ids = IDS(algorithm=algorithm) ids.fit(quant_df, cars) auc = ids.score_auc(quant_df_test) metrics = ids.score_interpretability_metrics(quant_df_test) data = dict(dataset_name=dataset_name, algorithm=algorithm, auc=auc, rule_cutoff=rule_cutoff) data.update(metrics) print(data) benchmark_data.append(data)
from pyids.algorithms.ids import IDS from pyids.algorithms import mine_CARs from pyarc.qcba.data_structures import QuantitativeDataFrame from pyids.model_selection.coordinate_ascent import CoordinateAscent lambda_dict = { 'l1': 124.16415180612711, 'l2': 38.896662094192955, 'l3': 557.0996799268405, 'l4': 638.188385916781, 'l5': 136.48056698673983, 'l6': 432.1760402377687, 'l7': 452.1563786008231 } lambda_array = [ 665.9341563786008, 271.7242798353909, 212.34156378600824, 20.489711934156375, 648.5761316872428, 911, 560 ] df = pd.read_csv("C:/code/python/machine_learning/assoc_rules/train/iris0.csv") quant_df = QuantitativeDataFrame(df) quant_df_test = QuantitativeDataFrame( pd.read_csv("C:/code/python/machine_learning/assoc_rules/test/iris0.csv")) cars = mine_CARs(df, 20) ids = IDS(algorithm="DUSM") ids.fit(quant_df, cars, lambda_array=lambda_array) print(ids.score_auc(quant_df))