import pandas as pd import numpy as np from pyids import IDS from pyids.algorithms import mine_CARs from pyids.data_structures import IDSRuleSet import json from pyarc.qcba.data_structures import QuantitativeDataFrame import random import logging import time logging.basicConfig(level=logging.INFO) df = pd.read_csv("../../../data/iris0.csv") cars = mine_CARs(df, 10, sample=False) ids_ruleset = IDSRuleSet.from_cba_rules(cars).ruleset quant_dataframe = QuantitativeDataFrame(df) def generate_lambda_arr(one_idx): total_params = 7 if one_idx == 0: start_arr = [] else: start_arr = one_idx * [0] end_arr = (total_params - one_idx - 1) * [0]
data_count = df.shape[0] quant_df = QuantitativeDataFrame(df) for algorithm in ["DLS", "SLS", "DUSM", "RUSM"]: durations = [] for _ in range(time_estimation_iterations): print(_) lambda_array = generate_lambda_array() print(f"data count: {data_count}") print(f"algorithm: {algorithm}") print(f"using lambda: {lambda_array}") cars = mine_CARs(df, rule_cutoff=rule_cutoff) ids = IDS(algorithm=algorithm) start = time.time() ids.fit(class_association_rules=cars, quant_dataframe=quant_df, lambda_array=lambda_array) duration = time.time() - start print(f"avg duration: {duration}") durations.append(duration) duration = np.mean(durations) print(f"avg duration: {duration}")
from pyids.model_selection.random_search import RandomSearch from pyids.algorithms.ids import IDS from pyids.algorithms import mine_CARs from pyarc.qcba.data_structures import QuantitativeDataFrame import pandas as pd import numpy as np df_iris = pd.read_csv("../../../data/iris0.csv") quant_df = QuantitativeDataFrame(df_iris) cars = mine_CARs(df_iris, 40) def is_solution_interpretable(metrics): print(metrics) return (metrics["fraction_overlap"] <= 0.3 and metrics["fraction_classes"] > 1.0 and metrics["fraction_uncovered"] <= 0.3 and metrics["average_rule_width"] < 8 and metrics["ruleset_length"] <= 10) def solution_interpretability_distance(metrics): distance_vector = np.array([ max(metrics["fraction_overlap"] - 0.1, 0), max(1 - metrics["fraction_classes"], 0), max(metrics["fraction_uncovered"] - 0.15, 0), max(metrics["average_rule_width"] - 8, 0), max(metrics["ruleset_length"] - 10, 0) ])
from pyids.model_selection import CoordinateAscent from pyids.algorithms.ids import IDS from pyids.algorithms import mine_CARs, mine_IDS_ruleset from pyarc.qcba.data_structures import QuantitativeDataFrame import pandas as pd import numpy as np df_iris = pd.read_csv("../../../data/iris0.csv") quant_df = QuantitativeDataFrame(df_iris) cars = mine_CARs(df_iris, 30) interpretability_bounds = dict(fraction_overlap=0.1, fraction_classes=1, fraction_uncovered=0.35, average_rule_width=8, ruleset_length=10) def is_solution_interpretable(metrics): print(metrics) return (metrics["fraction_overlap"] <= interpretability_bounds["fraction_overlap"] and metrics["fraction_classes"] >= interpretability_bounds["fraction_classes"] and metrics["fraction_uncovered"] <= interpretability_bounds["fraction_uncovered"] and metrics["average_rule_width"] <= interpretability_bounds["average_rule_width"] and metrics["ruleset_length"] <=
from pyids.algorithms import mine_CARs from pyids.algorithms.ids_multiclass import IDSOneVsAll from pyids.data_structures import IDSRuleSet from pyarc.qcba.data_structures import QuantitativeDataFrame from pyarc.data_structures import TransactionDB from pyarc import CBA import random import logging import time import matplotlib.pyplot as plt df = pd.read_csv("c:/code/python/machine_learning/assoc_rules/train/iris0.csv") cars = mine_CARs(df, rule_cutoff=40) quant_dataframe = QuantitativeDataFrame(df) def is_solution_interpretable(metrics): print(metrics) return ( metrics["fraction_overlap"] <= 0.10 and metrics["fraction_classes"] > 1.0 and metrics["fraction_uncovered"] <= 0.15 and metrics["average_rule_width"] < 8 and metrics["ruleset_length"] <= 10 ) def solution_interpretability_distance(metrics): distance_vector = np.array([
from pyids.model_selection import CoordinateAscent from pyids.algorithms.ids import IDS from pyids.algorithms import mine_CARs, mine_IDS_ruleset from pyarc.qcba.data_structures import QuantitativeDataFrame import pandas as pd import numpy as np df_iris = pd.read_csv("../../../data/iris0.csv") quant_df = QuantitativeDataFrame(df_iris) cars = mine_CARs(df_iris, 20) interpretability_bounds = dict(fraction_overlap=0.1, fraction_classes=1, fraction_uncovered=0.35, average_rule_width=8, ruleset_length=10) def is_solution_interpretable(metrics): print(metrics) return (metrics["fraction_overlap"] <= interpretability_bounds["fraction_overlap"] and metrics["fraction_classes"] >= interpretability_bounds["fraction_classes"] and metrics["fraction_uncovered"] <= interpretability_bounds["fraction_uncovered"] and metrics["average_rule_width"] <= interpretability_bounds["average_rule_width"] and metrics["ruleset_length"] <=
from pyids.algorithms.ids import IDS from pyids.algorithms import mine_CARs from pyarc.qcba.data_structures import QuantitativeDataFrame from pyids.model_selection.coordinate_ascent import CoordinateAscent lambda_dict = { 'l1': 124.16415180612711, 'l2': 38.896662094192955, 'l3': 557.0996799268405, 'l4': 638.188385916781, 'l5': 136.48056698673983, 'l6': 432.1760402377687, 'l7': 452.1563786008231 } lambda_array = [ 665.9341563786008, 271.7242798353909, 212.34156378600824, 20.489711934156375, 648.5761316872428, 911, 560 ] df = pd.read_csv("C:/code/python/machine_learning/assoc_rules/train/iris0.csv") quant_df = QuantitativeDataFrame(df) quant_df_test = QuantitativeDataFrame( pd.read_csv("C:/code/python/machine_learning/assoc_rules/test/iris0.csv")) cars = mine_CARs(df, 20) ids = IDS(algorithm="DUSM") ids.fit(quant_df, cars, lambda_array=lambda_array) print(ids.score_auc(quant_df))