def test_top_rules(self): header1 = ["A", "B", "Y"] rows1 = [ [1, 1, 0], [1, 1, 0], [1, 1, 1], [0, 0, 0], [0, 0, 1], [0, 0, 1] ] transactionDB1 = TransactionDB(rows1, header1) rules = None with HiddenPrints(): rules = top_rules(transactionDB1.string_representation, appearance=transactionDB1.appeardict) expected_rules = [ ('Y:=:1', ('A:=:1',), 1/6, 1/3), ('Y:=:0', ('A:=:1',), 1/3, 2/3), ('Y:=:1', ('B:=:1',), 1/6, 1/3), ('Y:=:0', ('B:=:1',), 1/3, 2/3), ('Y:=:1', ('B:=:1', 'A:=:1'), 1/6, 1/3), ('Y:=:0', ('B:=:1', 'A:=:1'), 1/3, 2/3), ('Y:=:1', ('A:=:0',), 1/3, 2/3), ('Y:=:0', ('A:=:0',), 1/6, 1/3), ('Y:=:1', ('B:=:0',), 1/3, 2/3), ('Y:=:0', ('B:=:0',), 1/6, 1/3), ('Y:=:1', ('B:=:0', 'A:=:0'), 1/3, 2/3), ('Y:=:0', ('B:=:0', 'A:=:0'), 1/6, 1/3) ] for r in rules: assert r in expected_rules
def fit(self, rule_cutoff): dataframes = self._prepare_dataframes() scores = [] for dataframe_train, dataframe_test in dataframes: txns_train = TransactionDB.from_DataFrame(dataframe_train) rules = top_rules(txns_train.string_representation, appearance=txns_train.appeardict) cars = createCARs(rules)[:rule_cutoff] quant_dataframe_train = QuantitativeDataFrame(dataframe_train) quant_dataframe_test = QuantitativeDataFrame(dataframe_test) self.classifier.fit(quant_dataframe_train, cars, debug=self.debug) score = None if self.score_auc: score = self.classifier.score_auc(quant_dataframe_test) else: score = self.classifier.score(quant_dataframe_test) scores.append(score) return scores
def fit(self, quant_dataframe, cars=None, rule_cutoff=30, lambda_array=7 * [1], class_name=None, debug=False, algorithm="SLS"): self.quant_dataframe_train = quant_dataframe self._prepare(quant_dataframe, class_name) for class_, clf_dict in self.ids_classifiers.items(): print("training class:", class_) clf = clf_dict["clf"] quant_dataframe = clf_dict["quant_dataframe"] pandas_dataframe = quant_dataframe.dataframe txns = TransactionDB.from_DataFrame(pandas_dataframe) rules = top_rules(txns.string_representation, appearance=txns.appeardict) cars = createCARs(rules) cars.sort(reverse=True) clf.fit(quant_dataframe, cars[:rule_cutoff], lambda_array=lambda_array, debug=debug, algorithm=algorithm)
def mine_CARs(df, rule_cutoff, sample=False): txns = TransactionDB.from_DataFrame(df) rules = top_rules(txns.string_representation, appearance=txns.appeardict) cars = createCARs(rules) cars_subset = cars[:rule_cutoff] if sample: cars_subset = random.sample(cars, rule_cutoff) return cars_subset
def mine_CARs(df, rule_cutoff, sample=False, random_seed=None, **top_rules_kwargs): if random_seed: random.seed(random_seed) np.random.seed(random_seed) txns = TransactionDB.from_DataFrame(df) rules = top_rules(txns.string_representation, appearance=txns.appeardict, **top_rules_kwargs) cars = createCARs(rules) cars_subset = cars[:rule_cutoff] if sample: cars_subset = random.sample(cars, rule_cutoff) return cars_subset
import pandas as pd import numpy as np from pyarc.data_structures import TransactionDB from pyarc.algorithms import top_rules, createCARs df = pd.read_csv("./data/iris0.csv") df[df["sepallength"] == "-inf_to_5.55"] = np.NaN print(df) txns = TransactionDB.from_DataFrame(df) rules = top_rules(txns.string_representation, appearance=txns.appeardict) cars = createCARs(rules) for car in cars[:10]: print(car)