def test_top_rules(self):
        header1 = ["A", "B", "Y"]
        rows1 = [
            [1, 1, 0],
            [1, 1, 0],
            [1, 1, 1],
            [0, 0, 0],
            [0, 0, 1],
            [0, 0, 1]
        ]

        transactionDB1 = TransactionDB(rows1, header1)

        rules = None
        with HiddenPrints():
            rules = top_rules(transactionDB1.string_representation, appearance=transactionDB1.appeardict)

        expected_rules = [
            ('Y:=:1', ('A:=:1',), 1/6, 1/3),
            ('Y:=:0', ('A:=:1',), 1/3, 2/3),
            ('Y:=:1', ('B:=:1',), 1/6, 1/3),
            ('Y:=:0', ('B:=:1',), 1/3, 2/3),
            ('Y:=:1', ('B:=:1', 'A:=:1'), 1/6, 1/3),
            ('Y:=:0', ('B:=:1', 'A:=:1'), 1/3, 2/3),
            ('Y:=:1', ('A:=:0',), 1/3, 2/3),
            ('Y:=:0', ('A:=:0',), 1/6, 1/3),
            ('Y:=:1', ('B:=:0',), 1/3, 2/3),
            ('Y:=:0', ('B:=:0',), 1/6, 1/3),
            ('Y:=:1', ('B:=:0', 'A:=:0'), 1/3, 2/3),
            ('Y:=:0', ('B:=:0', 'A:=:0'), 1/6, 1/3)
        ]

        for r in rules:
            assert r in expected_rules
Example #2
0
    def fit(self, rule_cutoff):
        dataframes = self._prepare_dataframes()

        scores = []

        for dataframe_train, dataframe_test in dataframes:
            txns_train = TransactionDB.from_DataFrame(dataframe_train)

            rules = top_rules(txns_train.string_representation, appearance=txns_train.appeardict)
            cars = createCARs(rules)[:rule_cutoff]

            quant_dataframe_train = QuantitativeDataFrame(dataframe_train)
            quant_dataframe_test = QuantitativeDataFrame(dataframe_test)

            self.classifier.fit(quant_dataframe_train, cars, debug=self.debug)

            score = None
            
            if self.score_auc:
                score = self.classifier.score_auc(quant_dataframe_test)
            else:
                score = self.classifier.score(quant_dataframe_test)


            scores.append(score)

        return scores
Example #3
0
    def fit(self,
            quant_dataframe,
            cars=None,
            rule_cutoff=30,
            lambda_array=7 * [1],
            class_name=None,
            debug=False,
            algorithm="SLS"):

        self.quant_dataframe_train = quant_dataframe

        self._prepare(quant_dataframe, class_name)

        for class_, clf_dict in self.ids_classifiers.items():
            print("training class:", class_)

            clf = clf_dict["clf"]
            quant_dataframe = clf_dict["quant_dataframe"]
            pandas_dataframe = quant_dataframe.dataframe

            txns = TransactionDB.from_DataFrame(pandas_dataframe)
            rules = top_rules(txns.string_representation,
                              appearance=txns.appeardict)
            cars = createCARs(rules)
            cars.sort(reverse=True)

            clf.fit(quant_dataframe,
                    cars[:rule_cutoff],
                    lambda_array=lambda_array,
                    debug=debug,
                    algorithm=algorithm)
Example #4
0
def mine_CARs(df, rule_cutoff, sample=False):
    txns = TransactionDB.from_DataFrame(df)
    rules = top_rules(txns.string_representation, appearance=txns.appeardict)
    cars = createCARs(rules)

    cars_subset = cars[:rule_cutoff]

    if sample:
        cars_subset = random.sample(cars, rule_cutoff)

    return cars_subset
Example #5
0
def mine_CARs(df,
              rule_cutoff,
              sample=False,
              random_seed=None,
              **top_rules_kwargs):
    if random_seed:
        random.seed(random_seed)
        np.random.seed(random_seed)

    txns = TransactionDB.from_DataFrame(df)
    rules = top_rules(txns.string_representation,
                      appearance=txns.appeardict,
                      **top_rules_kwargs)
    cars = createCARs(rules)

    cars_subset = cars[:rule_cutoff]

    if sample:
        cars_subset = random.sample(cars, rule_cutoff)

    return cars_subset
Example #6
0
import pandas as pd
import numpy as np

from pyarc.data_structures import TransactionDB
from pyarc.algorithms import top_rules, createCARs

df = pd.read_csv("./data/iris0.csv")

df[df["sepallength"] == "-inf_to_5.55"] = np.NaN

print(df)

txns = TransactionDB.from_DataFrame(df)

rules = top_rules(txns.string_representation, appearance=txns.appeardict)

cars = createCARs(rules)

for car in cars[:10]:
    print(car)