Beispiel #1
0
    def fit(self, rule_cutoff):
        dataframes = self._prepare_dataframes()

        scores = []

        for dataframe_train, dataframe_test in dataframes:
            txns_train = TransactionDB.from_DataFrame(dataframe_train)

            rules = top_rules(txns_train.string_representation, appearance=txns_train.appeardict)
            cars = createCARs(rules)[:rule_cutoff]

            quant_dataframe_train = QuantitativeDataFrame(dataframe_train)
            quant_dataframe_test = QuantitativeDataFrame(dataframe_test)

            self.classifier.fit(quant_dataframe_train, cars, debug=self.debug)

            score = None
            
            if self.score_auc:
                score = self.classifier.score_auc(quant_dataframe_test)
            else:
                score = self.classifier.score(quant_dataframe_test)


            scores.append(score)

        return scores
Beispiel #2
0
    def fit(self,
            quant_dataframe,
            cars=None,
            rule_cutoff=30,
            lambda_array=7 * [1],
            class_name=None,
            debug=False,
            algorithm="SLS"):

        self.quant_dataframe_train = quant_dataframe

        self._prepare(quant_dataframe, class_name)

        for class_, clf_dict in self.ids_classifiers.items():
            print("training class:", class_)

            clf = clf_dict["clf"]
            quant_dataframe = clf_dict["quant_dataframe"]
            pandas_dataframe = quant_dataframe.dataframe

            txns = TransactionDB.from_DataFrame(pandas_dataframe)
            rules = top_rules(txns.string_representation,
                              appearance=txns.appeardict)
            cars = createCARs(rules)
            cars.sort(reverse=True)

            clf.fit(quant_dataframe,
                    cars[:rule_cutoff],
                    lambda_array=lambda_array,
                    debug=debug,
                    algorithm=algorithm)
Beispiel #3
0
def mine_CARs(df, rule_cutoff, sample=False):
    txns = TransactionDB.from_DataFrame(df)
    rules = top_rules(txns.string_representation, appearance=txns.appeardict)
    cars = createCARs(rules)

    cars_subset = cars[:rule_cutoff]

    if sample:
        cars_subset = random.sample(cars, rule_cutoff)

    return cars_subset
Beispiel #4
0
    def mine_rules(self, pandas_df, minsup=0.2):
        frequent_itemsets = self.mine_frequent_itemsets(pandas_df, minsup)

        distinct_classes = list(pandas_df.iloc[:, -1].unique())

        fim_rules = self._convert_to_fim_rules(frequent_itemsets,
                                               distinct_classes,
                                               pandas_df.columns[-1])

        cars = createCARs(fim_rules)
        print(cars)

        for car in cars:
            car.confidence = self._calculate_rule_confidence(car, pandas_df)

        return cars
    def test_createCARs(self):

        generated_rules = [('Y:=:1', (), 0.5, 0.5), ('Y:=:0', (), 0.5, 0.5),
                           ('Y:=:1', ('A:=:1', ), 0.5, 1 / 3)]

        cars = createCARs(generated_rules)

        assert cars[0].consequent == Consequent("Y", 1)
        assert cars[0].confidence == 0.5
        assert cars[0].support == 0.5

        assert cars[1].consequent == Consequent("Y", 0)
        assert cars[1].confidence == 0.5
        assert cars[1].support == 0.5

        assert cars[2].consequent == Consequent("Y", 1)
        assert cars[2].antecedent == Antecedent([Item("A", 1)])
        assert cars[2].confidence == 1 / 3
        assert cars[2].support == 0.5
Beispiel #6
0
def mine_CARs(df,
              rule_cutoff,
              sample=False,
              random_seed=None,
              **top_rules_kwargs):
    if random_seed:
        random.seed(random_seed)
        np.random.seed(random_seed)

    txns = TransactionDB.from_DataFrame(df)
    rules = top_rules(txns.string_representation,
                      appearance=txns.appeardict,
                      **top_rules_kwargs)
    cars = createCARs(rules)

    cars_subset = cars[:rule_cutoff]

    if sample:
        cars_subset = random.sample(cars, rule_cutoff)

    return cars_subset
Beispiel #7
0
import pandas as pd
import numpy as np

from pyarc.data_structures import TransactionDB
from pyarc.algorithms import top_rules, createCARs

df = pd.read_csv("./data/iris0.csv")

df[df["sepallength"] == "-inf_to_5.55"] = np.NaN

print(df)

txns = TransactionDB.from_DataFrame(df)

rules = top_rules(txns.string_representation, appearance=txns.appeardict)

cars = createCARs(rules)

for car in cars[:10]:
    print(car)