Esempio n. 1
0
 def setUp(self):
     NS_checking = ps.EqualitySelector("checking_status", b"<0")
     NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
     NS_other_parties = ps.EqualitySelector("other_parties", b"none")
     NS_savings_status = ps.EqualitySelector("savings_status", b"<100")
     NS_job = ps.EqualitySelector("job", b"skilled")
     self.result = [ps.Conjunction([NS_checking, NS_foreign_worker]),
                    ps.Conjunction([NS_checking]),
                    ps.Conjunction([NS_checking, NS_other_parties, NS_foreign_worker]),
                    ps.Conjunction([NS_checking, NS_other_parties]),
                    ps.Conjunction([NS_checking, NS_savings_status, NS_foreign_worker]),
                    ps.Conjunction([NS_checking, NS_savings_status]),
                    ps.Conjunction([NS_checking, NS_savings_status, NS_other_parties, NS_foreign_worker]),
                    ps.Conjunction([NS_checking, NS_job, NS_foreign_worker]),
                    ps.Conjunction([NS_checking, NS_savings_status, NS_other_parties]),
                    ps.Conjunction([NS_checking, NS_job]),
                    ]
     self.qualities = [0.055299999999999995,
                       0.05280000000000001,
                       0.052300000000000006,
                       0.05059999999999999,
                       0.04959999999999999,
                       0.048299999999999996,
                       0.04660000000000001,
                       0.04550000000000001,
                       0.0452,
                       0.044399999999999995]
     data = get_credit_data()
     target = ps.BinaryTarget('class', b'bad')
     searchSpace = ps.create_nominal_selectors(data, ignore=['class'])
     self.task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=10, depth=5, qf=ps.StandardQF(1.0))
 def setUp(self):
     #NS_all = ps.EqualitySelector(True)
     NS_payment = ps.EqualitySelector("other_payment_plans",b"none")
     NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
     NS_other_parties = ps.EqualitySelector("other_parties", b"none")
     NS_housing = ps.EqualitySelector("housing", b'own')
     NS_class = ps.EqualitySelector("class", b"good")
     DFSo = [[NS_foreign_worker],
             [NS_other_parties],
             [NS_foreign_worker, NS_other_parties],
             [NS_payment],
             [NS_foreign_worker, NS_payment],
             [NS_other_parties, NS_payment],
             [NS_housing],
             [NS_class],
             [NS_foreign_worker, NS_other_parties, NS_payment]]
     self.DFSresult = list(map(ps.Conjunction, DFSo))
     self.DFSresult.insert(0,True)
     self.DFSqualities = [500.4980179286455,
             483.3153195123844,
             459.2862838915471,
             444.60343785358896,
             398.25539855072464,
             384.0460358056267,
             362.090608537693,
             355.0749649843413,
             355.010575658835,
             349.8188702669149]
     o = [[NS_foreign_worker],
             [NS_other_parties],
             [NS_foreign_worker, NS_other_parties],
             [NS_payment],
             [NS_foreign_worker, NS_payment],
             [NS_other_parties, NS_payment],
             [NS_housing],
             [NS_class],
             [NS_foreign_worker, NS_other_parties, NS_payment],
             [NS_foreign_worker, NS_housing]]
     self.result = list(map(ps.Conjunction, o))
     self.qualities = [483.3153195123844,
             459.2862838915471,
             444.60343785358896,
             398.25539855072464,
             384.0460358056267,
             362.090608537693,
             355.0749649843413,
             355.010575658835,
             349.8188702669149,
             342.20780439530444]
     np.random.seed(1111)
     self.target_variables = np.random.randint(low=0, high=2, size=1000)
     self.target_estimates = np.random.uniform(size=1000)
     data = get_credit_data()
     target = ps.PredictionTarget(self.target_variables, self.target_estimates, roc_auc_score)
     searchSpace_Nominal = ps.create_nominal_selectors(data, ignore=['credit_amount'])
     searchSpace_Numeric = [] #ps.create_numeric_selectors(data, ignore=['credit_amount'], nbins=10)
     searchSpace = searchSpace_Nominal + searchSpace_Numeric
     self.task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=10, depth=5, qf=ps.CountCallsInterestingMeasure(ps.PredictionQFNumeric(1, False)))
Esempio n. 3
0
 def setUpClass(cls):
     data = get_credit_data()
     target = ps.BinaryTarget('class', b'bad')
     searchSpace = ps.create_nominal_selectors(data, ignore=['class'])
     cls.task = ps.SubgroupDiscoveryTask(data,
                                         target,
                                         searchSpace,
                                         result_set_size=10,
                                         depth=5,
                                         qf=ps.StandardQF(1.0))
     cls.result = ps.SimpleDFS().execute(cls.task)
    def setUp(self):
        NS_checking = ps.EqualitySelector("checking_status", b"<0")
        NS_checking2 = ps.EqualitySelector("checking_status", b"0<=X<200")
        NS_other_parties = ps.EqualitySelector("other_parties",
                                               b"co applicant")
        NS_other = ps.EqualitySelector("purpose", b'other')
        NS_repairs = ps.EqualitySelector("purpose", b'repairs')
        NS_purpose = ps.EqualitySelector("purpose", b'business')

        NS_history = ps.EqualitySelector("credit_history",
                                         b"no credits/all paid")
        NS_history2 = ps.EqualitySelector("credit_history", b"all paid")
        NS_empl = ps.EqualitySelector("employment", b"unemployed")
        NS_job = ps.EqualitySelector("job", b"unemp/unskilled non res")
        NS_bank = ps.EqualitySelector("other_payment_plans", b"bank")
        self.result = [
            ps.Disjunction([NS_checking, NS_checking2, NS_bank]),
            ps.Disjunction([NS_checking, NS_checking2, NS_history]),
            ps.Disjunction([NS_checking, NS_checking2]),
            ps.Disjunction([NS_checking, NS_checking2, NS_other]),
            ps.Disjunction([NS_checking, NS_checking2, NS_repairs]),
            ps.Disjunction([NS_checking, NS_checking2, NS_empl]),
            ps.Disjunction([NS_checking, NS_checking2, NS_other_parties]),
            ps.Disjunction([NS_checking, NS_checking2, NS_history2]),
            ps.Disjunction([NS_checking, NS_checking2, NS_purpose]),
            ps.Disjunction([NS_checking, NS_checking2, NS_job]),
        ]
        self.qualities = [
            0.0779, 0.07740000000000002, 0.0771, 0.07680000000000001,
            0.07670000000000002, 0.0767, 0.07660000000000003,
            0.07650000000000003, 0.07650000000000001, 0.07600000000000001
        ]
        data = get_credit_data()
        target = ps.BinaryTarget('class', b'bad')
        searchSpace = ps.create_nominal_selectors(data, ignore=['class'])
        self.task = ps.SubgroupDiscoveryTask(data,
                                             target,
                                             searchSpace,
                                             result_set_size=10,
                                             depth=3,
                                             qf=ps.StandardQF(1.0))
Esempio n. 5
0
    def setUp(self):
        NS_checking = ps.EqualitySelector("checking_status", b"<0")
        NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
        NS_other_parties = ps.EqualitySelector("other_parties", b"none")
        NS_savings_status = ps.EqualitySelector("savings_status", b"<100")
        NS_job = ps.EqualitySelector("job", b"skilled")
        NS_dependents = ps.EqualitySelector("num_dependents", 1.0)
        self.result = [ps.Conjunction([NS_checking, NS_foreign_worker, NS_job, NS_other_parties, NS_savings_status]),  # AND job=='b'skilled'' AND other_parties=='b'none'' AND savings_status=='b'<100'
                       # 0.113713540226172:    checking_status=='b'<0'' AND foreign_worker=='b'yes'' AND job=='b'skilled'' AND savings_status=='b'<100''
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_job, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_job]),  # checking_status=='b'<0'' AND foreign_worker=='b'yes'' AND job=='b'skilled''
                       # checking_status=='b'<0'' AND job=='b'skilled'' AND other_parties=='b'none'' AND savings_status=='b'<100''
                       ps.Conjunction([NS_checking, NS_job, NS_other_parties, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_job, NS_other_parties]),
                       ps.Conjunction([NS_checking, NS_job, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_other_parties, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_other_parties]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_foreign_worker]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_job, NS_dependents, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_job, NS_other_parties])]

        self.qualities = [0.11457431093955019,
                          0.113713540226172,
                          0.11201325679119281,
                          0.1117538749727658,
                          0.11161046793076415,
                          0.11145710640046322,
                          0.11045259291161472,
                          0.10929088624672183,
                          0.10875519439407161,
                          0.10866138825404954,
                          0.10832735026213287,
                          0.10813405094128754]
        data = get_credit_data()
        target = ps.BinaryTarget('class', b'bad')
        searchSpace_Nominal = ps.create_nominal_selectors(data, ignore=['class'])
        searchSpace_Numeric = ps.create_numeric_selectors(data, ignore=['class'])
        searchSpace = searchSpace_Nominal + searchSpace_Numeric
        self.task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=12, depth=5, qf=ps.StandardQF(0.5))
    def setUp(self):
        NS_telephone = ps.EqualitySelector("own_telephone", b"yes")
        NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
        NS_other_parties = ps.EqualitySelector("other_parties", b"none")
        NS_personal = ps.EqualitySelector("personal_status", b'male single')
        NS_job = ps.EqualitySelector("job", b'high qualif/self emp/mgmt')
        NS_class = ps.EqualitySelector("class", b"bad")

        o = [[NS_telephone], [NS_foreign_worker, NS_telephone],
             [NS_other_parties, NS_telephone],
             [NS_foreign_worker, NS_telephone, NS_personal],
             [NS_telephone, NS_personal],
             [NS_foreign_worker, NS_other_parties, NS_telephone], [NS_job],
             [NS_class, NS_telephone], [NS_foreign_worker, NS_job],
             [NS_foreign_worker, NS_other_parties, NS_telephone, NS_personal]]
        self.result = list(map(ps.Conjunction, o))
        self.qualities = [
            383476.7679999999, 361710.05800000014, 345352.9920000001,
            338205.08, 336857.8220000001, 323586.28200000006,
            320306.81600000005, 300963.84599999996, 299447.332,
            297422.98200000013
        ]

        data = get_credit_data()
        target = ps.NumericTarget('credit_amount')
        searchSpace_Nominal = ps.create_nominal_selectors(
            data, ignore=['credit_amount'])
        searchSpace_Numeric = [
        ]  #ps.create_numeric_selectors(data, ignore=['credit_amount'], nbins=10)
        searchSpace = searchSpace_Nominal + searchSpace_Numeric
        self.task = ps.SubgroupDiscoveryTask(
            data,
            target,
            searchSpace,
            result_set_size=10,
            depth=5,
            qf=ps.CountCallsInterestingMeasure(
                ps.StandardQFNumeric(1, False, 'sum')))
Esempio n. 7
0
    def setUp(self):
        NS_cabin = ps.EqualitySelector("Cabin", np.nan)
        NS_embarked = ps.EqualitySelector("Embarked", 'S')
        NS_embarked2 = ps.EqualitySelector("Embarked", 'C')
        NS_male = ps.EqualitySelector("Sex", 'male')
        NS_female = ps.EqualitySelector("Sex", 'female')
        #NS_other_parties = ps.EqualitySelector("other_parties", b"none")
        #NS_savings_status = ps.EqualitySelector("savings_status", b"<100")
        #NS_job = ps.EqualitySelector("job", b"skilled")
        self.result = [
            ps.Conjunction([NS_cabin, NS_embarked]),
            ps.Conjunction([NS_cabin, NS_male]),
            ps.Conjunction([NS_embarked, NS_male]),
            ps.Conjunction([NS_cabin]),
            ps.Conjunction([NS_embarked]),
            ps.Conjunction([NS_male]),
            ps.Conjunction([NS_cabin, NS_female]),
            ps.Conjunction([NS_embarked, NS_female]),
            ps.Conjunction([NS_female]),
            ps.Conjunction([NS_cabin, NS_embarked2]),
        ]

        self.qualities = [178, 164, 146, 125, 110, 100, 86, 74, 56, 46]

        data = get_titanic_data()
        self.qualities2 = [
            np.count_nonzero(conj.covers(data)) * conj.depth
            for conj in self.result
        ]
        self.assertEqual(self.qualities, self.qualities2)
        searchSpace = ps.create_nominal_selectors(data)
        self.task = ps.SubgroupDiscoveryTask(data,
                                             ps.FITarget,
                                             searchSpace,
                                             result_set_size=10,
                                             depth=2,
                                             qf=ps.AreaQF())
 def setUp(self):
     NS_checking = ps.EqualitySelector("checking_status", b"<0")
     NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
     NS_other_parties = ps.EqualitySelector("other_parties", b"none")
     NS_savings_status = ps.EqualitySelector("savings_status", b"<100")
     NS_payment_plans = ps.EqualitySelector("other_payment_plans", b"none")
     self.result = [
         ps.Conjunction([NS_checking, NS_foreign_worker]),
         ps.Conjunction([NS_checking]),
         ps.Conjunction([NS_checking, NS_other_parties, NS_foreign_worker]),
         ps.Conjunction([NS_checking, NS_other_parties]),
         ps.Conjunction([NS_checking, NS_savings_status,
                         NS_foreign_worker]),
         ps.Conjunction([NS_checking, NS_savings_status]),
         ps.Conjunction([NS_checking, NS_foreign_worker, NS_payment_plans]),
         ps.Conjunction([NS_checking, NS_payment_plans]),
         ps.Conjunction([NS_foreign_worker, NS_savings_status]),
         ps.Conjunction(
             [NS_foreign_worker, NS_other_parties, NS_savings_status]),
     ]
     self.qualities = [
         0.055299999999999995, 0.05280000000000001, 0.052300000000000006,
         0.05059999999999999, 0.04959999999999999, 0.048299999999999996,
         0.0426, 0.04, 0.03869999999999999, 0.03750000000000001
     ]
     data = get_credit_data()
     target = ps.BinaryTarget('class', b'bad')
     searchSpace = ps.create_nominal_selectors(data, ignore=['class'])
     self.task = ps.SubgroupDiscoveryTask(
         data,
         target,
         searchSpace,
         result_set_size=10,
         depth=5,
         qf=ps.StandardQF(1.0),
         constraints=[ps.MinSupportConstraint(200)])
Esempio n. 9
0
from scipy.io import arff

import pysubgroup as ps
import pandas as pd
from timeit import default_timer as timer

data = pd.DataFrame(arff.loadarff("../data/credit-g.arff")[0])

target = ps.NominalTarget('class', b'bad')
search_space = ps.create_nominal_selectors(data, ignore=['class'])
task = ps.SubgroupDiscoveryTask(data,
                                target,
                                search_space,
                                result_set_size=10,
                                depth=5,
                                qf=ps.StandardQF(0.5))

#start = timer()
#result = ps.BSD_Bitarray().execute(task)
#end = timer()
#print("Time elapsed: ", (end - start))
#for (q, sg) in result:
#    print (str(q) + ":\t" + str(sg.subgroup_description))
# print WRAccQF().evaluate_from_dataset(data, Subgroup(target, []))

start = timer()
result = ps.BSD().execute(task)
end = timer()
print("Time elapsed: ", (end - start))
for (q, sg) in result:
    print(str(q) + ":\t" + str(sg.subgroup_description))
Esempio n. 10
0
roc_auc_score(y[X.Age >= 38.0], y_hat[X.Age >= 38.0])

############################################################
## Toy example using the test dataset to generate answers ##
############################################################


from pysubgroup.tests.DataSets import get_credit_data
data = get_credit_data()

np.random.seed(1111)
target_variables = np.random.randint(low=0, high=2, size=1000)
target_estimates = np.random.uniform(size=1000)
target = ps.PredictionTarget(target_variables, target_estimates, roc_auc_score)

searchSpace_Nominal = ps.create_nominal_selectors(data, ignore=['credit_amount'])
searchSpace_Numeric = [] #ps.create_numeric_selectors(data, ignore=['credit_amount'], nbins=10)
searchSpace = searchSpace_Nominal + searchSpace_Numeric

task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=10, depth=5, qf=ps.CountCallsInterestingMeasure(ps.PredictionQFNumeric(1, False)))

resultBS = ps.BeamSearch().execute(task)
resultA = ps.Apriori(use_numba=False).execute(task)
resultA_numba = ps.Apriori(use_numba=True).execute(task)
resultSimpleDFS = ps.SimpleDFS().execute(task)
resultDFS = ps.DFS(ps.BitSetRepresentation).execute(task)
resultDFS.to_dataframe()


############################################################
## Toy example using the default eval to generate answers ##
import pysubgroup as ps
import pandas as pd
from scipy.io import arff
from timeit import default_timer as timer

data = pd.DataFrame(arff.loadarff("../data/credit-g.arff")[0])
target = ps.NumericTarget('credit_amount')
search_space = ps.create_nominal_selectors(data, ignore=['credit_amount'])

task = ps.SubgroupDiscoveryTask(data,
                                target,
                                search_space,
                                result_set_size=10,
                                depth=3,
                                qf=ps.StandardQFNumeric(1, False))
print(search_space)

start = timer()
result = ps.SimpleDFS().execute(task)
end = timer()
print(f"Time elapsed: {end - start}")
for (q, sg) in result:
    print(str(q) + ":\t" + str(sg.subgroup_description))