def setUp(self): NS_checking = ps.EqualitySelector("checking_status", b"<0") NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes") NS_other_parties = ps.EqualitySelector("other_parties", b"none") NS_savings_status = ps.EqualitySelector("savings_status", b"<100") NS_job = ps.EqualitySelector("job", b"skilled") self.result = [ps.Conjunction([NS_checking, NS_foreign_worker]), ps.Conjunction([NS_checking]), ps.Conjunction([NS_checking, NS_other_parties, NS_foreign_worker]), ps.Conjunction([NS_checking, NS_other_parties]), ps.Conjunction([NS_checking, NS_savings_status, NS_foreign_worker]), ps.Conjunction([NS_checking, NS_savings_status]), ps.Conjunction([NS_checking, NS_savings_status, NS_other_parties, NS_foreign_worker]), ps.Conjunction([NS_checking, NS_job, NS_foreign_worker]), ps.Conjunction([NS_checking, NS_savings_status, NS_other_parties]), ps.Conjunction([NS_checking, NS_job]), ] self.qualities = [0.055299999999999995, 0.05280000000000001, 0.052300000000000006, 0.05059999999999999, 0.04959999999999999, 0.048299999999999996, 0.04660000000000001, 0.04550000000000001, 0.0452, 0.044399999999999995] data = get_credit_data() target = ps.BinaryTarget('class', b'bad') searchSpace = ps.create_nominal_selectors(data, ignore=['class']) self.task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=10, depth=5, qf=ps.StandardQF(1.0))
def setUp(self): #NS_all = ps.EqualitySelector(True) NS_payment = ps.EqualitySelector("other_payment_plans",b"none") NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes") NS_other_parties = ps.EqualitySelector("other_parties", b"none") NS_housing = ps.EqualitySelector("housing", b'own') NS_class = ps.EqualitySelector("class", b"good") DFSo = [[NS_foreign_worker], [NS_other_parties], [NS_foreign_worker, NS_other_parties], [NS_payment], [NS_foreign_worker, NS_payment], [NS_other_parties, NS_payment], [NS_housing], [NS_class], [NS_foreign_worker, NS_other_parties, NS_payment]] self.DFSresult = list(map(ps.Conjunction, DFSo)) self.DFSresult.insert(0,True) self.DFSqualities = [500.4980179286455, 483.3153195123844, 459.2862838915471, 444.60343785358896, 398.25539855072464, 384.0460358056267, 362.090608537693, 355.0749649843413, 355.010575658835, 349.8188702669149] o = [[NS_foreign_worker], [NS_other_parties], [NS_foreign_worker, NS_other_parties], [NS_payment], [NS_foreign_worker, NS_payment], [NS_other_parties, NS_payment], [NS_housing], [NS_class], [NS_foreign_worker, NS_other_parties, NS_payment], [NS_foreign_worker, NS_housing]] self.result = list(map(ps.Conjunction, o)) self.qualities = [483.3153195123844, 459.2862838915471, 444.60343785358896, 398.25539855072464, 384.0460358056267, 362.090608537693, 355.0749649843413, 355.010575658835, 349.8188702669149, 342.20780439530444] np.random.seed(1111) self.target_variables = np.random.randint(low=0, high=2, size=1000) self.target_estimates = np.random.uniform(size=1000) data = get_credit_data() target = ps.PredictionTarget(self.target_variables, self.target_estimates, roc_auc_score) searchSpace_Nominal = ps.create_nominal_selectors(data, ignore=['credit_amount']) searchSpace_Numeric = [] #ps.create_numeric_selectors(data, ignore=['credit_amount'], nbins=10) searchSpace = searchSpace_Nominal + searchSpace_Numeric self.task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=10, depth=5, qf=ps.CountCallsInterestingMeasure(ps.PredictionQFNumeric(1, False)))
def setUpClass(cls): data = get_credit_data() target = ps.BinaryTarget('class', b'bad') searchSpace = ps.create_nominal_selectors(data, ignore=['class']) cls.task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=10, depth=5, qf=ps.StandardQF(1.0)) cls.result = ps.SimpleDFS().execute(cls.task)
def setUp(self): NS_checking = ps.EqualitySelector("checking_status", b"<0") NS_checking2 = ps.EqualitySelector("checking_status", b"0<=X<200") NS_other_parties = ps.EqualitySelector("other_parties", b"co applicant") NS_other = ps.EqualitySelector("purpose", b'other') NS_repairs = ps.EqualitySelector("purpose", b'repairs') NS_purpose = ps.EqualitySelector("purpose", b'business') NS_history = ps.EqualitySelector("credit_history", b"no credits/all paid") NS_history2 = ps.EqualitySelector("credit_history", b"all paid") NS_empl = ps.EqualitySelector("employment", b"unemployed") NS_job = ps.EqualitySelector("job", b"unemp/unskilled non res") NS_bank = ps.EqualitySelector("other_payment_plans", b"bank") self.result = [ ps.Disjunction([NS_checking, NS_checking2, NS_bank]), ps.Disjunction([NS_checking, NS_checking2, NS_history]), ps.Disjunction([NS_checking, NS_checking2]), ps.Disjunction([NS_checking, NS_checking2, NS_other]), ps.Disjunction([NS_checking, NS_checking2, NS_repairs]), ps.Disjunction([NS_checking, NS_checking2, NS_empl]), ps.Disjunction([NS_checking, NS_checking2, NS_other_parties]), ps.Disjunction([NS_checking, NS_checking2, NS_history2]), ps.Disjunction([NS_checking, NS_checking2, NS_purpose]), ps.Disjunction([NS_checking, NS_checking2, NS_job]), ] self.qualities = [ 0.0779, 0.07740000000000002, 0.0771, 0.07680000000000001, 0.07670000000000002, 0.0767, 0.07660000000000003, 0.07650000000000003, 0.07650000000000001, 0.07600000000000001 ] data = get_credit_data() target = ps.BinaryTarget('class', b'bad') searchSpace = ps.create_nominal_selectors(data, ignore=['class']) self.task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=10, depth=3, qf=ps.StandardQF(1.0))
def setUp(self): NS_checking = ps.EqualitySelector("checking_status", b"<0") NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes") NS_other_parties = ps.EqualitySelector("other_parties", b"none") NS_savings_status = ps.EqualitySelector("savings_status", b"<100") NS_job = ps.EqualitySelector("job", b"skilled") NS_dependents = ps.EqualitySelector("num_dependents", 1.0) self.result = [ps.Conjunction([NS_checking, NS_foreign_worker, NS_job, NS_other_parties, NS_savings_status]), # AND job=='b'skilled'' AND other_parties=='b'none'' AND savings_status=='b'<100' # 0.113713540226172: checking_status=='b'<0'' AND foreign_worker=='b'yes'' AND job=='b'skilled'' AND savings_status=='b'<100'' ps.Conjunction([NS_checking, NS_foreign_worker, NS_job, NS_savings_status]), ps.Conjunction([NS_checking, NS_foreign_worker, NS_job]), # checking_status=='b'<0'' AND foreign_worker=='b'yes'' AND job=='b'skilled'' # checking_status=='b'<0'' AND job=='b'skilled'' AND other_parties=='b'none'' AND savings_status=='b'<100'' ps.Conjunction([NS_checking, NS_job, NS_other_parties, NS_savings_status]), ps.Conjunction([NS_checking, NS_foreign_worker, NS_job, NS_other_parties]), ps.Conjunction([NS_checking, NS_job, NS_savings_status]), ps.Conjunction([NS_checking, NS_foreign_worker, NS_other_parties, NS_savings_status]), ps.Conjunction([NS_checking, NS_foreign_worker, NS_other_parties]), ps.Conjunction([NS_checking, NS_foreign_worker, NS_savings_status]), ps.Conjunction([NS_checking, NS_foreign_worker]), ps.Conjunction([NS_checking, NS_foreign_worker, NS_job, NS_dependents, NS_savings_status]), ps.Conjunction([NS_checking, NS_job, NS_other_parties])] self.qualities = [0.11457431093955019, 0.113713540226172, 0.11201325679119281, 0.1117538749727658, 0.11161046793076415, 0.11145710640046322, 0.11045259291161472, 0.10929088624672183, 0.10875519439407161, 0.10866138825404954, 0.10832735026213287, 0.10813405094128754] data = get_credit_data() target = ps.BinaryTarget('class', b'bad') searchSpace_Nominal = ps.create_nominal_selectors(data, ignore=['class']) searchSpace_Numeric = ps.create_numeric_selectors(data, ignore=['class']) searchSpace = searchSpace_Nominal + searchSpace_Numeric self.task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=12, depth=5, qf=ps.StandardQF(0.5))
def setUp(self): NS_telephone = ps.EqualitySelector("own_telephone", b"yes") NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes") NS_other_parties = ps.EqualitySelector("other_parties", b"none") NS_personal = ps.EqualitySelector("personal_status", b'male single') NS_job = ps.EqualitySelector("job", b'high qualif/self emp/mgmt') NS_class = ps.EqualitySelector("class", b"bad") o = [[NS_telephone], [NS_foreign_worker, NS_telephone], [NS_other_parties, NS_telephone], [NS_foreign_worker, NS_telephone, NS_personal], [NS_telephone, NS_personal], [NS_foreign_worker, NS_other_parties, NS_telephone], [NS_job], [NS_class, NS_telephone], [NS_foreign_worker, NS_job], [NS_foreign_worker, NS_other_parties, NS_telephone, NS_personal]] self.result = list(map(ps.Conjunction, o)) self.qualities = [ 383476.7679999999, 361710.05800000014, 345352.9920000001, 338205.08, 336857.8220000001, 323586.28200000006, 320306.81600000005, 300963.84599999996, 299447.332, 297422.98200000013 ] data = get_credit_data() target = ps.NumericTarget('credit_amount') searchSpace_Nominal = ps.create_nominal_selectors( data, ignore=['credit_amount']) searchSpace_Numeric = [ ] #ps.create_numeric_selectors(data, ignore=['credit_amount'], nbins=10) searchSpace = searchSpace_Nominal + searchSpace_Numeric self.task = ps.SubgroupDiscoveryTask( data, target, searchSpace, result_set_size=10, depth=5, qf=ps.CountCallsInterestingMeasure( ps.StandardQFNumeric(1, False, 'sum')))
def setUp(self): NS_cabin = ps.EqualitySelector("Cabin", np.nan) NS_embarked = ps.EqualitySelector("Embarked", 'S') NS_embarked2 = ps.EqualitySelector("Embarked", 'C') NS_male = ps.EqualitySelector("Sex", 'male') NS_female = ps.EqualitySelector("Sex", 'female') #NS_other_parties = ps.EqualitySelector("other_parties", b"none") #NS_savings_status = ps.EqualitySelector("savings_status", b"<100") #NS_job = ps.EqualitySelector("job", b"skilled") self.result = [ ps.Conjunction([NS_cabin, NS_embarked]), ps.Conjunction([NS_cabin, NS_male]), ps.Conjunction([NS_embarked, NS_male]), ps.Conjunction([NS_cabin]), ps.Conjunction([NS_embarked]), ps.Conjunction([NS_male]), ps.Conjunction([NS_cabin, NS_female]), ps.Conjunction([NS_embarked, NS_female]), ps.Conjunction([NS_female]), ps.Conjunction([NS_cabin, NS_embarked2]), ] self.qualities = [178, 164, 146, 125, 110, 100, 86, 74, 56, 46] data = get_titanic_data() self.qualities2 = [ np.count_nonzero(conj.covers(data)) * conj.depth for conj in self.result ] self.assertEqual(self.qualities, self.qualities2) searchSpace = ps.create_nominal_selectors(data) self.task = ps.SubgroupDiscoveryTask(data, ps.FITarget, searchSpace, result_set_size=10, depth=2, qf=ps.AreaQF())
def setUp(self): NS_checking = ps.EqualitySelector("checking_status", b"<0") NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes") NS_other_parties = ps.EqualitySelector("other_parties", b"none") NS_savings_status = ps.EqualitySelector("savings_status", b"<100") NS_payment_plans = ps.EqualitySelector("other_payment_plans", b"none") self.result = [ ps.Conjunction([NS_checking, NS_foreign_worker]), ps.Conjunction([NS_checking]), ps.Conjunction([NS_checking, NS_other_parties, NS_foreign_worker]), ps.Conjunction([NS_checking, NS_other_parties]), ps.Conjunction([NS_checking, NS_savings_status, NS_foreign_worker]), ps.Conjunction([NS_checking, NS_savings_status]), ps.Conjunction([NS_checking, NS_foreign_worker, NS_payment_plans]), ps.Conjunction([NS_checking, NS_payment_plans]), ps.Conjunction([NS_foreign_worker, NS_savings_status]), ps.Conjunction( [NS_foreign_worker, NS_other_parties, NS_savings_status]), ] self.qualities = [ 0.055299999999999995, 0.05280000000000001, 0.052300000000000006, 0.05059999999999999, 0.04959999999999999, 0.048299999999999996, 0.0426, 0.04, 0.03869999999999999, 0.03750000000000001 ] data = get_credit_data() target = ps.BinaryTarget('class', b'bad') searchSpace = ps.create_nominal_selectors(data, ignore=['class']) self.task = ps.SubgroupDiscoveryTask( data, target, searchSpace, result_set_size=10, depth=5, qf=ps.StandardQF(1.0), constraints=[ps.MinSupportConstraint(200)])
from scipy.io import arff import pysubgroup as ps import pandas as pd from timeit import default_timer as timer data = pd.DataFrame(arff.loadarff("../data/credit-g.arff")[0]) target = ps.NominalTarget('class', b'bad') search_space = ps.create_nominal_selectors(data, ignore=['class']) task = ps.SubgroupDiscoveryTask(data, target, search_space, result_set_size=10, depth=5, qf=ps.StandardQF(0.5)) #start = timer() #result = ps.BSD_Bitarray().execute(task) #end = timer() #print("Time elapsed: ", (end - start)) #for (q, sg) in result: # print (str(q) + ":\t" + str(sg.subgroup_description)) # print WRAccQF().evaluate_from_dataset(data, Subgroup(target, [])) start = timer() result = ps.BSD().execute(task) end = timer() print("Time elapsed: ", (end - start)) for (q, sg) in result: print(str(q) + ":\t" + str(sg.subgroup_description))
roc_auc_score(y[X.Age >= 38.0], y_hat[X.Age >= 38.0]) ############################################################ ## Toy example using the test dataset to generate answers ## ############################################################ from pysubgroup.tests.DataSets import get_credit_data data = get_credit_data() np.random.seed(1111) target_variables = np.random.randint(low=0, high=2, size=1000) target_estimates = np.random.uniform(size=1000) target = ps.PredictionTarget(target_variables, target_estimates, roc_auc_score) searchSpace_Nominal = ps.create_nominal_selectors(data, ignore=['credit_amount']) searchSpace_Numeric = [] #ps.create_numeric_selectors(data, ignore=['credit_amount'], nbins=10) searchSpace = searchSpace_Nominal + searchSpace_Numeric task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=10, depth=5, qf=ps.CountCallsInterestingMeasure(ps.PredictionQFNumeric(1, False))) resultBS = ps.BeamSearch().execute(task) resultA = ps.Apriori(use_numba=False).execute(task) resultA_numba = ps.Apriori(use_numba=True).execute(task) resultSimpleDFS = ps.SimpleDFS().execute(task) resultDFS = ps.DFS(ps.BitSetRepresentation).execute(task) resultDFS.to_dataframe() ############################################################ ## Toy example using the default eval to generate answers ##
import pysubgroup as ps import pandas as pd from scipy.io import arff from timeit import default_timer as timer data = pd.DataFrame(arff.loadarff("../data/credit-g.arff")[0]) target = ps.NumericTarget('credit_amount') search_space = ps.create_nominal_selectors(data, ignore=['credit_amount']) task = ps.SubgroupDiscoveryTask(data, target, search_space, result_set_size=10, depth=3, qf=ps.StandardQFNumeric(1, False)) print(search_space) start = timer() result = ps.SimpleDFS().execute(task) end = timer() print(f"Time elapsed: {end - start}") for (q, sg) in result: print(str(q) + ":\t" + str(sg.subgroup_description))