def setUp(self):
        NS_telephone = ps.EqualitySelector("own_telephone", b"yes")
        NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
        NS_other_parties = ps.EqualitySelector("other_parties", b"none")
        NS_personal = ps.EqualitySelector("personal_status", b'male single')
        NS_job = ps.EqualitySelector("job", b'high qualif/self emp/mgmt')
        NS_class = ps.EqualitySelector("class", b"bad")

        o = [[NS_telephone], [NS_foreign_worker, NS_telephone],
             [NS_other_parties, NS_telephone],
             [NS_foreign_worker, NS_telephone, NS_personal],
             [NS_telephone, NS_personal],
             [NS_foreign_worker, NS_other_parties, NS_telephone], [NS_job],
             [NS_class, NS_telephone], [NS_foreign_worker, NS_job],
             [NS_foreign_worker, NS_other_parties, NS_telephone, NS_personal]]
        self.result = list(map(ps.Conjunction, o))
        self.qualities = [
            383476.7679999999, 361710.05800000014, 345352.9920000001,
            338205.08, 336857.8220000001, 323586.28200000006,
            320306.81600000005, 300963.84599999996, 299447.332,
            297422.98200000013
        ]

        data = get_credit_data()
        target = ps.NumericTarget('credit_amount')
        searchSpace_Nominal = ps.create_nominal_selectors(
            data, ignore=['credit_amount'])
        searchSpace_Numeric = [
        ]  #ps.create_numeric_selectors(data, ignore=['credit_amount'], nbins=10)
        searchSpace = searchSpace_Nominal + searchSpace_Numeric
        self.task = ps.SubgroupDiscoveryTask(
            data,
            target,
            searchSpace,
            result_set_size=10,
            depth=5,
            qf=ps.CountCallsInterestingMeasure(
                ps.StandardQFNumeric(1, False, 'sum')))
Ejemplo n.º 2
0
############################################################


from pysubgroup.tests.DataSets import get_credit_data
data = get_credit_data()

np.random.seed(1111)
target_variables = np.random.randint(low=0, high=2, size=1000)
target_estimates = np.random.uniform(size=1000)
target = ps.PredictionTarget(target_variables, target_estimates, roc_auc_score)

searchSpace_Nominal = ps.create_nominal_selectors(data, ignore=['credit_amount'])
searchSpace_Numeric = [] #ps.create_numeric_selectors(data, ignore=['credit_amount'], nbins=10)
searchSpace = searchSpace_Nominal + searchSpace_Numeric

task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=10, depth=5, qf=ps.CountCallsInterestingMeasure(ps.PredictionQFNumeric(1, False)))

resultBS = ps.BeamSearch().execute(task)
resultA = ps.Apriori(use_numba=False).execute(task)
resultA_numba = ps.Apriori(use_numba=True).execute(task)
resultSimpleDFS = ps.SimpleDFS().execute(task)
resultDFS = ps.DFS(ps.BitSetRepresentation).execute(task)
resultDFS.to_dataframe()


############################################################
## Toy example using the default eval to generate answers ##
############################################################


np.random.seed(1111)
 def test_BeamSearch_sum(self):
     self.task.qf = ps.CountCallsInterestingMeasure(
         ps.StandardQFNumeric(self.task.qf.a, False, 'sum'))
     self.runAlgorithm(ps.BeamSearch(), "BeamSearch sum", self.result,
                       self.qualities, self.task)
 def test_DFS_sum(self):
     self.task.qf = ps.CountCallsInterestingMeasure(
         ps.StandardQFNumeric(self.task.qf.a, False, 'sum'))
     self.runAlgorithm(ps.DFS(ps.BitSetRepresentation), "DFS sum",
                       self.result, self.qualities, self.task)
 def test_BeamSearch(self):
     self.task.qf = ps.CountCallsInterestingMeasure(ps.PredictionQFNumeric(self.task.qf.a, False))
     self.runAlgorithm(ps.BeamSearch(), "BeamSearch sum", self.result, self.qualities, self.task)
 def test_DFS(self):
     self.task.qf = ps.CountCallsInterestingMeasure(ps.PredictionQFNumeric(self.task.qf.a, False))
     self.runAlgorithm(ps.DFS(ps.BitSetRepresentation), "DFS", self.DFSresult, self.DFSqualities, self.task)
 def setUp(self):
     #NS_all = ps.EqualitySelector(True)
     NS_payment = ps.EqualitySelector("other_payment_plans",b"none")
     NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
     NS_other_parties = ps.EqualitySelector("other_parties", b"none")
     NS_housing = ps.EqualitySelector("housing", b'own')
     NS_class = ps.EqualitySelector("class", b"good")
     DFSo = [[NS_foreign_worker],
             [NS_other_parties],
             [NS_foreign_worker, NS_other_parties],
             [NS_payment],
             [NS_foreign_worker, NS_payment],
             [NS_other_parties, NS_payment],
             [NS_housing],
             [NS_class],
             [NS_foreign_worker, NS_other_parties, NS_payment]]
     self.DFSresult = list(map(ps.Conjunction, DFSo))
     self.DFSresult.insert(0,True)
     self.DFSqualities = [500.4980179286455,
             483.3153195123844,
             459.2862838915471,
             444.60343785358896,
             398.25539855072464,
             384.0460358056267,
             362.090608537693,
             355.0749649843413,
             355.010575658835,
             349.8188702669149]
     o = [[NS_foreign_worker],
             [NS_other_parties],
             [NS_foreign_worker, NS_other_parties],
             [NS_payment],
             [NS_foreign_worker, NS_payment],
             [NS_other_parties, NS_payment],
             [NS_housing],
             [NS_class],
             [NS_foreign_worker, NS_other_parties, NS_payment],
             [NS_foreign_worker, NS_housing]]
     self.result = list(map(ps.Conjunction, o))
     self.qualities = [483.3153195123844,
             459.2862838915471,
             444.60343785358896,
             398.25539855072464,
             384.0460358056267,
             362.090608537693,
             355.0749649843413,
             355.010575658835,
             349.8188702669149,
             342.20780439530444]
     np.random.seed(1111)
     self.target_variables = np.random.randint(low=0, high=2, size=1000)
     self.target_estimates = np.random.uniform(size=1000)
     data = get_credit_data()
     target = ps.PredictionTarget(self.target_variables, self.target_estimates, roc_auc_score)
     searchSpace_Nominal = ps.create_nominal_selectors(data, ignore=['credit_amount'])
     searchSpace_Numeric = [] #ps.create_numeric_selectors(data, ignore=['credit_amount'], nbins=10)
     searchSpace = searchSpace_Nominal + searchSpace_Numeric
     self.task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=10, depth=5, qf=ps.CountCallsInterestingMeasure(ps.PredictionQFNumeric(1, False)))