def test_constructor(self):
        ps.StandardQFNumeric(0)
        ps.StandardQFNumeric(1.0)
        ps.StandardQFNumeric(0, invert=True)
        ps.StandardQFNumeric(0, invert=False)

        with self.assertRaises(ValueError):
            ps.StandardQFNumeric('test')

        ps.StandardQFNumeric(0, estimator='sum')
        ps.StandardQFNumeric(0, estimator='average')
        ps.StandardQFNumeric(0, estimator='order')

        with self.assertRaises(ValueError):
            ps.StandardQFNumeric(0, estimator='bla')
Example #2
0
 def setUpClass(cls):
     data = get_credit_data()
     target = ps.NumericTarget('credit_amount')
     searchSpace = ps.create_nominal_selectors(data,
                                               ignore=['credit_amount'])
     cls.task = ps.SubgroupDiscoveryTask(data,
                                         target,
                                         searchSpace,
                                         result_set_size=10,
                                         depth=3,
                                         qf=ps.StandardQFNumeric(1.0))
     cls.result = ps.SimpleDFS().execute(cls.task)
    def setUp(self):
        NS_telephone = ps.EqualitySelector("own_telephone", b"yes")
        NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
        NS_other_parties = ps.EqualitySelector("other_parties", b"none")
        NS_personal = ps.EqualitySelector("personal_status", b'male single')
        NS_job = ps.EqualitySelector("job", b'high qualif/self emp/mgmt')
        NS_class = ps.EqualitySelector("class", b"bad")

        o = [[NS_telephone], [NS_foreign_worker, NS_telephone],
             [NS_other_parties, NS_telephone],
             [NS_foreign_worker, NS_telephone, NS_personal],
             [NS_telephone, NS_personal],
             [NS_foreign_worker, NS_other_parties, NS_telephone], [NS_job],
             [NS_class, NS_telephone], [NS_foreign_worker, NS_job],
             [NS_foreign_worker, NS_other_parties, NS_telephone, NS_personal]]
        self.result = list(map(ps.Conjunction, o))
        self.qualities = [
            383476.7679999999, 361710.05800000014, 345352.9920000001,
            338205.08, 336857.8220000001, 323586.28200000006,
            320306.81600000005, 300963.84599999996, 299447.332,
            297422.98200000013
        ]

        data = get_credit_data()
        target = ps.NumericTarget('credit_amount')
        searchSpace_Nominal = ps.create_nominal_selectors(
            data, ignore=['credit_amount'])
        searchSpace_Numeric = [
        ]  #ps.create_numeric_selectors(data, ignore=['credit_amount'], nbins=10)
        searchSpace = searchSpace_Nominal + searchSpace_Numeric
        self.task = ps.SubgroupDiscoveryTask(
            data,
            target,
            searchSpace,
            result_set_size=10,
            depth=5,
            qf=ps.CountCallsInterestingMeasure(
                ps.StandardQFNumeric(1, False, 'sum')))
import pprint
import numpy as np
import pandas as pd
import pysubgroup as ps


pp = pprint.PrettyPrinter(indent=4)

data = np.array([[1, 2, 3, 4, 5], ["F", "F", "F", "Tr", "Tr"]]).T
data = pd.DataFrame(data, columns=["Target", "A"])
data["Target"] = pd.to_numeric(data["Target"])


target = ps.NumericTarget('Target')
print(data[target.target_variable])
sgd = ps.EqualitySelector("A", "Tr")
target.calculate_statistics(sgd, data)

qf = ps.StandardQFNumeric(1.0)
print(qf.evaluate(sgd, target, data))
print(qf.optimistic_estimate(sgd, target, data))
 def test_BeamSearch_sum(self):
     self.task.qf = ps.CountCallsInterestingMeasure(
         ps.StandardQFNumeric(self.task.qf.a, False, 'sum'))
     self.runAlgorithm(ps.BeamSearch(), "BeamSearch sum", self.result,
                       self.qualities, self.task)
 def test_DFS_sum(self):
     self.task.qf = ps.CountCallsInterestingMeasure(
         ps.StandardQFNumeric(self.task.qf.a, False, 'sum'))
     self.runAlgorithm(ps.DFS(ps.BitSetRepresentation), "DFS sum",
                       self.result, self.qualities, self.task)
import pysubgroup as ps
import pandas as pd
from scipy.io import arff
from timeit import default_timer as timer

data = pd.DataFrame(arff.loadarff("../data/credit-g.arff")[0])
target = ps.NumericTarget('credit_amount')
search_space = ps.create_nominal_selectors(data, ignore=['credit_amount'])

task = ps.SubgroupDiscoveryTask(data,
                                target,
                                search_space,
                                result_set_size=10,
                                depth=3,
                                qf=ps.StandardQFNumeric(1, False))
print(search_space)

start = timer()
result = ps.SimpleDFS().execute(task)
end = timer()
print(f"Time elapsed: {end - start}")
for (q, sg) in result:
    print(str(q) + ":\t" + str(sg.subgroup_description))