def setUpClass(cls): data = get_credit_data() target = ps.NumericTarget('credit_amount') searchSpace = ps.create_nominal_selectors(data, ignore=['credit_amount']) cls.task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=10, depth=3, qf=ps.StandardQFNumeric(1.0)) cls.result = ps.SimpleDFS().execute(cls.task)
def setUp(self): NS_telephone = ps.EqualitySelector("own_telephone", b"yes") NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes") NS_other_parties = ps.EqualitySelector("other_parties", b"none") NS_personal = ps.EqualitySelector("personal_status", b'male single') NS_job = ps.EqualitySelector("job", b'high qualif/self emp/mgmt') NS_class = ps.EqualitySelector("class", b"bad") o = [[NS_telephone], [NS_foreign_worker, NS_telephone], [NS_other_parties, NS_telephone], [NS_foreign_worker, NS_telephone, NS_personal], [NS_telephone, NS_personal], [NS_foreign_worker, NS_other_parties, NS_telephone], [NS_job], [NS_class, NS_telephone], [NS_foreign_worker, NS_job], [NS_foreign_worker, NS_other_parties, NS_telephone, NS_personal]] self.result = list(map(ps.Conjunction, o)) self.qualities = [ 383476.7679999999, 361710.05800000014, 345352.9920000001, 338205.08, 336857.8220000001, 323586.28200000006, 320306.81600000005, 300963.84599999996, 299447.332, 297422.98200000013 ] data = get_credit_data() target = ps.NumericTarget('credit_amount') searchSpace_Nominal = ps.create_nominal_selectors( data, ignore=['credit_amount']) searchSpace_Numeric = [ ] #ps.create_numeric_selectors(data, ignore=['credit_amount'], nbins=10) searchSpace = searchSpace_Nominal + searchSpace_Numeric self.task = ps.SubgroupDiscoveryTask( data, target, searchSpace, result_set_size=10, depth=5, qf=ps.CountCallsInterestingMeasure( ps.StandardQFNumeric(1, False, 'sum')))
import pprint import numpy as np import pandas as pd import pysubgroup as ps pp = pprint.PrettyPrinter(indent=4) data = np.array([[1, 2, 3, 4, 5], ["F", "F", "F", "Tr", "Tr"]]).T data = pd.DataFrame(data, columns=["Target", "A"]) data["Target"] = pd.to_numeric(data["Target"]) target = ps.NumericTarget('Target') print(data[target.target_variable]) sgd = ps.EqualitySelector("A", "Tr") target.calculate_statistics(sgd, data) qf = ps.StandardQFNumeric(1.0) print(qf.evaluate(sgd, target, data)) print(qf.optimistic_estimate(sgd, target, data))
from scipy.io import arff import pysubgroup as ps import pandas as pd import pprint pp = pprint.PrettyPrinter(indent=4) data = pd.DataFrame(arff.loadarff("../data/credit-g.arff")[0]) target = ps.NumericTarget('credit_amount') sg = ps.Subgroup(target, ps.NominalSelector("purpose", b"other")) print(target.get_base_statistics(data, sg)) sg.calculateStatistics(data) # pp.pprint (sg.statistics) qf = ps.StandardQF_numeric(1.0) print(qf.evaluateFromDataset(data, sg))