Ejemplo n.º 1
0
 def setUpClass(cls):
     data = get_credit_data()
     target = ps.NumericTarget('credit_amount')
     searchSpace = ps.create_nominal_selectors(data,
                                               ignore=['credit_amount'])
     cls.task = ps.SubgroupDiscoveryTask(data,
                                         target,
                                         searchSpace,
                                         result_set_size=10,
                                         depth=3,
                                         qf=ps.StandardQFNumeric(1.0))
     cls.result = ps.SimpleDFS().execute(cls.task)
    def setUp(self):
        NS_telephone = ps.EqualitySelector("own_telephone", b"yes")
        NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
        NS_other_parties = ps.EqualitySelector("other_parties", b"none")
        NS_personal = ps.EqualitySelector("personal_status", b'male single')
        NS_job = ps.EqualitySelector("job", b'high qualif/self emp/mgmt')
        NS_class = ps.EqualitySelector("class", b"bad")

        o = [[NS_telephone], [NS_foreign_worker, NS_telephone],
             [NS_other_parties, NS_telephone],
             [NS_foreign_worker, NS_telephone, NS_personal],
             [NS_telephone, NS_personal],
             [NS_foreign_worker, NS_other_parties, NS_telephone], [NS_job],
             [NS_class, NS_telephone], [NS_foreign_worker, NS_job],
             [NS_foreign_worker, NS_other_parties, NS_telephone, NS_personal]]
        self.result = list(map(ps.Conjunction, o))
        self.qualities = [
            383476.7679999999, 361710.05800000014, 345352.9920000001,
            338205.08, 336857.8220000001, 323586.28200000006,
            320306.81600000005, 300963.84599999996, 299447.332,
            297422.98200000013
        ]

        data = get_credit_data()
        target = ps.NumericTarget('credit_amount')
        searchSpace_Nominal = ps.create_nominal_selectors(
            data, ignore=['credit_amount'])
        searchSpace_Numeric = [
        ]  #ps.create_numeric_selectors(data, ignore=['credit_amount'], nbins=10)
        searchSpace = searchSpace_Nominal + searchSpace_Numeric
        self.task = ps.SubgroupDiscoveryTask(
            data,
            target,
            searchSpace,
            result_set_size=10,
            depth=5,
            qf=ps.CountCallsInterestingMeasure(
                ps.StandardQFNumeric(1, False, 'sum')))
Ejemplo n.º 3
0
import pprint
import numpy as np
import pandas as pd
import pysubgroup as ps


pp = pprint.PrettyPrinter(indent=4)

data = np.array([[1, 2, 3, 4, 5], ["F", "F", "F", "Tr", "Tr"]]).T
data = pd.DataFrame(data, columns=["Target", "A"])
data["Target"] = pd.to_numeric(data["Target"])


target = ps.NumericTarget('Target')
print(data[target.target_variable])
sgd = ps.EqualitySelector("A", "Tr")
target.calculate_statistics(sgd, data)

qf = ps.StandardQFNumeric(1.0)
print(qf.evaluate(sgd, target, data))
print(qf.optimistic_estimate(sgd, target, data))
Ejemplo n.º 4
0
from scipy.io import arff
import pysubgroup as ps
import pandas as pd

import pprint
pp = pprint.PrettyPrinter(indent=4)

data = pd.DataFrame(arff.loadarff("../data/credit-g.arff")[0])

target = ps.NumericTarget('credit_amount')
sg = ps.Subgroup(target, ps.NominalSelector("purpose", b"other"))
print(target.get_base_statistics(data, sg))
sg.calculateStatistics(data)
# pp.pprint (sg.statistics)

qf = ps.StandardQF_numeric(1.0)
print(qf.evaluateFromDataset(data, sg))