Exemple #1
0
 def setUpClass(cls):
     data = get_credit_data()
     target = ps.BinaryTarget('class', b'bad')
     searchSpace = ps.create_nominal_selectors(data, ignore=['class'])
     cls.task = ps.SubgroupDiscoveryTask(data,
                                         target,
                                         searchSpace,
                                         result_set_size=10,
                                         depth=5,
                                         qf=ps.StandardQF(1.0))
     cls.result = ps.SimpleDFS().execute(cls.task)
Exemple #2
0
import pandas as pd
import pysubgroup as ps


data = pd.read_table("../data/titanic.csv")
target = ps.BinaryTarget('Survived', 0)
search_space = ps.create_selectors(data, ignore=['Survived'])
task = ps.SubgroupDiscoveryTask(data, target, search_space,
                                result_set_size=5, depth=2,
                                qf=ps.CombinedInterestingnessMeasure([ps.StandardQF(1), ps.GeneralizationAware_StandardQF(1)]))

result = ps.SimpleDFS().execute(task, use_optimistic_estimates=False)

print(result.to_dataframe())
Exemple #3
0
from scipy.io import arff

import pysubgroup as ps
import pandas as pd

data = pd.DataFrame(arff.loadarff("../data/credit-g.arff")[0])

target = ps.NominalTarget('class', b'bad')
searchSpace = ps.createNominalSelectors(data, ignore=['class'])
task = ps.SubgroupDiscoveryTask(data,
                                target,
                                searchSpace,
                                resultSetSize=10,
                                depth=3,
                                qf=ps.StandardQF(1.0))

result = ps.BeamSearch(beamWidth=10).execute(task)
for (q, sg) in result:
    print(str(q) + ":\t" + str(sg.subgroupDescription))

print("******")
result = ps.SimpleDFS().execute(task)
for (q, sg) in result:
    print(str(q) + ":\t" + str(sg.subgroupDescription))

# print WRAccQF().evaluateFromDataset(data, Subgroup(target, []))
 def test_SimpleDFS(self):
     self.runAlgorithm(ps.SimpleDFS(), "SimpleDFS", self.result,
                       self.qualities, self.task)
 def test_StandardQF_GA_SimpleDFS(self):
     self.task.qf = ps.GeneralizationAware_StandardQF(0.5)
     self.runAlgorithm(ps.SimpleDFS(), "Standard_SimpleDFS", self.result,
                       self.qualities, self.task)
     print(self.task.qf.cache)
 def test_DFS(self):
     self.runAlgorithm(ps.SimpleDFS(), "DFS", self.result[:-1],
                       self.qualities[:-1], self.task)
Exemple #7
0
import pysubgroup as ps
import pandas as pd

data = pd.read_csv("~/datasets/titanic.csv")
target = ps.NominalTarget('survived', 0)
searchSpace = ps.createSelectors(data, ignore=['survived'])
task = ps.SubgroupDiscoveryTask(data,
                                target,
                                searchSpace,
                                resultSetSize=5,
                                depth=2,
                                qf=ps.CombinedInterestingnessMeasure(
                                    [ps.StandardQF(1),
                                     ps.GAStandardQF(1)]))

result = ps.SimpleDFS().execute(task, useOptimisticEstimates=False)

for (q, sg) in result:
    print(str(q) + ":\t" + str(sg.subgroupDescription))
Exemple #8
0
                                    searchSpace,
                                    result_set_size=200,
                                    depth=4,
                                    qf=QF)
    GpGrowth(mode='b_u').to_file(task, 'E:/tmp/gp_credit.txt')

    import time
    start_time = time.time()
    gp = GpGrowth(mode='b_u').execute(task)
    print("--- %s seconds ---" % (time.time() - start_time))
    #gp = [(qual, sg) for qual, sg in gp if sg.depth <= task.depth]
    gp = sorted(gp)
    quit()

    start_time = time.time()
    dfs1 = ps.SimpleDFS().execute(task)
    print("--- %s seconds ---" % (time.time() - start_time))
    dfs = [(qual, sg.subgroup_description) for qual, sg in dfs1]
    dfs = sorted(dfs, reverse=True)
    gp = sorted(gp, reverse=True)

    def better_sorted(l):
        the_dict = defaultdict(list)
        prev_key = l[0][0]
        for key, val in l:

            if abs(prev_key - key) < 10**-11:
                the_dict[prev_key].append(val)
            else:
                the_dict[key].append(val)
                prev_key = key