def setUpClass(cls): data = get_credit_data() target = ps.BinaryTarget('class', b'bad') searchSpace = ps.create_nominal_selectors(data, ignore=['class']) cls.task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=10, depth=5, qf=ps.StandardQF(1.0)) cls.result = ps.SimpleDFS().execute(cls.task)
import pandas as pd import pysubgroup as ps data = pd.read_table("../data/titanic.csv") target = ps.BinaryTarget('Survived', 0) search_space = ps.create_selectors(data, ignore=['Survived']) task = ps.SubgroupDiscoveryTask(data, target, search_space, result_set_size=5, depth=2, qf=ps.CombinedInterestingnessMeasure([ps.StandardQF(1), ps.GeneralizationAware_StandardQF(1)])) result = ps.SimpleDFS().execute(task, use_optimistic_estimates=False) print(result.to_dataframe())
from scipy.io import arff import pysubgroup as ps import pandas as pd data = pd.DataFrame(arff.loadarff("../data/credit-g.arff")[0]) target = ps.NominalTarget('class', b'bad') searchSpace = ps.createNominalSelectors(data, ignore=['class']) task = ps.SubgroupDiscoveryTask(data, target, searchSpace, resultSetSize=10, depth=3, qf=ps.StandardQF(1.0)) result = ps.BeamSearch(beamWidth=10).execute(task) for (q, sg) in result: print(str(q) + ":\t" + str(sg.subgroupDescription)) print("******") result = ps.SimpleDFS().execute(task) for (q, sg) in result: print(str(q) + ":\t" + str(sg.subgroupDescription)) # print WRAccQF().evaluateFromDataset(data, Subgroup(target, []))
def test_SimpleDFS(self): self.runAlgorithm(ps.SimpleDFS(), "SimpleDFS", self.result, self.qualities, self.task)
def test_StandardQF_GA_SimpleDFS(self): self.task.qf = ps.GeneralizationAware_StandardQF(0.5) self.runAlgorithm(ps.SimpleDFS(), "Standard_SimpleDFS", self.result, self.qualities, self.task) print(self.task.qf.cache)
def test_DFS(self): self.runAlgorithm(ps.SimpleDFS(), "DFS", self.result[:-1], self.qualities[:-1], self.task)
import pysubgroup as ps import pandas as pd data = pd.read_csv("~/datasets/titanic.csv") target = ps.NominalTarget('survived', 0) searchSpace = ps.createSelectors(data, ignore=['survived']) task = ps.SubgroupDiscoveryTask(data, target, searchSpace, resultSetSize=5, depth=2, qf=ps.CombinedInterestingnessMeasure( [ps.StandardQF(1), ps.GAStandardQF(1)])) result = ps.SimpleDFS().execute(task, useOptimisticEstimates=False) for (q, sg) in result: print(str(q) + ":\t" + str(sg.subgroupDescription))
searchSpace, result_set_size=200, depth=4, qf=QF) GpGrowth(mode='b_u').to_file(task, 'E:/tmp/gp_credit.txt') import time start_time = time.time() gp = GpGrowth(mode='b_u').execute(task) print("--- %s seconds ---" % (time.time() - start_time)) #gp = [(qual, sg) for qual, sg in gp if sg.depth <= task.depth] gp = sorted(gp) quit() start_time = time.time() dfs1 = ps.SimpleDFS().execute(task) print("--- %s seconds ---" % (time.time() - start_time)) dfs = [(qual, sg.subgroup_description) for qual, sg in dfs1] dfs = sorted(dfs, reverse=True) gp = sorted(gp, reverse=True) def better_sorted(l): the_dict = defaultdict(list) prev_key = l[0][0] for key, val in l: if abs(prev_key - key) < 10**-11: the_dict[prev_key].append(val) else: the_dict[key].append(val) prev_key = key