Esempio n. 1
0
import pysubgroup as ps
import pandas as pd
import numpy as np

import pprint

pp = pprint.PrettyPrinter(indent=4)

data = np.array([[1, 2, 3, 4, 5], ["F", "F", "F", "Tr", "Tr"]]).T
data = pd.DataFrame(data, columns=["Target", "A"])
data["Target"] = pd.to_numeric(data["Target"])

target = ps.NumericTarget('Target')
print(data[target.target_variable])
sg = ps.Subgroup(target, ps.NominalSelector("A", "Tr"))
print(target.get_base_statistics(data, sg))
sg.calculateStatistics(data)
# pp.pprint (sg.statistics)
qf = ps.StandardQF_numeric(1.0)
print(qf.optimisticEstimateFromDataset(data, sg))
Esempio n. 2
0
from scipy.io import arff
import pysubgroup as ps
import pandas as pd

import pprint
pp = pprint.PrettyPrinter(indent=4)

data = pd.DataFrame(arff.loadarff("../data/credit-g.arff")[0])

target = ps.NumericTarget('credit_amount')
sg = ps.Subgroup(target, ps.NominalSelector("purpose", b"other"))
print(target.get_base_statistics(data, sg))
sg.calculateStatistics(data)
# pp.pprint (sg.statistics)

qf = ps.StandardQF_numeric(1.0)
print(qf.evaluateFromDataset(data, sg))
Esempio n. 3
0
'''
Created on 10.05.2017

@author: lemmerfn
'''
import pandas as pd
import pysubgroup as ps

if __name__ == '__main__':
    data = pd.read_csv("~/datasets/titanic.csv")
    target = ps.NominalSelector('survived', 0)

    s1 = ps.Subgroup(target, [])
    s2 = ps.Subgroup(target, [])

    print(s1 == s2)