import pysubgroup as ps import pandas as pd import numpy as np import pprint pp = pprint.PrettyPrinter(indent=4) data = np.array([[1, 2, 3, 4, 5], ["F", "F", "F", "Tr", "Tr"]]).T data = pd.DataFrame(data, columns=["Target", "A"]) data["Target"] = pd.to_numeric(data["Target"]) target = ps.NumericTarget('Target') print(data[target.target_variable]) sg = ps.Subgroup(target, ps.NominalSelector("A", "Tr")) print(target.get_base_statistics(data, sg)) sg.calculateStatistics(data) # pp.pprint (sg.statistics) qf = ps.StandardQF_numeric(1.0) print(qf.optimisticEstimateFromDataset(data, sg))
from scipy.io import arff import pysubgroup as ps import pandas as pd import pprint pp = pprint.PrettyPrinter(indent=4) data = pd.DataFrame(arff.loadarff("../data/credit-g.arff")[0]) target = ps.NumericTarget('credit_amount') sg = ps.Subgroup(target, ps.NominalSelector("purpose", b"other")) print(target.get_base_statistics(data, sg)) sg.calculateStatistics(data) # pp.pprint (sg.statistics) qf = ps.StandardQF_numeric(1.0) print(qf.evaluateFromDataset(data, sg))
''' Created on 10.05.2017 @author: lemmerfn ''' import pandas as pd import pysubgroup as ps if __name__ == '__main__': data = pd.read_csv("~/datasets/titanic.csv") target = ps.NominalSelector('survived', 0) s1 = ps.Subgroup(target, []) s2 = ps.Subgroup(target, []) print(s1 == s2)