def test_DNF(self):
        A1 = ps.EqualitySelector("A1", 1)
        A2 = ps.EqualitySelector("A2", 1, "AA")
        B1 = ps.EqualitySelector("B1", 1)
        B2 = ps.EqualitySelector("B2", "1")

        dnf1 = ps.DNF()
        dnf1.append_or([A1, A2])
        dnf2 = ps.DNF([A1, A2])
        self.assertTrue(dnf1 == dnf2)

        dnf3 = ps.DNF(ps.Conjunction([A1, A2]))
        dnf4 = ps.DNF()
        dnf4.append_and([A1, A2])
        dnf5 = ps.DNF()
        dnf5.append_and(A1)
        dnf5.append_and(A2)
        self.assertTrue(dnf3 == dnf4)
        self.assertTrue(dnf4 == dnf5)

        dnf6 = ps.DNF([])
        dnf6.append_and([B1, B2])
        dnf7 = ps.DNF([])
        dnf7.append_and([A1, A2])
        dnf7.append_or(ps.Conjunction([B1, B2]))
        self.df = pd.DataFrame.from_dict({
            "A1": [1, 1, 1, 2, 2, 2, 2, 0, 0, 0],  #pylint: disable=attribute-defined-outside-init
            "A2": [0, 1, 1, 1, 2, 2, 2, 0, 0, 0],
            "B1": [0, 0, 0, 0, 1, 1, 1, 0, 1, 1],
            "B2": ["0", "0", "0", "0", "1", "1", "2", "0", "0", "1"]
        })
        self.check_dataframe_query(dnf1, [1, 1, 1, 1, 0, 0, 0, 0, 0, 0])
        self.check_dataframe_query(dnf3, [0, 1, 1, 0, 0, 0, 0, 0, 0, 0])
        self.check_dataframe_query(dnf6, [0, 0, 0, 0, 1, 1, 0, 0, 0, 1])
        self.check_dataframe_query(dnf7, [0, 1, 1, 0, 1, 1, 0, 0, 0, 1])
Esempio n. 2
0
 def setUp(self):
     NS_checking = ps.EqualitySelector("checking_status", b"<0")
     NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
     NS_other_parties = ps.EqualitySelector("other_parties", b"none")
     NS_savings_status = ps.EqualitySelector("savings_status", b"<100")
     NS_job = ps.EqualitySelector("job", b"skilled")
     self.result = [ps.Conjunction([NS_checking, NS_foreign_worker]),
                    ps.Conjunction([NS_checking]),
                    ps.Conjunction([NS_checking, NS_other_parties, NS_foreign_worker]),
                    ps.Conjunction([NS_checking, NS_other_parties]),
                    ps.Conjunction([NS_checking, NS_savings_status, NS_foreign_worker]),
                    ps.Conjunction([NS_checking, NS_savings_status]),
                    ps.Conjunction([NS_checking, NS_savings_status, NS_other_parties, NS_foreign_worker]),
                    ps.Conjunction([NS_checking, NS_job, NS_foreign_worker]),
                    ps.Conjunction([NS_checking, NS_savings_status, NS_other_parties]),
                    ps.Conjunction([NS_checking, NS_job]),
                    ]
     self.qualities = [0.055299999999999995,
                       0.05280000000000001,
                       0.052300000000000006,
                       0.05059999999999999,
                       0.04959999999999999,
                       0.048299999999999996,
                       0.04660000000000001,
                       0.04550000000000001,
                       0.0452,
                       0.044399999999999995]
     data = get_credit_data()
     target = ps.BinaryTarget('class', b'bad')
     searchSpace = ps.create_nominal_selectors(data, ignore=['class'])
     self.task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=10, depth=5, qf=ps.StandardQF(1.0))
Esempio n. 3
0
    def test_str_representations(self):
        A = ps.EqualitySelector("A", 1)
        self.assertEqual(str(A), "A==1")
        self.assertEqual(repr(A), "A==1")

        B = ps.EqualitySelector("BB", 2)
        self.assertEqual(str(B), "BB==2")
        self.assertEqual(repr(B), "BB==2")

        C = ps.EqualitySelector("CCC", True)
        self.assertEqual(str(C), "CCC==True")
        self.assertEqual(repr(C), "CCC==True")

        NegC = ps.NegatedSelector(ps.EqualitySelector("CCC", True))
        self.assertEqual(str(NegC), "NOT CCC==True")
        self.assertEqual(repr(NegC), "(not CCC==True)")

        I1 = ps.IntervalSelector("test", 10, 15)
        self.assertEqual(str(I1), "test: [10:15[")
        self.assertEqual(repr(I1), "test: [10:15[")

        I2 = ps.IntervalSelector("test2", np.sqrt(2), np.sqrt(3))
        self.assertEqual(str(I2), "test2: [1.41:1.73[")
        self.assertEqual(repr(I2),
                         "test2: [1.4142135623730951:1.7320508075688772[")
    def test_equality_expressions(self):
        A1 = ps.EqualitySelector("A", 1)
        A2 = ps.EqualitySelector("A", 2, "AA")
        B1 = ps.EqualitySelector("B", 1)

        D1 = ps.Disjunction([A1, A2])
        D1_clone = ps.Disjunction([A1, A2])
        self.assertTrue(D1 == D1_clone)
        self.assertTrue(hash(D1) == hash(D1_clone))

        D_all = ps.Disjunction([A1, A2, B1])
        D1_clone.append_or(B1)
        self.assertTrue(D_all == D1_clone)
        self.assertTrue(hash(D_all) == hash(D1_clone))

        C1 = ps.Conjunction([A1, A2])
        C1_clone = ps.Conjunction([A1, A2])
        self.assertTrue(C1 == C1_clone)
        self.assertTrue(hash(C1) == hash(C1_clone))

        C_all = ps.Conjunction([A1, A2, B1])
        C1_clone.append_and(B1)
        self.assertTrue(C_all == C1_clone)
        self.assertTrue(hash(C_all) == hash(C1_clone))

        self.assertFalse(C1 == D1)
        self.assertFalse(hash(C1) == hash(D1))
 def setUp(self):
     #NS_all = ps.EqualitySelector(True)
     NS_payment = ps.EqualitySelector("other_payment_plans",b"none")
     NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
     NS_other_parties = ps.EqualitySelector("other_parties", b"none")
     NS_housing = ps.EqualitySelector("housing", b'own')
     NS_class = ps.EqualitySelector("class", b"good")
     DFSo = [[NS_foreign_worker],
             [NS_other_parties],
             [NS_foreign_worker, NS_other_parties],
             [NS_payment],
             [NS_foreign_worker, NS_payment],
             [NS_other_parties, NS_payment],
             [NS_housing],
             [NS_class],
             [NS_foreign_worker, NS_other_parties, NS_payment]]
     self.DFSresult = list(map(ps.Conjunction, DFSo))
     self.DFSresult.insert(0,True)
     self.DFSqualities = [500.4980179286455,
             483.3153195123844,
             459.2862838915471,
             444.60343785358896,
             398.25539855072464,
             384.0460358056267,
             362.090608537693,
             355.0749649843413,
             355.010575658835,
             349.8188702669149]
     o = [[NS_foreign_worker],
             [NS_other_parties],
             [NS_foreign_worker, NS_other_parties],
             [NS_payment],
             [NS_foreign_worker, NS_payment],
             [NS_other_parties, NS_payment],
             [NS_housing],
             [NS_class],
             [NS_foreign_worker, NS_other_parties, NS_payment],
             [NS_foreign_worker, NS_housing]]
     self.result = list(map(ps.Conjunction, o))
     self.qualities = [483.3153195123844,
             459.2862838915471,
             444.60343785358896,
             398.25539855072464,
             384.0460358056267,
             362.090608537693,
             355.0749649843413,
             355.010575658835,
             349.8188702669149,
             342.20780439530444]
     np.random.seed(1111)
     self.target_variables = np.random.randint(low=0, high=2, size=1000)
     self.target_estimates = np.random.uniform(size=1000)
     data = get_credit_data()
     target = ps.PredictionTarget(self.target_variables, self.target_estimates, roc_auc_score)
     searchSpace_Nominal = ps.create_nominal_selectors(data, ignore=['credit_amount'])
     searchSpace_Numeric = [] #ps.create_numeric_selectors(data, ignore=['credit_amount'], nbins=10)
     searchSpace = searchSpace_Nominal + searchSpace_Numeric
     self.task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=10, depth=5, qf=ps.CountCallsInterestingMeasure(ps.PredictionQFNumeric(1, False)))
Esempio n. 6
0
 def test_create_selectors_with_nan(self):
     df = pd.DataFrame.from_dict({
         'A': np.array([np.nan, np.nan, np.nan]),
         'B': np.array([10, np.nan, np.nan])
     })
     result = ps.create_selectors(df)
     A_null = ps.EqualitySelector('A', np.nan)
     B_null = ps.EqualitySelector('B', np.nan)
     B_10 = ps.EqualitySelector('B', 10.)
     assert A_null in result
     assert B_null in result
     assert B_10 in result
    def prepare_df(self):
        A = np.array([0, 0, 1, 1, 0, 0, 1, 1, 1, 1], dtype=bool)
        self.A1 = ps.EqualitySelector("columnA", True)
        self.A0 = ps.EqualitySelector("columnA", False)

        B = np.array(["A", "B", "C", "C", "B", "A", "D", "A", "A", "A"])
        self.BA = ps.EqualitySelector("columnB", "A")
        self.BC = ps.EqualitySelector("columnB", "C")
        self.df = pd.DataFrame.from_dict({
            'columnA':
            A,
            'columnB':
            B,
            'columnC':
            np.array([[0, 1] for _ in range(5)]).flatten()
        })
Esempio n. 8
0
    def test_EqualitySelector_ordering(self):
        A1 = ps.EqualitySelector("A", 1)
        A1_clone = ps.EqualitySelector("A", 1)
        A2 = ps.EqualitySelector("A", 2, "AA")
        B1 = ps.EqualitySelector("B", 1)

        self.assertTrue(A1_clone is A1)

        B1_clone = ps.EqualitySelector("B", 1)
        self.assertTrue(A1 < B1)
        self.assertTrue(A1 < A2)
        self.assertTrue(A2 < B1)
        self.assertTrue(B1 == B1_clone)
        self.assertTrue(hash(B1) == hash(B1_clone))

        C1 = ps.EqualitySelector("checking_status", b"<0")
        C2 = ps.EqualitySelector("checking_status", b"<0")

        self.assertTrue(C1 == C2)
        self.assertTrue(hash(C1) == hash(C2))

        l = [A1, A2, B1]
        self.assertEqual(l.index(A1), 0)
        self.assertEqual(l.index(A2), 1)
        self.assertEqual(l.index(B1), 2)
Esempio n. 9
0
    def assert_class_ordering(self, cls):
        A1 = ps.EqualitySelector("A", 1)
        A2 = ps.EqualitySelector("A", 2, "AA")
        B1 = ps.EqualitySelector("B", 1)

        SGD1 = cls([A1, A2])
        SGD1_clone = cls([A1, A2])
        SGD1_order = cls([A2, A1])

        self.assertTrue(SGD1 == SGD1_clone)
        self.assertTrue(hash(SGD1) == hash(SGD1_clone))
        self.assertTrue(SGD1 == SGD1_order)
        self.assertTrue(hash(SGD1) == hash(SGD1_order))

        SGD2 = cls([A1, A2, B1])
        SGD3 = cls([B1])
        self.assertTrue(SGD1 > SGD2)
        self.assertTrue(SGD2 < SGD3)
Esempio n. 10
0
 def test_get_cover_array_and_size(self):
     sel = ps.EqualitySelector('checking_status', b'no checking')
     _, size = ps.get_cover_array_and_size(sel, None, self.data)
     self.assertEqual(size, 394)
     _, size = ps.get_cover_array_and_size(slice(None), len(self.data),
                                           None)
     self.assertEqual(size, len(self.data))
     _, size = ps.get_cover_array_and_size(slice(0, 10), len(self.data))
     self.assertEqual(size, 10)
     _, size = ps.get_cover_array_and_size(
         np.array([1, 3, 5, 7, 11], dtype=int))
     self.assertEqual(size, 5)
Esempio n. 11
0
    def test_nominal_selector_covers(self):
        A = np.array([0, 0, 1, 1, 0, 0, 1, 1, 1, 1], dtype=bool)
        A1 = ps.EqualitySelector("columnA", True)
        A0 = ps.EqualitySelector("columnA", False)

        B = np.array(["A", "B", "C", "C", "B", "A", "D", "A", "A", "A"])
        BA = ps.EqualitySelector("columnB", "A")
        BC = ps.EqualitySelector("columnB", "C")

        C = np.array([np.nan, np.nan, 1.1, 1.1, 2, 2, 2, 2, 2, 2])
        CA = ps.EqualitySelector("columnC", 1.1)
        CNan = ps.EqualitySelector("columnC", np.nan)

        df = pd.DataFrame.from_dict({"columnA": A, "columnB": B, "columnC": C})

        np.testing.assert_array_equal(A1.covers(df), A)
        np.testing.assert_array_equal(A0.covers(df), np.logical_not(A))

        np.testing.assert_array_equal(BA.covers(df),
                                      [1, 0, 0, 0, 0, 1, 0, 1, 1, 1])
        np.testing.assert_array_equal(BC.covers(df),
                                      [0, 0, 1, 1, 0, 0, 0, 0, 0, 0])

        np.testing.assert_array_equal(CA.covers(df),
                                      [0, 0, 1, 1, 0, 0, 0, 0, 0, 0])
        np.testing.assert_array_equal(CNan.covers(df),
                                      [1, 1, 0, 0, 0, 0, 0, 0, 0, 0])
Esempio n. 12
0
    def setUp(self):
        NS_cabin = ps.EqualitySelector("Cabin", np.nan)
        NS_embarked = ps.EqualitySelector("Embarked", 'S')
        NS_embarked2 = ps.EqualitySelector("Embarked", 'C')
        NS_male = ps.EqualitySelector("Sex", 'male')
        NS_female = ps.EqualitySelector("Sex", 'female')
        #NS_other_parties = ps.EqualitySelector("other_parties", b"none")
        #NS_savings_status = ps.EqualitySelector("savings_status", b"<100")
        #NS_job = ps.EqualitySelector("job", b"skilled")
        self.result = [
            ps.Conjunction([NS_cabin, NS_embarked]),
            ps.Conjunction([NS_cabin, NS_male]),
            ps.Conjunction([NS_embarked, NS_male]),
            ps.Conjunction([NS_cabin]),
            ps.Conjunction([NS_embarked]),
            ps.Conjunction([NS_male]),
            ps.Conjunction([NS_cabin, NS_female]),
            ps.Conjunction([NS_embarked, NS_female]),
            ps.Conjunction([NS_female]),
            ps.Conjunction([NS_cabin, NS_embarked2]),
        ]

        self.qualities = [178, 164, 146, 125, 110, 100, 86, 74, 56, 46]

        data = get_titanic_data()
        self.qualities2 = [
            np.count_nonzero(conj.covers(data)) * conj.depth
            for conj in self.result
        ]
        self.assertEqual(self.qualities, self.qualities2)
        searchSpace = ps.create_nominal_selectors(data)
        self.task = ps.SubgroupDiscoveryTask(data,
                                             ps.FITarget,
                                             searchSpace,
                                             result_set_size=10,
                                             depth=2,
                                             qf=ps.AreaQF())
 def setUp(self):
     NS_checking = ps.EqualitySelector("checking_status", b"<0")
     NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
     NS_other_parties = ps.EqualitySelector("other_parties", b"none")
     NS_savings_status = ps.EqualitySelector("savings_status", b"<100")
     NS_payment_plans = ps.EqualitySelector("other_payment_plans", b"none")
     self.result = [
         ps.Conjunction([NS_checking, NS_foreign_worker]),
         ps.Conjunction([NS_checking]),
         ps.Conjunction([NS_checking, NS_other_parties, NS_foreign_worker]),
         ps.Conjunction([NS_checking, NS_other_parties]),
         ps.Conjunction([NS_checking, NS_savings_status,
                         NS_foreign_worker]),
         ps.Conjunction([NS_checking, NS_savings_status]),
         ps.Conjunction([NS_checking, NS_foreign_worker, NS_payment_plans]),
         ps.Conjunction([NS_checking, NS_payment_plans]),
         ps.Conjunction([NS_foreign_worker, NS_savings_status]),
         ps.Conjunction(
             [NS_foreign_worker, NS_other_parties, NS_savings_status]),
     ]
     self.qualities = [
         0.055299999999999995, 0.05280000000000001, 0.052300000000000006,
         0.05059999999999999, 0.04959999999999999, 0.048299999999999996,
         0.0426, 0.04, 0.03869999999999999, 0.03750000000000001
     ]
     data = get_credit_data()
     target = ps.BinaryTarget('class', b'bad')
     searchSpace = ps.create_nominal_selectors(data, ignore=['class'])
     self.task = ps.SubgroupDiscoveryTask(
         data,
         target,
         searchSpace,
         result_set_size=10,
         depth=5,
         qf=ps.StandardQF(1.0),
         constraints=[ps.MinSupportConstraint(200)])
Esempio n. 14
0
 def test_CountQf(self):
     task = ps.SubgroupDiscoveryTask(self.data, ps.FITarget, None, None)
     qf = ps.CountQF()
     qf.calculate_constant_statistics(task)
     sel = ps.EqualitySelector('checking_status', b'no checking')
     print(self.data.columns)
     print(self.data.checking_status.value_counts())
     size = qf.evaluate(sel, self.data)
     self.assertEqual(size, 394)
     size = qf.evaluate(slice(None))
     self.assertEqual(size, len(self.data))
     size = qf.evaluate(slice(0, 10))
     self.assertEqual(size, 10)
     size = qf.evaluate(np.array([1, 3, 5, 7, 11], dtype=int))
     self.assertEqual(size, 5)
Esempio n. 15
0
    def setUp(self):
        self.A = np.array([0, 0, 1, 1, 0, 0, 1, 1, 1, 1], dtype=bool)
        self.A1 = ps.EqualitySelector("columnA", True)
        self.A0 = ps.EqualitySelector("columnA", False)

        self.B = np.array(["A", "B", "C", "C", "B", "A", "D", "A", "A", "A"])
        self.BA = ps.EqualitySelector("columnB", "A")
        self.BC = ps.EqualitySelector("columnB", "C")

        self.C = np.array([np.nan, np.nan, 1.1, 1.1, 2, 2, 2, 2, 2, 2])
        self.CA = ps.EqualitySelector("columnC", 1.1)
        self.CNan = ps.EqualitySelector("columnC", np.nan)

        self.df = pd.DataFrame.from_dict({
            "columnA": self.A,
            "columnB": self.B,
            "columnC": self.C
        })
Esempio n. 16
0
    def setUp(self):
        NS_checking = ps.EqualitySelector("checking_status", b"<0")
        NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
        NS_other_parties = ps.EqualitySelector("other_parties", b"none")
        NS_savings_status = ps.EqualitySelector("savings_status", b"<100")
        NS_job = ps.EqualitySelector("job", b"skilled")
        NS_dependents = ps.EqualitySelector("num_dependents", 1.0)
        self.result = [ps.Conjunction([NS_checking, NS_foreign_worker, NS_job, NS_other_parties, NS_savings_status]),  # AND job=='b'skilled'' AND other_parties=='b'none'' AND savings_status=='b'<100'
                       # 0.113713540226172:    checking_status=='b'<0'' AND foreign_worker=='b'yes'' AND job=='b'skilled'' AND savings_status=='b'<100''
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_job, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_job]),  # checking_status=='b'<0'' AND foreign_worker=='b'yes'' AND job=='b'skilled''
                       # checking_status=='b'<0'' AND job=='b'skilled'' AND other_parties=='b'none'' AND savings_status=='b'<100''
                       ps.Conjunction([NS_checking, NS_job, NS_other_parties, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_job, NS_other_parties]),
                       ps.Conjunction([NS_checking, NS_job, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_other_parties, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_other_parties]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_foreign_worker]),
                       ps.Conjunction([NS_checking, NS_foreign_worker, NS_job, NS_dependents, NS_savings_status]),
                       ps.Conjunction([NS_checking, NS_job, NS_other_parties])]

        self.qualities = [0.11457431093955019,
                          0.113713540226172,
                          0.11201325679119281,
                          0.1117538749727658,
                          0.11161046793076415,
                          0.11145710640046322,
                          0.11045259291161472,
                          0.10929088624672183,
                          0.10875519439407161,
                          0.10866138825404954,
                          0.10832735026213287,
                          0.10813405094128754]
        data = get_credit_data()
        target = ps.BinaryTarget('class', b'bad')
        searchSpace_Nominal = ps.create_nominal_selectors(data, ignore=['class'])
        searchSpace_Numeric = ps.create_numeric_selectors(data, ignore=['class'])
        searchSpace = searchSpace_Nominal + searchSpace_Numeric
        self.task = ps.SubgroupDiscoveryTask(data, target, searchSpace, result_set_size=12, depth=5, qf=ps.StandardQF(0.5))
    def setUp(self):
        NS_telephone = ps.EqualitySelector("own_telephone", b"yes")
        NS_foreign_worker = ps.EqualitySelector("foreign_worker", b"yes")
        NS_other_parties = ps.EqualitySelector("other_parties", b"none")
        NS_personal = ps.EqualitySelector("personal_status", b'male single')
        NS_job = ps.EqualitySelector("job", b'high qualif/self emp/mgmt')
        NS_class = ps.EqualitySelector("class", b"bad")

        o = [[NS_telephone], [NS_foreign_worker, NS_telephone],
             [NS_other_parties, NS_telephone],
             [NS_foreign_worker, NS_telephone, NS_personal],
             [NS_telephone, NS_personal],
             [NS_foreign_worker, NS_other_parties, NS_telephone], [NS_job],
             [NS_class, NS_telephone], [NS_foreign_worker, NS_job],
             [NS_foreign_worker, NS_other_parties, NS_telephone, NS_personal]]
        self.result = list(map(ps.Conjunction, o))
        self.qualities = [
            383476.7679999999, 361710.05800000014, 345352.9920000001,
            338205.08, 336857.8220000001, 323586.28200000006,
            320306.81600000005, 300963.84599999996, 299447.332,
            297422.98200000013
        ]

        data = get_credit_data()
        target = ps.NumericTarget('credit_amount')
        searchSpace_Nominal = ps.create_nominal_selectors(
            data, ignore=['credit_amount'])
        searchSpace_Numeric = [
        ]  #ps.create_numeric_selectors(data, ignore=['credit_amount'], nbins=10)
        searchSpace = searchSpace_Nominal + searchSpace_Numeric
        self.task = ps.SubgroupDiscoveryTask(
            data,
            target,
            searchSpace,
            result_set_size=10,
            depth=5,
            qf=ps.CountCallsInterestingMeasure(
                ps.StandardQFNumeric(1, False, 'sum')))
Esempio n. 18
0
    def setUp(self):
        NS_checking = ps.EqualitySelector("checking_status", b"<0")
        NS_checking2 = ps.EqualitySelector("checking_status", b"0<=X<200")
        NS_other_parties = ps.EqualitySelector("other_parties",
                                               b"co applicant")
        NS_other = ps.EqualitySelector("purpose", b'other')
        NS_repairs = ps.EqualitySelector("purpose", b'repairs')
        NS_purpose = ps.EqualitySelector("purpose", b'business')

        NS_history = ps.EqualitySelector("credit_history",
                                         b"no credits/all paid")
        NS_history2 = ps.EqualitySelector("credit_history", b"all paid")
        NS_empl = ps.EqualitySelector("employment", b"unemployed")
        NS_job = ps.EqualitySelector("job", b"unemp/unskilled non res")
        NS_bank = ps.EqualitySelector("other_payment_plans", b"bank")
        self.result = [
            ps.Disjunction([NS_checking, NS_checking2, NS_bank]),
            ps.Disjunction([NS_checking, NS_checking2, NS_history]),
            ps.Disjunction([NS_checking, NS_checking2]),
            ps.Disjunction([NS_checking, NS_checking2, NS_other]),
            ps.Disjunction([NS_checking, NS_checking2, NS_repairs]),
            ps.Disjunction([NS_checking, NS_checking2, NS_empl]),
            ps.Disjunction([NS_checking, NS_checking2, NS_other_parties]),
            ps.Disjunction([NS_checking, NS_checking2, NS_history2]),
            ps.Disjunction([NS_checking, NS_checking2, NS_purpose]),
            ps.Disjunction([NS_checking, NS_checking2, NS_job]),
        ]
        self.qualities = [
            0.0779, 0.07740000000000002, 0.0771, 0.07680000000000001,
            0.07670000000000002, 0.0767, 0.07660000000000003,
            0.07650000000000003, 0.07650000000000001, 0.07600000000000001
        ]
        data = get_credit_data()
        target = ps.BinaryTarget('class', b'bad')
        searchSpace = ps.create_nominal_selectors(data, ignore=['class'])
        self.task = ps.SubgroupDiscoveryTask(data,
                                             target,
                                             searchSpace,
                                             result_set_size=10,
                                             depth=3,
                                             qf=ps.StandardQF(1.0))
Esempio n. 19
0
import pprint
import numpy as np
import pandas as pd
import pysubgroup as ps


pp = pprint.PrettyPrinter(indent=4)

data = np.array([[1, 2, 3, 4, 5], ["F", "F", "F", "Tr", "Tr"]]).T
data = pd.DataFrame(data, columns=["Target", "A"])
data["Target"] = pd.to_numeric(data["Target"])


target = ps.NumericTarget('Target')
print(data[target.target_variable])
sgd = ps.EqualitySelector("A", "Tr")
target.calculate_statistics(sgd, data)

qf = ps.StandardQFNumeric(1.0)
print(qf.evaluate(sgd, target, data))
print(qf.optimistic_estimate(sgd, target, data))
Esempio n. 20
0
import unittest
import pysubgroup as ps
from pysubgroup.tests.DataSets import get_credit_data

from pysubgroup.tests.algorithms_testing import TestAlgorithmsBase
data = get_credit_data()

target = ps.NumericTarget('credit_amount')
sgd = ps.EqualitySelector("purpose", b"other")

stats = target.calculate_statistics(sgd, data)
print(stats)

qf = ps.StandardQFNumeric(1.0)
score = qf.evaluate(sgd, target, data)
print(score)

score = qf.evaluate(sgd, target, data, stats)
print(score)