Beispiel #1
0
    def test_operator_eq(self):
        df = pd.DataFrame()
        df["c1"] = [chr(i) for i in range(100)]
        df["c2"] = [i for i in range(100)]
        df["c3"] = [float(i) for i in range(100)]

        condition1 = {"column": "c1", "operator": "eq", "value": chr(50)}
        condition2 = {"column": "c1", "operator": "eq", "value": chr(50)}
        conditions = [condition1, condition2]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 1.)

        condition1 = {"column": "c1", "operator": "eq", "value": chr(50)}
        condition2 = {"column": "c2", "operator": "eq", "value": 50}
        conditions = [condition1, condition2]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 1.)

        condition1 = {"column": "c1", "operator": "eq", "value": chr(50)}
        condition2 = {"column": "c2", "operator": "eq", "value": 50}
        condition3 = {"column": "c3", "operator": "eq", "value": 50.}
        conditions = [condition1, condition2, condition3]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 1.)

        condition1 = {"column": "c1", "operator": "eq", "value": chr(50)}
        condition2 = {"column": "c2", "operator": "eq", "value": 50}
        condition3 = {"column": "c3", "operator": "eq", "value": 51.}
        conditions = [condition1, condition2, condition3]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 0.)
Beispiel #2
0
    def test_operators_mixed(self):
        df = pd.DataFrame()
        df["c1"] = [chr(i) for i in range(100)]
        df["c2"] = [i for i in range(100)]
        df["c3"] = [float(i) for i in range(100)]

        condition1 = {"column": "c1", "operator": "eq", "value": chr(50)}
        condition2 = {"column": "c2", "operator": "lt", "value": 100.}
        condition3 = {"column": "c3", "operator": "lt", "value": 100.}
        conditions = [condition1, condition2, condition3]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 1.)

        condition1 = {"column": "c1", "operator": "eq", "value": chr(100)}
        condition2 = {"column": "c2", "operator": "lt", "value": 10}
        condition3 = {"column": "c3", "operator": "lt", "value": 10.}
        conditions = [condition1, condition2, condition3]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 0.)

        condition2 = {"column": "c2", "operator": "gt", "value": 22}
        condition3 = {"column": "c3", "operator": "lt", "value": 31.}
        conditions = [condition2, condition3]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 8.)

        condition1 = {"column": "c2", "operator": "gt", "value": 10}
        condition2 = {"column": "c2", "operator": "lt", "value": 100}
        condition3 = {"column": "c2", "operator": "gt", "value": 0}
        condition4 = {"column": "c2", "operator": "lt", "value": 50}
        condition5 = {"column": "c2", "operator": "lt", "value": 40}
        condition6 = {"column": "c3", "operator": "gt", "value": 20.}
        condition7 = {"column": "c3", "operator": "lt", "value": 100}
        condition8 = {"column": "c3", "operator": "gt", "value": 0}
        condition9 = {"column": "c3", "operator": "lt", "value": 25}
        condition10 = {"column": "c3", "operator": "lt", "value": 23}
        conditions = [
            condition1, condition2, condition3, condition4, condition5,
            condition6, condition7, condition8, condition9, condition10
        ]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 2.)

        condition1 = {"column": "c2", "operator": "gt", "value": 10}
        condition2 = {"column": "c2", "operator": "lt", "value": 100}
        condition3 = {"column": "c2", "operator": "gt", "value": 0}
        condition4 = {"column": "c2", "operator": "lt", "value": 50}
        condition5 = {"column": "c2", "operator": "lt", "value": 40}
        condition6 = {"column": "c3", "operator": "gt", "value": 20.}
        condition7 = {"column": "c3", "operator": "lt", "value": 100}
        condition8 = {"column": "c3", "operator": "eq", "value": -1}
        condition9 = {"column": "c3", "operator": "lt", "value": 25}
        condition10 = {"column": "c3", "operator": "lt", "value": 23}
        conditions = [
            condition1, condition2, condition3, condition4, condition5,
            condition6, condition7, condition8, condition9, condition10
        ]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 0.)
Beispiel #3
0
    def test_empty(self):
        df = pd.DataFrame()
        df["c1"] = []
        df["c2"] = []

        condition1 = {"column": "c1", "operator": "lt", "value": 1000}
        condition2 = {"column": "c1", "operator": "gt", "value": 0}
        conditions = [condition1, condition2]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 100.)
Beispiel #4
0
    def test_operator_lt(self):
        df = pd.DataFrame()
        df["c1"] = [chr(i) for i in range(100)]
        df["c2"] = [i for i in range(100)]
        df["c3"] = [float(i) for i in range(100)]

        condition1 = {"column": "c2", "operator": "lt", "value": 0}
        conditions = [condition1]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 0.)

        condition1 = {"column": "c2", "operator": "lt", "value": 50}
        condition2 = {"column": "c2", "operator": "lt", "value": 49}
        condition3 = {"column": "c3", "operator": "lt", "value": 100.}
        conditions = [condition1, condition2, condition3]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 49.)

        condition1 = {"column": "c2", "operator": "lt", "value": 500}
        condition2 = {"column": "c2", "operator": "lt", "value": 10}
        condition3 = {"column": "c3", "operator": "lt", "value": 10.}
        conditions = [condition1, condition2, condition3]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 10.)

        condition1 = {"column": "c2", "operator": "lt", "value": 21}
        condition2 = {"column": "c2", "operator": "lt", "value": 22}
        condition3 = {"column": "c3", "operator": "lt", "value": 0.}
        conditions = [condition1, condition2, condition3]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 0.)

        condition1 = {"column": "c2", "operator": "lt", "value": 100}
        condition2 = {"column": "c2", "operator": "lt", "value": 100}
        condition3 = {"column": "c3", "operator": "lt", "value": 100.}
        conditions = [condition1, condition2, condition3]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 100.)
Beispiel #5
0
    def test_operators_mixed_and_nulls_inconditions(self):
        df = pd.DataFrame()
        c1 = [chr(i) for i in range(100)]
        c2 = [i for i in range(100)]
        c3 = [float(i) for i in range(100)]
        for i in range(10):
            c1[i] = None
            c2[i] = None
            c3[i] = np.NaN
        df["c1"] = c1
        df["c2"] = c2
        df["c3"] = c3

        condition1 = {"column": "c3", "operator": "eq", "value": 10.0}
        conditions = [condition1]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 1.)

        condition1 = {"column": "c2", "operator": "gt", "value": -1}
        condition2 = {"column": "c2", "operator": "gt", "value": -1}
        condition3 = {"column": "c3", "operator": "gt", "value": -1}
        conditions = [condition1, condition2, condition3]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 90.)

        condition1 = {"column": "c2", "operator": "gt", "value": -1}
        condition2 = {"column": "c2", "operator": "gt", "value": 50}
        condition3 = {"column": "c3", "operator": "gt", "value": 50}
        conditions = [condition1, condition2, condition3]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 49.)

        condition1 = {"column": "c2", "operator": "lt", "value": 100}
        condition2 = {"column": "c2", "operator": "gt", "value": 50}
        condition3 = {"column": "c3", "operator": "gt", "value": 20}
        conditions = [condition1, condition2, condition3]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 49.)

        condition1 = {"column": "c2", "operator": "lt", "value": 100}
        condition2 = {"column": "c3", "operator": "lt", "value": 20}
        conditions = [condition1, condition2]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 10.)

        condition1 = {"column": "c2", "operator": "lt", "value": 100}
        condition2 = {"column": "c3", "operator": "lt", "value": 10}
        conditions = [condition1, condition2]
        r = rule(conditions, df)[0]
        self.assertEqual(r, 0.)
Beispiel #6
0
    def test_groping_multile_columns(self):
        df = pd.DataFrame()
        c1 = [0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5]
        c2 = ["a", "a", "b", "b", "c", "c", "d", "d", "d", "d", "a", "a", "a"]
        c3 = [0.0, 0.0, 0.1, 0.1, 2.2, 2.2, 2.2, 3.1, 3.2, 3.3, 40, 40, 50]
        c4 = [10.0, 20.0, 10.0, 20.0, 10.0, 20.0, 10.0, 20.0, 10.0, 20.0, 10.0, 20.0, 10.0]
        c5 = ["09:10:10" for _ in range(10)]
        c5.extend(["00:11:10" for _ in range(3)])
        df["c1"] = c1
        df["c2"] = c2
        df["c3"] = c3
        df["c4"] = c4
        df["c5"] = c5
        task = Task()

        task.add(completeness())
        task.add(completeness([0, 1, 2]))
        task.add(deduplication([0, 1]))
        task.add(deduplication())
        task.add(timeliness(["c5"], value="10:10:10", timeFormat="%S:%M:%H"))
        task.add(completeness())
        condition1 = {"column": "c3", "operator": "lt", "value": 50}
        condition2 = {"column": "c3", "operator": "gt", "value": 1.0}
        conditions = [condition1, condition2]
        task.add(rule(conditions))
        condition1 = {"column": "c5", "operator": "eq", "value": "00:11:10"}
        conditions = [condition1]
        task.add(rule(conditions))

        condition1 = {"column": "c3", "operator": "lt", "value": 50}
        condition2 = {"column": "c3", "operator": "gt", "value": 1.0}
        conditions = [condition1, condition2]
        having1 = {"column": "*", "operator": "gt", "value": 1, "aggregator": "count"}
        having2 = {"column": "c4", "operator": "eq", "value": 50 / 3, "aggregator": "avg"}
        havings = [having1, having2]
        task.add(grouprule([0, "c2"], havings, conditions))

        result = task.run(df)

        # c1
        r = result[0]["scores"][0]
        self.assertEqual(r, 100.)

        # c2
        r1, r2, r3 = result[1]["scores"]
        self.assertEqual(r1, 100.)
        self.assertEqual(r2, 100.)
        self.assertEqual(r3, 100.)

        # d1
        r1, r2 = result[2]["scores"]
        self.assertEqual(r1, (6 / 13) * 100)
        self.assertEqual(r2, (4 / 13) * 100)

        # d2
        r = result[3]["scores"][0]
        self.assertEqual(r, 100.)

        # t
        r = result[4]["scores"][0]
        self.assertEqual(r, (10 / 13) * 100)

        # c3
        r = result[5]["scores"][0]
        self.assertEqual(r, 100.)

        # r1
        r = result[6]["scores"][0]
        self.assertEqual(r, (8 / 13) * 100)

        # r2
        r = result[7]["scores"][0]
        self.assertEqual(r, (3 / 13) * 100)

        # gr1
        r = result[8]["scores"][0]
        self.assertEqual(r, 25.0)