Exemple #1
0
    def test_evaluation_get_eval_id(self):
        """
        get_eval_id() should return the eval_id specified when initialized
        """
        df = pd.DataFrame([{"a": 1, "b": 2}, {"a": 3, "b": 4}])
        result = Evaluation(df, eval_id=1000)
        assert result.get_eval_id() == 1000

        result = Evaluation(df, eval_id=0)
        assert result.get_eval_id() == 0
Exemple #2
0
 def process(self, input_eval: Evaluation) -> Evaluation:
     if self._sort_col_names is None:
         new_df = input_eval.get_df().drop_duplicates(
             subset=self._duplicate_col_names
         )
         return Evaluation(new_df, input_eval.get_eval_id())
     else:
         new_df = (
             input_eval.get_df()
             .sort_values(by=self._sort_col_names, ascending=self._reverse_sort)
             .drop_duplicates(subset=self._duplicate_col_names)
         )
         return Evaluation(new_df, input_eval.get_eval_id())
Exemple #3
0
 def process(self, input_eval: Evaluation):
     old_df = input_eval.get_df()
     if isinstance(old_df.index, pd.MultiIndex):
         new_df = old_df.xs(self.index_value, level=self.index_name)
     elif isinstance(old_df.index, pd.Index):
         # slicing instead of indexing to maintain shape
         new_df = old_df.loc[self.index_value:self.index_value]
     else:
         raise ValueError("Incompatible dataframe index.")
     return Evaluation(new_df, input_eval.get_eval_id())
Exemple #4
0
    def test_comparetofirst_dir_subset(self):
        """ Test if CompareToFirst works with different direction and subset"""
        df = pd.DataFrame([
            {
                "a": 1,
                "b": 5
            },
            {
                "a": 2,
                "b": 4
            },
            {
                "a": 3,
                "b": 3
            },
            {
                "a": 4,
                "b": 2
            },
            {
                "a": 5,
                "b": 1
            },
        ])
        eval1 = Evaluation(df, eval_id=20)

        direction = {"a": Direction.MINIMIZE}

        pipeline = [CompareToFirst(direction)]
        eval1 = eval1.process(pipeline)

        expected_df = pd.DataFrame([
            {
                "a": 1,
                "a.relative": 1.0 / 1
            },
            {
                "a": 2,
                "a.relative": 1.0 / 2
            },
            {
                "a": 3,
                "a.relative": 1.0 / 3
            },
            {
                "a": 4,
                "a.relative": 1.0 / 4
            },
            {
                "a": 5,
                "a.relative": 1.0 / 5
            },
        ])
        assert_frame_equal(eval1.get_df(), expected_df)
        assert eval1.get_eval_id() == 20
Exemple #5
0
    def process(self, input_eval: Evaluation) -> Evaluation:
        input_df = input_eval.get_df()

        try:
            # if int, we might need to convert to float first
            # (e.g. int(float("6.0")))
            if int in self.types.values():
                # create dictionary replacing int with float
                pre_df_types = {
                    k: (v if v != int else float) for k, v in self.types.items()
                }
                input_df = input_df.astype(pre_df_types)

            new_df = input_df.astype(self.types)
        except KeyError:
            raise KeyError("A key in the types parameter does not exist in the evaluation.")
        return Evaluation(new_df, input_eval.get_eval_id())
Exemple #6
0
    def test_aggregate(self):
        """ Test aggregate processor with custom aggregator functions """
        df = pd.DataFrame([
            {
                "a": 1,
                "b": 1,
                "c": 5
            },
            {
                "a": 1,
                "b": 2,
                "c": 4
            },
            {
                "a": 3,
                "b": 3,
                "c": 3
            },
            {
                "a": 4,
                "b": 4,
                "c": 2
            },
            {
                "a": 5,
                "b": 5,
                "c": 1
            },
        ])
        eval1 = Evaluation(df, eval_id=20)

        pipeline = [Aggregate(lambda x: x.sum())]
        result = eval1.process(pipeline)

        expected_df = pd.DataFrame([{"a": 14, "b": 15, "c": 15}])
        assert_frame_equal(result.get_df(), expected_df)
        assert eval1.get_eval_id() == 20

        pipeline2 = [Aggregate(lambda x: x.product())]
        result2 = eval1.process(pipeline2)

        expected_df2 = pd.DataFrame([{"a": 60, "b": 120, "c": 120}])
        assert_frame_equal(result2.get_df(), expected_df2)
        assert result2.get_eval_id() == 20
Exemple #7
0
    def test_geomean_aggregate(self):
        """ Test built-in geomean aggregator """
        df = pd.DataFrame([
            {
                "a": 1,
                "b": 1,
                "c": 5
            },
            {
                "a": 1,
                "b": 2,
                "c": 4
            },
            {
                "a": 3,
                "b": 3,
                "c": 3
            },
            {
                "a": 4,
                "b": 4,
                "c": 2
            },
            {
                "a": 5,
                "b": 5,
                "c": 1
            },
        ])
        eval1 = Evaluation(df, eval_id=20)

        pipeline = [GeomeanAggregate()]
        eval1 = eval1.process(pipeline)

        expected_a = (1 * 1 * 3 * 4 * 5)**(1 / 5)
        expected_b = expected_c = (1 * 2 * 3 * 4 * 5)**(1 / 5)
        expected_df = pd.DataFrame([{
            "a": expected_a,
            "b": expected_b,
            "c": expected_c
        }])
        assert_frame_equal(eval1.get_df(), expected_df)
        assert eval1.get_eval_id() == 20
Exemple #8
0
    def test_aggregate_exclude_nonnumeric(self):
        """ Check if aggregate processor excludes fields that are non-numeric """
        df = pd.DataFrame([
            {
                "a": 1,
                "b": 1,
                "c": "a"
            },
            {
                "a": 1,
                "b": 2,
                "c": "b"
            },
            {
                "a": 3,
                "b": 3,
                "c": "c"
            },
            {
                "a": 4,
                "b": 4,
                "c": "d"
            },
            {
                "a": 5,
                "b": 5,
                "c": "e"
            },
        ])
        eval1 = Evaluation(df, eval_id=20)

        pipeline = [Aggregate(lambda x: x.sum())]
        result = eval1.process(pipeline)

        expected_df = pd.DataFrame([{"a": 14, "b": 15}])
        assert_frame_equal(result.get_df(), expected_df)
        assert eval1.get_eval_id() == 20
Exemple #9
0
 def process(self, input_eval: Evaluation) -> Evaluation:
     input_df = input_eval.get_df()
     new_df = input_df.sort_index(level=self._sort_names)
     return Evaluation(new_df, input_eval.get_eval_id())
Exemple #10
0
 def process(self, input_eval: Evaluation) -> Evaluation:
     input_df = input_eval.get_df()
     new_df = self._compare_to_first(input_df)
     return Evaluation(new_df, input_eval.get_eval_id())
Exemple #11
0
 def process(self, input_eval: Evaluation):
     old_df = input_eval.get_df()
     numeric_columns = old_df.select_dtypes(include=['number']).dropna(axis=1).columns
     new_df = pd.DataFrame([old_df[numeric_columns].agg(self.func)])
     return Evaluation(new_df, input_eval.get_eval_id())
Exemple #12
0
 def process(self, input_eval: Evaluation) -> Evaluation:
     input_df = input_eval.get_df()
     new_df = self._normalize(input_df)
     return Evaluation(new_df, input_eval.get_eval_id())
Exemple #13
0
 def process(self, input_eval: Evaluation) -> Evaluation:
     return Evaluation(input_eval.get_df() - 1, input_eval.get_eval_id())
Exemple #14
0
 def process(self, input_eval: Evaluation) -> Evaluation:
     input_df = input_eval.get_df()
     new_df = input_df.groupby(self._groupby).apply(self._normalize_around)
     return Evaluation(new_df, input_eval.get_eval_id())
Exemple #15
0
    def test_comparetofirst_suffix(self):
        """ Test if CompareToFirst works with different suffix """
        df = pd.DataFrame([
            {
                "a": 1,
                "b": 5
            },
            {
                "a": 2,
                "b": 4
            },
            {
                "a": 3,
                "b": 3
            },
            {
                "a": 4,
                "b": 2
            },
            {
                "a": 5,
                "b": 1
            },
        ])
        eval1 = Evaluation(df, eval_id=20)

        direction = {"a": Direction.MAXIMIZE, "b": Direction.MAXIMIZE}

        pipeline = [CompareToFirst(direction, suffix=".diff")]
        eval1 = eval1.process(pipeline)

        expected_df = pd.DataFrame([
            {
                "a": 1,
                "a.diff": 1.0 / 1,
                "b": 5,
                "b.diff": 5.0 / 5
            },
            {
                "a": 2,
                "a.diff": 2.0 / 1,
                "b": 4,
                "b.diff": 4.0 / 5
            },
            {
                "a": 3,
                "a.diff": 3.0 / 1,
                "b": 3,
                "b.diff": 3.0 / 5
            },
            {
                "a": 4,
                "a.diff": 4.0 / 1,
                "b": 2,
                "b.diff": 2.0 / 5
            },
            {
                "a": 5,
                "a.diff": 5.0 / 1,
                "b": 1,
                "b.diff": 1.0 / 5
            },
        ])
        assert_frame_equal(eval1.get_df(), expected_df)
        assert eval1.get_eval_id() == 20
Exemple #16
0
 def process(self, input_eval: Evaluation) -> Evaluation:
     input_df = input_eval.get_df()
     new_df = input_df.set_index(self._reindex_names)
     return Evaluation(new_df, input_eval.get_eval_id())
Exemple #17
0
    def process(self, input_eval: Evaluation) -> Evaluation:
        input_df = input_eval.get_df()
        new_df = input_df.groupby(self._input_col_name).apply(self._expansion)

        return Evaluation(new_df, input_eval.get_eval_id())