Example #1
0
 def process(self, input_eval: Evaluation) -> Evaluation:
     if self._sort_col_names is None:
         new_df = input_eval.get_df().drop_duplicates(
             subset=self._duplicate_col_names
         )
         return Evaluation(new_df, input_eval.get_eval_id())
     else:
         new_df = (
             input_eval.get_df()
             .sort_values(by=self._sort_col_names, ascending=self._reverse_sort)
             .drop_duplicates(subset=self._duplicate_col_names)
         )
         return Evaluation(new_df, input_eval.get_eval_id())
Example #2
0
    def process(self, b: Evaluation) -> Evaluation:
        a_nums = self.a.get_df().select_dtypes(include=[np.number])
        b_nums = b.get_df().select_dtypes(include=[np.number])
        diff = (b_nums - a_nums) / a_nums
        difference_eval = Evaluation(diff)

        return difference_eval
Example #3
0
 def process(self, input_eval: Evaluation):
     old_df = input_eval.get_df()
     if isinstance(old_df.index, pd.MultiIndex):
         new_df = old_df.xs(self.index_value, level=self.index_name)
     elif isinstance(old_df.index, pd.Index):
         # slicing instead of indexing to maintain shape
         new_df = old_df.loc[self.index_value:self.index_value]
     else:
         raise ValueError("Incompatible dataframe index.")
     return Evaluation(new_df, input_eval.get_eval_id())
Example #4
0
    def test_comparetofirst_dir_subset(self):
        """ Test if CompareToFirst works with different direction and subset"""
        df = pd.DataFrame([
            {
                "a": 1,
                "b": 5
            },
            {
                "a": 2,
                "b": 4
            },
            {
                "a": 3,
                "b": 3
            },
            {
                "a": 4,
                "b": 2
            },
            {
                "a": 5,
                "b": 1
            },
        ])
        eval1 = Evaluation(df, eval_id=20)

        direction = {"a": Direction.MINIMIZE}

        pipeline = [CompareToFirst(direction)]
        eval1 = eval1.process(pipeline)

        expected_df = pd.DataFrame([
            {
                "a": 1,
                "a.relative": 1.0 / 1
            },
            {
                "a": 2,
                "a.relative": 1.0 / 2
            },
            {
                "a": 3,
                "a.relative": 1.0 / 3
            },
            {
                "a": 4,
                "a.relative": 1.0 / 4
            },
            {
                "a": 5,
                "a.relative": 1.0 / 5
            },
        ])
        assert_frame_equal(eval1.get_df(), expected_df)
        assert eval1.get_eval_id() == 20
Example #5
0
    def test_standardizetypes(self):
        """ Test whether types are standardized """
        types = {"a": float}
        df = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
        eval1 = Evaluation(df, eval_id=10)

        assert eval1.get_df().dtypes["a"] == int

        pipeline = [StandardizeTypes(types)]

        result = eval1.process(pipeline)

        assert result.get_df().dtypes["a"] == float

        assert result.get_eval_id() == 10
Example #6
0
    def process(self, input_eval: Evaluation) -> Evaluation:
        input_df = input_eval.get_df()

        try:
            # if int, we might need to convert to float first
            # (e.g. int(float("6.0")))
            if int in self.types.values():
                # create dictionary replacing int with float
                pre_df_types = {
                    k: (v if v != int else float) for k, v in self.types.items()
                }
                input_df = input_df.astype(pre_df_types)

            new_df = input_df.astype(self.types)
        except KeyError:
            raise KeyError("A key in the types parameter does not exist in the evaluation.")
        return Evaluation(new_df, input_eval.get_eval_id())
Example #7
0
    def test_geomean_aggregate(self):
        """ Test built-in geomean aggregator """
        df = pd.DataFrame([
            {
                "a": 1,
                "b": 1,
                "c": 5
            },
            {
                "a": 1,
                "b": 2,
                "c": 4
            },
            {
                "a": 3,
                "b": 3,
                "c": 3
            },
            {
                "a": 4,
                "b": 4,
                "c": 2
            },
            {
                "a": 5,
                "b": 5,
                "c": 1
            },
        ])
        eval1 = Evaluation(df, eval_id=20)

        pipeline = [GeomeanAggregate()]
        eval1 = eval1.process(pipeline)

        expected_a = (1 * 1 * 3 * 4 * 5)**(1 / 5)
        expected_b = expected_c = (1 * 2 * 3 * 4 * 5)**(1 / 5)
        expected_df = pd.DataFrame([{
            "a": expected_a,
            "b": expected_b,
            "c": expected_c
        }])
        assert_frame_equal(eval1.get_df(), expected_df)
        assert eval1.get_eval_id() == 20
Example #8
0
    def test_minusone(self):
        """ Test whether all values are correctly changed """

        df = pd.DataFrame({"a": [1, 2, 3, 4, 5]})
        eval1 = Evaluation(df, eval_id=10)

        result = eval1.get_df()["a"]
        expected = pd.Series([1, 2, 3, 4, 5], name="a")

        assert_series_equal(result, expected)

        pipeline = [MinusOne()]
        result_processed = eval1.process(pipeline)
        result_df = result_processed.get_df()["a"]
        expected_df = pd.Series([0, 1, 2, 3, 4], name="a")

        assert_series_equal(result_df, expected_df)

        assert result_processed.get_eval_id() == 10
Example #9
0
 def process(self, input_eval: Evaluation):
     old_df = input_eval.get_df()
     numeric_columns = old_df.select_dtypes(include=['number']).dropna(axis=1).columns
     new_df = pd.DataFrame([old_df[numeric_columns].agg(self.func)])
     return Evaluation(new_df, input_eval.get_eval_id())
Example #10
0
 def process(self, input_eval: Evaluation) -> Evaluation:
     input_df = input_eval.get_df()
     new_df = self._normalize(input_df)
     return Evaluation(new_df, input_eval.get_eval_id())
Example #11
0
 def process(self, input_eval: Evaluation) -> Evaluation:
     return Evaluation(input_eval.get_df() - 1, input_eval.get_eval_id())
Example #12
0
 def process(self, input_eval: Evaluation) -> Evaluation:
     input_df = input_eval.get_df()
     new_df = input_df.groupby(self._groupby).apply(self._normalize_around)
     return Evaluation(new_df, input_eval.get_eval_id())
Example #13
0
    def test_comparetofirst_suffix(self):
        """ Test if CompareToFirst works with different suffix """
        df = pd.DataFrame([
            {
                "a": 1,
                "b": 5
            },
            {
                "a": 2,
                "b": 4
            },
            {
                "a": 3,
                "b": 3
            },
            {
                "a": 4,
                "b": 2
            },
            {
                "a": 5,
                "b": 1
            },
        ])
        eval1 = Evaluation(df, eval_id=20)

        direction = {"a": Direction.MAXIMIZE, "b": Direction.MAXIMIZE}

        pipeline = [CompareToFirst(direction, suffix=".diff")]
        eval1 = eval1.process(pipeline)

        expected_df = pd.DataFrame([
            {
                "a": 1,
                "a.diff": 1.0 / 1,
                "b": 5,
                "b.diff": 5.0 / 5
            },
            {
                "a": 2,
                "a.diff": 2.0 / 1,
                "b": 4,
                "b.diff": 4.0 / 5
            },
            {
                "a": 3,
                "a.diff": 3.0 / 1,
                "b": 3,
                "b.diff": 3.0 / 5
            },
            {
                "a": 4,
                "a.diff": 4.0 / 1,
                "b": 2,
                "b.diff": 2.0 / 5
            },
            {
                "a": 5,
                "a.diff": 5.0 / 1,
                "b": 1,
                "b.diff": 1.0 / 5
            },
        ])
        assert_frame_equal(eval1.get_df(), expected_df)
        assert eval1.get_eval_id() == 20
Example #14
0
 def process(self, input_eval: Evaluation) -> Evaluation:
     input_df = input_eval.get_df()
     new_df = input_df.set_index(self._reindex_names)
     return Evaluation(new_df, input_eval.get_eval_id())
Example #15
0
    def process(self, input_eval: Evaluation) -> Evaluation:
        input_df = input_eval.get_df()
        new_df = input_df.groupby(self._input_col_name).apply(self._expansion)

        return Evaluation(new_df, input_eval.get_eval_id())
Example #16
0
 def process(self, input_eval: Evaluation) -> Evaluation:
     input_df = input_eval.get_df()
     new_df = self._compare_to_first(input_df)
     return Evaluation(new_df, input_eval.get_eval_id())
Example #17
0
 def process(self, input_eval: Evaluation) -> Evaluation:
     input_df = input_eval.get_df()
     new_df = input_df.sort_index(level=self._sort_names)
     return Evaluation(new_df, input_eval.get_eval_id())