Ejemplo n.º 1
0
    def test_recommend_method_wrong_type(self):
        """
        Negative test

        data: array (unsupported type)

        Checks that the function raises a TypeError if the data is passed as
        an array.
        """
        # 1. Arrange
        data = [2, 4, np.nan, 1]
        # 2. Act & 3. Assert
        with self.assertRaises(TypeError):
            recommend_method(data)
Ejemplo n.º 2
0
    def test_recommend_method_df_wrong_column(self):
        """
        Negative test

        data: Correct data frame (df_breast_cancer)
        column: 'z' (not a column of df_breast_cancer)

        Checks that the function raises a ValueError if the columns doesn't
        exist in the data.
        """
        # 1. Arrange
        df = generate_df_breast_cancer()
        # 2. Act & 3. Assert
        with self.assertRaises(ValueError):
            recommend_method(df, column='z')
Ejemplo n.º 3
0
    def test_recommend_method_col_for_series(self):
        """
        Negative test

        data: Correct series (ts_cat)
        column: 'a' (series can't have columns=

        Checks that the function raises a ValueError if a column is passed
        for a series.
        """
        # 1. Arrange
        ser = generate_ts_cat()
        # 2. Act & 3. Assert
        with self.assertRaises(ValueError):
            recommend_method(ser, 'a')
Ejemplo n.º 4
0
    def test_recommend_method_df_num(self):
        """
        Positive test

        data: Correct dataframe (df_sales)

        The data frame contains no categorical values.
        Therefore, imputation using k-NN should be recommended.
        """
        # 1. Arrange
        ser = generate_df_sales()
        # 2. Act
        method = recommend_method(ser, title_only=True)
        # 3. Assert
        self.assertEqual(method, 'imputation using k-NN')
Ejemplo n.º 5
0
    def test_recommend_method_series_num_no_ts(self):
        """
        Positive test

        data: Correct series (example_series)

        The series contains numerical values but not a datetime index.
        Therefore, mean substitution should be recommended.
        """
        # 1. Arrange
        ser = generate_example_series()
        # 2. Act
        method = recommend_method(ser, title_only=True)
        # 3. Assert
        self.assertEqual(method, 'mean substitution')
Ejemplo n.º 6
0
    def test_recommend_method_series_cat(self):
        """
        Positive test

        data: Correct series (ts_cat)

        The series contains categorical values.
        Therefore, random sample imputation should be recommended.
        """
        # 1. Arrange
        ser = generate_ts_cat()
        # 2. Act
        method = recommend_method(ser, title_only=True)
        # 3. Assert
        self.assertEqual(method, 'random sample imputation')
Ejemplo n.º 7
0
    def test_recommend_method_df_cat(self):
        """
        Positive test

        data: Correct dataframe (df_breast_cancer)

        The data frame contains categorical values.
        Therefore, most-frequent substitution should be recommended.
        """
        # 1. Arrange
        ser = generate_df_breast_cancer()
        # 2. Act
        method = recommend_method(ser, title_only=True)
        # 3. Assert
        self.assertEqual(method, 'most-frequent substitution')
Ejemplo n.º 8
0
    def test_recommend_method_df_col_cat(self):
        """
        Positive test

        data: Correct dataframe (df_breast_cancer)
        column: 'class'

        The column contains categorical values.
        Therefore, logistic regression imputation should be recommended.
        """
        # 1. Arrange
        ser = generate_df_breast_cancer()
        # 2. Act
        method = recommend_method(ser, 'class', title_only=True)
        # 3. Assert
        self.assertEqual(method, 'logistic regression imputation')
Ejemplo n.º 9
0
    def test_recommend_method_series_timeseries(self):
        """
        Positive test

        data: Correct series (ts_airgap)

        The series contains numerical values and has a datetime index.
        Therefore, interpolation with seasonal adjustment should be
        recommended.
        """
        # 1. Arrange
        ser = generate_ts_airgap()
        # 2. Act
        method = recommend_method(ser, title_only=True)
        # 3. Assert
        self.assertEqual(method, 'interpolation with seasonal adjustment')
Ejemplo n.º 10
0
    def test_recommend_method_df_process_description(self):
        """
        Positive test

        data: Correct dataframe (df_breast_cancer)
        title_only: False

        Since title_only is false, the full message should be shown, in this
        case, 4 lines
        """
        # 1. Arrange
        ser = generate_df_breast_cancer()
        # 2. Act
        message = recommend_method(ser)
        # 3. Assert
        self.assertEqual(message.count('\n') + 1, 4)
Ejemplo n.º 11
0
    def test_recommend_method_df_col_little_na(self):
        """
        Positive test

        data: Correct dataframe (example_df_divcols)
        column: 'd'

        The column contains numerical values and the dataframe does not have a
        datetime index. Less than 10% of the data is missing.
        Therefore, mean substitution should be recommended.
        """
        # 1. Arrange
        ser = generate_example_df_divcols()
        # 2. Act
        method = recommend_method(ser, 'd', title_only=True)
        # 3. Assert
        self.assertEqual(method, 'mean substitution')
Ejemplo n.º 12
0
    def test_recommend_method_df_col_low_corr(self):
        """
        Positive test

        data: Correct dataframe (example_df_divcols)
        column: 'h'

        The column contains numerical values and the dataframe does not have a
        datetime index. More than 10% of the data is missing. The column has
        los (<= 0.8) correlation with another column.
        Therefore, linear regression imputation should be recommended.
        """
        # 1. Arrange
        ser = generate_example_df_divcols()
        # 2. Act
        method = recommend_method(ser, 'h', title_only=True)
        # 3. Assert
        self.assertEqual(method, 'imputation using k-NN')
Ejemplo n.º 13
0
    def test_recommend_method_df_col_ts(self):
        """
        Positive test

        data: Correct dataframe (example_df_ts)
        column: 'airgap'

        The column contains numerical values and the dataframe has a
        datetime index.
        Therefore, interpolation with seasonal adjustment should be
        recommended.
        """
        # 1. Arrange
        ser = generate_example_df_ts()
        # 2. Act
        method = recommend_method(ser, 'airgap', title_only=True)
        # 3. Assert
        self.assertEqual(method, 'interpolation with seasonal adjustment')