def test_fit_with_df_input_without_column_arg(self, example_train_df):
        """
        In case we give no column argument to the initalizer, the input during fit
        should be a pd.Series. Otherwise raise TypeError.

        """
        transformer = PandasTfidfVectorizer()
        with pytest.raises(TypeError):
            transformer.fit(example_train_df)
    def test_missing_values_fit(self, example_missing_values_df):
        """
        Tests the case where there are missing values in the training data.
        Should return a ValueError.
        """

        transformer = PandasTfidfVectorizer(column="text")
        with pytest.raises(ValueError):
            transformer.fit(example_missing_values_df)
    def test_fit_with_series_input_with_column_arg(self, example_series):
        """
        In case we do  give a value for the column keyword argument, the input
        should be a pd.DataFrame.
        Otherwise, return a TypeError.
        """

        transformer = PandasTfidfVectorizer(column="text")
        with pytest.raises(TypeError):
            transformer.fit(example_series)
    def test_missing_column(self, example_train_df,
                            example_test_df_diff_column):
        """
        Test transformer when test set does not have the required columns.
        In that case, it should return a KeyError
        """
        transformer = PandasTfidfVectorizer(column="text")
        transformer.fit(example_train_df)

        with pytest.raises(KeyError):
            transformer.transform(example_test_df_diff_column)
    def test_example(self, example_train_df):
        """ Tests a simple example. """
        transformer = PandasTfidfVectorizer(column="text")
        transformer.fit(example_train_df)
        transformed = transformer.transform(example_train_df)

        expected = pd.DataFrame({
            "num": pd.Series([3, 4, 4]),
            "animal": pd.Series([0.0, 1.0, 0.0]),
            "house": pd.Series([1.0, 0.0, 1.0]),
        })
        # The column order shouldnt matter (therefore we sort them)
        pd.testing.assert_frame_equal(transformed.sort_index(axis=1),
                                      expected.sort_index(axis=1))
    def test_series_input(self, example_series):
        """
        In case we don't give a value for the column keyword argument, the input
        should be a pandas series or np.ndarray.
        Otherwise, return a TypeError.
        """

        transformer = PandasTfidfVectorizer()
        transformer.fit(example_series)
        transformed = transformer.transform(example_series)

        expected = pd.DataFrame({
            "animal": pd.Series([0.0, 1.0, 0.0]),
            "house": pd.Series([1.0, 0.0, 1.0]),
        })

        pd.testing.assert_frame_equal(transformed.sort_index(axis=1),
                                      expected.sort_index(axis=1))