def test_get_groups_single_valued_series(self):
     """This test ensures that get_groups() returns a single-valued DataFrame or Series object
     since the input-series is also single-valued.  This test was created in response to a bug discovered
     by George Walker"""
     pd.testing.assert_frame_equal(
         pd.DataFrame([(0, "hello")],
                      columns=['group_rep_index', 'group_rep']),
         group_similar_strings(pd.Series(["hello"]), min_similarity=0.6))
     pd.testing.assert_series_equal(
         pd.Series(["hello"], name='group_rep'),
         group_similar_strings(pd.Series(["hello"]),
                               min_similarity=0.6,
                               ignore_index=True))
     pd.testing.assert_frame_equal(
         pd.DataFrame([(0, "hello")],
                      columns=['most_similar_index',
                               'most_similar_master']),
         match_most_similar(pd.Series(["hello"]),
                            pd.Series(["hello"]),
                            min_similarity=0.6))
     pd.testing.assert_series_equal(
         pd.Series(["hello"], name='most_similar_master'),
         match_most_similar(pd.Series(["hello"]),
                            pd.Series(["hello"]),
                            min_similarity=0.6,
                            ignore_index=True))
 def test_get_groups_single_df_group_rep_bad_option_value(self):
     """Should raise an exception when group_rep value given is neither 'centroid' nor 'first'"""
     simple_example = SimpleExample()
     customers_df = simple_example.customers_df
     with self.assertRaises(Exception):
         _ = group_similar_strings(customers_df['Customer Name'],
                                   group_rep='nonsense',
                                   min_similarity=0.6)
 def test_get_groups_single_df_keep_index(self):
     """Should return a pd.Series object with the same length as the original df. The series object will contain
     a list of the grouped strings with their indexes displayed in columns"""
     simple_example = SimpleExample()
     customers_df = simple_example.customers_df
     pd.testing.assert_frame_equal(
         simple_example.expected_result_centroid_with_index_col,
         group_similar_strings(customers_df['Customer Name'],
                               min_similarity=0.6,
                               ignore_index=False))
 def test_get_groups_single_df_group_rep_default(self):
     """Should return a pd.Series object with the same length as the original df. The series object will contain
     a list of the grouped strings"""
     simple_example = SimpleExample()
     customers_df = simple_example.customers_df
     pd.testing.assert_series_equal(
         simple_example.expected_result_centroid,
         group_similar_strings(customers_df['Customer Name'],
                               min_similarity=0.6,
                               ignore_index=True))
    def test_group_similar_strings(self, mock_StringGouper):
        """mocks StringGrouper to test if the high-level function group_similar_strings utilizes it as expected"""
        mock_StringGrouper_instance = mock_StringGouper.return_value
        mock_StringGrouper_instance.fit.return_value = mock_StringGrouper_instance
        mock_StringGrouper_instance.get_groups.return_value = 'whatever'

        test_series_1 = None
        test_series_id_1 = None
        df = group_similar_strings(test_series_1, string_ids=test_series_id_1)

        mock_StringGrouper_instance.fit.assert_called_once()
        mock_StringGrouper_instance.get_groups.assert_called_once()
        self.assertEqual(df, 'whatever')