def test_clean_phone_numbers_valid_numbers_with_incorrect_regionfield_but_correct_region_string(self):
        """ Verify that if number is valid but region field does not have a value for that number or if country is wrong
        (in this case canadian numbers), but you have a fallback default region string, it will still work"""
        orig_mydf = self.mydf
        mydf = self.mydf.copy(deep=True)

        expected = pd.DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'],
                                'phones_by_country': [self.starbucks_vancouver[2], # first element has no country code
                                                                    # so would not viewed as valid, except
                                                                    # in this case we specify the default region
                                                      self.london_eye_ticketoff[2],
                                                      np.nan, # input phone was NaN
                                                      self.boston_mikes_pastry[2],
                                                      self.starbucks_toronto[2], # has no country code
                                                                    # so would not viewed as valid, except
                                                                    # in this case we specify the default region
                                                      np.nan, # has incorrect country code (GB) so we wouldn't use the
                                                                # (in this case correct) substitute region_string
                                                      self.madrid_laMallorquina_bakery[2],
                                                      self.london_eye_ticketoff2[2]]
                                 }).set_index('key')

        # replace in place
        clean_phone_numbers(mydf, phonenum_field='phones_by_country', region_string='CA',
                            region_field='regions_valid_but_missing_or_invalid_canada', use_orig_on_error=False)

        # new field should hold formatted + validated phones, orig phone field should not be changed
        assert_series_equal(mydf['phones_by_country'], expected['phones_by_country'])
    def test_clean_phone_numbers_some_bad_phonenums_and_put_into_new_field_using_region_string_use_orig_on_error(self):
        orig_mydf = self.mydf
        mydf = self.mydf.copy(deep=True)

        correctedphone = self.starbucks_toronto[2]
        expected = pd.DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'],
                                  'correctedPhones': [correctedphone, correctedphone, np.nan, 'BILL_TO',
                                                 '  ', '', 'SHIP_TO', 'BILL_TO']
                                  }).set_index('key')

        clean_phone_numbers(mydf, phonenum_field='badphones', newField='correctedPhones', region_string='CA', region_field=None,
                            use_orig_on_error=True)

        # new field should hold formatted + validated phones, orig phone field should not be changed
        assert_series_equal(mydf['correctedPhones'], expected['correctedPhones'])
        assert_series_equal(mydf['goodphones'], orig_mydf['goodphones'])
    def test_clean_phone_numbers_all_valid_phonenums_and_replace_orig_field_using_region_field(self):
        """ Verify that valid phone numbers with valid region codes specified in a region field replaces
        the original phone number field's values when this option is specified """
        mydf = self.mydf.copy(deep=True)

        expected = pd.DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'],
                                'phones_by_country': [self.starbucks_vancouver[2], self.london_eye_ticketoff[2], np.nan,
                                                          self.boston_mikes_pastry[2],
                                                          self.starbucks_toronto[2], self.ontario_tim_hortons[2],
                                                          self.madrid_laMallorquina_bakery[2],
                                                          self.london_eye_ticketoff2[2]]
                                 }).set_index('key')

        clean_phone_numbers(mydf, phonenum_field='phones_by_country', region_field='regions_all_valid',
                            use_orig_on_error=False)

        # orig phone number field should hold formatted + validated phones
        assert_series_equal(mydf['phones_by_country'], expected['phones_by_country'])
    def test_clean_phone_numbers_valid_numbers_but_no_region_field_values_are_nullified(self):
        """ Verify that if number is valid but region field does not have a value for that number or if country is wrong
        (in this case canadian numbers), then phone num library will detect it as invalid """
        orig_mydf = self.mydf
        mydf = self.mydf.copy(deep=True)

        expected = pd.DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'],
                                'correctedPhones': [np.nan, # first element has no country code so not viewed as valid
                                                      self.london_eye_ticketoff[2],
                                                      np.nan, # input phone was NaN
                                                      self.boston_mikes_pastry[2],
                                                      np.nan, # has no country code
                                                      np.nan, # has incorrect country code (GB)
                                                      self.madrid_laMallorquina_bakery[2],
                                                      self.london_eye_ticketoff2[2]]
                                 }).set_index('key')

        clean_phone_numbers(mydf, phonenum_field='phones_by_country', newField='correctedPhones', #region_string='CA',
                            region_field='regions_valid_but_missing_or_invalid_canada', use_orig_on_error=False)

        # new field should hold formatted + validated phones, orig phone field should not be changed
        assert_series_equal(mydf['correctedPhones'], expected['correctedPhones'])
        assert_series_equal(mydf['goodphones'], orig_mydf['goodphones'])
 def test_input_df_works_with_pandas_generated_keys(self):
     # any dataframe with pandas generated keys will work because they are unique
     mydf = self.mydf_generatedkeys.copy(deep=True)
     clean_phone_numbers(mydf, phonenum_field='goodphones', newField='correctedPhones', region_string='CA',
                         region_field=None,
                         use_orig_on_error=False)