def test_generalize_names_duplcheck(): df = pd.read_csv(StringIO(csv)) # duplicates before dupl = any(df['Name'].apply(generalize_names).duplicated()) assert dupl is True # no duplicates df_new = generalize_names_duplcheck(df=df, col_name='Name') no_dupl = any(df_new['Name'].duplicated()) assert no_dupl is False
def test_generalize_names_duplcheck(): path = os.path.join(os.getcwd(),'tests','data', 'csv', 'DKSalaries.csv') df = pd.read_csv(path) # duplicates before dupl = any(df['Name'].apply(generalize_names).duplicated()) assert(dupl==True) # no duplicates df_new = generalize_names_duplcheck(df=df, col_name='Name') no_dupl = any(df_new['Name'].duplicated()) assert(no_dupl==False)
def test_generalize_names_duplcheck(): path = os.path.join(os.getcwd(), 'tests', 'data', 'csv', 'DKSalaries.csv') df = pd.read_csv(path) # duplicates before dupl = any(df['Name'].apply(generalize_names).duplicated()) assert (dupl == True) # no duplicates df_new = generalize_names_duplcheck(df=df, col_name='Name') no_dupl = any(df_new['Name'].duplicated()) assert (no_dupl == False)