Esempio n. 1
0
def test_generalize_names_duplcheck():

    df = pd.read_csv(StringIO(csv))

    # duplicates before
    dupl = any(df['Name'].apply(generalize_names).duplicated())
    assert dupl is True

    # no duplicates
    df_new = generalize_names_duplcheck(df=df, col_name='Name')
    no_dupl = any(df_new['Name'].duplicated())
    assert no_dupl is False
def test_generalize_names_duplcheck():
    
    path = os.path.join(os.getcwd(),'tests','data', 'csv', 'DKSalaries.csv')
    df = pd.read_csv(path)

    # duplicates before
    dupl = any(df['Name'].apply(generalize_names).duplicated())
    assert(dupl==True)
    
    # no duplicates
    df_new = generalize_names_duplcheck(df=df, col_name='Name')
    no_dupl = any(df_new['Name'].duplicated())
    assert(no_dupl==False)
def test_generalize_names_duplcheck():

    path = os.path.join(os.getcwd(), 'tests', 'data', 'csv', 'DKSalaries.csv')
    df = pd.read_csv(path)

    # duplicates before
    dupl = any(df['Name'].apply(generalize_names).duplicated())
    assert (dupl == True)

    # no duplicates
    df_new = generalize_names_duplcheck(df=df, col_name='Name')
    no_dupl = any(df_new['Name'].duplicated())
    assert (no_dupl == False)