Esempio n. 1
0
def save_dataframes_unique(file):
    df = get_dataframe(file, worksheet=0, usecols=("A:O"))
    branches = find_branches(df)
    branches = find_duplicates(branches)
    headquartes = find_headquarters(df)
    headquartes = find_duplicates(headquartes)
    worksheets = {'filiais': branches, 'matrizes': headquartes}
    writer = create_excel_file(worksheets, filename='petroleo_empresas')
    writer.save()
def test_unique_headquarters():
    df = get_dataframe(FILE, worksheet=0, usecols=USECOLS)
    headquarters = find_headquarters(df)
    headquarters = find_duplicates(headquarters)
    assert headquarters.shape == (782, 15)
def test_get_headquarters():
    df = get_dataframe(FILE, worksheet=0, usecols=USECOLS)
    headquarters = find_headquarters(df)
    branches = headquarters[headquarters['CNPJ'].str.contains(
        '^(?!.*/0001).*$', regex=True)]
    assert len(branches) == 0
def test_get_branches(worksheet, result):
    df = get_dataframe(FILE, worksheet=worksheet, usecols=USECOLS)
    df = find_branches(df)
    assert df.shape == (result, 15)
def test_read_file(file=FILE, usecols=USECOLS):
    df = get_dataframe(file, worksheet=0, usecols=usecols)
    assert isinstance(df, pd.core.frame.DataFrame)
    assert df.iloc[0]['Categoria'] == 'Produtos'
    assert df.shape[1] == 15