def save_dataframes_unique(file): df = get_dataframe(file, worksheet=0, usecols=("A:O")) branches = find_branches(df) branches = find_duplicates(branches) headquartes = find_headquarters(df) headquartes = find_duplicates(headquartes) worksheets = {'filiais': branches, 'matrizes': headquartes} writer = create_excel_file(worksheets, filename='petroleo_empresas') writer.save()
def test_unique_headquarters(): df = get_dataframe(FILE, worksheet=0, usecols=USECOLS) headquarters = find_headquarters(df) headquarters = find_duplicates(headquarters) assert headquarters.shape == (782, 15)
def test_get_headquarters(): df = get_dataframe(FILE, worksheet=0, usecols=USECOLS) headquarters = find_headquarters(df) branches = headquarters[headquarters['CNPJ'].str.contains( '^(?!.*/0001).*$', regex=True)] assert len(branches) == 0
def test_get_branches(worksheet, result): df = get_dataframe(FILE, worksheet=worksheet, usecols=USECOLS) df = find_branches(df) assert df.shape == (result, 15)
def test_read_file(file=FILE, usecols=USECOLS): df = get_dataframe(file, worksheet=0, usecols=usecols) assert isinstance(df, pd.core.frame.DataFrame) assert df.iloc[0]['Categoria'] == 'Produtos' assert df.shape[1] == 15