def test_no_date_format(caplog, mixed_excel): caplog.set_level(logging.WARNING, logger='report') with pytest.raises(ValueError): read_bucket_sheet('Tester_CAN31_no_date', mixed_excel) for record in caplog.records: assert record.levelno >= logging.WARNING assert 'Read buckets failed' in caplog.text
def test_empty_sheets(sheetname, caplog, mixed_excel): caplog.set_level(logging.WARNING, logger='report') read_bucket_sheet(sheetname, mixed_excel) for record in caplog.records: assert record.levelno >= logging.WARNING assert 'no account numbers' in caplog.text
def test_bad_col_name(caplog, mixed_excel): caplog.set_level(logging.WARNING, logger='report') with pytest.raises(KeyError): read_bucket_sheet('Tester_CAN31_Bad_Col_name', mixed_excel) for record in caplog.records: assert record.levelno >= logging.WARNING assert 'specified col' in caplog.text assert 'Current Dat' in caplog.text
def test_bad_date_format(caplog, mixed_excel): caplog.set_level(logging.WARNING, logger='report') df = read_bucket_sheet('Tester_CAN31_dateformat', mixed_excel) for record in caplog.records: assert record.levelno >= logging.WARNING assert 'Read buckets failed' in caplog.text assert np.issubdtype(df['Date'], np.datetime64)
def test_non_standard_sheet(caplog, mixed_excel): caplog.set_level(logging.WARNING, logger='report') df = read_bucket_sheet('Tester_CAN31', mixed_excel) for record in caplog.records: assert record.levelno >= logging.WARNING assert 'Sheetname Tester_CAN31' in caplog.text assert 'pattern' in caplog.text assert np.issubdtype(df['Date'], np.datetime64) assert all(df['Bucket'] == 'Tester_CAN31')
# end%% # %% Get inputs rpc_fn = os.path.join('Real_Reports', 'ALL_RPC-7-3_2018.xlsx') bucket_fn = os.path.join('Real_Reports', 'Daily_Queues_by_Bucket.7.3.18.xls') # end%% # %% rpc = pd.read_excel(rpc_fn, skiprows=3, converters={'Acct Id Acc': str}) # end%% # %% Get Bucket file bucket_dfs = [] excel_bucket = pd.ExcelFile(bucket_fn) for sheet in excel_bucket.sheet_names: bucket_dfs.append(read_bucket_sheet(sheet, excel_bucket)) buckets = pd.concat(bucket_dfs, ignore_index=True) buckets.dropna(subset=['Acct_Num'], inplace=True) # buckets['Acct_Num_new'] = buckets.index.map(lambda x: 'Acct {}'.format(x)) # acct_dict = buckets[['Acct_Num','Acct_Num_new']].set_index('Acct_Num')['Acct_Num_new'].to_dict() buckets # end%% # %% Now we need to get all acct numbers and replace them all_act = pd.concat([buckets['Acct_Num'], rpc['Acct Id Acc']], ignore_index=True) all_act.dropna(inplace=True) all_act.drop_duplicates(inplace=True) all_act = all_act.to_frame('Acct_Num') all_act['Acct_Num_new'] = all_act.index.map(lambda x: 'Acct {}'.format(x))