def __init__(self): box_path = Common.change_location(p.DATA) fl = FileService(box_path) self.ric_files = fl.get_source_file() self.okay = PatternFill(fgColor='E1F7DC', bgColor='C00000', fill_type='solid') self.amber = PatternFill(fgColor='F4B042', bgColor='C00000', fill_type='solid') self.header = PatternFill(fgColor='218c04', bgColor='C00000', fill_type='solid') self.empty = PatternFill(fgColor='f9462a', bgColor='C00000', fill_type='solid') self.red = PatternFill(fgColor='f72f11', bgColor='C00000', fill_type='solid') warnings.filterwarnings("ignore") self.quarter = 'Q1' self.year = '2019' self.youth = 'Youth' self.all_youth = 'ALL incl. youth' self.month_names = [ 'january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december' ] self.no_value = ['na', 'n/a', '', '0000-00-00', '*****']
def check_columns_completeness(self): dfps = pd.DataFrame() dfpys = pd.DataFrame() dfqc = pd.DataFrame() dfac = pd.DataFrame() clm_lst = [] for fl in self.ric_files: Common.change_location(p.DATA) wb = openpyxl.load_workbook(fl, data_only=True) ric_file_name = fl[:-5] print('-' * 250) program_sheet = wb.get_sheet_by_name(WS.bap_program.value) df_ps = self.sheet_columns(program_sheet, ric_file_name, WS.bap_program.value) program_youth_sheet = wb.get_sheet_by_name( WS.bap_program_youth.value) df_pys = self.sheet_columns(program_youth_sheet, ric_file_name, WS.bap_program_youth.value) quarterly_company_sheet = wb.get_sheet_by_name( WS.bap_company.value) df_qc = self.sheet_columns(quarterly_company_sheet, ric_file_name, WS.bap_company.value) annual_company_sheet = wb.get_sheet_by_name( WS.bap_company_annual.value) df_ac = self.sheet_columns(annual_company_sheet, ric_file_name, WS.bap_company_annual.value) dfps = pd.concat([dfps, df_ps]) dfpys = pd.concat([dfpys, df_pys]) dfqc = pd.concat([dfqc, df_qc]) dfac = pd.concat([dfac, df_ac]) writer = pd.ExcelWriter('00 ALL_RIC_BAP_COLUMNS_FY19_Q1.xlsx') dfps.to_excel(writer, 'Program', index=False) dfpys.to_excel(writer, 'Program Youth', index=False) dfqc.to_excel(writer, 'Quarterly Company', index=False) dfac.to_excel(writer, 'Annual Company', index=False) Common.change_location(p.QA) print(os.getcwd()) writer.save()
def generate_company_matching_result(self): index = 0 df_new, df_old = self.get_company() new_company = self.generate_basic_name(df_new) old_company = self.generate_basic_name(df_old) values = [] for _, company in new_company.iterrows(): company_name = company['BasicName'] try: index+=1 val = dict() if len(old_company[old_company.BasicName == company_name]) > 0: cid = old_company[old_company.BasicName == company_name].CompanyID.values[0] cname = old_company[old_company.BasicName == company_name].Name.values[0] bname = old_company[old_company.BasicName == company_name].BasicName.values[0] val['Basic Index'] = index val['RIC'] = company.FileName val['Basic Index Company Name'] = company.Name val['Basic Name'] = company.BasicName val['DC Basic Name '] = bname val['DimCompany ID'] = cid val['DimCompany Name'] = cname values.append(val) else: val['Basic Index'] = index val['RIC'] = company.FileName val['Basic Index Company Name'] = company.Name val['Basic Name'] = company.BasicName val['DC Basic Name '] = '-' val['DimCompany ID'] = '-' val['DimCompany Name'] = '-' values.append(val) except Exception as ex: print('EXCEPTION >>>{}'.format(ex)) df = pd.DataFrame.from_dict(values, orient='columns') print(os.getcwd()) CM.change_location(PATH.MATCH) self.file.save_as_csv(df, 'Company_Matching_FY18_Q3.xlsx', self.path, 'Company Matched')