def upload_previous_year_from_file(file_upload, year): try: workbook, worksheet = validate_excel_file(file_upload, VALID_WS_NAME) except UploadFileFormatError as ex: set_file_upload_fatal_error( file_upload, str(ex), str(ex), ) raise ex try: upload_previous_year(worksheet, year, file_upload) except (UploadFileDataError, ArchiveYearError) as ex: set_file_upload_fatal_error( file_upload, str(ex), str(ex), ) workbook.close raise ex workbook.close
def validate_excel_file(file_upload, worksheet_title_pattern=""): try: # read_only=True makes the opening process much faster # data_only=True to read values from cells with formula. # otherwise the formula is returned instead of the value. if file_upload.file_location == FileUpload.LOCALFILE: excelname = file_upload.document_file_name else: excelname = file_upload.s3_document_file workbook = load_workbook( excelname, read_only=True, data_only=True, ) except BadZipFile as ex: set_file_upload_fatal_error( file_upload, "The file is not in the correct format (.xlsx)", "BadZipFile (user file is not .xlsx)", ) raise ex worksheet_found = False worksheet = None # Open the first worksheet if no name pattern was provided. # There are several checks when the file is uploaded, so if the # workbook is wrong it will be spotted later on. if not worksheet_title_pattern: worksheet = workbook.active return workbook, worksheet for ws in workbook: if ws.title[:len(worksheet_title_pattern)] == worksheet_title_pattern: worksheet_found = True worksheet = ws break if worksheet_found: return workbook, worksheet # wrong file raise UploadFileFormatError( f"File appears to be incorrect: " f"it does not contain a worksheet " f"with name starting by {worksheet_title_pattern}")
def upload_budget_from_file(file_upload, year): try: workbook, worksheet = validate_excel_file(file_upload, "Budgets") except UploadFileFormatError as ex: set_file_upload_fatal_error( file_upload, str(ex), str(ex), ) raise ex header_dict = xslx_header_to_dict(worksheet[1]) try: check_header(header_dict, EXPECTED_BUDGET_HEADERS) except UploadFileFormatError as ex: set_file_upload_fatal_error( file_upload, str(ex), str(ex), ) workbook.close raise ex try: upload_budget(worksheet, year, header_dict, file_upload) except (UploadFileDataError) as ex: set_file_upload_fatal_error( file_upload, str(ex), str(ex), ) workbook.close raise ex workbook.close
def validate_trial_balance_report(file_upload, month_number, year): try: workbook, worksheet = validate_excel_file(file_upload, ) except UploadFileFormatError as ex: set_file_upload_fatal_error( file_upload, str(ex), str(ex), ) raise ex try: check_trial_balance_format(worksheet, month_number, year) except UploadFileFormatError as ex: set_file_upload_fatal_error( file_upload, str(ex), str(ex), ) workbook.close raise ex return workbook, worksheet
def archive_current_year(): # Get the latest period for archiving fields = ForecastQueryFields() financial_year = fields.selected_year try: validate_year_for_archiving_actuals(financial_year, False) except ArchiveYearError as ex: raise ex datamodel = fields.datamodel data_to_archive_list = datamodel.view_data.raw_data_annotated( fields.archive_forecast_columns, {}, year=financial_year) financial_year_obj = FinancialYear.objects.get(pk=financial_year) # Clear the table used to upload the previous_years. # The previous_years are uploaded to to a temporary storage, # and copied when the upload is completed successfully. # This means that we always have a full upload. ArchivedForecastDataUpload.objects.filter( financial_year=financial_year, ).delete() rows_to_process = data_to_archive_list.count() # Create an entry in the file upload table, even if it is not a file. # It is useful for keeping the log of errors file_upload = FileUpload(document_file_name="dummy", document_type=FileUpload.PREVIOUSYEAR, file_location=FileUpload.LOCALFILE, status=FileUpload.PROCESSING) check_financial_code = CheckArchivedFinancialCode(financial_year, file_upload) row_number = 0 cost_centre_field = fields.cost_centre_code_field nac_field = fields.nac_code_field programme_field = fields.programme_code_field analysis1_field = fields.analysis1_code_field analysis2_field = fields.analysis2_code_field project_code_field = fields.project_code_field for row_to_archive in data_to_archive_list: row_number += 1 if not row_number % 100: # Display the number of rows processed every 100 rows set_file_upload_feedback( file_upload, f"Processing row {row_number} of {rows_to_process}.") logger.info(f"Processing row {row_number} of {rows_to_process}.") cost_centre = row_to_archive[cost_centre_field] nac = row_to_archive[nac_field] programme_code = row_to_archive[programme_field] analysis1 = row_to_archive[analysis1_field] analysis2 = row_to_archive[analysis2_field] project_code = row_to_archive[project_code_field] check_financial_code.validate( cost_centre, nac, programme_code, analysis1, analysis2, project_code, row_number, ) if not check_financial_code.error_found: financialcode_obj = check_financial_code.get_financial_code() try: archive_to_temp_previous_year_figures( row_to_archive, financial_year_obj, financialcode_obj, ) except (UploadFileFormatError, ArchiveYearError) as ex: set_file_upload_fatal_error( file_upload, str(ex), str(ex), ) raise ex final_status = set_final_status(check_financial_code) if final_status != FileUpload.PROCESSEDWITHERROR: # No errors, so we can copy the figures # from the temporary table to the previous_years copy_previous_year_figure_from_temp_table(financial_year) set_file_upload_feedback(file_upload, f"Processed {rows_to_process} rows.", final_status) if final_status == FileUpload.PROCESSEDWITHERROR: raise UploadFileDataError( "No data archived. Check the log in the file upload record.")
def upload_previous_year(worksheet, financial_year, file_upload): # noqa header_dict = xslx_header_to_dict(worksheet[1]) expected_headers = [ COST_CENTRE_HEADER, NAC_HEADER, PROGRAMME_HEADER, ANALYSIS_HEADER, ANALYSIS2_HEADER, PROJECT_HEADER, ] expected_headers.extend(DATA_HEADERS) check_header(header_dict, expected_headers) try: validate_year_for_archiving_actuals(financial_year) except ArchiveYearError as ex: set_file_upload_fatal_error( file_upload, str(ex), str(ex), ) raise ex financial_year_obj = FinancialYear.objects.get(pk=financial_year) # Clear the table used to upload the previous_years. # The previous_years are uploaded to to a temporary storage, and copied # when the upload is completed successfully. # This means that we always have a full upload. ArchivedForecastDataUpload.objects.filter(financial_year=financial_year,).delete() rows_to_process = worksheet.max_row + 1 check_financial_code = CheckArchivedFinancialCode(financial_year, file_upload) cc_index = header_dict[COST_CENTRE_HEADER] nac_index = header_dict[NAC_HEADER] prog_index = header_dict[PROGRAMME_HEADER] a1_index = header_dict[ANALYSIS_HEADER] a2_index = header_dict[ANALYSIS2_HEADER] proj_index = header_dict[PROJECT_HEADER] row_number = 0 # There is a terrible performance hit accessing the individual cells: # The cell is found starting from cell A0, and continuing until the # required cell is found # The rows in worksheet.rows are accessed sequentially, so there is no # performance problem. # A typical files took over 2 hours to read using the cell access method # and 10 minutes with the row access. for previous_year_row in worksheet.rows: row_number += 1 if row_number == 1: # There is no way to start reading rows from a specific place. # Ignore first row, the headers have been processed already continue if not row_number % 100: # Display the number of rows processed every 100 rows set_file_upload_feedback( file_upload, f"Processing row {row_number} of {rows_to_process}." ) logger.info(f"Processing row {row_number} of {rows_to_process}.") cost_centre = previous_year_row[cc_index].value if not cost_centre: # protection against empty rows break nac = previous_year_row[nac_index].value programme_code = previous_year_row[prog_index].value analysis1 = previous_year_row[a1_index].value analysis2 = previous_year_row[a2_index].value project_code = previous_year_row[proj_index].value check_financial_code.validate( cost_centre, nac, programme_code, analysis1, analysis2, project_code, row_number, ) if not check_financial_code.error_found: financialcode_obj = check_financial_code.get_financial_code() try: upload_previous_year_figures( previous_year_row, financial_year_obj, financialcode_obj, header_dict, ) except (UploadFileFormatError, ArchiveYearError) as ex: set_file_upload_fatal_error( file_upload, str(ex), str(ex), ) raise ex final_status = FileUpload.PROCESSED if check_financial_code.error_found: final_status = FileUpload.PROCESSEDWITHERROR else: # No errors, so we can copy the figures # from the temporary table to the previous_years copy_previous_year_figure_from_temp_table(financial_year) if check_financial_code.warning_found: final_status = FileUpload.PROCESSEDWITHWARNING set_file_upload_feedback( file_upload, f"Processed {rows_to_process} rows.", final_status ) if check_financial_code.error_found: raise UploadFileDataError( "No data uploaded. Check the log in the file upload record." )
def upload_budget(worksheet, year, header_dict, file_upload): # noqa year_obj, created = FinancialYear.objects.get_or_create( financial_year=year) if created: year_obj.financial_year_display = create_financial_year_display(year) year_obj.save() forecast_months = get_forecast_month_dict() month_dict = {header_dict[k]: v for (k, v) in forecast_months.items()} # Clear the table used to upload the budgets. # The budgets are uploaded to to a temporary storage, and copied # when the upload is completed successfully. # This means that we always have a full upload. BudgetUploadMonthlyFigure.objects.filter(financial_year=year, ).delete() rows_to_process = worksheet.max_row + 1 check_financial_code = CheckFinancialCode(file_upload) cc_index = header_dict["cost centre"] nac_index = header_dict["natural account"] prog_index = header_dict["programme"] a1_index = header_dict["analysis"] a2_index = header_dict["analysis2"] proj_index = header_dict["project"] row_number = 0 # There is a terrible performance hit accessing the individual cells: # The cell is found starting from cell A0, and continuing until the # required cell is found # The rows in worksheet.rows are accessed sequentially, so there is no # performance problem. # A typical files took over 2 hours to read using the cell access method # and 10 minutes with the row access. for budget_row in worksheet.rows: row_number += 1 if row_number == 1: # There is no way to start reading rows from a specific place. # Ignore first row, the headers have been processed already continue if not row_number % 100: # Display the number of rows processed every 100 rows set_file_upload_feedback( file_upload, f"Processing row {row_number} of {rows_to_process}.") cost_centre = budget_row[cc_index].value if not cost_centre: # protection against empty rows break nac = budget_row[nac_index].value programme_code = budget_row[prog_index].value analysis1 = budget_row[a1_index].value analysis2 = budget_row[a2_index].value project_code = budget_row[proj_index].value check_financial_code.validate(cost_centre, nac, programme_code, analysis1, analysis2, project_code, row_number) if not check_financial_code.error_found: financialcode_obj = check_financial_code.get_financial_code() try: upload_budget_figures(budget_row, year_obj, financialcode_obj, month_dict) except UploadFileFormatError as ex: set_file_upload_fatal_error( file_upload, str(ex), str(ex), ) raise ex final_status = FileUpload.PROCESSED if check_financial_code.error_found: final_status = FileUpload.PROCESSEDWITHERROR else: # No errors, so we can copy the figures from the temporary table to the budgets copy_uploaded_budget(year, month_dict) if check_financial_code.warning_found: final_status = FileUpload.PROCESSEDWITHWARNING set_file_upload_feedback(file_upload, f"Processed {rows_to_process} rows.", final_status) return not check_financial_code.error_found