def run_sql_validations(self, job, file_type, short_colnames, writer, warning_writer, row_number, error_list): """ Run all SQL rules for this file type Args: job: Current job file_type: Type of file for current job short_colnames: Dict mapping short field names to long writer: CsvWriter object warning_writer: CsvWriter for warnings row_number: Current row number error_list: instance of ErrorInterface to keep track of errors Returns: a list of the row numbers that failed one of the sql-based validations """ job_id = job.job_id error_rows = [] sql_failures = validate_file_by_sql( job, file_type, self.short_to_long_dict[job.file_type_id]) for failure in sql_failures: # convert shorter, machine friendly column names used in the # SQL validation queries back to their long names if failure.field_name in short_colnames: field_name = short_colnames[failure.field_name] else: field_name = failure.field_name if failure.severity_id == RULE_SEVERITY_DICT['fatal']: error_rows.append(failure.row) try: # If error is an int, it's one of our prestored messages error_type = int(failure.error) error_msg = ValidationError.get_error_message(error_type) except ValueError: # If not, treat it literally error_msg = failure.error if failure.severity_id == RULE_SEVERITY_DICT['fatal']: writer.writerow([ field_name, error_msg, str(failure.row), failure.failed_value, failure.original_label ]) elif failure.severity_id == RULE_SEVERITY_DICT['warning']: # write to warnings file warning_writer.writerow([ field_name, error_msg, str(failure.row), failure.failed_value, failure.original_label ]) error_list.record_row_error(job_id, job.filename, field_name, failure.error, row_number, failure.original_label, failure.file_type_id, failure.target_file_id, failure.severity_id) return error_rows
def write_all_row_errors(error_list, job_id): """ Writes all recorded errors to database Args: error_list: dict keeping track of error metadata to be updated job_id: ID to write errors for """ sess = GlobalDB.db().session for key in error_list.keys(): error_dict = error_list[key] # Set info for this error this_job = error_dict["jobId"] if int(job_id) != int(this_job): # This row is for a different job, skip it continue field_name = error_dict["fieldName"] try: # If last part of key is an int, it's one of our prestored messages error_type = int(error_dict["errorType"]) except ValueError: # For rule failures, it will hold the error message error_msg = error_dict["errorType"] if "Field must be no longer than specified limit" in error_msg: rule_failed_id = ERROR_TYPE_DICT['length_error'] else: rule_failed_id = ERROR_TYPE_DICT['rule_failed'] error_row = ErrorMetadata( job_id=this_job, filename=error_dict["filename"], field_name=field_name, error_type_id=rule_failed_id, rule_failed=error_msg, occurrences=error_dict["numErrors"], first_row=error_dict["firstRow"], original_rule_label=error_dict["originalRuleLabel"], file_type_id=error_dict["fileTypeId"], target_file_type_id=error_dict["targetFileId"], severity_id=error_dict["severity"]) else: # This happens if cast to int was successful error_string = ValidationError.get_error_type_string(error_type) error_id = ERROR_TYPE_DICT[error_string] # Create error metadata error_row = ErrorMetadata( job_id=this_job, filename=error_dict["filename"], field_name=field_name, error_type_id=error_id, occurrences=error_dict["numErrors"], first_row=error_dict["firstRow"], rule_failed=ValidationError.get_error_message(error_type), original_rule_label=error_dict["originalRuleLabel"], file_type_id=error_dict["fileTypeId"], target_file_type_id=error_dict["targetFileId"], severity_id=error_dict["severity"]) sess.add(error_row) # Commit the session to write all rows sess.commit()
def write_all_row_errors(self, job_id): """ Writes all recorded errors to database Args: job_id: ID to write errors for """ sess = GlobalDB.db().session for key in self.rowErrors.keys(): error_dict = self.rowErrors[key] # Set info for this error this_job = error_dict["jobId"] if int(job_id) != int(this_job): # This row is for a different job, skip it continue field_name = error_dict["fieldName"] try: # If last part of key is an int, it's one of our prestored messages error_type = int(error_dict["errorType"]) except ValueError: # For rule failures, it will hold the error message error_msg = error_dict["errorType"] if "Field must be no longer than specified limit" in error_msg: rule_failed_id = ERROR_TYPE_DICT['length_error'] else: rule_failed_id = ERROR_TYPE_DICT['rule_failed'] error_row = ErrorMetadata(job_id=this_job, filename=error_dict["filename"], field_name=field_name, error_type_id=rule_failed_id, rule_failed=error_msg, occurrences=error_dict["numErrors"], first_row=error_dict["firstRow"], original_rule_label=error_dict["originalRuleLabel"], file_type_id=error_dict["fileTypeId"], target_file_type_id=error_dict["targetFileId"], severity_id=error_dict["severity"]) else: # This happens if cast to int was successful error_string = ValidationError.get_error_type_string(error_type) error_id = ERROR_TYPE_DICT[error_string] # Create error metadata error_row = ErrorMetadata(job_id=this_job, filename=error_dict["filename"], field_name=field_name, error_type_id=error_id, occurrences=error_dict["numErrors"], first_row=error_dict["firstRow"], rule_failed=ValidationError.get_error_message(error_type), original_rule_label=error_dict["originalRuleLabel"], file_type_id=error_dict["fileTypeId"], target_file_type_id=error_dict["targetFileId"], severity_id=error_dict["severity"]) sess.add(error_row) # Commit the session to write all rows sess.commit() # Clear the dictionary self.rowErrors = {}
def run_sql_validations(self, job, file_type, short_colnames, writer, warning_writer, row_number, error_list): """ Run all SQL rules for this file type Args: job: Current job file_type: Type of file for current job short_colnames: Dict mapping short field names to long writer: CsvWriter object warning_writer: CsvWriter for warnings row_number: Current row number error_list: instance of ErrorInterface to keep track of errors Returns: a list of the row numbers that failed one of the sql-based validations """ job_id = job.job_id error_rows = [] sql_failures = validate_file_by_sql(job, file_type, self.short_to_long_dict) for failure in sql_failures: # convert shorter, machine friendly column names used in the # SQL validation queries back to their long names if failure.field_name in short_colnames: field_name = short_colnames[failure.field_name] else: field_name = failure.field_name if failure.severity_id == RULE_SEVERITY_DICT['fatal']: error_rows.append(failure.row) try: # If error is an int, it's one of our prestored messages error_type = int(failure.error) error_msg = ValidationError.get_error_message(error_type) except ValueError: # If not, treat it literally error_msg = failure.error if failure.severity_id == RULE_SEVERITY_DICT['fatal']: writer.write([field_name, error_msg, str(failure.row), failure.failed_value, failure.original_label]) elif failure.severity_id == RULE_SEVERITY_DICT['warning']: # write to warnings file warning_writer.write([field_name, error_msg, str(failure.row), failure.failed_value, failure.original_label]) error_list.record_row_error(job_id, job.filename, field_name, failure.error, row_number, failure.original_label, failure.file_type_id, failure.target_file_id, failure.severity_id) return error_rows
def run_sql_validations(self, short_colnames, writer, warning_writer): """ Run all SQL rules for this file type Args: short_colnames: Dict mapping short field names to long writer: CsvWriter object for error file warning_writer: CsvWriter object for warning file Returns: a list of the row numbers that failed one of the sql-based validations """ error_rows = [] sql_failures = validate_file_by_sql(self.job, self.file_type.name, self.short_to_long_dict[self.file_type.file_type_id]) for failure in sql_failures: # convert shorter, machine friendly column names used in the # SQL validation queries back to their long names if failure.field_name in short_colnames: field_name = short_colnames[failure.field_name] else: field_name = failure.field_name if failure.severity_id == RULE_SEVERITY_DICT['fatal']: error_rows.append(failure.row) try: # If error is an int, it's one of our prestored messages error_type = int(failure.error) error_msg = ValidationError.get_error_message(error_type) except ValueError: # If not, treat it literally error_msg = failure.error if failure.severity_id == RULE_SEVERITY_DICT['fatal']: writer.writerow([failure.unique_id, field_name, error_msg, failure.failed_value, failure.expected_value, failure.difference, failure.flex_fields, str(failure.row), failure.original_label]) elif failure.severity_id == RULE_SEVERITY_DICT['warning']: # write to warnings file warning_writer.writerow([failure.unique_id, field_name, error_msg, failure.failed_value, failure.expected_value, failure.difference, failure.flex_fields, str(failure.row), failure.original_label]) # labeled errors self.error_list.record_row_error(self.job.job_id, self.file_name, field_name, failure.error, self.total_rows, failure.original_label, failure.file_type_id, failure.target_file_id, failure.severity_id) return error_rows
def write_errors(failures, job, short_colnames, writer, warning_writer, row_number, error_list): """ Write errors to error database Args: failures: List of Failures to be written job: Current job short_colnames: Dict mapping short names to long names writer: CsvWriter object warning_writer: CsvWriter object row_number: Current row number error_list: instance of ErrorInterface to keep track of errors Returns: True if any fatal errors were found, False if only warnings are present """ fatal_error_found = False # For each failure, record it in error report and metadata for failure in failures: # map short column names back to long names if failure.field in short_colnames: field_name = short_colnames[failure.field] else: field_name = failure.field severity_id = RULE_SEVERITY_DICT[failure.severity] try: # If error is an int, it's one of our prestored messages error_type = int(failure.description) error_msg = ValidationError.get_error_message(error_type) except ValueError: # If not, treat it literally error_msg = failure.description if failure.severity == 'fatal': fatal_error_found = True writer.write([field_name, error_msg, str(row_number), failure.value, failure.label]) elif failure.severity == 'warning': # write to warnings file warning_writer.write([field_name, error_msg, str(row_number), failure.value, failure.label]) error_list.record_row_error(job.job_id, job.filename, field_name, failure.description, row_number, failure.label, severity_id=severity_id) return fatal_error_found
def write_errors(failures, job, short_colnames, writer, warning_writer, row_number, error_list, flex_cols): """ Write errors to error database Args: failures: List of Failures to be written job: Current job short_colnames: Dict mapping short names to long names writer: CsvWriter object warning_writer: CsvWriter object row_number: Current row number error_list: instance of ErrorInterface to keep track of errors flex_cols: all flex columns for this row Returns: True if any fatal errors were found, False if only warnings are present """ fatal_error_found = False # prepare flex cols for all the errors for this row flex_col_headers = [] flex_col_cells = [] if flex_cols: for flex_col in flex_cols: flex_col_headers.append(flex_col.header) flex_val = flex_col.cell if flex_col.cell else "" flex_col_cells.append(flex_col.header + ": " + flex_val) # For each failure, record it in error report and metadata for failure in failures: # map short column names back to long names if failure.field in short_colnames: field_name = short_colnames[failure.field] else: field_name = failure.field severity_id = RULE_SEVERITY_DICT[failure.severity] try: # If error is an int, it's one of our prestored messages error_type = int(failure.description) error_msg = ValidationError.get_error_message(error_type) except ValueError: # If not, treat it literally error_msg = failure.description # get flex fields field_names = [field_name] flex_list = [] # only add the value if there's something to add, otherwise our join will look bad if failure.value: flex_list = [field_name + ": " + failure.value] # append whatever list we made of flex columns to our existing field names and content list field_names.extend(flex_col_headers) flex_list.extend(flex_col_cells) # join the field names and flex column values so we have a list instead of a single value combined_field_names = ", ".join(field_names) fail_value = ", ".join(flex_list) if failure.severity == 'fatal': fatal_error_found = True writer.writerow([ combined_field_names, error_msg, str(row_number), fail_value, failure.label ]) elif failure.severity == 'warning': # write to warnings file warning_writer.writerow([ combined_field_names, error_msg, str(row_number), fail_value, failure.label ]) error_list.record_row_error(job.job_id, job.filename, combined_field_names, failure.description, row_number, failure.label, severity_id=severity_id) return fatal_error_found