def run_sql_validations(self, job, file_type, short_colnames, writer,
                            warning_writer, row_number, error_list):
        """ Run all SQL rules for this file type

        Args:
            job: Current job
            file_type: Type of file for current job
            short_colnames: Dict mapping short field names to long
            writer: CsvWriter object
            warning_writer: CsvWriter for warnings
            row_number: Current row number
            error_list: instance of ErrorInterface to keep track of errors

        Returns:
            a list of the row numbers that failed one of the sql-based validations
        """
        job_id = job.job_id
        error_rows = []
        sql_failures = validate_file_by_sql(
            job, file_type, self.short_to_long_dict[job.file_type_id])
        for failure in sql_failures:
            # convert shorter, machine friendly column names used in the
            # SQL validation queries back to their long names
            if failure.field_name in short_colnames:
                field_name = short_colnames[failure.field_name]
            else:
                field_name = failure.field_name

            if failure.severity_id == RULE_SEVERITY_DICT['fatal']:
                error_rows.append(failure.row)

            try:
                # If error is an int, it's one of our prestored messages
                error_type = int(failure.error)
                error_msg = ValidationError.get_error_message(error_type)
            except ValueError:
                # If not, treat it literally
                error_msg = failure.error

            if failure.severity_id == RULE_SEVERITY_DICT['fatal']:
                writer.writerow([
                    field_name, error_msg,
                    str(failure.row), failure.failed_value,
                    failure.original_label
                ])
            elif failure.severity_id == RULE_SEVERITY_DICT['warning']:
                # write to warnings file
                warning_writer.writerow([
                    field_name, error_msg,
                    str(failure.row), failure.failed_value,
                    failure.original_label
                ])
            error_list.record_row_error(job_id, job.filename, field_name,
                                        failure.error, row_number,
                                        failure.original_label,
                                        failure.file_type_id,
                                        failure.target_file_id,
                                        failure.severity_id)
        return error_rows
예제 #2
0
def write_all_row_errors(error_list, job_id):
    """ Writes all recorded errors to database

    Args:
        error_list: dict keeping track of error metadata to be updated
        job_id: ID to write errors for
    """
    sess = GlobalDB.db().session
    for key in error_list.keys():
        error_dict = error_list[key]
        # Set info for this error
        this_job = error_dict["jobId"]
        if int(job_id) != int(this_job):
            # This row is for a different job, skip it
            continue
        field_name = error_dict["fieldName"]
        try:
            # If last part of key is an int, it's one of our prestored messages
            error_type = int(error_dict["errorType"])
        except ValueError:
            # For rule failures, it will hold the error message
            error_msg = error_dict["errorType"]
            if "Field must be no longer than specified limit" in error_msg:
                rule_failed_id = ERROR_TYPE_DICT['length_error']
            else:
                rule_failed_id = ERROR_TYPE_DICT['rule_failed']
            error_row = ErrorMetadata(
                job_id=this_job,
                filename=error_dict["filename"],
                field_name=field_name,
                error_type_id=rule_failed_id,
                rule_failed=error_msg,
                occurrences=error_dict["numErrors"],
                first_row=error_dict["firstRow"],
                original_rule_label=error_dict["originalRuleLabel"],
                file_type_id=error_dict["fileTypeId"],
                target_file_type_id=error_dict["targetFileId"],
                severity_id=error_dict["severity"])
        else:
            # This happens if cast to int was successful
            error_string = ValidationError.get_error_type_string(error_type)
            error_id = ERROR_TYPE_DICT[error_string]
            # Create error metadata
            error_row = ErrorMetadata(
                job_id=this_job,
                filename=error_dict["filename"],
                field_name=field_name,
                error_type_id=error_id,
                occurrences=error_dict["numErrors"],
                first_row=error_dict["firstRow"],
                rule_failed=ValidationError.get_error_message(error_type),
                original_rule_label=error_dict["originalRuleLabel"],
                file_type_id=error_dict["fileTypeId"],
                target_file_type_id=error_dict["targetFileId"],
                severity_id=error_dict["severity"])

        sess.add(error_row)
    # Commit the session to write all rows
    sess.commit()
    def write_all_row_errors(self, job_id):
        """ Writes all recorded errors to database

        Args:
            job_id: ID to write errors for
        """
        sess = GlobalDB.db().session
        for key in self.rowErrors.keys():
            error_dict = self.rowErrors[key]
            # Set info for this error
            this_job = error_dict["jobId"]
            if int(job_id) != int(this_job):
                # This row is for a different job, skip it
                continue
            field_name = error_dict["fieldName"]
            try:
                # If last part of key is an int, it's one of our prestored messages
                error_type = int(error_dict["errorType"])
            except ValueError:
                # For rule failures, it will hold the error message
                error_msg = error_dict["errorType"]
                if "Field must be no longer than specified limit" in error_msg:
                    rule_failed_id = ERROR_TYPE_DICT['length_error']
                else:
                    rule_failed_id = ERROR_TYPE_DICT['rule_failed']
                error_row = ErrorMetadata(job_id=this_job, filename=error_dict["filename"], field_name=field_name,
                                          error_type_id=rule_failed_id, rule_failed=error_msg,
                                          occurrences=error_dict["numErrors"], first_row=error_dict["firstRow"],
                                          original_rule_label=error_dict["originalRuleLabel"],
                                          file_type_id=error_dict["fileTypeId"],
                                          target_file_type_id=error_dict["targetFileId"],
                                          severity_id=error_dict["severity"])
            else:
                # This happens if cast to int was successful
                error_string = ValidationError.get_error_type_string(error_type)
                error_id = ERROR_TYPE_DICT[error_string]
                # Create error metadata
                error_row = ErrorMetadata(job_id=this_job, filename=error_dict["filename"], field_name=field_name,
                                          error_type_id=error_id, occurrences=error_dict["numErrors"],
                                          first_row=error_dict["firstRow"],
                                          rule_failed=ValidationError.get_error_message(error_type),
                                          original_rule_label=error_dict["originalRuleLabel"],
                                          file_type_id=error_dict["fileTypeId"],
                                          target_file_type_id=error_dict["targetFileId"],
                                          severity_id=error_dict["severity"])

            sess.add(error_row)

        # Commit the session to write all rows
        sess.commit()
        # Clear the dictionary
        self.rowErrors = {}
    def run_sql_validations(self, job, file_type, short_colnames, writer, warning_writer, row_number, error_list):
        """ Run all SQL rules for this file type

        Args:
            job: Current job
            file_type: Type of file for current job
            short_colnames: Dict mapping short field names to long
            writer: CsvWriter object
            warning_writer: CsvWriter for warnings
            row_number: Current row number
            error_list: instance of ErrorInterface to keep track of errors

        Returns:
            a list of the row numbers that failed one of the sql-based validations
        """
        job_id = job.job_id
        error_rows = []
        sql_failures = validate_file_by_sql(job, file_type, self.short_to_long_dict)
        for failure in sql_failures:
            # convert shorter, machine friendly column names used in the
            # SQL validation queries back to their long names
            if failure.field_name in short_colnames:
                field_name = short_colnames[failure.field_name]
            else:
                field_name = failure.field_name

            if failure.severity_id == RULE_SEVERITY_DICT['fatal']:
                error_rows.append(failure.row)

            try:
                # If error is an int, it's one of our prestored messages
                error_type = int(failure.error)
                error_msg = ValidationError.get_error_message(error_type)
            except ValueError:
                # If not, treat it literally
                error_msg = failure.error

            if failure.severity_id == RULE_SEVERITY_DICT['fatal']:
                writer.write([field_name, error_msg, str(failure.row), failure.failed_value, failure.original_label])
            elif failure.severity_id == RULE_SEVERITY_DICT['warning']:
                # write to warnings file
                warning_writer.write([field_name, error_msg, str(failure.row), failure.failed_value,
                                      failure.original_label])
            error_list.record_row_error(job_id, job.filename, field_name, failure.error, row_number,
                                        failure.original_label, failure.file_type_id, failure.target_file_id,
                                        failure.severity_id)
        return error_rows
    def run_sql_validations(self, short_colnames, writer, warning_writer):
        """ Run all SQL rules for this file type

        Args:
            short_colnames: Dict mapping short field names to long
            writer: CsvWriter object for error file
            warning_writer: CsvWriter object for warning file

        Returns:
            a list of the row numbers that failed one of the sql-based validations
        """
        error_rows = []
        sql_failures = validate_file_by_sql(self.job, self.file_type.name,
                                            self.short_to_long_dict[self.file_type.file_type_id])
        for failure in sql_failures:
            # convert shorter, machine friendly column names used in the
            # SQL validation queries back to their long names
            if failure.field_name in short_colnames:
                field_name = short_colnames[failure.field_name]
            else:
                field_name = failure.field_name

            if failure.severity_id == RULE_SEVERITY_DICT['fatal']:
                error_rows.append(failure.row)

            try:
                # If error is an int, it's one of our prestored messages
                error_type = int(failure.error)
                error_msg = ValidationError.get_error_message(error_type)
            except ValueError:
                # If not, treat it literally
                error_msg = failure.error

            if failure.severity_id == RULE_SEVERITY_DICT['fatal']:
                writer.writerow([failure.unique_id, field_name, error_msg, failure.failed_value, failure.expected_value,
                                 failure.difference, failure.flex_fields, str(failure.row), failure.original_label])
            elif failure.severity_id == RULE_SEVERITY_DICT['warning']:
                # write to warnings file
                warning_writer.writerow([failure.unique_id, field_name, error_msg, failure.failed_value,
                                         failure.expected_value, failure.difference, failure.flex_fields,
                                         str(failure.row), failure.original_label])
            # labeled errors
            self.error_list.record_row_error(self.job.job_id, self.file_name, field_name, failure.error,
                                             self.total_rows, failure.original_label, failure.file_type_id,
                                             failure.target_file_id, failure.severity_id)
        return error_rows
def write_errors(failures, job, short_colnames, writer, warning_writer, row_number, error_list):
    """ Write errors to error database

    Args:
        failures: List of Failures to be written
        job: Current job
        short_colnames: Dict mapping short names to long names
        writer: CsvWriter object
        warning_writer: CsvWriter object
        row_number: Current row number
        error_list: instance of ErrorInterface to keep track of errors
    Returns:
        True if any fatal errors were found, False if only warnings are present
    """
    fatal_error_found = False
    # For each failure, record it in error report and metadata
    for failure in failures:
        # map short column names back to long names
        if failure.field in short_colnames:
            field_name = short_colnames[failure.field]
        else:
            field_name = failure.field

        severity_id = RULE_SEVERITY_DICT[failure.severity]
        try:
            # If error is an int, it's one of our prestored messages
            error_type = int(failure.description)
            error_msg = ValidationError.get_error_message(error_type)
        except ValueError:
            # If not, treat it literally
            error_msg = failure.description
        if failure.severity == 'fatal':
            fatal_error_found = True
            writer.write([field_name, error_msg, str(row_number), failure.value, failure.label])
        elif failure.severity == 'warning':
            # write to warnings file
            warning_writer.write([field_name, error_msg, str(row_number), failure.value, failure.label])
        error_list.record_row_error(job.job_id, job.filename, field_name, failure.description, row_number,
                                    failure.label, severity_id=severity_id)
    return fatal_error_found
def write_errors(failures, job, short_colnames, writer, warning_writer,
                 row_number, error_list, flex_cols):
    """ Write errors to error database

    Args:
        failures: List of Failures to be written
        job: Current job
        short_colnames: Dict mapping short names to long names
        writer: CsvWriter object
        warning_writer: CsvWriter object
        row_number: Current row number
        error_list: instance of ErrorInterface to keep track of errors
        flex_cols: all flex columns for this row
    Returns:
        True if any fatal errors were found, False if only warnings are present
    """
    fatal_error_found = False
    # prepare flex cols for all the errors for this row
    flex_col_headers = []
    flex_col_cells = []
    if flex_cols:
        for flex_col in flex_cols:
            flex_col_headers.append(flex_col.header)
            flex_val = flex_col.cell if flex_col.cell else ""
            flex_col_cells.append(flex_col.header + ": " + flex_val)
    # For each failure, record it in error report and metadata
    for failure in failures:
        # map short column names back to long names
        if failure.field in short_colnames:
            field_name = short_colnames[failure.field]
        else:
            field_name = failure.field

        severity_id = RULE_SEVERITY_DICT[failure.severity]
        try:
            # If error is an int, it's one of our prestored messages
            error_type = int(failure.description)
            error_msg = ValidationError.get_error_message(error_type)
        except ValueError:
            # If not, treat it literally
            error_msg = failure.description
        # get flex fields
        field_names = [field_name]
        flex_list = []
        # only add the value if there's something to add, otherwise our join will look bad
        if failure.value:
            flex_list = [field_name + ": " + failure.value]

        # append whatever list we made of flex columns to our existing field names and content list
        field_names.extend(flex_col_headers)
        flex_list.extend(flex_col_cells)

        # join the field names and flex column values so we have a list instead of a single value
        combined_field_names = ", ".join(field_names)
        fail_value = ", ".join(flex_list)
        if failure.severity == 'fatal':
            fatal_error_found = True
            writer.writerow([
                combined_field_names, error_msg,
                str(row_number), fail_value, failure.label
            ])
        elif failure.severity == 'warning':
            # write to warnings file
            warning_writer.writerow([
                combined_field_names, error_msg,
                str(row_number), fail_value, failure.label
            ])
        error_list.record_row_error(job.job_id,
                                    job.filename,
                                    combined_field_names,
                                    failure.description,
                                    row_number,
                                    failure.label,
                                    severity_id=severity_id)
    return fatal_error_found