Ejemplo n.º 1
0
def write_all_row_errors(error_list, job_id):
    """ Writes all recorded errors to database

    Args:
        error_list: dict keeping track of error metadata to be updated
        job_id: ID to write errors for
    """
    sess = GlobalDB.db().session
    for key in error_list.keys():
        error_dict = error_list[key]
        # Set info for this error
        this_job = error_dict["jobId"]
        if int(job_id) != int(this_job):
            # This row is for a different job, skip it
            continue
        field_name = error_dict["fieldName"]
        try:
            # If last part of key is an int, it's one of our prestored messages
            error_type = int(error_dict["errorType"])
        except ValueError:
            # For rule failures, it will hold the error message
            error_msg = error_dict["errorType"]
            if "Field must be no longer than specified limit" in error_msg:
                rule_failed_id = ERROR_TYPE_DICT['length_error']
            else:
                rule_failed_id = ERROR_TYPE_DICT['rule_failed']
            error_row = ErrorMetadata(
                job_id=this_job,
                filename=error_dict["filename"],
                field_name=field_name,
                error_type_id=rule_failed_id,
                rule_failed=error_msg,
                occurrences=error_dict["numErrors"],
                first_row=error_dict["firstRow"],
                original_rule_label=error_dict["originalRuleLabel"],
                file_type_id=error_dict["fileTypeId"],
                target_file_type_id=error_dict["targetFileId"],
                severity_id=error_dict["severity"])
        else:
            # This happens if cast to int was successful
            error_string = ValidationError.get_error_type_string(error_type)
            error_id = ERROR_TYPE_DICT[error_string]
            # Create error metadata
            error_row = ErrorMetadata(
                job_id=this_job,
                filename=error_dict["filename"],
                field_name=field_name,
                error_type_id=error_id,
                occurrences=error_dict["numErrors"],
                first_row=error_dict["firstRow"],
                rule_failed=ValidationError.get_error_message(error_type),
                original_rule_label=error_dict["originalRuleLabel"],
                file_type_id=error_dict["fileTypeId"],
                target_file_type_id=error_dict["targetFileId"],
                severity_id=error_dict["severity"])

        sess.add(error_row)
    # Commit the session to write all rows
    sess.commit()
    def write_all_row_errors(self, job_id):
        """ Writes all recorded errors to database

        Args:
            job_id: ID to write errors for
        """
        sess = GlobalDB.db().session
        for key in self.rowErrors.keys():
            error_dict = self.rowErrors[key]
            # Set info for this error
            this_job = error_dict["jobId"]
            if int(job_id) != int(this_job):
                # This row is for a different job, skip it
                continue
            field_name = error_dict["fieldName"]
            try:
                # If last part of key is an int, it's one of our prestored messages
                error_type = int(error_dict["errorType"])
            except ValueError:
                # For rule failures, it will hold the error message
                error_msg = error_dict["errorType"]
                if "Field must be no longer than specified limit" in error_msg:
                    rule_failed_id = ERROR_TYPE_DICT['length_error']
                else:
                    rule_failed_id = ERROR_TYPE_DICT['rule_failed']
                error_row = ErrorMetadata(job_id=this_job, filename=error_dict["filename"], field_name=field_name,
                                          error_type_id=rule_failed_id, rule_failed=error_msg,
                                          occurrences=error_dict["numErrors"], first_row=error_dict["firstRow"],
                                          original_rule_label=error_dict["originalRuleLabel"],
                                          file_type_id=error_dict["fileTypeId"],
                                          target_file_type_id=error_dict["targetFileId"],
                                          severity_id=error_dict["severity"])
            else:
                # This happens if cast to int was successful
                error_string = ValidationError.get_error_type_string(error_type)
                error_id = ERROR_TYPE_DICT[error_string]
                # Create error metadata
                error_row = ErrorMetadata(job_id=this_job, filename=error_dict["filename"], field_name=field_name,
                                          error_type_id=error_id, occurrences=error_dict["numErrors"],
                                          first_row=error_dict["firstRow"],
                                          rule_failed=ValidationError.get_error_message(error_type),
                                          original_rule_label=error_dict["originalRuleLabel"],
                                          file_type_id=error_dict["fileTypeId"],
                                          target_file_type_id=error_dict["targetFileId"],
                                          severity_id=error_dict["severity"])

            sess.add(error_row)

        # Commit the session to write all rows
        sess.commit()
        # Clear the dictionary
        self.rowErrors = {}
Ejemplo n.º 3
0
    def writeAllRowErrors(self, jobId):
        """ Writes all recorded errors to database

        Args:
            jobId: ID to write errors for

        Returns:
            True if successful
        """
        for key in self.rowErrors.keys():
            errorDict = self.rowErrors[key]
            # Set info for this error
            thisJob = errorDict["jobId"]
            if (int(jobId) != int(thisJob)):
                # This row is for a different job, skip it
                continue
            fieldName = errorDict["fieldName"]
            try:
                # If last part of key is an int, it's one of our prestored messages
                errorType = int(errorDict["errorType"])
            except ValueError:
                # For rule failures, it will hold the error message
                errorMsg = errorDict["errorType"]
                ruleFailedId = self.getTypeId("rule_failed")
                errorRow = ErrorData(job_id=thisJob,
                                     filename=errorDict["filename"],
                                     field_name=fieldName,
                                     error_type_id=ruleFailedId,
                                     rule_failed=errorMsg,
                                     occurrences=errorDict["numErrors"],
                                     first_row=errorDict["firstRow"])
            else:
                # This happens if cast to int was successful
                errorString = ValidationError.getErrorTypeString(errorType)
                errorId = self.getTypeId(errorString)
                # Create error data
                errorRow = ErrorData(
                    job_id=thisJob,
                    filename=errorDict["filename"],
                    field_name=fieldName,
                    error_type_id=errorId,
                    occurrences=errorDict["numErrors"],
                    first_row=errorDict["firstRow"],
                    rule_failed=ValidationError.getErrorMessage(errorType))

            self.session.add(errorRow)

        # Commit the session to write all rows
        self.session.commit()
        # Clear the dictionary
        self.rowErrors = {}
def write_file_error(job_id, filename, error_type, extra_info=None):
    """ Write a file-level error to the file table

    Args:
        job_id: ID of job in job tracker
        filename: name of error report in S3
        error_type: type of error, value will be mapped to ValidationError class
        extra_info: list of extra information to be included in file
    """
    sess = GlobalDB.db().session
    try:
        int(job_id)
    except Exception:
        logger.error({
            'message': 'Bad job_id: {}'.format(job_id),
            'message_type': 'CoreError',
            'job_id': job_id,
            'function': 'write_file_error'
        })
        raise ValueError('Bad job_id: {}'.format(job_id))

    # Get File object for this job ID or create it if it doesn't exist
    file_rec = create_file_if_needed(job_id, filename)

    # Mark error type and add header info if present
    file_rec.file_status_id = FILE_STATUS_DICT[
        ValidationError.get_error_type_string(error_type)]
    if extra_info is not None:
        if 'missing_headers' in extra_info:
            file_rec.headers_missing = extra_info['missing_headers']
        if 'duplicated_headers' in extra_info:
            file_rec.headers_duplicated = extra_info['duplicated_headers']

    sess.add(file_rec)
    sess.commit()
def writeFileError(job_id, filename, error_type, extra_info=None):
    """ Write a file-level error to the file table

    Args:
        job_id: ID of job in job tracker
        filename: name of error report in S3
        error_type: type of error, value will be mapped to ValidationError class
        extra_info: list of extra information to be included in file
    """
    sess = GlobalDB.db().session
    try:
        int(job_id)
    except:
        raise ValueError("".join(["Bad jobId: ", str(job_id)]))

    # Get File object for this job ID or create it if it doesn't exist
    fileRec = createFileIfNeeded(job_id, filename)

    # Mark error type and add header info if present
    fileRec.file_status_id = FILE_STATUS_DICT[ValidationError.getErrorTypeString(error_type)]
    if extra_info is not None:
        if "missing_headers" in extra_info:
            fileRec.headers_missing = extra_info["missing_headers"]
        if "duplicated_headers" in extra_info:
            fileRec.headers_duplicated = extra_info["duplicated_headers"]

    sess.add(fileRec)
    sess.commit()
    def writeFileError(self, jobId, filename, errorType, extraInfo = None):
        """ Write a file-level error to the file table

        Args:
            jobId: ID of job in job tracker
            filename: name of error report in S3
            errorType: type of error, value will be mapped to ValidationError class

        Returns:
            True if successful
        """
        try:
            int(jobId)
        except:
            raise ValueError("".join(["Bad jobId: ", str(jobId)]))

        # Get File object for this job ID or create it if it doesn't exist
        fileRec = self.createFileIfNeeded(jobId, filename)

        # Mark error type and add header info if present
        fileRec.file_status_id = self.getFileStatusId(
            ValidationError.getErrorTypeString(errorType))
        if extraInfo is not None:
            if "missing_headers" in extraInfo:
                fileRec.headers_missing = extraInfo["missing_headers"]
            if "duplicated_headers" in extraInfo:
                fileRec.headers_duplicated = extraInfo["duplicated_headers"]

        self.session.add(fileRec)
        self.session.commit()
        return True
def write_file_error(job_id, filename, error_type, extra_info=None):
    """ Write a file-level error to the file table

    Args:
        job_id: ID of job in job tracker
        filename: name of error report in S3
        error_type: type of error, value will be mapped to ValidationError class
        extra_info: list of extra information to be included in file
    """
    sess = GlobalDB.db().session
    try:
        int(job_id)
    except:
        raise ValueError("".join(["Bad jobId: ", str(job_id)]))

    # Get File object for this job ID or create it if it doesn't exist
    file_rec = create_file_if_needed(job_id, filename)

    # Mark error type and add header info if present
    file_rec.file_status_id = FILE_STATUS_DICT[ValidationError.get_error_type_string(error_type)]
    if extra_info is not None:
        if "missing_headers" in extra_info:
            file_rec.headers_missing = extra_info["missing_headers"]
        if "duplicated_headers" in extra_info:
            file_rec.headers_duplicated = extra_info["duplicated_headers"]

    sess.add(file_rec)
    sess.commit()
Ejemplo n.º 8
0
    def writeFileError(self, jobId, filename, errorType, extraInfo=None):
        """ Write a file-level error to the file status table

        Args:
            jobId: ID of job in job tracker
            filename: name of error report in S3
            errorType: type of error, value will be mapped to ValidationError class

        Returns:
            True if successful
        """
        try:
            int(jobId)
        except:
            raise ValueError("".join(["Bad jobId: ", str(jobId)]))

        # Get File Status for this job ID or create it if it doesn't exist
        fileStatus = self.createFileStatusIfNeeded(jobId, filename)

        # Mark error type and add header info if present
        fileStatus.status_id = self.getStatusId(
            ValidationError.getErrorTypeString(errorType))
        if extraInfo is not None:
            if "missing_headers" in extraInfo:
                fileStatus.headers_missing = extraInfo["missing_headers"]
            if "duplicated_headers" in extraInfo:
                fileStatus.headers_duplicated = extraInfo["duplicated_headers"]

        self.session.add(fileStatus)
        self.session.commit()
        return True
    def run_sql_validations(self, job, file_type, short_colnames, writer,
                            warning_writer, row_number, error_list):
        """ Run all SQL rules for this file type

        Args:
            job: Current job
            file_type: Type of file for current job
            short_colnames: Dict mapping short field names to long
            writer: CsvWriter object
            warning_writer: CsvWriter for warnings
            row_number: Current row number
            error_list: instance of ErrorInterface to keep track of errors

        Returns:
            a list of the row numbers that failed one of the sql-based validations
        """
        job_id = job.job_id
        error_rows = []
        sql_failures = validate_file_by_sql(
            job, file_type, self.short_to_long_dict[job.file_type_id])
        for failure in sql_failures:
            # convert shorter, machine friendly column names used in the
            # SQL validation queries back to their long names
            if failure.field_name in short_colnames:
                field_name = short_colnames[failure.field_name]
            else:
                field_name = failure.field_name

            if failure.severity_id == RULE_SEVERITY_DICT['fatal']:
                error_rows.append(failure.row)

            try:
                # If error is an int, it's one of our prestored messages
                error_type = int(failure.error)
                error_msg = ValidationError.get_error_message(error_type)
            except ValueError:
                # If not, treat it literally
                error_msg = failure.error

            if failure.severity_id == RULE_SEVERITY_DICT['fatal']:
                writer.writerow([
                    field_name, error_msg,
                    str(failure.row), failure.failed_value,
                    failure.original_label
                ])
            elif failure.severity_id == RULE_SEVERITY_DICT['warning']:
                # write to warnings file
                warning_writer.writerow([
                    field_name, error_msg,
                    str(failure.row), failure.failed_value,
                    failure.original_label
                ])
            error_list.record_row_error(job_id, job.filename, field_name,
                                        failure.error, row_number,
                                        failure.original_label,
                                        failure.file_type_id,
                                        failure.target_file_id,
                                        failure.severity_id)
        return error_rows
    def writeErrors(self, failures, job, short_colnames, writer,
                    warning_writer, row_number, error_list):
        """ Write errors to error database

        Args:
            failures: List of errors to be written
            job: Current job
            short_colnames: Dict mapping short names to long names
            writer: CsvWriter object
            warning_writer: CsvWriter object
            row_number: Current row number
            error_list: instance of ErrorInterface to keep track of errors
        Returns:
            True if any fatal errors were found, False if only warnings are present
        """
        job_id = job.job_id
        fatal_error_found = False
        # For each failure, record it in error report and metadata
        for failure in failures:
            # map short column names back to long names
            if failure[0] in short_colnames:
                field_name = short_colnames[failure[0]]
            else:
                field_name = failure[0]
            error = failure[1]
            failed_value = failure[2]
            original_rule_label = failure[3]

            severityId = RULE_SEVERITY_DICT[failure[4]]
            try:
                # If error is an int, it's one of our prestored messages
                error_type = int(error)
                error_msg = ValidationError.getErrorMessage(error_type)
            except ValueError:
                # If not, treat it literally
                error_msg = error
            if failure[4] == "fatal":
                fatal_error_found = True
                writer.write([
                    field_name, error_msg,
                    str(row_number), failed_value, original_rule_label
                ])
            elif failure[4] == "warning":
                # write to warnings file
                warning_writer.write([
                    field_name, error_msg,
                    str(row_number), failed_value, original_rule_label
                ])
            error_list.recordRowError(job_id,
                                      job.filename,
                                      field_name,
                                      error,
                                      row_number,
                                      original_rule_label,
                                      severity_id=severityId)
        return fatal_error_found
Ejemplo n.º 11
0
    def writeErrors(self, failures, interfaces, jobId, shortColnames, writer,
                    warningWriter, rowNumber):
        """ Write errors to error database

        Args:
            failures: List of errors to be written
            interfaces: InterfaceHolder object
            jobId: ID of current job
            shortColnames: Dict mapping short names to long names
            writer: CsvWriter object
            rowNumber: Current row number
        Returns:
            True if any fatal errors were found, False if only warnings are present
        """
        fatalErrorFound = False
        errorInterface = interfaces.errorDb
        # For each failure, record it in error report and metadata
        for failure in failures:
            # map short column names back to long names
            if failure[0] in shortColnames:
                fieldName = shortColnames[failure[0]]
            else:
                fieldName = failure[0]
            error = failure[1]
            failedValue = failure[2]
            originalRuleLabel = failure[3]

            severityId = interfaces.validationDb.getRuleSeverityId(failure[4])
            try:
                # If error is an int, it's one of our prestored messages
                errorType = int(error)
                errorMsg = ValidationError.getErrorMessage(errorType)
            except ValueError:
                # If not, treat it literally
                errorMsg = error
            if failure[4] == "fatal":
                fatalErrorFound = True
                writer.write([
                    fieldName, errorMsg,
                    str(rowNumber), failedValue, originalRuleLabel
                ])
            elif failure[4] == "warning":
                # write to warnings file
                warningWriter.write([
                    fieldName, errorMsg,
                    str(rowNumber), failedValue, originalRuleLabel
                ])
            errorInterface.recordRowError(jobId,
                                          self.filename,
                                          fieldName,
                                          error,
                                          rowNumber,
                                          originalRuleLabel,
                                          severity_id=severityId)
        return fatalErrorFound
    def run_sql_validations(self, job, file_type, short_colnames, writer, warning_writer, row_number, error_list):
        """ Run all SQL rules for this file type

        Args:
            job: Current job
            file_type: Type of file for current job
            short_colnames: Dict mapping short field names to long
            writer: CsvWriter object
            warning_writer: CsvWriter for warnings
            row_number: Current row number
            error_list: instance of ErrorInterface to keep track of errors

        Returns:
            a list of the row numbers that failed one of the sql-based validations
        """
        job_id = job.job_id
        error_rows = []
        sql_failures = validate_file_by_sql(job, file_type, self.short_to_long_dict)
        for failure in sql_failures:
            # convert shorter, machine friendly column names used in the
            # SQL validation queries back to their long names
            if failure.field_name in short_colnames:
                field_name = short_colnames[failure.field_name]
            else:
                field_name = failure.field_name

            if failure.severity_id == RULE_SEVERITY_DICT['fatal']:
                error_rows.append(failure.row)

            try:
                # If error is an int, it's one of our prestored messages
                error_type = int(failure.error)
                error_msg = ValidationError.get_error_message(error_type)
            except ValueError:
                # If not, treat it literally
                error_msg = failure.error

            if failure.severity_id == RULE_SEVERITY_DICT['fatal']:
                writer.write([field_name, error_msg, str(failure.row), failure.failed_value, failure.original_label])
            elif failure.severity_id == RULE_SEVERITY_DICT['warning']:
                # write to warnings file
                warning_writer.write([field_name, error_msg, str(failure.row), failure.failed_value,
                                      failure.original_label])
            error_list.record_row_error(job_id, job.filename, field_name, failure.error, row_number,
                                        failure.original_label, failure.file_type_id, failure.target_file_id,
                                        failure.severity_id)
        return error_rows
    def run_sql_validations(self, short_colnames, writer, warning_writer):
        """ Run all SQL rules for this file type

        Args:
            short_colnames: Dict mapping short field names to long
            writer: CsvWriter object for error file
            warning_writer: CsvWriter object for warning file

        Returns:
            a list of the row numbers that failed one of the sql-based validations
        """
        error_rows = []
        sql_failures = validate_file_by_sql(self.job, self.file_type.name,
                                            self.short_to_long_dict[self.file_type.file_type_id])
        for failure in sql_failures:
            # convert shorter, machine friendly column names used in the
            # SQL validation queries back to their long names
            if failure.field_name in short_colnames:
                field_name = short_colnames[failure.field_name]
            else:
                field_name = failure.field_name

            if failure.severity_id == RULE_SEVERITY_DICT['fatal']:
                error_rows.append(failure.row)

            try:
                # If error is an int, it's one of our prestored messages
                error_type = int(failure.error)
                error_msg = ValidationError.get_error_message(error_type)
            except ValueError:
                # If not, treat it literally
                error_msg = failure.error

            if failure.severity_id == RULE_SEVERITY_DICT['fatal']:
                writer.writerow([failure.unique_id, field_name, error_msg, failure.failed_value, failure.expected_value,
                                 failure.difference, failure.flex_fields, str(failure.row), failure.original_label])
            elif failure.severity_id == RULE_SEVERITY_DICT['warning']:
                # write to warnings file
                warning_writer.writerow([failure.unique_id, field_name, error_msg, failure.failed_value,
                                         failure.expected_value, failure.difference, failure.flex_fields,
                                         str(failure.row), failure.original_label])
            # labeled errors
            self.error_list.record_row_error(self.job.job_id, self.file_name, field_name, failure.error,
                                             self.total_rows, failure.original_label, failure.file_type_id,
                                             failure.target_file_id, failure.severity_id)
        return error_rows
def write_errors(failures, job, short_colnames, writer, warning_writer, row_number, error_list):
    """ Write errors to error database

    Args:
        failures: List of Failures to be written
        job: Current job
        short_colnames: Dict mapping short names to long names
        writer: CsvWriter object
        warning_writer: CsvWriter object
        row_number: Current row number
        error_list: instance of ErrorInterface to keep track of errors
    Returns:
        True if any fatal errors were found, False if only warnings are present
    """
    fatal_error_found = False
    # For each failure, record it in error report and metadata
    for failure in failures:
        # map short column names back to long names
        if failure.field in short_colnames:
            field_name = short_colnames[failure.field]
        else:
            field_name = failure.field

        severity_id = RULE_SEVERITY_DICT[failure.severity]
        try:
            # If error is an int, it's one of our prestored messages
            error_type = int(failure.description)
            error_msg = ValidationError.get_error_message(error_type)
        except ValueError:
            # If not, treat it literally
            error_msg = failure.description
        if failure.severity == 'fatal':
            fatal_error_found = True
            writer.write([field_name, error_msg, str(row_number), failure.value, failure.label])
        elif failure.severity == 'warning':
            # write to warnings file
            warning_writer.write([field_name, error_msg, str(row_number), failure.value, failure.label])
        error_list.record_row_error(job.job_id, job.filename, field_name, failure.description, row_number,
                                    failure.label, severity_id=severity_id)
    return fatal_error_found
    def writeAllRowErrors(self, jobId):
        """ Writes all recorded errors to database

        Args:
            jobId: ID to write errors for

        Returns:
            True if successful
        """
        for key in self.rowErrors.keys():
            errorDict = self.rowErrors[key]
            # Set info for this error
            thisJob = errorDict["jobId"]
            if(int(jobId) != int(thisJob)):
                # This row is for a different job, skip it
                continue
            fieldName = errorDict["fieldName"]
            try:
                # If last part of key is an int, it's one of our prestored messages
                errorType = int(errorDict["errorType"])
            except ValueError:
                # For rule failures, it will hold the error message
                errorMsg = errorDict["errorType"]
                ruleFailedId = self.getTypeId("rule_failed")
                errorRow = ErrorMetadata(job_id=thisJob, filename=errorDict["filename"], field_name=fieldName, error_type_id=ruleFailedId, rule_failed=errorMsg, occurrences=errorDict["numErrors"], first_row=errorDict["firstRow"])
            else:
                # This happens if cast to int was successful
                errorString = ValidationError.getErrorTypeString(errorType)
                errorId = self.getTypeId(errorString)
                # Create error metadata
                errorRow = ErrorMetadata(job_id=thisJob, filename=errorDict["filename"], field_name=fieldName, error_type_id=errorId, occurrences=errorDict["numErrors"], first_row=errorDict["firstRow"], rule_failed=ValidationError.getErrorMessage(errorType))

            self.session.add(errorRow)

        # Commit the session to write all rows
        self.session.commit()
        # Clear the dictionary
        self.rowErrors = {}
def write_errors(failures, job, short_colnames, writer, warning_writer,
                 row_number, error_list, flex_cols):
    """ Write errors to error database

    Args:
        failures: List of Failures to be written
        job: Current job
        short_colnames: Dict mapping short names to long names
        writer: CsvWriter object
        warning_writer: CsvWriter object
        row_number: Current row number
        error_list: instance of ErrorInterface to keep track of errors
        flex_cols: all flex columns for this row
    Returns:
        True if any fatal errors were found, False if only warnings are present
    """
    fatal_error_found = False
    # prepare flex cols for all the errors for this row
    flex_col_headers = []
    flex_col_cells = []
    if flex_cols:
        for flex_col in flex_cols:
            flex_col_headers.append(flex_col.header)
            flex_val = flex_col.cell if flex_col.cell else ""
            flex_col_cells.append(flex_col.header + ": " + flex_val)
    # For each failure, record it in error report and metadata
    for failure in failures:
        # map short column names back to long names
        if failure.field in short_colnames:
            field_name = short_colnames[failure.field]
        else:
            field_name = failure.field

        severity_id = RULE_SEVERITY_DICT[failure.severity]
        try:
            # If error is an int, it's one of our prestored messages
            error_type = int(failure.description)
            error_msg = ValidationError.get_error_message(error_type)
        except ValueError:
            # If not, treat it literally
            error_msg = failure.description
        # get flex fields
        field_names = [field_name]
        flex_list = []
        # only add the value if there's something to add, otherwise our join will look bad
        if failure.value:
            flex_list = [field_name + ": " + failure.value]

        # append whatever list we made of flex columns to our existing field names and content list
        field_names.extend(flex_col_headers)
        flex_list.extend(flex_col_cells)

        # join the field names and flex column values so we have a list instead of a single value
        combined_field_names = ", ".join(field_names)
        fail_value = ", ".join(flex_list)
        if failure.severity == 'fatal':
            fatal_error_found = True
            writer.writerow([
                combined_field_names, error_msg,
                str(row_number), fail_value, failure.label
            ])
        elif failure.severity == 'warning':
            # write to warnings file
            warning_writer.writerow([
                combined_field_names, error_msg,
                str(row_number), fail_value, failure.label
            ])
        error_list.record_row_error(job.job_id,
                                    job.filename,
                                    combined_field_names,
                                    failure.description,
                                    row_number,
                                    failure.label,
                                    severity_id=severity_id)
    return fatal_error_found
    def runSqlValidations(self, job, file_type, short_colnames, writer,
                          warning_writer, row_number, error_list):
        """ Run all SQL rules for this file type

        Args:
            job: Current job
            file_type: Type of file for current job
            short_colnames: Dict mapping short field names to long
            writer: CsvWriter object
            warning_writer: CsvWriter for warnings
            row_number: Current row number
            error_list: instance of ErrorInterface to keep track of errors

        Returns:
            a list of the row numbers that failed one of the sql-based validations
        """
        sess = GlobalDB.db().session
        job_id = job.job_id
        error_rows = []
        sql_failures = Validator.validateFileBySql(job.submission_id,
                                                   file_type,
                                                   self.short_to_long_dict)
        for failure in sql_failures:
            # convert shorter, machine friendly column names used in the
            # SQL validation queries back to their long names
            if failure[0] in short_colnames:
                field_name = short_colnames[failure[0]]
            else:
                field_name = failure[0]
            error = failure[1]
            failed_value = failure[2]
            row = failure[3]
            original_label = failure[4]
            file_type_id = failure[5]
            target_file_id = failure[6]
            severity_id = failure[7]
            if severity_id == RULE_SEVERITY_DICT['fatal']:
                error_rows.append(row)
            try:
                # If error is an int, it's one of our prestored messages
                error_type = int(error)
                error_msg = ValidationError.getErrorMessage(error_type)
            except ValueError:
                # If not, treat it literally
                error_msg = error
            if severity_id == RULE_SEVERITY_DICT['fatal']:
                writer.write([
                    field_name, error_msg,
                    str(row), failed_value, original_label
                ])
            elif severity_id == RULE_SEVERITY_DICT['warning']:
                # write to warnings file
                warning_writer.write([
                    field_name, error_msg,
                    str(row), failed_value, original_label
                ])
            error_list.recordRowError(job_id,
                                      job.filename,
                                      field_name,
                                      error,
                                      row_number,
                                      original_label,
                                      file_type_id=file_type_id,
                                      target_file_id=target_file_id,
                                      severity_id=severity_id)
        return error_rows
    def runValidation(self, jobId, interfaces):
        """ Run validations for specified job
        Args:
            jobId: Job to be validated
            jobTracker: Interface for job tracker
        Returns:
            True if successful
        """
        jobTracker = interfaces.jobDb
        rowNumber = 1
        fileType = jobTracker.getFileType(jobId)
        # If local, make the error report directory
        if(self.isLocal and not os.path.exists(self.directory)):
            os.makedirs(self.directory)
        # Get bucket name and file name
        fileName = jobTracker.getFileName(jobId)
        self.filename = fileName
        bucketName = CONFIG_BROKER['aws_bucket']
        regionName = CONFIG_BROKER['aws_region']

        errorFileName = self.getFileName(jobTracker.getReportPath(jobId))

        # Create File Status object
        interfaces.errorDb.createFileIfNeeded(jobId,fileName)

        validationDB = interfaces.validationDb
        fieldList = validationDB.getFieldsByFileList(fileType)
        csvSchema  = validationDB.getFieldsByFile(fileType)
        rules = validationDB.getRulesByFile(fileType)

        reader = self.getReader()

        # Get file size and write to jobs table
        if(CONFIG_BROKER["use_aws"]):
            fileSize =  s3UrlHandler.getFileSize("errors/"+jobTracker.getReportPath(jobId))
        else:
            fileSize = os.path.getsize(jobTracker.getFileName(jobId))
        jobTracker.setFileSizeById(jobId, fileSize)


        try:
            # Pull file
            reader.openFile(regionName, bucketName, fileName,fieldList,bucketName,errorFileName)
            # Create staging table

            tableName = interfaces.stagingDb.getTableName(jobId)
            # Create staging table
            tableObject = StagingTable(interfaces)
            tableObject.createTable(fileType,fileName,jobId,tableName)
            errorInterface = interfaces.errorDb

            # While not done, pull one row and put it into staging if it passes
            # the Validator
            with self.getWriter(regionName, bucketName, errorFileName, self.reportHeaders) as writer:
                while(not reader.isFinished):
                    rowNumber += 1
                    #if (rowNumber % 1000) == 0:
                    #    print("Validating row " + str(rowNumber))
                    try :
                        record = FieldCleaner.cleanRow(reader.getNextRecord(), fileType, validationDB)
                        record["row"] = rowNumber
                        if(reader.isFinished and len(record) < 2):
                            # This is the last line and is empty, don't record an error
                            rowNumber -= 1 # Don't count this row
                            break
                    except ResponseException as e:
                        if reader.isFinished and reader.extraLine:
                            #Last line may be blank don't record an error, reader.extraLine indicates a case where the last valid line has extra line breaks
                            # Don't count last row if empty
                            rowNumber -= 1
                        else:
                            writer.write(["Formatting Error", ValidationError.readErrorMsg, str(rowNumber), ""])
                            errorInterface.recordRowError(jobId,self.filename,"Formatting Error",ValidationError.readError,rowNumber)
                            errorInterface.setRowErrorsPresent(jobId, True)
                        continue
                    valid, failures = Validator.validate(record,rules,csvSchema,fileType,interfaces)
                    if(valid) :
                        try:
                            tableObject.insert(record,fileType)
                        except ResponseException as e:
                            # Write failed, move to next record
                            writer.write(["Formatting Error", ValidationError.writeErrorMsg, str(rowNumber),""])
                            errorInterface.recordRowError(jobId,self.filename,"Formatting Error",ValidationError.writeError,rowNumber)
                            errorInterface.setRowErrorsPresent(jobId, True)
                            continue

                    else:
                        # For each failure, record it in error report and metadata
                        if failures:
                            errorInterface.setRowErrorsPresent(jobId, True)
                        for failure in failures:
                            fieldName = failure[0]
                            error = failure[1]
                            failedValue = failure[2]
                            try:
                                # If error is an int, it's one of our prestored messages
                                errorType = int(error)
                                errorMsg = ValidationError.getErrorMessage(errorType)
                            except ValueError:
                                # If not, treat it literally
                                errorMsg = error
                            writer.write([fieldName,errorMsg,str(rowNumber),failedValue])
                            errorInterface.recordRowError(jobId,self.filename,fieldName,error,rowNumber)
                # Write unfinished batch
                writer.finishBatch()

            # Write number of rows to job table
            jobTracker.setNumberOfRowsById(jobId,rowNumber)
            # Write leftover records
            tableObject.endBatch()
            # Mark validation as finished in job tracker
            jobTracker.markJobStatus(jobId,"finished")
            errorInterface.writeAllRowErrors(jobId)
        finally:
            #ensure the file always closes
            reader.close()
        return True
    def runValidation(self, jobId, interfaces):
        """ Run validations for specified job
        Args:
            jobId: Job to be validated
            jobTracker: Interface for job tracker
        Returns:
            True if successful
        """
        jobTracker = interfaces.jobDb
        rowNumber = 1
        fileType = jobTracker.getFileType(jobId)
        # If local, make the error report directory
        if (self.isLocal and not os.path.exists(self.directory)):
            os.makedirs(self.directory)
        # Get bucket name and file name
        fileName = jobTracker.getFileName(jobId)
        self.filename = fileName
        bucketName = CONFIG_BROKER['aws_bucket']
        regionName = CONFIG_BROKER['aws_region']

        errorFileName = self.getFileName(jobTracker.getReportPath(jobId))

        # Create File Status object
        interfaces.errorDb.createFileIfNeeded(jobId, fileName)

        validationDB = interfaces.validationDb
        fieldList = validationDB.getFieldsByFileList(fileType)
        csvSchema = validationDB.getFieldsByFile(fileType)
        rules = validationDB.getRulesByFile(fileType)

        reader = self.getReader()

        # Get file size and write to jobs table
        if (CONFIG_BROKER["use_aws"]):
            fileSize = s3UrlHandler.getFileSize(
                "errors/" + jobTracker.getReportPath(jobId))
        else:
            fileSize = os.path.getsize(jobTracker.getFileName(jobId))
        jobTracker.setFileSizeById(jobId, fileSize)

        try:
            # Pull file
            reader.openFile(regionName, bucketName, fileName, fieldList,
                            bucketName, errorFileName)
            # Create staging table

            tableName = interfaces.stagingDb.getTableName(jobId)
            # Create staging table
            tableObject = StagingTable(interfaces)
            tableObject.createTable(fileType, fileName, jobId, tableName)
            errorInterface = interfaces.errorDb

            # While not done, pull one row and put it into staging if it passes
            # the Validator
            with self.getWriter(regionName, bucketName, errorFileName,
                                self.reportHeaders) as writer:
                while (not reader.isFinished):
                    rowNumber += 1
                    #if (rowNumber % 1000) == 0:
                    #    print("Validating row " + str(rowNumber))
                    try:
                        record = FieldCleaner.cleanRow(reader.getNextRecord(),
                                                       fileType, validationDB)
                        record["row"] = rowNumber
                        if (reader.isFinished and len(record) < 2):
                            # This is the last line and is empty, don't record an error
                            rowNumber -= 1  # Don't count this row
                            break
                    except ResponseException as e:
                        if reader.isFinished and reader.extraLine:
                            #Last line may be blank don't record an error, reader.extraLine indicates a case where the last valid line has extra line breaks
                            # Don't count last row if empty
                            rowNumber -= 1
                        else:
                            writer.write([
                                "Formatting Error",
                                ValidationError.readErrorMsg,
                                str(rowNumber), ""
                            ])
                            errorInterface.recordRowError(
                                jobId, self.filename, "Formatting Error",
                                ValidationError.readError, rowNumber)
                            errorInterface.setRowErrorsPresent(jobId, True)
                        continue
                    valid, failures = Validator.validate(
                        record, rules, csvSchema, fileType, interfaces)
                    if (valid):
                        try:
                            tableObject.insert(record, fileType)
                        except ResponseException as e:
                            # Write failed, move to next record
                            writer.write([
                                "Formatting Error",
                                ValidationError.writeErrorMsg,
                                str(rowNumber), ""
                            ])
                            errorInterface.recordRowError(
                                jobId, self.filename, "Formatting Error",
                                ValidationError.writeError, rowNumber)
                            errorInterface.setRowErrorsPresent(jobId, True)
                            continue

                    else:
                        # For each failure, record it in error report and metadata
                        if failures:
                            errorInterface.setRowErrorsPresent(jobId, True)
                        for failure in failures:
                            fieldName = failure[0]
                            error = failure[1]
                            failedValue = failure[2]
                            try:
                                # If error is an int, it's one of our prestored messages
                                errorType = int(error)
                                errorMsg = ValidationError.getErrorMessage(
                                    errorType)
                            except ValueError:
                                # If not, treat it literally
                                errorMsg = error
                            writer.write([
                                fieldName, errorMsg,
                                str(rowNumber), failedValue
                            ])
                            errorInterface.recordRowError(
                                jobId, self.filename, fieldName, error,
                                rowNumber)
                # Write unfinished batch
                writer.finishBatch()

            # Write number of rows to job table
            jobTracker.setNumberOfRowsById(jobId, rowNumber)
            # Write leftover records
            tableObject.endBatch()
            # Mark validation as finished in job tracker
            jobTracker.markJobStatus(jobId, "finished")
            errorInterface.writeAllRowErrors(jobId)
        finally:
            #ensure the file always closes
            reader.close()
        return True
Ejemplo n.º 20
0
    def runSqlValidations(self, interfaces, jobId, fileType, shortColnames,
                          writer, warningWriter, rowNumber):
        """ Run all SQL rules for this file type

        Args:
            interfaces: InterfaceHolder object
            jobId: ID of current job
            fileType: Type of file for current job
            shortColnames: Dict mapping short field names to long
            writer: CsvWriter object
            waringWriter: CsvWriter for warnings
            rowNumber: Current row number

        Returns:
            a list of the row numbers that failed one of the sql-based validations
        """
        errorInterface = interfaces.errorDb
        errorRows = []
        sqlFailures = Validator.validateFileBySql(
            interfaces.jobDb.getSubmissionId(jobId), fileType, interfaces)
        for failure in sqlFailures:
            # convert shorter, machine friendly column names used in the
            # SQL validation queries back to their long names
            if failure[0] in shortColnames:
                fieldName = shortColnames[failure[0]]
            else:
                fieldName = failure[0]
            error = failure[1]
            failedValue = failure[2]
            row = failure[3]
            original_label = failure[4]
            fileTypeId = failure[5]
            targetFileId = failure[6]
            severityId = failure[7]
            if severityId == interfaces.validationDb.getRuleSeverityId(
                    "fatal"):
                errorRows.append(row)
            try:
                # If error is an int, it's one of our prestored messages
                errorType = int(error)
                errorMsg = ValidationError.getErrorMessage(errorType)
            except ValueError:
                # If not, treat it literally
                errorMsg = error
            if severityId == interfaces.validationDb.getRuleSeverityId(
                    "fatal"):
                writer.write([
                    fieldName, errorMsg,
                    str(row), failedValue, original_label
                ])
            elif severityId == interfaces.validationDb.getRuleSeverityId(
                    "warning"):
                # write to warnings file
                warningWriter.write([
                    fieldName, errorMsg,
                    str(row), failedValue, original_label
                ])
            errorInterface.recordRowError(jobId,
                                          self.filename,
                                          fieldName,
                                          error,
                                          rowNumber,
                                          original_label,
                                          file_type_id=fileTypeId,
                                          target_file_id=targetFileId,
                                          severity_id=severityId)
        return errorRows