def runCrossValidation(self, jobId, interfaces): """ Cross file validation job, test all rules with matching rule_timing """ # Create File Status object interfaces.errorDb.createFileIfNeeded(jobId) validationDb = interfaces.validationDb errorDb = interfaces.errorDb submissionId = interfaces.jobDb.getSubmissionId(jobId) bucketName = CONFIG_BROKER['aws_bucket'] regionName = CONFIG_BROKER['aws_region'] CloudLogger.logError( "VALIDATOR_INFO: ", "Beginning runCrossValidation on submissionID: " + str(submissionId), "") # Delete existing cross file errors for this submission errorDb.resetErrorsByJobId(jobId) # use db to get a list of the cross-file combinations targetFiles = validationDb.session.query(FileTypeValidation).subquery() crossFileCombos = validationDb.session.query( FileTypeValidation.name.label('first_file_name'), FileTypeValidation.file_id.label('first_file_id'), targetFiles.c.name.label('second_file_name'), targetFiles.c.file_id.label('second_file_id')).filter( FileTypeValidation.file_order < targetFiles.c.file_order) # get all cross file rules from db crossFileRules = validationDb.session.query(RuleSql).filter( RuleSql.rule_cross_file_flag == True) # for each cross-file combo, run associated rules and create error report for row in crossFileCombos: comboRules = crossFileRules.filter( or_( and_(RuleSql.file_id == row.first_file_id, RuleSql.target_file_id == row.second_file_id), and_(RuleSql.file_id == row.second_file_id, RuleSql.target_file_id == row.first_file_id))) # send comboRules to validator.crossValidate sql failures = Validator.crossValidateSql(comboRules.all(), submissionId) # get error file name reportFilename = self.getFileName( getCrossReportName(submissionId, row.first_file_name, row.second_file_name)) warningReportFilename = self.getFileName( getCrossWarningReportName(submissionId, row.first_file_name, row.second_file_name)) # loop through failures to create the error report with self.getWriter(regionName, bucketName, reportFilename, self.crossFileReportHeaders) as writer, \ self.getWriter(regionName, bucketName, warningReportFilename, self.crossFileReportHeaders) as warningWriter: for failure in failures: if failure[9] == interfaces.validationDb.getRuleSeverityId( "fatal"): writer.write(failure[0:7]) if failure[9] == interfaces.validationDb.getRuleSeverityId( "warning"): warningWriter.write(failure[0:7]) errorDb.recordRowError(jobId, "cross_file", failure[0], failure[3], failure[5], failure[6], failure[7], failure[8], severity_id=failure[9]) writer.finishBatch() warningWriter.finishBatch() errorDb.writeAllRowErrors(jobId) interfaces.jobDb.markJobStatus(jobId, "finished") CloudLogger.logError( "VALIDATOR_INFO: ", "Completed runCrossValidation on submissionID: " + str(submissionId), "") # Update error info for submission interfaces.jobDb.populateSubmissionErrorInfo(submissionId) # TODO: Remove temporary step below # Temporarily set publishable flag at end of cross file, remove this once users are able to mark their submissions # as publishable # Publish only if no errors are present if interfaces.jobDb.getSubmissionById( submissionId).number_of_errors == 0: interfaces.jobDb.setPublishableFlag(submissionId, True) # Mark validation complete interfaces.errorDb.markFileComplete(jobId)
def runCrossValidation(self, job): """ Cross file validation job, test all rules with matching rule_timing """ sess = GlobalDB.db().session job_id = job.job_id # Create File Status object createFileIfNeeded(job_id) error_list = ErrorInterface() submission_id = job.submission_id bucketName = CONFIG_BROKER['aws_bucket'] regionName = CONFIG_BROKER['aws_region'] _exception_logger.info( 'VALIDATOR_INFO: Beginning runCrossValidation on submission_id: ' '%s', submission_id) # Delete existing cross file errors for this submission sess.query(ErrorMetadata).filter( ErrorMetadata.job_id == job_id).delete() sess.commit() # get all cross file rules from db crossFileRules = sess.query(RuleSql).filter( RuleSql.rule_cross_file_flag == True) # for each cross-file combo, run associated rules and create error report for c in get_cross_file_pairs(): first_file = c[0] second_file = c[1] comboRules = crossFileRules.filter( or_( and_(RuleSql.file_id == first_file.id, RuleSql.target_file_id == second_file.id), and_(RuleSql.file_id == second_file.id, RuleSql.target_file_id == first_file.id))) # send comboRules to validator.crossValidate sql failures = Validator.crossValidateSql(comboRules.all(), submission_id, self.short_to_long_dict) # get error file name reportFilename = self.getFileName( get_cross_report_name(submission_id, first_file.name, second_file.name)) warningReportFilename = self.getFileName( get_cross_warning_report_name(submission_id, first_file.name, second_file.name)) # loop through failures to create the error report with self.getWriter(regionName, bucketName, reportFilename, self.crossFileReportHeaders) as writer, \ self.getWriter(regionName, bucketName, warningReportFilename, self.crossFileReportHeaders) as warningWriter: for failure in failures: if failure[9] == RULE_SEVERITY_DICT['fatal']: writer.write(failure[0:7]) if failure[9] == RULE_SEVERITY_DICT['warning']: warningWriter.write(failure[0:7]) error_list.recordRowError(job_id, "cross_file", failure[0], failure[3], failure[5], failure[6], failure[7], failure[8], severity_id=failure[9]) writer.finishBatch() warningWriter.finishBatch() error_list.writeAllRowErrors(job_id) mark_job_status(job_id, "finished") _exception_logger.info( 'VALIDATOR_INFO: Completed runCrossValidation on submission_id: ' '%s', submission_id) submission = sess.query(Submission).filter_by( submission_id=submission_id).one() # Update error info for submission submission.number_of_errors = sumNumberOfErrorsForJobList( submission_id) submission.number_of_warnings = sumNumberOfErrorsForJobList( submission_id, errorType="warning") # TODO: Remove temporary step below # Temporarily set publishable flag at end of cross file, remove this once users are able to mark their submissions # as publishable # Publish only if no errors are present if submission.number_of_errors == 0: submission.publishable = True sess.commit() # Mark validation complete markFileComplete(job_id)