def closeOne(interface): """ Close all aspects of one interface """ if (interface == None): # No need to close a nonexistent connection return try: if (interface.session == None): # If session is None, skip closing return except AttributeError as e: # If interface has no session, skip closing return # Try to close the session and connection, on error try a rollback try: interface.session.close() except: try: interface.session.rollback() interface.session.close() except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() trace = traceback.extract_tb(exc_tb, 10) CloudLogger.logError('Broker DB Interface Error: ', e, trace) del exc_tb raise
def closeOne(interface): """ Close all aspects of one interface """ if(interface == None): # No need to close a nonexistent connection return try: if(interface.session == None): # If session is None, skip closing return except AttributeError as e: # If interface has no session, skip closing return # Try to close the session and connection, on error try a rollback try: interface.session.close() except: try: interface.session.rollback() interface.session.close() except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() trace = traceback.extract_tb(exc_tb, 10) CloudLogger.logError('Broker DB Interface Error: ', e, trace) del exc_tb raise
def validate_threaded(): """Start the validation process on a new thread.""" @copy_current_request_context def ThreadedFunction(arg): """The new thread.""" threadedManager = ValidationManager(local, error_report_path) threadedManager.threadedValidateJob(arg) try: interfaces = InterfaceHolder() jobTracker = interfaces.jobDb except ResponseException as e: open("errorLog","a").write(str(e) + "\n") return JsonResponse.error(e,e.status,table = "cannot connect to job database") except Exception as e: open("errorLog","a").write(str(e) + "\n") exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e)) return JsonResponse.error(exc,exc.status,table= "cannot connect to job database") jobId = None manager = ValidationManager(local, error_report_path) try: jobId = manager.getJobID(request) except ResponseException as e: manager.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,manager.filename) CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2])) return JsonResponse.error(e,e.status,table ="") except Exception as e: exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,type(e)) manager.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,manager.filename) CloudLogger.logError(str(e),exc,traceback.extract_tb(sys.exc_info()[2])) return JsonResponse.error(exc,exc.status,table="") try: manager.testJobID(jobId,interfaces) except ResponseException as e: open("errorLog","a").write(str(e) + "\n") # Job is not ready to run according to job tracker, do not change status of job in job tracker interfaces.errorDb.writeFileError(jobId,manager.filename,ValidationError.jobError) return JsonResponse.error(e,e.status,table="") except Exception as e: open("errorLog","a").write(str(e) + "\n") exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,type(e)) interfaces.errorDb.writeFileError(jobId,manager.filename,ValidationError.jobError) return JsonResponse.error(exc,exc.status,table="") thread = Thread(target=ThreadedFunction, args= (jobId,)) try : jobTracker.markJobStatus(jobId,"running") except Exception as e: open("errorLog","a").write(str(e) + "\n") exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e)) return JsonResponse.error(exc,exc.status,table="could not start job") interfaces.close() thread.start() return JsonResponse.create(StatusCode.OK,{"table":"job"+str(jobId)})
def checkJobDependencies(self, jobId): # raise exception if current job is not actually finished if self.getStatus(jobId) != self.getStatusId('finished'): raise ValueError( 'Current job not finished, unable to check dependencies') # check if dependent jobs are finished for depJobId in self.getDependentJobs(jobId): isReady = True if not (self.getStatus(depJobId) == self.getStatusId('waiting')): CloudLogger.logError( "Job dependency is not in a 'waiting' state", ResponseException( "Job dependency is not in a 'waiting' state", StatusCode.CLIENT_ERROR, ValueError), traceback.extract_stack()) continue # if dependent jobs are finished, then check the jobs of which the current job is a dependent for preReqJobId in self.getPrerequisiteJobs(depJobId): if not (self.getStatus(preReqJobId) == self.getStatusId('finished')): # Do nothing isReady = False break # The type check here is temporary and needs to be removed once the validator is able # to handle cross-file validation job if isReady and (self.getJobType(depJobId) == 'csv_record_validation' or self.getJobType(depJobId) == 'validation'): # mark job as ready self.markStatus(depJobId, 'ready') # add to the job queue jobQueueResult = self.jobQueue.enqueue.delay(depJobId)
def checkJobDependencies(self,jobId): """ For specified job, check which of its dependencies are ready to be started, and add them to the queue """ # raise exception if current job is not actually finished if self.getJobStatus(jobId) != self.getJobStatusId('finished'): raise ValueError('Current job not finished, unable to check dependencies') # check if dependent jobs are finished for depJobId in self.getDependentJobs(jobId): isReady = True if not (self.getJobStatus(depJobId) == self.getJobStatusId('waiting')): CloudLogger.logError("Job dependency is not in a 'waiting' state", ResponseException("Job dependency is not in a 'waiting' state",StatusCode.CLIENT_ERROR, ValueError), traceback.extract_stack()) continue # if dependent jobs are finished, then check the jobs of which the current job is a dependent for preReqJobId in self.getPrerequisiteJobs(depJobId): if not (self.getJobStatus(preReqJobId) == self.getJobStatusId('finished')): # Do nothing isReady = False break # The type check here is temporary and needs to be removed once the validator is able # to handle cross-file validation job if isReady and (self.getJobType(depJobId) == 'csv_record_validation' or self.getJobType(depJobId) == 'validation'): # mark job as ready self.markJobStatus(depJobId, 'ready') # add to the job queue jobQueueResult = self.jobQueue.enqueue.delay(depJobId)
def startGenerationJob(self, submission_id, file_type): """ Initiates a file generation job Args: submission_id: ID of submission to start job for file_type: Type of file to be generated Returns: Tuple of boolean indicating successful start, and error response if False """ jobDb = self.interfaces.jobDb file_type_name = self.fileTypeMap[file_type] if file_type in ["D1", "D2"]: # Populate start and end dates, these should be provided in MM/DD/YYYY format, using calendar year (not fiscal year) requestDict = RequestDictionary(self.request) start_date = requestDict.getValue("start") end_date = requestDict.getValue("end") if not (StringCleaner.isDate(start_date) and StringCleaner.isDate(end_date)): exc = ResponseException("Start or end date cannot be parsed into a date", StatusCode.CLIENT_ERROR) return False, JsonResponse.error(exc, exc.status, start = "", end = "", file_type = file_type, status = "failed") elif file_type not in ["E","F"]: exc = ResponseException("File type must be either D1, D2, E or F", StatusCode.CLIENT_ERROR) return False, JsonResponse.error(exc, exc.status, file_type = file_type, status = "failed") cgac_code = self.jobManager.getSubmissionById(submission_id).cgac_code # Generate and upload file to S3 user_id = LoginSession.getName(session) timestamped_name = s3UrlHandler.getTimestampedFilename(CONFIG_BROKER["".join([str(file_type_name),"_file_name"])]) if self.isLocal: upload_file_name = "".join([CONFIG_BROKER['broker_files'], timestamped_name]) else: upload_file_name = "".join([str(user_id), "/", timestamped_name]) job = jobDb.getJobBySubmissionFileTypeAndJobType(submission_id, file_type_name, "file_upload") job.filename = upload_file_name job.original_filename = timestamped_name job.job_status_id = jobDb.getJobStatusId("running") jobDb.session.commit() if file_type in ["D1", "D2"]: CloudLogger.log("DEBUG: Adding job info for job id of " + str(job.job_id), log_type="debug", file_name=self.debug_file_name) return self.addJobInfoForDFile(upload_file_name, timestamped_name, submission_id, file_type, file_type_name, start_date, end_date, cgac_code, job) elif file_type == 'E': generate_e_file.delay( submission_id, job.job_id, InterfaceHolder, timestamped_name, upload_file_name, self.isLocal) elif file_type == 'F': generate_f_file.delay( submission_id, job.job_id, InterfaceHolder, timestamped_name, upload_file_name, self.isLocal) return True, None
def addJobInfoForDFile(self, upload_file_name, timestamped_name, submission_id, file_type, file_type_name, start_date, end_date, cgac_code, job): """ Populates upload and validation job objects with start and end dates, filenames, and status Args: upload_file_name - Filename to use on S3 timestamped_name - Version of filename without user ID submission_id - Submission to add D files to file_type - File type as either "D1" or "D2" file_type_name - Full name of file type start_date - Beginning of period for D file end_date - End of period for D file cgac_code - Agency to generate D file for job - Job object for upload job """ jobDb = self.interfaces.jobDb try: valJob = jobDb.getJobBySubmissionFileTypeAndJobType(submission_id, file_type_name, "csv_record_validation") valJob.filename = upload_file_name valJob.original_filename = timestamped_name valJob.job_status_id = jobDb.getJobStatusId("waiting") job.start_date = datetime.strptime(start_date,"%m/%d/%Y").date() job.end_date = datetime.strptime(end_date,"%m/%d/%Y").date() valJob.start_date = datetime.strptime(start_date,"%m/%d/%Y").date() valJob.end_date = datetime.strptime(end_date,"%m/%d/%Y").date() # Generate random uuid and store generation task task_key = uuid4() task = FileGenerationTask(generation_task_key = task_key, submission_id = submission_id, file_type_id = jobDb.getFileTypeId(file_type_name), job_id = job.job_id) jobDb.session.add(task) jobDb.session.commit() except ValueError as e: # Date was not in expected format exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,ValueError) return False, JsonResponse.error(exc, exc.status, url = "", start = "", end = "", file_type = file_type) # Create file D API URL with dates and callback URL callback = "{}://{}:{}/v1/complete_generation/{}/".format(CONFIG_SERVICES["protocol"],CONFIG_SERVICES["broker_api_host"], CONFIG_SERVICES["broker_api_port"],task_key) CloudLogger.log( 'DEBUG: Callback URL for {}: {}'.format(file_type, callback), log_type='debug', file_name=self.debug_file_name) get_url = CONFIG_BROKER["".join([file_type_name, "_url"])].format(cgac_code, start_date, end_date, callback) CloudLogger.log("DEBUG: Calling D file API => " + str(get_url), log_type="debug", file_name=self.debug_file_name) try: if not self.call_d_file_api(get_url): self.handleEmptyResponse(job, valJob) except Timeout as e: exc = ResponseException(str(e), StatusCode.CLIENT_ERROR, Timeout) return False, JsonResponse.error(e, exc.status, url="", start="", end="", file_type=file_type) return True, None
def error(exception, errorCode, **kwargs): """ Create an http response object for specified error Args: exception: Exception to be represented by response object errorCode: Status code to be used in response kwargs: Extra fields and values to be included in response Returns: Http response object containing specified error """ responseDict = {} for key in kwargs: responseDict[key] = kwargs[key] exc_type, exc_obj, exc_tb = sys.exc_info() trace = traceback.extract_tb(exc_tb, 10) CloudLogger.logError('Route Error : ', exception, trace) if (JsonResponse.debugMode): responseDict["message"] = str(exception) responseDict["errorType"] = str(type(exception)) if (type(exception) == type(ResponseException("")) and exception.wrappedException != None): responseDict["wrappedType"] = str( type(exception.wrappedException)) responseDict["wrappedMessage"] = str( exception.wrappedException) trace = list(map(lambda entry: str(entry), trace)) responseDict["trace"] = trace if (JsonResponse.printDebug): print(str(type(exception))) print(str(exception)) print(str(trace)) if (JsonResponse.logDebug): open("responseErrorLog", "a").write(str(type(exception)) + ": ") open("responseErrorLog", "a").write(str(exception) + "\n") open("responseErrorLog", "a").write(str(trace) + "\n") del exc_tb return JsonResponse.create(errorCode, responseDict) else: responseDict["message"] = "An error has occurred" del exc_tb return JsonResponse.create(errorCode, responseDict)
def write_csv(file_name, upload_name, is_local, header, body): """Derive the relevant location and write a CSV to it. :return: the final file name (complete with prefix)""" if is_local: file_name = CONFIG_BROKER['broker_files'] + file_name csv_writer = CsvLocalWriter(file_name, header) message = 'DEBUG: Writing file locally...' else: bucket = CONFIG_BROKER['aws_bucket'] region = CONFIG_BROKER['aws_region'] csv_writer = CsvS3Writer(region, bucket, upload_name, header) message = 'DEBUG: Writing file to S3...' CloudLogger.log(message, log_type="debug", file_name='smx_request.log') with csv_writer as writer: for line in body: writer.write(line) writer.finishBatch()
def generateFile(self): """ Start a file generation job for the specified file type """ self.debug_file_name = "debug.log" CloudLogger.log("DEBUG: Starting D file generation", log_type="debug", file_name=self.debug_file_name) submission_id, file_type = self.getRequestParamsForGenerate() CloudLogger.log("DEBUG: Submission ID = " + str(submission_id) + " / File type = " + str(file_type), log_type="debug", file_name=self.debug_file_name) # Check permission to submission success, error_response = self.checkSubmissionById(submission_id, file_type) if not success: return error_response job = self.interfaces.jobDb.getJobBySubmissionFileTypeAndJobType(submission_id, self.fileTypeMap[file_type], "file_upload") # Check prerequisites on upload job if not self.interfaces.jobDb.runChecks(job.job_id): exc = ResponseException("Must wait for completion of prerequisite validation job", StatusCode.CLIENT_ERROR) return JsonResponse.error(exc, exc.status) success, error_response = self.startGenerationJob(submission_id,file_type) CloudLogger.log("DEBUG: Finished startGenerationJob method", log_type="debug", file_name=self.debug_file_name) if not success: # If not successful, set job status as "failed" self.interfaces.jobDb.markJobStatus(job.job_id, "failed") return error_response # Return same response as check generation route return self.checkGeneration(submission_id, file_type)
def completeGeneration(self, generationId): """ For files D1 and D2, the API uses this route as a callback to load the generated file. Requires an 'href' key in the request that specifies the URL of the file to be downloaded Args: generationId - Unique key stored in file_generation_task table, used in callback to identify which submission this file is for. """ if generationId is None: return JsonResponse.error(ResponseException("Must include a generation ID",StatusCode.CLIENT_ERROR), StatusCode.CLIENT_ERROR) self.smx_log_file_name = "smx_request.log" # Pull url from request safeDictionary = RequestDictionary(self.request) CloudLogger.log("DEBUG: Request content => " + safeDictionary.to_string(), log_type="debug", file_name=self.smx_log_file_name) if not safeDictionary.exists("href"): return JsonResponse.error(ResponseException("Request must include href key with URL of D file", StatusCode.CLIENT_ERROR), StatusCode.CLIENT_ERROR) url = safeDictionary.getValue("href") CloudLogger.log("DEBUG: Download URL => " + url, log_type="debug", file_name=self.smx_log_file_name) #Pull information based on task key try: CloudLogger.log("DEBUG: Pulling information based on task key...", log_type="debug", file_name=self.smx_log_file_name) task = self.interfaces.jobDb.session.query(FileGenerationTask).options(joinedload(FileGenerationTask.file_type)).filter(FileGenerationTask.generation_task_key == generationId).one() job = self.interfaces.jobDb.getJobById(task.job_id) CloudLogger.log("DEBUG: Loading D file...", log_type="debug", file_name=self.smx_log_file_name) result = self.load_d_file(url,job.filename,job.original_filename,job.job_id,self.isLocal) CloudLogger.log("DEBUG: Load D file result => " + str(result), log_type="debug", file_name=self.smx_log_file_name) return JsonResponse.create(StatusCode.OK,{"message":"File loaded successfully"}) except ResponseException as e: return JsonResponse.error(e, e.status) except NoResultFound as e: # Did not find file generation task return JsonResponse.error(ResponseException("Generation task key not found", StatusCode.CLIENT_ERROR), StatusCode.CLIENT_ERROR)
def checkJobDependencies(self, jobId): """ For specified job, check which of its dependencies are ready to be started, and add them to the queue """ # raise exception if current job is not actually finished if self.getJobStatus(jobId) != self.getJobStatusId('finished'): raise ValueError( 'Current job not finished, unable to check dependencies') # check if dependent jobs are finished for depJobId in self.getDependentJobs(jobId): isReady = True if not (self.getJobStatus(depJobId) == self.getJobStatusId('waiting')): CloudLogger.logError( "Job dependency is not in a 'waiting' state", ResponseException( "Job dependency is not in a 'waiting' state", StatusCode.CLIENT_ERROR, ValueError), traceback.extract_stack()) continue # if dependent jobs are finished, then check the jobs of which the current job is a dependent for preReqJobId in self.getPrerequisiteJobs(depJobId): if not (self.getJobStatus(preReqJobId) == self.getJobStatusId('finished')): # Do nothing isReady = False break # The type check here is temporary and needs to be removed once the validator is able # to handle cross-file validation job if isReady and (self.getJobType(depJobId) == 'csv_record_validation' or self.getJobType(depJobId) == 'validation'): # mark job as ready self.markJobStatus(depJobId, 'ready') # add to the job queue CloudLogger.log("Sending job {} to the job manager".format( str(depJobId))) mp = ManagerProxy() mp.sendJobRequest(depJobId)
def error(exception, errorCode, **kwargs): """ Create an http response object for specified error Args: exception: Exception to be represented by response object errorCode: Status code to be used in response kwargs: Extra fields and values to be included in response Returns: Http response object containing specified error """ responseDict = {} for key in kwargs: responseDict[key] = kwargs[key] exc_type, exc_obj, exc_tb = sys.exc_info() trace = traceback.extract_tb(exc_tb, 10) CloudLogger.logError("Route Error : ", exception, trace) if JsonResponse.debugMode: responseDict["message"] = str(exception) responseDict["errorType"] = str(type(exception)) if type(exception) == type(ResponseException("")) and exception.wrappedException != None: responseDict["wrappedType"] = str(type(exception.wrappedException)) responseDict["wrappedMessage"] = str(exception.wrappedException) responseDict["trace"] = trace if JsonResponse.printDebug: print(str(type(exception))) print(str(exception)) print(str(trace)) if JsonResponse.logDebug: open("responseErrorLog", "a").write(str(type(exception)) + ": ") open("responseErrorLog", "a").write(str(exception) + "\n") open("responseErrorLog", "a").write(str(trace) + "\n") del exc_tb return JsonResponse.create(errorCode, responseDict) else: responseDict["message"] = "An error has occurred" del exc_tb return JsonResponse.create(errorCode, responseDict)
def enqueue(jobID): """POST a job to the validator""" CloudLogger.log("Adding job {} to the queue".format(str(jobID))) validatorUrl = '{validator_host}:{validator_port}'.format( **CONFIG_SERVICES) if 'http://' not in validatorUrl: validatorUrl = 'http://' + validatorUrl validatorUrl += '/validate/' params = { 'job_id': jobID } response = requests.post(validatorUrl, params) CloudLogger.log("Job {} has completed validation".format(str(jobID))) CloudLogger.log("Validator response: {}".format(str(response.json()))) return response.json()
def load_d_file(self, url, upload_name, timestamped_name, job_id, isLocal): """ Pull D file from specified URL and write to S3 """ job_manager = self.interfaces.jobDb try: full_file_path = "".join( [CONFIG_BROKER['d_file_storage_path'], timestamped_name]) CloudLogger.log("DEBUG: Downloading file...", log_type="debug", file_name=self.smx_log_file_name) if not self.download_file(full_file_path, url): # Error occurred while downloading file, mark job as failed and record error message job_manager.markJobStatus(job_id, "failed") job = job_manager.getJobById(job_id) file_type = job_manager.getFileType(job_id) if file_type == "award": source = "ASP" elif file_type == "award_procurement": source = "FPDS" else: source = "unknown source" job.error_message = "A problem occurred receiving data from {}".format( source) raise ResponseException(job.error_message, StatusCode.CLIENT_ERROR) lines = self.get_lines_from_csv(full_file_path) write_csv(timestamped_name, upload_name, isLocal, lines[0], lines[1:]) CloudLogger.log("DEBUG: Marking job id of " + str(job_id) + " as finished", log_type="debug", file_name=self.smx_log_file_name) job_manager.markJobStatus(job_id, "finished") return {"message": "Success", "file_name": timestamped_name} except Exception as e: CloudLogger.log("ERROR: Exception caught => " + str(e), log_type="debug", file_name=self.smx_log_file_name) # Log the error JsonResponse.error(e, 500) job_manager.getJobById(job_id).error_message = str(e) job_manager.markJobStatus(job_id, "failed") job_manager.session.commit() raise e
def threadedValidateJob(self, jobId): """ args jobId -- (Integer) a valid jobId This method runs on a new thread thus there are zero error messages other then the job status being updated """ # As this is the start of a new thread, first generate new connections to the databases interfaces = InterfaceHolder() self.filename = "" jobTracker = interfaces.jobDb errorDb = interfaces.errorDb try: jobType = interfaces.jobDb.checkJobType(jobId) if jobType == interfaces.jobDb.getJobTypeId( "csv_record_validation"): self.runValidation(jobId, interfaces) elif jobType == interfaces.jobDb.getJobTypeId("validation"): self.runCrossValidation(jobId, interfaces) else: raise ResponseException("Bad job type for validator", StatusCode.INTERNAL_ERROR) self.runValidation(jobId, interfaces) errorDb.markFileComplete(jobId, self.filename) return except ResponseException as e: CloudLogger.logError(str(e), e, traceback.extract_tb(sys.exc_info()[2])) self.markJob(jobId, jobTracker, "invalid", errorDb, self.filename, e.errorType, e.extraInfo) except ValueError as e: CloudLogger.logError(str(e), e, traceback.extract_tb(sys.exc_info()[2])) self.markJob(jobId, jobTracker, "invalid", errorDb, self.filename, ValidationError.unknownError) except Exception as e: #Something unknown happened we may need to try again! CloudLogger.logError(str(e), e, traceback.extract_tb(sys.exc_info()[2])) self.markJob(jobId, jobTracker, "failed", errorDb, self.filename, ValidationError.unknownError) finally: interfaces.close()
def threadedValidateJob(self,jobId) : """ args jobId -- (Integer) a valid jobId This method runs on a new thread thus there are zero error messages other then the job status being updated """ # As this is the start of a new thread, first generate new connections to the databases interfaces = InterfaceHolder() self.filename = "" jobTracker = interfaces.jobDb errorDb = interfaces.errorDb try: jobType = interfaces.jobDb.checkJobType(jobId) if jobType == interfaces.jobDb.getJobTypeId("csv_record_validation"): self.runValidation(jobId,interfaces) elif jobType == interfaces.jobDb.getJobTypeId("validation"): self.runCrossValidation(jobId, interfaces) else: raise ResponseException("Bad job type for validator", StatusCode.INTERNAL_ERROR) self.runValidation(jobId, interfaces) errorDb.markFileComplete(jobId,self.filename) return except ResponseException as e: CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2])) self.markJob(jobId,jobTracker,"invalid",errorDb,self.filename,e.errorType,e.extraInfo) except ValueError as e: CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2])) self.markJob(jobId,jobTracker,"invalid",errorDb,self.filename,ValidationError.unknownError) except Exception as e: #Something unknown happened we may need to try again! CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2])) self.markJob(jobId,jobTracker,"failed",errorDb,self.filename,ValidationError.unknownError) finally: interfaces.close()
# we want to allow access to, e.g. config.CONFIG_BROKER from dataactcore.read_config import ( # noqa CONFIG_BROKER, CONFIG_LOGGING, CONFIG_SERVICES, CONFIG_DB, CONFIG_JOB_QUEUE, CONFIG_PATH, ALEMBIC_PATH, MIGRATION_PATH, log_message) from dataactcore.utils.cloudLogger import CloudLogger # Log config values along with warnings for missing files if log_message: CloudLogger.log(log_message)
def createApp(): """Create the Flask app.""" try: app = Flask(__name__) local = CONFIG_BROKER['local'] error_report_path = CONFIG_SERVICES['error_report_path'] app.config.from_object(__name__) # Future: Override config w/ environment variable, if set app.config.from_envvar('VALIDATOR_SETTINGS', silent=True) validationManager = ValidationManager(local, error_report_path) @app.route("/", methods=["GET"]) def testApp(): """Confirm server running.""" return "Validator is running" @app.route("/validate_threaded/", methods=["POST"]) def validate_threaded(): """Start the validation process on a new thread.""" @copy_current_request_context def ThreadedFunction(arg): """The new thread.""" threadedManager = ValidationManager(local, error_report_path) threadedManager.threadedValidateJob(arg) try: interfaces = InterfaceHolder() jobTracker = interfaces.jobDb except ResponseException as e: open("errorLog", "a").write(str(e) + "\n") return JsonResponse.error( e, e.status, table="cannot connect to job database") except Exception as e: open("errorLog", "a").write(str(e) + "\n") exc = ResponseException(str(e), StatusCode.INTERNAL_ERROR, type(e)) return JsonResponse.error( exc, exc.status, table="cannot connect to job database") jobId = None manager = ValidationManager(local, error_report_path) try: jobId = manager.getJobID(request) except ResponseException as e: manager.markJob(jobId, jobTracker, "invalid", interfaces.errorDb, manager.filename) CloudLogger.logError(str(e), e, traceback.extract_tb(sys.exc_info()[2])) return JsonResponse.error(e, e.status, table="") except Exception as e: exc = ResponseException(str(e), StatusCode.CLIENT_ERROR, type(e)) manager.markJob(jobId, jobTracker, "invalid", interfaces.errorDb, manager.filename) CloudLogger.logError(str(e), exc, traceback.extract_tb(sys.exc_info()[2])) return JsonResponse.error(exc, exc.status, table="") try: manager.testJobID(jobId, interfaces) except ResponseException as e: open("errorLog", "a").write(str(e) + "\n") # Job is not ready to run according to job tracker, do not change status of job in job tracker interfaces.errorDb.writeFileError(jobId, manager.filename, ValidationError.jobError) return JsonResponse.error(e, e.status, table="") except Exception as e: open("errorLog", "a").write(str(e) + "\n") exc = ResponseException(str(e), StatusCode.CLIENT_ERROR, type(e)) interfaces.errorDb.writeFileError(jobId, manager.filename, ValidationError.jobError) return JsonResponse.error(exc, exc.status, table="") thread = Thread(target=ThreadedFunction, args=(jobId, )) try: jobTracker.markStatus(jobId, "running") except Exception as e: open("errorLog", "a").write(str(e) + "\n") exc = ResponseException(str(e), StatusCode.INTERNAL_ERROR, type(e)) return JsonResponse.error(exc, exc.status, table="could not start job") interfaces.close() thread.start() return JsonResponse.create(StatusCode.OK, {"table": "job" + str(jobId)}) @app.route("/validate/", methods=["POST"]) def validate(): """Start the validation process on the same threads.""" interfaces = InterfaceHolder() # Create sessions for this route try: return validationManager.validateJob(request, interfaces) except Exception as e: # Something went wrong getting the flask request open("errorLog", "a").write(str(e) + "\n") exc = ResponseException(str(e), StatusCode.INTERNAL_ERROR, type(e)) return JsonResponse.error(exc, exc.status, table="") finally: interfaces.close() JsonResponse.debugMode = CONFIG_SERVICES['rest_trace'] return app except Exception as e: trace = traceback.extract_tb(sys.exc_info()[2], 10) CloudLogger.logError('Validator App Level Error: ', e, trace) raise
def createApp(): """Create the Flask app.""" try: app = Flask(__name__) local = CONFIG_BROKER['local'] error_report_path = CONFIG_SERVICES['error_report_path'] app.config.from_object(__name__) # Future: Override config w/ environment variable, if set app.config.from_envvar('VALIDATOR_SETTINGS', silent=True) validationManager = ValidationManager(local, error_report_path) @app.route("/", methods=["GET"]) def testApp(): """Confirm server running.""" return "Validator is running" @app.route("/validate_threaded/", methods=["POST"]) def validate_threaded(): """Start the validation process on a new thread.""" @copy_current_request_context def ThreadedFunction(arg): """The new thread.""" threadedManager = ValidationManager(local, error_report_path) threadedManager.threadedValidateJob(arg) try: interfaces = InterfaceHolder() jobTracker = interfaces.jobDb except ResponseException as e: open("errorLog","a").write(str(e) + "\n") return JsonResponse.error(e,e.status,table = "cannot connect to job database") except Exception as e: open("errorLog","a").write(str(e) + "\n") exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e)) return JsonResponse.error(exc,exc.status,table= "cannot connect to job database") jobId = None manager = ValidationManager(local, error_report_path) try: jobId = manager.getJobID(request) except ResponseException as e: manager.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,manager.filename) CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2])) return JsonResponse.error(e,e.status,table ="") except Exception as e: exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,type(e)) manager.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,manager.filename) CloudLogger.logError(str(e),exc,traceback.extract_tb(sys.exc_info()[2])) return JsonResponse.error(exc,exc.status,table="") try: manager.testJobID(jobId,interfaces) except ResponseException as e: open("errorLog","a").write(str(e) + "\n") # Job is not ready to run according to job tracker, do not change status of job in job tracker interfaces.errorDb.writeFileError(jobId,manager.filename,ValidationError.jobError) return JsonResponse.error(e,e.status,table="") except Exception as e: open("errorLog","a").write(str(e) + "\n") exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,type(e)) interfaces.errorDb.writeFileError(jobId,manager.filename,ValidationError.jobError) return JsonResponse.error(exc,exc.status,table="") thread = Thread(target=ThreadedFunction, args= (jobId,)) try : jobTracker.markJobStatus(jobId,"running") except Exception as e: open("errorLog","a").write(str(e) + "\n") exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e)) return JsonResponse.error(exc,exc.status,table="could not start job") interfaces.close() thread.start() return JsonResponse.create(StatusCode.OK,{"table":"job"+str(jobId)}) @app.route("/validate/",methods=["POST"]) def validate(): """Start the validation process on the same threads.""" interfaces = InterfaceHolder() # Create sessions for this route try: return validationManager.validateJob(request,interfaces) except Exception as e: # Something went wrong getting the flask request open("errorLog","a").write(str(e) + "\n") exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e)) return JsonResponse.error(exc,exc.status,table="") finally: interfaces.close() JsonResponse.debugMode = CONFIG_SERVICES['rest_trace'] return app except Exception as e: trace = traceback.extract_tb(sys.exc_info()[2], 10) CloudLogger.logError('Validator App Level Error: ', e, trace) raise
def validateFileBySql(cls, submissionId, fileType, interfaces): """ Check all SQL rules Args: submissionId: submission to be checked fileType: file type being checked interfaces: database interface objects Returns: List of errors found, each element has: field names error message values in fields involved row number rule label source file id target file id severity id """ CloudLogger.logError( "VALIDATOR_INFO: ", "Beginning SQL validation rules on submissionID: " + str(submissionId) + " fileType: " + fileType, "") # Pull all SQL rules for this file type fileId = interfaces.validationDb.getFileTypeIdByName(fileType) rules = interfaces.validationDb.session.query(RuleSql).filter( RuleSql.file_id == fileId).filter( RuleSql.rule_cross_file_flag == False).all() errors = [] # Get short to long colname dictionary shortColnames = interfaces.validationDb.getShortToLongColname() # For each rule, execute sql for rule for rule in rules: CloudLogger.logError( "VALIDATOR_INFO: ", "Running query: " + str(RuleSql.query_name) + " on submissionID: " + str(submissionId) + " fileType: " + fileType, "") failures = interfaces.stagingDb.connection.execute( rule.rule_sql.format(submissionId)) if failures.rowcount: # Create column list (exclude row_number) cols = failures.keys() cols.remove("row_number") # Build error list for failure in failures: errorMsg = rule.rule_error_message row = failure["row_number"] # Create strings for fields and values valueList = [ "{}: {}".format(shortColnames[field], str(failure[field])) if field in shortColnames else "{}: {}".format( field, str(failure[field])) for field in cols ] valueString = ", ".join(valueList) fieldList = [ shortColnames[field] if field in shortColnames else field for field in cols ] fieldString = ", ".join(fieldList) errors.append([ fieldString, errorMsg, valueString, row, rule.rule_label, fileId, rule.target_file_id, rule.rule_severity_id ]) CloudLogger.logError( "VALIDATOR_INFO: ", "Completed SQL validation rules on submissionID: " + str(submissionId) + " fileType: " + fileType, "") return errors
def validateJob(self, request,interfaces): """ Gets file for job, validates each row, and sends valid rows to staging database Args: request -- HTTP request containing the jobId interfaces -- InterfaceHolder object to the databases Returns: Http response object """ # Create connection to job tracker database self.filename = None tableName = "" jobId = None jobTracker = None try: jobTracker = interfaces.jobDb requestDict = RequestDictionary(request) tableName = "" if(requestDict.exists("job_id")): jobId = requestDict.getValue("job_id") else: # Request does not have a job ID, can't validate raise ResponseException("No job ID specified in request",StatusCode.CLIENT_ERROR) # Check that job exists and is ready if(not (jobTracker.runChecks(jobId))): raise ResponseException("Checks failed on Job ID",StatusCode.CLIENT_ERROR) tableName = interfaces.stagingDb.getTableName(jobId) jobType = interfaces.jobDb.checkJobType(jobId) except ResponseException as e: CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2])) if(e.errorType == None): # Error occurred while trying to get and check job ID e.errorType = ValidationError.jobError interfaces.errorDb.writeFileError(jobId,self.filename,e.errorType,e.extraInfo) return JsonResponse.error(e,e.status,table=tableName) except Exception as e: exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e)) CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2])) self.markJob(jobId,jobTracker,"failed",interfaces.errorDb,self.filename,ValidationError.unknownError) return JsonResponse.error(exc,exc.status,table=tableName) try: jobTracker.markJobStatus(jobId,"running") if jobType == interfaces.jobDb.getJobTypeId("csv_record_validation"): self.runValidation(jobId,interfaces) elif jobType == interfaces.jobDb.getJobTypeId("validation"): self.runCrossValidation(jobId, interfaces) else: raise ResponseException("Bad job type for validator", StatusCode.INTERNAL_ERROR) interfaces.errorDb.markFileComplete(jobId,self.filename) return JsonResponse.create(StatusCode.OK,{"table":tableName}) except ResponseException as e: CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2])) self.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,self.filename,e.errorType,e.extraInfo) return JsonResponse.error(e,e.status,table=tableName) except ValueError as e: CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2])) # Problem with CSV headers exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,type(e),ValidationError.unknownError) #"Internal value error" self.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,self.filename,ValidationError.unknownError) return JsonResponse.error(exc,exc.status,table=tableName) except Error as e: CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2])) # CSV file not properly formatted (usually too much in one field) exc = ResponseException("Internal error",StatusCode.CLIENT_ERROR,type(e),ValidationError.unknownError) self.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,self.filename,ValidationError.unknownError) return JsonResponse.error(exc,exc.status,table=tableName) except Exception as e: CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2])) exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e),ValidationError.unknownError) self.markJob(jobId,jobTracker,"failed",interfaces.errorDb,self.filename,ValidationError.unknownError) return JsonResponse.error(exc,exc.status,table=tableName)
def createApp(): """Set up the application.""" try: # Create application app = Flask(__name__, instance_path=CONFIG_PATH) local = CONFIG_BROKER['local'] app.config.from_object(__name__) app.config['LOCAL'] = local app.config['REST_TRACE'] = CONFIG_SERVICES['rest_trace'] app.config['SYSTEM_EMAIL'] = CONFIG_BROKER['reply_to_email'] # Future: Override config w/ environment variable, if set app.config.from_envvar('BROKER_SETTINGS', silent=True) # Set parameters broker_file_path = CONFIG_BROKER['broker_files'] AccountHandler.FRONT_END = CONFIG_BROKER['full_url'] sesEmail.SIGNING_KEY = CONFIG_BROKER['email_token_key'] sesEmail.isLocal = local if sesEmail.isLocal: sesEmail.emailLog = os.path.join(broker_file_path, 'email.log') # If local, make the email directory if needed if local and not os.path.exists(broker_file_path): os.makedirs(broker_file_path) # When runlocal is true, assume Dynamo is on the same server # (should be false for prod) JsonResponse.debugMode = app.config['REST_TRACE'] if CONFIG_SERVICES['cross_origin_url'] == "*": cors = CORS(app, supports_credentials=True) else: cors = CORS(app, supports_credentials=True, origins=CONFIG_SERVICES['cross_origin_url']) # Enable AWS Sessions app.session_interface = DynamoInterface() # Set up bcrypt bcrypt = Bcrypt(app) # Root will point to index.html @app.route("/", methods=["GET"]) def root(): return "Broker is running" if local: localFiles = os.path.join(broker_file_path, "<path:filename>") # Only define this route when running locally @app.route(localFiles) def sendFile(filename): if (config["local"]): return send_from_directory(broker_file_path, filename) else: # For non-local installs, set Dynamo Region SessionTable.DYNAMO_REGION = CONFIG_BROKER['aws_region'] # Add routes for modules here add_login_routes(app, bcrypt) add_file_routes(app, CONFIG_BROKER['aws_create_temp_credentials'], local, broker_file_path) add_user_routes(app, app.config['SYSTEM_EMAIL'], bcrypt) SessionTable.LOCAL_PORT = CONFIG_DB['dynamo_port'] SessionTable.setup(app, local) return app except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() trace = traceback.extract_tb(exc_tb, 10) CloudLogger.logError('Broker App Level Error: ', e, trace) del exc_tb raise
def validateJob(self, request, interfaces): """ Gets file for job, validates each row, and sends valid rows to staging database Args: request -- HTTP request containing the jobId interfaces -- InterfaceHolder object to the databases Returns: Http response object """ # Create connection to job tracker database self.filename = None tableName = "" jobId = None jobTracker = None try: jobTracker = interfaces.jobDb requestDict = RequestDictionary(request) tableName = "" if (requestDict.exists("job_id")): jobId = requestDict.getValue("job_id") else: # Request does not have a job ID, can't validate raise ResponseException("No job ID specified in request", StatusCode.CLIENT_ERROR) # Check that job exists and is ready if (not (jobTracker.runChecks(jobId))): raise ResponseException("Checks failed on Job ID", StatusCode.CLIENT_ERROR) tableName = interfaces.stagingDb.getTableName(jobId) jobType = interfaces.jobDb.checkJobType(jobId) except ResponseException as e: CloudLogger.logError(str(e), e, traceback.extract_tb(sys.exc_info()[2])) if (e.errorType == None): # Error occurred while trying to get and check job ID e.errorType = ValidationError.jobError interfaces.errorDb.writeFileError(jobId, self.filename, e.errorType, e.extraInfo) return JsonResponse.error(e, e.status, table=tableName) except Exception as e: exc = ResponseException(str(e), StatusCode.INTERNAL_ERROR, type(e)) CloudLogger.logError(str(e), e, traceback.extract_tb(sys.exc_info()[2])) self.markJob(jobId, jobTracker, "failed", interfaces.errorDb, self.filename, ValidationError.unknownError) return JsonResponse.error(exc, exc.status, table=tableName) try: jobTracker.markJobStatus(jobId, "running") if jobType == interfaces.jobDb.getJobTypeId( "csv_record_validation"): self.runValidation(jobId, interfaces) elif jobType == interfaces.jobDb.getJobTypeId("validation"): self.runCrossValidation(jobId, interfaces) else: raise ResponseException("Bad job type for validator", StatusCode.INTERNAL_ERROR) interfaces.errorDb.markFileComplete(jobId, self.filename) return JsonResponse.create(StatusCode.OK, {"table": tableName}) except ResponseException as e: CloudLogger.logError(str(e), e, traceback.extract_tb(sys.exc_info()[2])) self.markJob(jobId, jobTracker, "invalid", interfaces.errorDb, self.filename, e.errorType, e.extraInfo) return JsonResponse.error(e, e.status, table=tableName) except ValueError as e: CloudLogger.logError(str(e), e, traceback.extract_tb(sys.exc_info()[2])) # Problem with CSV headers exc = ResponseException( str(e), StatusCode.CLIENT_ERROR, type(e), ValidationError.unknownError) #"Internal value error" self.markJob(jobId, jobTracker, "invalid", interfaces.errorDb, self.filename, ValidationError.unknownError) return JsonResponse.error(exc, exc.status, table=tableName) except Error as e: CloudLogger.logError(str(e), e, traceback.extract_tb(sys.exc_info()[2])) # CSV file not properly formatted (usually too much in one field) exc = ResponseException("Internal error", StatusCode.CLIENT_ERROR, type(e), ValidationError.unknownError) self.markJob(jobId, jobTracker, "invalid", interfaces.errorDb, self.filename, ValidationError.unknownError) return JsonResponse.error(exc, exc.status, table=tableName) except Exception as e: CloudLogger.logError(str(e), e, traceback.extract_tb(sys.exc_info()[2])) exc = ResponseException(str(e), StatusCode.INTERNAL_ERROR, type(e), ValidationError.unknownError) self.markJob(jobId, jobTracker, "failed", interfaces.errorDb, self.filename, ValidationError.unknownError) return JsonResponse.error(exc, exc.status, table=tableName)
def runCrossValidation(self, jobId, interfaces): """ Cross file validation job, test all rules with matching rule_timing """ # Create File Status object interfaces.errorDb.createFileIfNeeded(jobId) validationDb = interfaces.validationDb errorDb = interfaces.errorDb submissionId = interfaces.jobDb.getSubmissionId(jobId) bucketName = CONFIG_BROKER['aws_bucket'] regionName = CONFIG_BROKER['aws_region'] CloudLogger.logError( "VALIDATOR_INFO: ", "Beginning runCrossValidation on submissionID: " + str(submissionId), "") # Delete existing cross file errors for this submission errorDb.resetErrorsByJobId(jobId) # use db to get a list of the cross-file combinations targetFiles = validationDb.session.query(FileTypeValidation).subquery() crossFileCombos = validationDb.session.query( FileTypeValidation.name.label('first_file_name'), FileTypeValidation.file_id.label('first_file_id'), targetFiles.c.name.label('second_file_name'), targetFiles.c.file_id.label('second_file_id')).filter( FileTypeValidation.file_order < targetFiles.c.file_order) # get all cross file rules from db crossFileRules = validationDb.session.query(RuleSql).filter( RuleSql.rule_cross_file_flag == True) # for each cross-file combo, run associated rules and create error report for row in crossFileCombos: comboRules = crossFileRules.filter( or_( and_(RuleSql.file_id == row.first_file_id, RuleSql.target_file_id == row.second_file_id), and_(RuleSql.file_id == row.second_file_id, RuleSql.target_file_id == row.first_file_id))) # send comboRules to validator.crossValidate sql failures = Validator.crossValidateSql(comboRules.all(), submissionId) # get error file name reportFilename = self.getFileName( getCrossReportName(submissionId, row.first_file_name, row.second_file_name)) warningReportFilename = self.getFileName( getCrossWarningReportName(submissionId, row.first_file_name, row.second_file_name)) # loop through failures to create the error report with self.getWriter(regionName, bucketName, reportFilename, self.crossFileReportHeaders) as writer, \ self.getWriter(regionName, bucketName, warningReportFilename, self.crossFileReportHeaders) as warningWriter: for failure in failures: if failure[9] == interfaces.validationDb.getRuleSeverityId( "fatal"): writer.write(failure[0:7]) if failure[9] == interfaces.validationDb.getRuleSeverityId( "warning"): warningWriter.write(failure[0:7]) errorDb.recordRowError(jobId, "cross_file", failure[0], failure[3], failure[5], failure[6], failure[7], failure[8], severity_id=failure[9]) writer.finishBatch() warningWriter.finishBatch() errorDb.writeAllRowErrors(jobId) interfaces.jobDb.markJobStatus(jobId, "finished") CloudLogger.logError( "VALIDATOR_INFO: ", "Completed runCrossValidation on submissionID: " + str(submissionId), "") # Update error info for submission interfaces.jobDb.populateSubmissionErrorInfo(submissionId) # TODO: Remove temporary step below # Temporarily set publishable flag at end of cross file, remove this once users are able to mark their submissions # as publishable # Publish only if no errors are present if interfaces.jobDb.getSubmissionById( submissionId).number_of_errors == 0: interfaces.jobDb.setPublishableFlag(submissionId, True) # Mark validation complete interfaces.errorDb.markFileComplete(jobId)
def runValidation(self, jobId, interfaces): """ Run validations for specified job Args: jobId: Job to be validated jobTracker: Interface for job tracker Returns: True if successful """ sess = GlobalDB.db().session # get the job object here so we can call the refactored getReportPath # todo: replace other db access functions with job object attributes job = sess.query(Job).filter(Job.job_id == jobId).one() CloudLogger.logError("VALIDATOR_INFO: ", "Beginning runValidation on jobID: " + str(jobId), "") jobTracker = interfaces.jobDb submissionId = jobTracker.getSubmissionId(jobId) rowNumber = 1 fileType = jobTracker.getFileType(jobId) # Clear existing records for this submission interfaces.stagingDb.clearFileBySubmission(submissionId, fileType) # Get short to long colname dictionary shortColnames = interfaces.validationDb.getShortToLongColname() # If local, make the error report directory if self.isLocal and not os.path.exists(self.directory): os.makedirs(self.directory) # Get bucket name and file name fileName = jobTracker.getFileName(jobId) self.filename = fileName bucketName = CONFIG_BROKER['aws_bucket'] regionName = CONFIG_BROKER['aws_region'] errorFileName = self.getFileName(getReportPath(job, 'error')) warningFileName = self.getFileName(getReportPath(job, 'warning')) # Create File Status object interfaces.errorDb.createFileIfNeeded(jobId, fileName) validationDB = interfaces.validationDb fieldList = validationDB.getFieldsByFileList(fileType) csvSchema = validationDB.getFieldsByFile(fileType, shortCols=True) reader = self.getReader() # Get file size and write to jobs table if CONFIG_BROKER["use_aws"]: fileSize = s3UrlHandler.getFileSize(errorFileName) else: fileSize = os.path.getsize(jobTracker.getFileName(jobId)) jobTracker.setFileSizeById(jobId, fileSize) fields = interfaces.validationDb.getFileColumnsByFile(fileType) try: # Pull file and return info on whether it's using short or long col headers reader.openFile(regionName, bucketName, fileName, fieldList, bucketName, errorFileName) errorInterface = interfaces.errorDb self.longToShortDict = interfaces.validationDb.getLongToShortColname( ) # rowErrorPresent becomes true if any row error occurs, used for determining file status rowErrorPresent = False # list to keep track of rows that fail validations errorRows = [] # While not done, pull one row and put it into staging table if it passes # the Validator with self.getWriter(regionName, bucketName, errorFileName, self.reportHeaders) as writer, \ self.getWriter(regionName, bucketName, warningFileName, self.reportHeaders) as warningWriter: while not reader.isFinished: rowNumber += 1 if (rowNumber % 100) == 0: CloudLogger.logError( "VALIDATOR_INFO: ", "JobId: " + str(jobId) + " loading row " + str(rowNumber), "") # # first phase of validations: read record and record a # formatting error if there's a problem # (record, reduceRow, skipRow, doneReading, rowErrorHere) = self.readRecord(reader, writer, fileType, interfaces, rowNumber, jobId, fields) if reduceRow: rowNumber -= 1 if rowErrorHere: rowErrorPresent = True errorRows.append(rowNumber) if doneReading: # Stop reading from input file break elif skipRow: # Do not write this row to staging, but continue processing future rows continue # # second phase of validations: do basic schema checks # (e.g., require fields, field length, data type) # # D files are obtained from upstream systems (ASP and FPDS) that perform their own basic validations, # so these validations are not repeated here if fileType in ["award", "award_procurement"]: # Skip basic validations for D files, set as valid to trigger write to staging passedValidations = True valid = True else: passedValidations, failures, valid = Validator.validate( record, csvSchema) if valid: skipRow = self.writeToStaging(record, jobId, submissionId, passedValidations, interfaces, writer, rowNumber, fileType) if skipRow: errorRows.append(rowNumber) continue if not passedValidations: if self.writeErrors(failures, interfaces, jobId, shortColnames, writer, warningWriter, rowNumber): errorRows.append(rowNumber) CloudLogger.logError( "VALIDATOR_INFO: ", "Loading complete on jobID: " + str(jobId) + ". Total rows added to staging: " + str(rowNumber), "") # # third phase of validations: run validation rules as specified # in the schema guidance. these validations are sql-based. # sqlErrorRows = self.runSqlValidations(interfaces, jobId, fileType, shortColnames, writer, warningWriter, rowNumber) errorRows.extend(sqlErrorRows) # Write unfinished batch writer.finishBatch() warningWriter.finishBatch() # Calculate total number of rows in file # that passed validations errorRowsUnique = set(errorRows) totalRowsExcludingHeader = rowNumber - 1 validRows = totalRowsExcludingHeader - len(errorRowsUnique) # Update job metadata jobTracker.setJobRowcounts(jobId, rowNumber, validRows) errorInterface.writeAllRowErrors(jobId) # Update error info for submission jobTracker.populateSubmissionErrorInfo(submissionId) # Mark validation as finished in job tracker jobTracker.markJobStatus(jobId, "finished") interfaces.errorDb.markFileComplete(jobId, self.filename) finally: # Ensure the file always closes reader.close() CloudLogger.logError( "VALIDATOR_INFO: ", "Completed L1 and SQL rule validations on jobID: " + str(jobId), "") return True
def get_xml_response_content(self, api_url): """ Retrieve XML Response from the provided API url """ CloudLogger.log("DEBUG: Getting XML response", log_type="debug", file_name=self.debug_file_name) return requests.get(api_url, verify=False, timeout=120).text