def closeOne(interface):
        """ Close all aspects of one interface """
        if (interface == None):
            # No need to close a nonexistent connection
            return
        try:
            if (interface.session == None):
                # If session is None, skip closing
                return
        except AttributeError as e:
            # If interface has no session, skip closing
            return

        # Try to close the session and connection, on error try a rollback
        try:
            interface.session.close()
        except:
            try:
                interface.session.rollback()
                interface.session.close()
            except Exception as e:
                exc_type, exc_obj, exc_tb = sys.exc_info()
                trace = traceback.extract_tb(exc_tb, 10)
                CloudLogger.logError('Broker DB Interface Error: ', e, trace)
                del exc_tb
                raise
    def closeOne(interface):
        """ Close all aspects of one interface """
        if(interface == None):
            # No need to close a nonexistent connection
            return
        try:
            if(interface.session == None):
                # If session is None, skip closing
                return
        except AttributeError as e:
            # If interface has no session, skip closing
            return

        # Try to close the session and connection, on error try a rollback
        try:
            interface.session.close()
        except:
            try:
                interface.session.rollback()
                interface.session.close()
            except Exception as e:
                exc_type, exc_obj, exc_tb = sys.exc_info()
                trace = traceback.extract_tb(exc_tb, 10)
                CloudLogger.logError('Broker DB Interface Error: ', e, trace)
                del exc_tb
                raise
        def validate_threaded():
            """Start the validation process on a new thread."""
            @copy_current_request_context
            def ThreadedFunction(arg):
                """The new thread."""
                threadedManager = ValidationManager(local, error_report_path)
                threadedManager.threadedValidateJob(arg)

            try:
                interfaces = InterfaceHolder()
                jobTracker = interfaces.jobDb
            except ResponseException as e:
                open("errorLog","a").write(str(e) + "\n")
                return JsonResponse.error(e,e.status,table = "cannot connect to job database")
            except Exception as e:
                open("errorLog","a").write(str(e) + "\n")
                exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e))
                return JsonResponse.error(exc,exc.status,table= "cannot connect to job database")

            jobId = None
            manager = ValidationManager(local, error_report_path)

            try:
                jobId = manager.getJobID(request)
            except ResponseException as e:
                manager.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,manager.filename)
                CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2]))
                return JsonResponse.error(e,e.status,table ="")
            except Exception as e:
                exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,type(e))
                manager.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,manager.filename)
                CloudLogger.logError(str(e),exc,traceback.extract_tb(sys.exc_info()[2]))
                return JsonResponse.error(exc,exc.status,table="")

            try:
                manager.testJobID(jobId,interfaces)
            except ResponseException as e:
                open("errorLog","a").write(str(e) + "\n")
                # Job is not ready to run according to job tracker, do not change status of job in job tracker
                interfaces.errorDb.writeFileError(jobId,manager.filename,ValidationError.jobError)
                return JsonResponse.error(e,e.status,table="")
            except Exception as e:
                open("errorLog","a").write(str(e) + "\n")
                exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,type(e))
                interfaces.errorDb.writeFileError(jobId,manager.filename,ValidationError.jobError)
                return JsonResponse.error(exc,exc.status,table="")

            thread = Thread(target=ThreadedFunction, args= (jobId,))

            try :
                jobTracker.markJobStatus(jobId,"running")
            except Exception as e:
                open("errorLog","a").write(str(e) + "\n")
                exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e))
                return JsonResponse.error(exc,exc.status,table="could not start job")

            interfaces.close()
            thread.start()

            return JsonResponse.create(StatusCode.OK,{"table":"job"+str(jobId)})
    def checkJobDependencies(self, jobId):
        # raise exception if current job is not actually finished
        if self.getStatus(jobId) != self.getStatusId('finished'):
            raise ValueError(
                'Current job not finished, unable to check dependencies')

        # check if dependent jobs are finished
        for depJobId in self.getDependentJobs(jobId):
            isReady = True
            if not (self.getStatus(depJobId) == self.getStatusId('waiting')):
                CloudLogger.logError(
                    "Job dependency is not in a 'waiting' state",
                    ResponseException(
                        "Job dependency is not in a 'waiting' state",
                        StatusCode.CLIENT_ERROR, ValueError),
                    traceback.extract_stack())
                continue
            # if dependent jobs are finished, then check the jobs of which the current job is a dependent
            for preReqJobId in self.getPrerequisiteJobs(depJobId):
                if not (self.getStatus(preReqJobId)
                        == self.getStatusId('finished')):
                    # Do nothing
                    isReady = False
                    break
            # The type check here is temporary and needs to be removed once the validator is able
            # to handle cross-file validation job
            if isReady and (self.getJobType(depJobId)
                            == 'csv_record_validation'
                            or self.getJobType(depJobId) == 'validation'):
                # mark job as ready
                self.markStatus(depJobId, 'ready')
                # add to the job queue
                jobQueueResult = self.jobQueue.enqueue.delay(depJobId)
    def checkJobDependencies(self,jobId):
        """ For specified job, check which of its dependencies are ready to be started, and add them to the queue """

        # raise exception if current job is not actually finished
        if self.getJobStatus(jobId) != self.getJobStatusId('finished'):
            raise ValueError('Current job not finished, unable to check dependencies')

        # check if dependent jobs are finished
        for depJobId in self.getDependentJobs(jobId):
            isReady = True
            if not (self.getJobStatus(depJobId) == self.getJobStatusId('waiting')):
                CloudLogger.logError("Job dependency is not in a 'waiting' state",
                                     ResponseException("Job dependency is not in a 'waiting' state",StatusCode.CLIENT_ERROR, ValueError),
                                     traceback.extract_stack())
                continue
            # if dependent jobs are finished, then check the jobs of which the current job is a dependent
            for preReqJobId in self.getPrerequisiteJobs(depJobId):
                if not (self.getJobStatus(preReqJobId) == self.getJobStatusId('finished')):
                    # Do nothing
                    isReady = False
                    break
            # The type check here is temporary and needs to be removed once the validator is able
            # to handle cross-file validation job
            if isReady and (self.getJobType(depJobId) == 'csv_record_validation' or self.getJobType(depJobId) == 'validation'):
                # mark job as ready
                self.markJobStatus(depJobId, 'ready')
                # add to the job queue
                jobQueueResult = self.jobQueue.enqueue.delay(depJobId)
    def startGenerationJob(self, submission_id, file_type):
        """ Initiates a file generation job

        Args:
            submission_id: ID of submission to start job for
            file_type: Type of file to be generated

        Returns:
            Tuple of boolean indicating successful start, and error response if False

        """
        jobDb = self.interfaces.jobDb
        file_type_name = self.fileTypeMap[file_type]

        if file_type in ["D1", "D2"]:
            # Populate start and end dates, these should be provided in MM/DD/YYYY format, using calendar year (not fiscal year)
            requestDict = RequestDictionary(self.request)
            start_date = requestDict.getValue("start")
            end_date = requestDict.getValue("end")

            if not (StringCleaner.isDate(start_date) and StringCleaner.isDate(end_date)):
                exc = ResponseException("Start or end date cannot be parsed into a date", StatusCode.CLIENT_ERROR)
                return False, JsonResponse.error(exc, exc.status, start = "", end = "", file_type = file_type, status = "failed")
        elif file_type not in ["E","F"]:
            exc = ResponseException("File type must be either D1, D2, E or F", StatusCode.CLIENT_ERROR)
            return False, JsonResponse.error(exc, exc.status, file_type = file_type, status = "failed")

        cgac_code = self.jobManager.getSubmissionById(submission_id).cgac_code

        # Generate and upload file to S3
        user_id = LoginSession.getName(session)
        timestamped_name = s3UrlHandler.getTimestampedFilename(CONFIG_BROKER["".join([str(file_type_name),"_file_name"])])
        if self.isLocal:
            upload_file_name = "".join([CONFIG_BROKER['broker_files'], timestamped_name])
        else:
            upload_file_name = "".join([str(user_id), "/", timestamped_name])

        job = jobDb.getJobBySubmissionFileTypeAndJobType(submission_id, file_type_name, "file_upload")
        job.filename = upload_file_name
        job.original_filename = timestamped_name
        job.job_status_id = jobDb.getJobStatusId("running")
        jobDb.session.commit()
        if file_type in ["D1", "D2"]:
            CloudLogger.log("DEBUG: Adding job info for job id of " + str(job.job_id),
                            log_type="debug",
                            file_name=self.debug_file_name)
            return self.addJobInfoForDFile(upload_file_name, timestamped_name, submission_id, file_type, file_type_name, start_date, end_date, cgac_code, job)
        elif file_type == 'E':
            generate_e_file.delay(
                submission_id, job.job_id, InterfaceHolder, timestamped_name,
                upload_file_name, self.isLocal)
        elif file_type == 'F':
            generate_f_file.delay(
                submission_id, job.job_id, InterfaceHolder, timestamped_name,
                upload_file_name, self.isLocal)

        return True, None
    def addJobInfoForDFile(self, upload_file_name, timestamped_name, submission_id, file_type, file_type_name, start_date, end_date, cgac_code, job):
        """ Populates upload and validation job objects with start and end dates, filenames, and status

        Args:
            upload_file_name - Filename to use on S3
            timestamped_name - Version of filename without user ID
            submission_id - Submission to add D files to
            file_type - File type as either "D1" or "D2"
            file_type_name - Full name of file type
            start_date - Beginning of period for D file
            end_date - End of period for D file
            cgac_code - Agency to generate D file for
            job - Job object for upload job
        """
        jobDb = self.interfaces.jobDb
        try:
            valJob = jobDb.getJobBySubmissionFileTypeAndJobType(submission_id, file_type_name, "csv_record_validation")
            valJob.filename = upload_file_name
            valJob.original_filename = timestamped_name
            valJob.job_status_id = jobDb.getJobStatusId("waiting")
            job.start_date = datetime.strptime(start_date,"%m/%d/%Y").date()
            job.end_date = datetime.strptime(end_date,"%m/%d/%Y").date()
            valJob.start_date = datetime.strptime(start_date,"%m/%d/%Y").date()
            valJob.end_date = datetime.strptime(end_date,"%m/%d/%Y").date()
            # Generate random uuid and store generation task
            task_key = uuid4()
            task = FileGenerationTask(generation_task_key = task_key, submission_id = submission_id, file_type_id = jobDb.getFileTypeId(file_type_name), job_id = job.job_id)
            jobDb.session.add(task)

            jobDb.session.commit()
        except ValueError as e:
            # Date was not in expected format
            exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,ValueError)
            return False, JsonResponse.error(exc, exc.status, url = "", start = "", end = "",  file_type = file_type)
        # Create file D API URL with dates and callback URL
        callback = "{}://{}:{}/v1/complete_generation/{}/".format(CONFIG_SERVICES["protocol"],CONFIG_SERVICES["broker_api_host"], CONFIG_SERVICES["broker_api_port"],task_key)
        CloudLogger.log(
            'DEBUG: Callback URL for {}: {}'.format(file_type, callback),
            log_type='debug', file_name=self.debug_file_name)
        get_url = CONFIG_BROKER["".join([file_type_name, "_url"])].format(cgac_code, start_date, end_date, callback)

        CloudLogger.log("DEBUG: Calling D file API => " + str(get_url),
                        log_type="debug",
                        file_name=self.debug_file_name)
        try:
            if not self.call_d_file_api(get_url):
                self.handleEmptyResponse(job, valJob)
        except Timeout as e:
            exc = ResponseException(str(e), StatusCode.CLIENT_ERROR, Timeout)
            return False, JsonResponse.error(e, exc.status, url="", start="", end="", file_type=file_type)

        return True, None
    def error(exception, errorCode, **kwargs):
        """ Create an http response object for specified error

        Args:
            exception: Exception to be represented by response object
            errorCode: Status code to be used in response
            kwargs: Extra fields and values to be included in response

        Returns:
            Http response object containing specified error
        """
        responseDict = {}
        for key in kwargs:
            responseDict[key] = kwargs[key]

        exc_type, exc_obj, exc_tb = sys.exc_info()
        trace = traceback.extract_tb(exc_tb, 10)
        CloudLogger.logError('Route Error : ', exception, trace)
        if (JsonResponse.debugMode):
            responseDict["message"] = str(exception)
            responseDict["errorType"] = str(type(exception))
            if (type(exception) == type(ResponseException(""))
                    and exception.wrappedException != None):
                responseDict["wrappedType"] = str(
                    type(exception.wrappedException))
                responseDict["wrappedMessage"] = str(
                    exception.wrappedException)
            trace = list(map(lambda entry: str(entry), trace))
            responseDict["trace"] = trace
            if (JsonResponse.printDebug):
                print(str(type(exception)))
                print(str(exception))
                print(str(trace))
            if (JsonResponse.logDebug):
                open("responseErrorLog",
                     "a").write(str(type(exception)) + ": ")
                open("responseErrorLog", "a").write(str(exception) + "\n")
                open("responseErrorLog", "a").write(str(trace) + "\n")
            del exc_tb
            return JsonResponse.create(errorCode, responseDict)
        else:
            responseDict["message"] = "An error has occurred"
            del exc_tb
            return JsonResponse.create(errorCode, responseDict)
Exemple #9
0
def write_csv(file_name, upload_name, is_local, header, body):
    """Derive the relevant location and write a CSV to it.
    :return: the final file name (complete with prefix)"""
    if is_local:
        file_name = CONFIG_BROKER['broker_files'] + file_name
        csv_writer = CsvLocalWriter(file_name, header)
        message = 'DEBUG: Writing file locally...'
    else:
        bucket = CONFIG_BROKER['aws_bucket']
        region = CONFIG_BROKER['aws_region']
        csv_writer = CsvS3Writer(region, bucket, upload_name, header)
        message = 'DEBUG: Writing file to S3...'

    CloudLogger.log(message, log_type="debug", file_name='smx_request.log')

    with csv_writer as writer:
        for line in body:
            writer.write(line)
        writer.finishBatch()
    def generateFile(self):
        """ Start a file generation job for the specified file type """
        self.debug_file_name = "debug.log"
        CloudLogger.log("DEBUG: Starting D file generation", log_type="debug",
                        file_name=self.debug_file_name)
        submission_id, file_type = self.getRequestParamsForGenerate()

        CloudLogger.log("DEBUG: Submission ID = " + str(submission_id) + " / File type = " + str(file_type), log_type="debug",
                        file_name=self.debug_file_name)
        # Check permission to submission
        success, error_response = self.checkSubmissionById(submission_id, file_type)
        if not success:
            return error_response

        job = self.interfaces.jobDb.getJobBySubmissionFileTypeAndJobType(submission_id, self.fileTypeMap[file_type], "file_upload")
        # Check prerequisites on upload job
        if not self.interfaces.jobDb.runChecks(job.job_id):
            exc = ResponseException("Must wait for completion of prerequisite validation job", StatusCode.CLIENT_ERROR)
            return JsonResponse.error(exc, exc.status)

        success, error_response = self.startGenerationJob(submission_id,file_type)

        CloudLogger.log("DEBUG: Finished startGenerationJob method",
                        log_type="debug",
                        file_name=self.debug_file_name)
        if not success:
            # If not successful, set job status as "failed"
            self.interfaces.jobDb.markJobStatus(job.job_id, "failed")
            return error_response

        # Return same response as check generation route
        return self.checkGeneration(submission_id, file_type)
    def completeGeneration(self, generationId):
        """ For files D1 and D2, the API uses this route as a callback to load the generated file.
        Requires an 'href' key in the request that specifies the URL of the file to be downloaded

        Args:
            generationId - Unique key stored in file_generation_task table, used in callback to identify which submission
            this file is for.

        """
        if generationId is None:
            return JsonResponse.error(ResponseException("Must include a generation ID",StatusCode.CLIENT_ERROR), StatusCode.CLIENT_ERROR)

        self.smx_log_file_name = "smx_request.log"

        # Pull url from request
        safeDictionary = RequestDictionary(self.request)
        CloudLogger.log("DEBUG: Request content => " + safeDictionary.to_string(), log_type="debug", file_name=self.smx_log_file_name)


        if not safeDictionary.exists("href"):
            return JsonResponse.error(ResponseException("Request must include href key with URL of D file", StatusCode.CLIENT_ERROR), StatusCode.CLIENT_ERROR)
        url =  safeDictionary.getValue("href")
        CloudLogger.log("DEBUG: Download URL => " + url, log_type="debug", file_name=self.smx_log_file_name)

        #Pull information based on task key
        try:
            CloudLogger.log("DEBUG: Pulling information based on task key...", log_type="debug",
                            file_name=self.smx_log_file_name)
            task = self.interfaces.jobDb.session.query(FileGenerationTask).options(joinedload(FileGenerationTask.file_type)).filter(FileGenerationTask.generation_task_key == generationId).one()
            job = self.interfaces.jobDb.getJobById(task.job_id)
            CloudLogger.log("DEBUG: Loading D file...", log_type="debug", file_name=self.smx_log_file_name)
            result = self.load_d_file(url,job.filename,job.original_filename,job.job_id,self.isLocal)
            CloudLogger.log("DEBUG: Load D file result => " + str(result), log_type="debug",
                            file_name=self.smx_log_file_name)
            return JsonResponse.create(StatusCode.OK,{"message":"File loaded successfully"})
        except ResponseException as e:
            return JsonResponse.error(e, e.status)
        except NoResultFound as e:
            # Did not find file generation task
            return JsonResponse.error(ResponseException("Generation task key not found", StatusCode.CLIENT_ERROR), StatusCode.CLIENT_ERROR)
    def checkJobDependencies(self, jobId):
        """ For specified job, check which of its dependencies are ready to be started, and add them to the queue """

        # raise exception if current job is not actually finished
        if self.getJobStatus(jobId) != self.getJobStatusId('finished'):
            raise ValueError(
                'Current job not finished, unable to check dependencies')

        # check if dependent jobs are finished
        for depJobId in self.getDependentJobs(jobId):
            isReady = True
            if not (self.getJobStatus(depJobId)
                    == self.getJobStatusId('waiting')):
                CloudLogger.logError(
                    "Job dependency is not in a 'waiting' state",
                    ResponseException(
                        "Job dependency is not in a 'waiting' state",
                        StatusCode.CLIENT_ERROR, ValueError),
                    traceback.extract_stack())
                continue
            # if dependent jobs are finished, then check the jobs of which the current job is a dependent
            for preReqJobId in self.getPrerequisiteJobs(depJobId):
                if not (self.getJobStatus(preReqJobId)
                        == self.getJobStatusId('finished')):
                    # Do nothing
                    isReady = False
                    break
            # The type check here is temporary and needs to be removed once the validator is able
            # to handle cross-file validation job
            if isReady and (self.getJobType(depJobId)
                            == 'csv_record_validation'
                            or self.getJobType(depJobId) == 'validation'):
                # mark job as ready
                self.markJobStatus(depJobId, 'ready')
                # add to the job queue
                CloudLogger.log("Sending job {} to the job manager".format(
                    str(depJobId)))
                mp = ManagerProxy()
                mp.sendJobRequest(depJobId)
    def error(exception, errorCode, **kwargs):
        """ Create an http response object for specified error

        Args:
            exception: Exception to be represented by response object
            errorCode: Status code to be used in response
            kwargs: Extra fields and values to be included in response

        Returns:
            Http response object containing specified error
        """
        responseDict = {}
        for key in kwargs:
            responseDict[key] = kwargs[key]

        exc_type, exc_obj, exc_tb = sys.exc_info()
        trace = traceback.extract_tb(exc_tb, 10)
        CloudLogger.logError("Route Error : ", exception, trace)
        if JsonResponse.debugMode:
            responseDict["message"] = str(exception)
            responseDict["errorType"] = str(type(exception))
            if type(exception) == type(ResponseException("")) and exception.wrappedException != None:
                responseDict["wrappedType"] = str(type(exception.wrappedException))
                responseDict["wrappedMessage"] = str(exception.wrappedException)
            responseDict["trace"] = trace
            if JsonResponse.printDebug:
                print(str(type(exception)))
                print(str(exception))
                print(str(trace))
            if JsonResponse.logDebug:
                open("responseErrorLog", "a").write(str(type(exception)) + ": ")
                open("responseErrorLog", "a").write(str(exception) + "\n")
                open("responseErrorLog", "a").write(str(trace) + "\n")
            del exc_tb
            return JsonResponse.create(errorCode, responseDict)
        else:
            responseDict["message"] = "An error has occurred"
            del exc_tb
            return JsonResponse.create(errorCode, responseDict)
Exemple #14
0
def enqueue(jobID):
    """POST a job to the validator"""
    CloudLogger.log("Adding job {} to the queue".format(str(jobID)))
    validatorUrl = '{validator_host}:{validator_port}'.format(
        **CONFIG_SERVICES)
    if 'http://' not in validatorUrl:
        validatorUrl = 'http://' + validatorUrl
    validatorUrl += '/validate/'
    params = {
        'job_id': jobID
    }
    response = requests.post(validatorUrl, params)
    CloudLogger.log("Job {} has completed validation".format(str(jobID)))
    CloudLogger.log("Validator response: {}".format(str(response.json())))
    return response.json()
Exemple #15
0
    def load_d_file(self, url, upload_name, timestamped_name, job_id, isLocal):
        """ Pull D file from specified URL and write to S3 """
        job_manager = self.interfaces.jobDb
        try:
            full_file_path = "".join(
                [CONFIG_BROKER['d_file_storage_path'], timestamped_name])

            CloudLogger.log("DEBUG: Downloading file...",
                            log_type="debug",
                            file_name=self.smx_log_file_name)
            if not self.download_file(full_file_path, url):
                # Error occurred while downloading file, mark job as failed and record error message
                job_manager.markJobStatus(job_id, "failed")
                job = job_manager.getJobById(job_id)
                file_type = job_manager.getFileType(job_id)
                if file_type == "award":
                    source = "ASP"
                elif file_type == "award_procurement":
                    source = "FPDS"
                else:
                    source = "unknown source"
                job.error_message = "A problem occurred receiving data from {}".format(
                    source)

                raise ResponseException(job.error_message,
                                        StatusCode.CLIENT_ERROR)
            lines = self.get_lines_from_csv(full_file_path)

            write_csv(timestamped_name, upload_name, isLocal, lines[0],
                      lines[1:])

            CloudLogger.log("DEBUG: Marking job id of " + str(job_id) +
                            " as finished",
                            log_type="debug",
                            file_name=self.smx_log_file_name)
            job_manager.markJobStatus(job_id, "finished")
            return {"message": "Success", "file_name": timestamped_name}
        except Exception as e:
            CloudLogger.log("ERROR: Exception caught => " + str(e),
                            log_type="debug",
                            file_name=self.smx_log_file_name)
            # Log the error
            JsonResponse.error(e, 500)
            job_manager.getJobById(job_id).error_message = str(e)
            job_manager.markJobStatus(job_id, "failed")
            job_manager.session.commit()
            raise e
    def threadedValidateJob(self, jobId):
        """
        args
        jobId -- (Integer) a valid jobId
        This method runs on a new thread thus
        there are zero error messages other then the
        job status being updated
        """

        # As this is the start of a new thread, first generate new connections to the databases
        interfaces = InterfaceHolder()

        self.filename = ""
        jobTracker = interfaces.jobDb
        errorDb = interfaces.errorDb
        try:
            jobType = interfaces.jobDb.checkJobType(jobId)
            if jobType == interfaces.jobDb.getJobTypeId(
                    "csv_record_validation"):
                self.runValidation(jobId, interfaces)
            elif jobType == interfaces.jobDb.getJobTypeId("validation"):
                self.runCrossValidation(jobId, interfaces)
            else:
                raise ResponseException("Bad job type for validator",
                                        StatusCode.INTERNAL_ERROR)
            self.runValidation(jobId, interfaces)
            errorDb.markFileComplete(jobId, self.filename)
            return
        except ResponseException as e:
            CloudLogger.logError(str(e), e,
                                 traceback.extract_tb(sys.exc_info()[2]))
            self.markJob(jobId, jobTracker, "invalid", errorDb, self.filename,
                         e.errorType, e.extraInfo)
        except ValueError as e:
            CloudLogger.logError(str(e), e,
                                 traceback.extract_tb(sys.exc_info()[2]))
            self.markJob(jobId, jobTracker, "invalid", errorDb, self.filename,
                         ValidationError.unknownError)
        except Exception as e:
            #Something unknown happened we may need to try again!
            CloudLogger.logError(str(e), e,
                                 traceback.extract_tb(sys.exc_info()[2]))
            self.markJob(jobId, jobTracker, "failed", errorDb, self.filename,
                         ValidationError.unknownError)
        finally:
            interfaces.close()
    def threadedValidateJob(self,jobId) :
        """
        args
        jobId -- (Integer) a valid jobId
        This method runs on a new thread thus
        there are zero error messages other then the
        job status being updated
        """

        # As this is the start of a new thread, first generate new connections to the databases
        interfaces = InterfaceHolder()

        self.filename = ""
        jobTracker = interfaces.jobDb
        errorDb = interfaces.errorDb
        try:
            jobType = interfaces.jobDb.checkJobType(jobId)
            if jobType == interfaces.jobDb.getJobTypeId("csv_record_validation"):
                self.runValidation(jobId,interfaces)
            elif jobType == interfaces.jobDb.getJobTypeId("validation"):
                self.runCrossValidation(jobId, interfaces)
            else:
                raise ResponseException("Bad job type for validator", StatusCode.INTERNAL_ERROR)
            self.runValidation(jobId, interfaces)
            errorDb.markFileComplete(jobId,self.filename)
            return
        except ResponseException as e:
            CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2]))
            self.markJob(jobId,jobTracker,"invalid",errorDb,self.filename,e.errorType,e.extraInfo)
        except ValueError as e:
            CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2]))
            self.markJob(jobId,jobTracker,"invalid",errorDb,self.filename,ValidationError.unknownError)
        except Exception as e:
            #Something unknown happened we may need to try again!
            CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2]))
            self.markJob(jobId,jobTracker,"failed",errorDb,self.filename,ValidationError.unknownError)
        finally:
            interfaces.close()
Exemple #18
0
# we want to allow access to, e.g. config.CONFIG_BROKER
from dataactcore.read_config import (  # noqa
    CONFIG_BROKER, CONFIG_LOGGING, CONFIG_SERVICES, CONFIG_DB,
    CONFIG_JOB_QUEUE, CONFIG_PATH, ALEMBIC_PATH, MIGRATION_PATH, log_message)
from dataactcore.utils.cloudLogger import CloudLogger

# Log config values along with warnings for missing files
if log_message:
    CloudLogger.log(log_message)
Exemple #19
0
def createApp():
    """Create the Flask app."""
    try:
        app = Flask(__name__)
        local = CONFIG_BROKER['local']
        error_report_path = CONFIG_SERVICES['error_report_path']
        app.config.from_object(__name__)

        # Future: Override config w/ environment variable, if set
        app.config.from_envvar('VALIDATOR_SETTINGS', silent=True)

        validationManager = ValidationManager(local, error_report_path)

        @app.route("/", methods=["GET"])
        def testApp():
            """Confirm server running."""
            return "Validator is running"

        @app.route("/validate_threaded/", methods=["POST"])
        def validate_threaded():
            """Start the validation process on a new thread."""
            @copy_current_request_context
            def ThreadedFunction(arg):
                """The new thread."""
                threadedManager = ValidationManager(local, error_report_path)
                threadedManager.threadedValidateJob(arg)

            try:
                interfaces = InterfaceHolder()
                jobTracker = interfaces.jobDb
            except ResponseException as e:
                open("errorLog", "a").write(str(e) + "\n")
                return JsonResponse.error(
                    e, e.status, table="cannot connect to job database")
            except Exception as e:
                open("errorLog", "a").write(str(e) + "\n")
                exc = ResponseException(str(e), StatusCode.INTERNAL_ERROR,
                                        type(e))
                return JsonResponse.error(
                    exc, exc.status, table="cannot connect to job database")

            jobId = None
            manager = ValidationManager(local, error_report_path)

            try:
                jobId = manager.getJobID(request)
            except ResponseException as e:
                manager.markJob(jobId, jobTracker, "invalid",
                                interfaces.errorDb, manager.filename)
                CloudLogger.logError(str(e), e,
                                     traceback.extract_tb(sys.exc_info()[2]))
                return JsonResponse.error(e, e.status, table="")
            except Exception as e:
                exc = ResponseException(str(e), StatusCode.CLIENT_ERROR,
                                        type(e))
                manager.markJob(jobId, jobTracker, "invalid",
                                interfaces.errorDb, manager.filename)
                CloudLogger.logError(str(e), exc,
                                     traceback.extract_tb(sys.exc_info()[2]))
                return JsonResponse.error(exc, exc.status, table="")

            try:
                manager.testJobID(jobId, interfaces)
            except ResponseException as e:
                open("errorLog", "a").write(str(e) + "\n")
                # Job is not ready to run according to job tracker, do not change status of job in job tracker
                interfaces.errorDb.writeFileError(jobId, manager.filename,
                                                  ValidationError.jobError)
                return JsonResponse.error(e, e.status, table="")
            except Exception as e:
                open("errorLog", "a").write(str(e) + "\n")
                exc = ResponseException(str(e), StatusCode.CLIENT_ERROR,
                                        type(e))
                interfaces.errorDb.writeFileError(jobId, manager.filename,
                                                  ValidationError.jobError)
                return JsonResponse.error(exc, exc.status, table="")

            thread = Thread(target=ThreadedFunction, args=(jobId, ))

            try:
                jobTracker.markStatus(jobId, "running")
            except Exception as e:
                open("errorLog", "a").write(str(e) + "\n")
                exc = ResponseException(str(e), StatusCode.INTERNAL_ERROR,
                                        type(e))
                return JsonResponse.error(exc,
                                          exc.status,
                                          table="could not start job")

            interfaces.close()
            thread.start()

            return JsonResponse.create(StatusCode.OK,
                                       {"table": "job" + str(jobId)})

        @app.route("/validate/", methods=["POST"])
        def validate():
            """Start the validation process on the same threads."""
            interfaces = InterfaceHolder()  # Create sessions for this route
            try:
                return validationManager.validateJob(request, interfaces)
            except Exception as e:
                # Something went wrong getting the flask request
                open("errorLog", "a").write(str(e) + "\n")
                exc = ResponseException(str(e), StatusCode.INTERNAL_ERROR,
                                        type(e))
                return JsonResponse.error(exc, exc.status, table="")
            finally:
                interfaces.close()

        JsonResponse.debugMode = CONFIG_SERVICES['rest_trace']

        return app

    except Exception as e:
        trace = traceback.extract_tb(sys.exc_info()[2], 10)
        CloudLogger.logError('Validator App Level Error: ', e, trace)
        raise
Exemple #20
0
def createApp():
    """Create the Flask app."""
    try:
        app = Flask(__name__)
        local = CONFIG_BROKER['local']
        error_report_path = CONFIG_SERVICES['error_report_path']
        app.config.from_object(__name__)

        # Future: Override config w/ environment variable, if set
        app.config.from_envvar('VALIDATOR_SETTINGS', silent=True)

        validationManager = ValidationManager(local, error_report_path)

        @app.route("/", methods=["GET"])
        def testApp():
            """Confirm server running."""
            return "Validator is running"

        @app.route("/validate_threaded/", methods=["POST"])
        def validate_threaded():
            """Start the validation process on a new thread."""
            @copy_current_request_context
            def ThreadedFunction(arg):
                """The new thread."""
                threadedManager = ValidationManager(local, error_report_path)
                threadedManager.threadedValidateJob(arg)

            try:
                interfaces = InterfaceHolder()
                jobTracker = interfaces.jobDb
            except ResponseException as e:
                open("errorLog","a").write(str(e) + "\n")
                return JsonResponse.error(e,e.status,table = "cannot connect to job database")
            except Exception as e:
                open("errorLog","a").write(str(e) + "\n")
                exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e))
                return JsonResponse.error(exc,exc.status,table= "cannot connect to job database")

            jobId = None
            manager = ValidationManager(local, error_report_path)

            try:
                jobId = manager.getJobID(request)
            except ResponseException as e:
                manager.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,manager.filename)
                CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2]))
                return JsonResponse.error(e,e.status,table ="")
            except Exception as e:
                exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,type(e))
                manager.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,manager.filename)
                CloudLogger.logError(str(e),exc,traceback.extract_tb(sys.exc_info()[2]))
                return JsonResponse.error(exc,exc.status,table="")

            try:
                manager.testJobID(jobId,interfaces)
            except ResponseException as e:
                open("errorLog","a").write(str(e) + "\n")
                # Job is not ready to run according to job tracker, do not change status of job in job tracker
                interfaces.errorDb.writeFileError(jobId,manager.filename,ValidationError.jobError)
                return JsonResponse.error(e,e.status,table="")
            except Exception as e:
                open("errorLog","a").write(str(e) + "\n")
                exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,type(e))
                interfaces.errorDb.writeFileError(jobId,manager.filename,ValidationError.jobError)
                return JsonResponse.error(exc,exc.status,table="")

            thread = Thread(target=ThreadedFunction, args= (jobId,))

            try :
                jobTracker.markJobStatus(jobId,"running")
            except Exception as e:
                open("errorLog","a").write(str(e) + "\n")
                exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e))
                return JsonResponse.error(exc,exc.status,table="could not start job")

            interfaces.close()
            thread.start()

            return JsonResponse.create(StatusCode.OK,{"table":"job"+str(jobId)})

        @app.route("/validate/",methods=["POST"])
        def validate():
            """Start the validation process on the same threads."""
            interfaces = InterfaceHolder() # Create sessions for this route
            try:
                return validationManager.validateJob(request,interfaces)
            except Exception as e:
                # Something went wrong getting the flask request
                open("errorLog","a").write(str(e) + "\n")
                exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e))
                return JsonResponse.error(exc,exc.status,table="")
            finally:
                interfaces.close()

        JsonResponse.debugMode = CONFIG_SERVICES['rest_trace']

        return app

    except Exception as e:
        trace = traceback.extract_tb(sys.exc_info()[2], 10)
        CloudLogger.logError('Validator App Level Error: ', e, trace)
        raise
    def validateFileBySql(cls, submissionId, fileType, interfaces):
        """ Check all SQL rules

        Args:
            submissionId: submission to be checked
            fileType: file type being checked
            interfaces: database interface objects

        Returns:
            List of errors found, each element has:
             field names
             error message
             values in fields involved
             row number
             rule label
             source file id
             target file id
             severity id
        """

        CloudLogger.logError(
            "VALIDATOR_INFO: ",
            "Beginning SQL validation rules on submissionID: " +
            str(submissionId) + " fileType: " + fileType, "")

        # Pull all SQL rules for this file type
        fileId = interfaces.validationDb.getFileTypeIdByName(fileType)
        rules = interfaces.validationDb.session.query(RuleSql).filter(
            RuleSql.file_id == fileId).filter(
                RuleSql.rule_cross_file_flag == False).all()
        errors = []

        # Get short to long colname dictionary
        shortColnames = interfaces.validationDb.getShortToLongColname()

        # For each rule, execute sql for rule
        for rule in rules:
            CloudLogger.logError(
                "VALIDATOR_INFO: ", "Running query: " +
                str(RuleSql.query_name) + " on submissionID: " +
                str(submissionId) + " fileType: " + fileType, "")
            failures = interfaces.stagingDb.connection.execute(
                rule.rule_sql.format(submissionId))
            if failures.rowcount:
                # Create column list (exclude row_number)
                cols = failures.keys()
                cols.remove("row_number")
                # Build error list
                for failure in failures:
                    errorMsg = rule.rule_error_message
                    row = failure["row_number"]
                    # Create strings for fields and values
                    valueList = [
                        "{}: {}".format(shortColnames[field],
                                        str(failure[field]))
                        if field in shortColnames else "{}: {}".format(
                            field, str(failure[field])) for field in cols
                    ]
                    valueString = ", ".join(valueList)
                    fieldList = [
                        shortColnames[field]
                        if field in shortColnames else field for field in cols
                    ]
                    fieldString = ", ".join(fieldList)
                    errors.append([
                        fieldString, errorMsg, valueString, row,
                        rule.rule_label, fileId, rule.target_file_id,
                        rule.rule_severity_id
                    ])

            CloudLogger.logError(
                "VALIDATOR_INFO: ",
                "Completed SQL validation rules on submissionID: " +
                str(submissionId) + " fileType: " + fileType, "")

        return errors
    def validateJob(self, request,interfaces):
        """ Gets file for job, validates each row, and sends valid rows to staging database
        Args:
        request -- HTTP request containing the jobId
        interfaces -- InterfaceHolder object to the databases
        Returns:
        Http response object
        """
        # Create connection to job tracker database
        self.filename = None
        tableName = ""
        jobId = None
        jobTracker = None

        try:
            jobTracker = interfaces.jobDb
            requestDict = RequestDictionary(request)
            tableName = ""
            if(requestDict.exists("job_id")):
                jobId = requestDict.getValue("job_id")
            else:
                # Request does not have a job ID, can't validate
                raise ResponseException("No job ID specified in request",StatusCode.CLIENT_ERROR)

            # Check that job exists and is ready
            if(not (jobTracker.runChecks(jobId))):
                raise ResponseException("Checks failed on Job ID",StatusCode.CLIENT_ERROR)
            tableName = interfaces.stagingDb.getTableName(jobId)
            jobType = interfaces.jobDb.checkJobType(jobId)

        except ResponseException as e:
            CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2]))
            if(e.errorType == None):
                # Error occurred while trying to get and check job ID
                e.errorType = ValidationError.jobError
            interfaces.errorDb.writeFileError(jobId,self.filename,e.errorType,e.extraInfo)
            return JsonResponse.error(e,e.status,table=tableName)
        except Exception as e:
            exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e))
            CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2]))
            self.markJob(jobId,jobTracker,"failed",interfaces.errorDb,self.filename,ValidationError.unknownError)
            return JsonResponse.error(exc,exc.status,table=tableName)

        try:
            jobTracker.markJobStatus(jobId,"running")
            if jobType == interfaces.jobDb.getJobTypeId("csv_record_validation"):
                self.runValidation(jobId,interfaces)
            elif jobType == interfaces.jobDb.getJobTypeId("validation"):
                self.runCrossValidation(jobId, interfaces)
            else:
                raise ResponseException("Bad job type for validator", StatusCode.INTERNAL_ERROR)
            interfaces.errorDb.markFileComplete(jobId,self.filename)
            return  JsonResponse.create(StatusCode.OK,{"table":tableName})
        except ResponseException as e:
            CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2]))
            self.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,self.filename,e.errorType,e.extraInfo)
            return JsonResponse.error(e,e.status,table=tableName)
        except ValueError as e:
            CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2]))
            # Problem with CSV headers
            exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,type(e),ValidationError.unknownError) #"Internal value error"
            self.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,self.filename,ValidationError.unknownError)
            return JsonResponse.error(exc,exc.status,table=tableName)
        except Error as e:
            CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2]))
            # CSV file not properly formatted (usually too much in one field)
            exc = ResponseException("Internal error",StatusCode.CLIENT_ERROR,type(e),ValidationError.unknownError)
            self.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,self.filename,ValidationError.unknownError)
            return JsonResponse.error(exc,exc.status,table=tableName)
        except Exception as e:
            CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2]))
            exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e),ValidationError.unknownError)
            self.markJob(jobId,jobTracker,"failed",interfaces.errorDb,self.filename,ValidationError.unknownError)
            return JsonResponse.error(exc,exc.status,table=tableName)
def createApp():
    """Set up the application."""
    try:
        # Create application
        app = Flask(__name__, instance_path=CONFIG_PATH)
        local = CONFIG_BROKER['local']
        app.config.from_object(__name__)
        app.config['LOCAL'] = local
        app.config['REST_TRACE'] = CONFIG_SERVICES['rest_trace']
        app.config['SYSTEM_EMAIL'] = CONFIG_BROKER['reply_to_email']

        # Future: Override config w/ environment variable, if set
        app.config.from_envvar('BROKER_SETTINGS', silent=True)

        # Set parameters
        broker_file_path = CONFIG_BROKER['broker_files']
        AccountHandler.FRONT_END = CONFIG_BROKER['full_url']
        sesEmail.SIGNING_KEY = CONFIG_BROKER['email_token_key']
        sesEmail.isLocal = local
        if sesEmail.isLocal:
            sesEmail.emailLog = os.path.join(broker_file_path, 'email.log')
        # If local, make the email directory if needed
        if local and not os.path.exists(broker_file_path):
            os.makedirs(broker_file_path)

        # When runlocal is true, assume Dynamo is on the same server
        # (should be false for prod)
        JsonResponse.debugMode = app.config['REST_TRACE']

        if CONFIG_SERVICES['cross_origin_url'] == "*":
            cors = CORS(app, supports_credentials=True)
        else:
            cors = CORS(app,
                        supports_credentials=True,
                        origins=CONFIG_SERVICES['cross_origin_url'])
        # Enable AWS Sessions
        app.session_interface = DynamoInterface()
        # Set up bcrypt
        bcrypt = Bcrypt(app)
        # Root will point to index.html
        @app.route("/", methods=["GET"])
        def root():
            return "Broker is running"

        if local:
            localFiles = os.path.join(broker_file_path, "<path:filename>")
            # Only define this route when running locally
            @app.route(localFiles)
            def sendFile(filename):
                if (config["local"]):
                    return send_from_directory(broker_file_path, filename)
        else:
            # For non-local installs, set Dynamo Region
            SessionTable.DYNAMO_REGION = CONFIG_BROKER['aws_region']

        # Add routes for modules here
        add_login_routes(app, bcrypt)

        add_file_routes(app, CONFIG_BROKER['aws_create_temp_credentials'],
                        local, broker_file_path)
        add_user_routes(app, app.config['SYSTEM_EMAIL'], bcrypt)

        SessionTable.LOCAL_PORT = CONFIG_DB['dynamo_port']

        SessionTable.setup(app, local)

        return app

    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        trace = traceback.extract_tb(exc_tb, 10)
        CloudLogger.logError('Broker App Level Error: ', e, trace)

        del exc_tb
        raise
    def validateJob(self, request, interfaces):
        """ Gets file for job, validates each row, and sends valid rows to staging database
        Args:
        request -- HTTP request containing the jobId
        interfaces -- InterfaceHolder object to the databases
        Returns:
        Http response object
        """
        # Create connection to job tracker database
        self.filename = None
        tableName = ""
        jobId = None
        jobTracker = None

        try:
            jobTracker = interfaces.jobDb
            requestDict = RequestDictionary(request)
            tableName = ""
            if (requestDict.exists("job_id")):
                jobId = requestDict.getValue("job_id")
            else:
                # Request does not have a job ID, can't validate
                raise ResponseException("No job ID specified in request",
                                        StatusCode.CLIENT_ERROR)

            # Check that job exists and is ready
            if (not (jobTracker.runChecks(jobId))):
                raise ResponseException("Checks failed on Job ID",
                                        StatusCode.CLIENT_ERROR)
            tableName = interfaces.stagingDb.getTableName(jobId)
            jobType = interfaces.jobDb.checkJobType(jobId)

        except ResponseException as e:
            CloudLogger.logError(str(e), e,
                                 traceback.extract_tb(sys.exc_info()[2]))
            if (e.errorType == None):
                # Error occurred while trying to get and check job ID
                e.errorType = ValidationError.jobError
            interfaces.errorDb.writeFileError(jobId, self.filename,
                                              e.errorType, e.extraInfo)
            return JsonResponse.error(e, e.status, table=tableName)
        except Exception as e:
            exc = ResponseException(str(e), StatusCode.INTERNAL_ERROR, type(e))
            CloudLogger.logError(str(e), e,
                                 traceback.extract_tb(sys.exc_info()[2]))
            self.markJob(jobId, jobTracker, "failed", interfaces.errorDb,
                         self.filename, ValidationError.unknownError)
            return JsonResponse.error(exc, exc.status, table=tableName)

        try:
            jobTracker.markJobStatus(jobId, "running")
            if jobType == interfaces.jobDb.getJobTypeId(
                    "csv_record_validation"):
                self.runValidation(jobId, interfaces)
            elif jobType == interfaces.jobDb.getJobTypeId("validation"):
                self.runCrossValidation(jobId, interfaces)
            else:
                raise ResponseException("Bad job type for validator",
                                        StatusCode.INTERNAL_ERROR)
            interfaces.errorDb.markFileComplete(jobId, self.filename)
            return JsonResponse.create(StatusCode.OK, {"table": tableName})
        except ResponseException as e:
            CloudLogger.logError(str(e), e,
                                 traceback.extract_tb(sys.exc_info()[2]))
            self.markJob(jobId, jobTracker, "invalid", interfaces.errorDb,
                         self.filename, e.errorType, e.extraInfo)
            return JsonResponse.error(e, e.status, table=tableName)
        except ValueError as e:
            CloudLogger.logError(str(e), e,
                                 traceback.extract_tb(sys.exc_info()[2]))
            # Problem with CSV headers
            exc = ResponseException(
                str(e), StatusCode.CLIENT_ERROR, type(e),
                ValidationError.unknownError)  #"Internal value error"
            self.markJob(jobId, jobTracker, "invalid", interfaces.errorDb,
                         self.filename, ValidationError.unknownError)
            return JsonResponse.error(exc, exc.status, table=tableName)
        except Error as e:
            CloudLogger.logError(str(e), e,
                                 traceback.extract_tb(sys.exc_info()[2]))
            # CSV file not properly formatted (usually too much in one field)
            exc = ResponseException("Internal error", StatusCode.CLIENT_ERROR,
                                    type(e), ValidationError.unknownError)
            self.markJob(jobId, jobTracker, "invalid", interfaces.errorDb,
                         self.filename, ValidationError.unknownError)
            return JsonResponse.error(exc, exc.status, table=tableName)
        except Exception as e:
            CloudLogger.logError(str(e), e,
                                 traceback.extract_tb(sys.exc_info()[2]))
            exc = ResponseException(str(e), StatusCode.INTERNAL_ERROR, type(e),
                                    ValidationError.unknownError)
            self.markJob(jobId, jobTracker, "failed", interfaces.errorDb,
                         self.filename, ValidationError.unknownError)
            return JsonResponse.error(exc, exc.status, table=tableName)
Exemple #25
0
    def runCrossValidation(self, jobId, interfaces):
        """ Cross file validation job, test all rules with matching rule_timing """
        # Create File Status object
        interfaces.errorDb.createFileIfNeeded(jobId)

        validationDb = interfaces.validationDb
        errorDb = interfaces.errorDb
        submissionId = interfaces.jobDb.getSubmissionId(jobId)
        bucketName = CONFIG_BROKER['aws_bucket']
        regionName = CONFIG_BROKER['aws_region']
        CloudLogger.logError(
            "VALIDATOR_INFO: ",
            "Beginning runCrossValidation on submissionID: " +
            str(submissionId), "")

        # Delete existing cross file errors for this submission
        errorDb.resetErrorsByJobId(jobId)

        # use db to get a list of the cross-file combinations
        targetFiles = validationDb.session.query(FileTypeValidation).subquery()
        crossFileCombos = validationDb.session.query(
            FileTypeValidation.name.label('first_file_name'),
            FileTypeValidation.file_id.label('first_file_id'),
            targetFiles.c.name.label('second_file_name'),
            targetFiles.c.file_id.label('second_file_id')).filter(
                FileTypeValidation.file_order < targetFiles.c.file_order)

        # get all cross file rules from db
        crossFileRules = validationDb.session.query(RuleSql).filter(
            RuleSql.rule_cross_file_flag == True)

        # for each cross-file combo, run associated rules and create error report
        for row in crossFileCombos:
            comboRules = crossFileRules.filter(
                or_(
                    and_(RuleSql.file_id == row.first_file_id,
                         RuleSql.target_file_id == row.second_file_id),
                    and_(RuleSql.file_id == row.second_file_id,
                         RuleSql.target_file_id == row.first_file_id)))
            # send comboRules to validator.crossValidate sql
            failures = Validator.crossValidateSql(comboRules.all(),
                                                  submissionId)
            # get error file name
            reportFilename = self.getFileName(
                getCrossReportName(submissionId, row.first_file_name,
                                   row.second_file_name))
            warningReportFilename = self.getFileName(
                getCrossWarningReportName(submissionId, row.first_file_name,
                                          row.second_file_name))

            # loop through failures to create the error report
            with self.getWriter(regionName, bucketName, reportFilename, self.crossFileReportHeaders) as writer, \
                 self.getWriter(regionName, bucketName, warningReportFilename, self.crossFileReportHeaders) as warningWriter:
                for failure in failures:
                    if failure[9] == interfaces.validationDb.getRuleSeverityId(
                            "fatal"):
                        writer.write(failure[0:7])
                    if failure[9] == interfaces.validationDb.getRuleSeverityId(
                            "warning"):
                        warningWriter.write(failure[0:7])
                    errorDb.recordRowError(jobId,
                                           "cross_file",
                                           failure[0],
                                           failure[3],
                                           failure[5],
                                           failure[6],
                                           failure[7],
                                           failure[8],
                                           severity_id=failure[9])
                writer.finishBatch()
                warningWriter.finishBatch()

        errorDb.writeAllRowErrors(jobId)
        interfaces.jobDb.markJobStatus(jobId, "finished")
        CloudLogger.logError(
            "VALIDATOR_INFO: ",
            "Completed runCrossValidation on submissionID: " +
            str(submissionId), "")
        # Update error info for submission
        interfaces.jobDb.populateSubmissionErrorInfo(submissionId)
        # TODO: Remove temporary step below
        # Temporarily set publishable flag at end of cross file, remove this once users are able to mark their submissions
        # as publishable
        # Publish only if no errors are present
        if interfaces.jobDb.getSubmissionById(
                submissionId).number_of_errors == 0:
            interfaces.jobDb.setPublishableFlag(submissionId, True)

        # Mark validation complete
        interfaces.errorDb.markFileComplete(jobId)
Exemple #26
0
    def runValidation(self, jobId, interfaces):
        """ Run validations for specified job
        Args:
            jobId: Job to be validated
            jobTracker: Interface for job tracker
        Returns:
            True if successful
        """

        sess = GlobalDB.db().session
        # get the job object here so we can call the refactored getReportPath
        # todo: replace other db access functions with job object attributes
        job = sess.query(Job).filter(Job.job_id == jobId).one()

        CloudLogger.logError("VALIDATOR_INFO: ",
                             "Beginning runValidation on jobID: " + str(jobId),
                             "")

        jobTracker = interfaces.jobDb
        submissionId = jobTracker.getSubmissionId(jobId)

        rowNumber = 1
        fileType = jobTracker.getFileType(jobId)
        # Clear existing records for this submission
        interfaces.stagingDb.clearFileBySubmission(submissionId, fileType)

        # Get short to long colname dictionary
        shortColnames = interfaces.validationDb.getShortToLongColname()

        # If local, make the error report directory
        if self.isLocal and not os.path.exists(self.directory):
            os.makedirs(self.directory)
        # Get bucket name and file name
        fileName = jobTracker.getFileName(jobId)
        self.filename = fileName
        bucketName = CONFIG_BROKER['aws_bucket']
        regionName = CONFIG_BROKER['aws_region']

        errorFileName = self.getFileName(getReportPath(job, 'error'))
        warningFileName = self.getFileName(getReportPath(job, 'warning'))

        # Create File Status object
        interfaces.errorDb.createFileIfNeeded(jobId, fileName)

        validationDB = interfaces.validationDb
        fieldList = validationDB.getFieldsByFileList(fileType)
        csvSchema = validationDB.getFieldsByFile(fileType, shortCols=True)

        reader = self.getReader()

        # Get file size and write to jobs table
        if CONFIG_BROKER["use_aws"]:
            fileSize = s3UrlHandler.getFileSize(errorFileName)
        else:
            fileSize = os.path.getsize(jobTracker.getFileName(jobId))
        jobTracker.setFileSizeById(jobId, fileSize)

        fields = interfaces.validationDb.getFileColumnsByFile(fileType)
        try:
            # Pull file and return info on whether it's using short or long col headers
            reader.openFile(regionName, bucketName, fileName, fieldList,
                            bucketName, errorFileName)

            errorInterface = interfaces.errorDb
            self.longToShortDict = interfaces.validationDb.getLongToShortColname(
            )
            # rowErrorPresent becomes true if any row error occurs, used for determining file status
            rowErrorPresent = False
            # list to keep track of rows that fail validations
            errorRows = []

            # While not done, pull one row and put it into staging table if it passes
            # the Validator

            with self.getWriter(regionName, bucketName, errorFileName, self.reportHeaders) as writer, \
                 self.getWriter(regionName, bucketName, warningFileName, self.reportHeaders) as warningWriter:
                while not reader.isFinished:
                    rowNumber += 1

                    if (rowNumber % 100) == 0:
                        CloudLogger.logError(
                            "VALIDATOR_INFO: ", "JobId: " + str(jobId) +
                            " loading row " + str(rowNumber), "")

                    #
                    # first phase of validations: read record and record a
                    # formatting error if there's a problem
                    #
                    (record, reduceRow, skipRow, doneReading,
                     rowErrorHere) = self.readRecord(reader, writer, fileType,
                                                     interfaces, rowNumber,
                                                     jobId, fields)
                    if reduceRow:
                        rowNumber -= 1
                    if rowErrorHere:
                        rowErrorPresent = True
                        errorRows.append(rowNumber)
                    if doneReading:
                        # Stop reading from input file
                        break
                    elif skipRow:
                        # Do not write this row to staging, but continue processing future rows
                        continue

                    #
                    # second phase of validations: do basic schema checks
                    # (e.g., require fields, field length, data type)
                    #
                    # D files are obtained from upstream systems (ASP and FPDS) that perform their own basic validations,
                    # so these validations are not repeated here
                    if fileType in ["award", "award_procurement"]:
                        # Skip basic validations for D files, set as valid to trigger write to staging
                        passedValidations = True
                        valid = True
                    else:
                        passedValidations, failures, valid = Validator.validate(
                            record, csvSchema)
                    if valid:
                        skipRow = self.writeToStaging(record, jobId,
                                                      submissionId,
                                                      passedValidations,
                                                      interfaces, writer,
                                                      rowNumber, fileType)
                        if skipRow:
                            errorRows.append(rowNumber)
                            continue

                    if not passedValidations:
                        if self.writeErrors(failures, interfaces, jobId,
                                            shortColnames, writer,
                                            warningWriter, rowNumber):
                            errorRows.append(rowNumber)

                CloudLogger.logError(
                    "VALIDATOR_INFO: ",
                    "Loading complete on jobID: " + str(jobId) +
                    ". Total rows added to staging: " + str(rowNumber), "")

                #
                # third phase of validations: run validation rules as specified
                # in the schema guidance. these validations are sql-based.
                #
                sqlErrorRows = self.runSqlValidations(interfaces, jobId,
                                                      fileType, shortColnames,
                                                      writer, warningWriter,
                                                      rowNumber)
                errorRows.extend(sqlErrorRows)

                # Write unfinished batch
                writer.finishBatch()
                warningWriter.finishBatch()

            # Calculate total number of rows in file
            # that passed validations
            errorRowsUnique = set(errorRows)
            totalRowsExcludingHeader = rowNumber - 1
            validRows = totalRowsExcludingHeader - len(errorRowsUnique)

            # Update job metadata
            jobTracker.setJobRowcounts(jobId, rowNumber, validRows)

            errorInterface.writeAllRowErrors(jobId)
            # Update error info for submission
            jobTracker.populateSubmissionErrorInfo(submissionId)
            # Mark validation as finished in job tracker
            jobTracker.markJobStatus(jobId, "finished")
            interfaces.errorDb.markFileComplete(jobId, self.filename)
        finally:
            # Ensure the file always closes
            reader.close()
            CloudLogger.logError(
                "VALIDATOR_INFO: ",
                "Completed L1 and SQL rule validations on jobID: " +
                str(jobId), "")
        return True
Exemple #27
0
 def get_xml_response_content(self, api_url):
     """ Retrieve XML Response from the provided API url """
     CloudLogger.log("DEBUG: Getting XML response",
                     log_type="debug",
                     file_name=self.debug_file_name)
     return requests.get(api_url, verify=False, timeout=120).text