Example #1
0
        def validate_threaded():
            """Start the validation process on a new thread."""
            @copy_current_request_context
            def ThreadedFunction(arg):
                """The new thread."""
                threadedManager = ValidationManager(local, error_report_path)
                threadedManager.threadedValidateJob(arg)

            try:
                interfaces = InterfaceHolder()
                jobTracker = interfaces.jobDb
            except ResponseException as e:
                open("errorLog","a").write(str(e) + "\n")
                return JsonResponse.error(e,e.status,table = "cannot connect to job database")
            except Exception as e:
                open("errorLog","a").write(str(e) + "\n")
                exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e))
                return JsonResponse.error(exc,exc.status,table= "cannot connect to job database")

            jobId = None
            manager = ValidationManager(local, error_report_path)

            try:
                jobId = manager.getJobID(request)
            except ResponseException as e:
                manager.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,manager.filename)
                CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2]))
                return JsonResponse.error(e,e.status,table ="")
            except Exception as e:
                exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,type(e))
                manager.markJob(jobId,jobTracker,"invalid",interfaces.errorDb,manager.filename)
                CloudLogger.logError(str(e),exc,traceback.extract_tb(sys.exc_info()[2]))
                return JsonResponse.error(exc,exc.status,table="")

            try:
                manager.testJobID(jobId,interfaces)
            except ResponseException as e:
                open("errorLog","a").write(str(e) + "\n")
                # Job is not ready to run according to job tracker, do not change status of job in job tracker
                interfaces.errorDb.writeFileError(jobId,manager.filename,ValidationError.jobError)
                return JsonResponse.error(e,e.status,table="")
            except Exception as e:
                open("errorLog","a").write(str(e) + "\n")
                exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,type(e))
                interfaces.errorDb.writeFileError(jobId,manager.filename,ValidationError.jobError)
                return JsonResponse.error(exc,exc.status,table="")

            thread = Thread(target=ThreadedFunction, args= (jobId,))

            try :
                jobTracker.markJobStatus(jobId,"running")
            except Exception as e:
                open("errorLog","a").write(str(e) + "\n")
                exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e))
                return JsonResponse.error(exc,exc.status,table="could not start job")

            interfaces.close()
            thread.start()

            return JsonResponse.create(StatusCode.OK,{"table":"job"+str(jobId)})
Example #2
0
 def validate():
     """Start the validation process on the same threads."""
     interfaces = InterfaceHolder() # Create sessions for this route
     try:
         return validationManager.validateJob(request,interfaces)
     except Exception as e:
         # Something went wrong getting the flask request
         open("errorLog","a").write(str(e) + "\n")
         exc = ResponseException(str(e),StatusCode.INTERNAL_ERROR,type(e))
         return JsonResponse.error(exc,exc.status,table="")
     finally:
         interfaces.close()
    def threadedValidateJob(self, jobId):
        """
        args
        jobId -- (Integer) a valid jobId
        This method runs on a new thread thus
        there are zero error messages other then the
        job status being updated
        """

        # As this is the start of a new thread, first generate new connections to the databases
        interfaces = InterfaceHolder()

        self.filename = ""
        jobTracker = interfaces.jobDb
        errorDb = interfaces.errorDb
        try:
            jobType = interfaces.jobDb.checkJobType(jobId)
            if jobType == interfaces.jobDb.getJobTypeId(
                    "csv_record_validation"):
                self.runValidation(jobId, interfaces)
            elif jobType == interfaces.jobDb.getJobTypeId("validation"):
                self.runCrossValidation(jobId, interfaces)
            else:
                raise ResponseException("Bad job type for validator",
                                        StatusCode.INTERNAL_ERROR)
            self.runValidation(jobId, interfaces)
            errorDb.markFileComplete(jobId, self.filename)
            return
        except ResponseException as e:
            CloudLogger.logError(str(e), e,
                                 traceback.extract_tb(sys.exc_info()[2]))
            self.markJob(jobId, jobTracker, "invalid", errorDb, self.filename,
                         e.errorType, e.extraInfo)
        except ValueError as e:
            CloudLogger.logError(str(e), e,
                                 traceback.extract_tb(sys.exc_info()[2]))
            self.markJob(jobId, jobTracker, "invalid", errorDb, self.filename,
                         ValidationError.unknownError)
        except Exception as e:
            #Something unknown happened we may need to try again!
            CloudLogger.logError(str(e), e,
                                 traceback.extract_tb(sys.exc_info()[2]))
            self.markJob(jobId, jobTracker, "failed", errorDb, self.filename,
                         ValidationError.unknownError)
        finally:
            interfaces.close()
    def setUpClass(cls):
        """Set up resources to be shared within a test class"""
        #TODO: refactor into a pytest class fixtures and inject as necessary

        # update application's db config options so unittests
        # run against test databases
        suite = cls.__name__.lower()
        config = dataactcore.config.CONFIG_DB
        cls.num = randint(1, 9999)
        config['error_db_name'] = 'unittest{}_{}_error_data'.format(
            cls.num, suite)
        config['job_db_name'] = 'unittest{}_{}_job_tracker'.format(
            cls.num, suite)
        config['user_db_name'] = 'unittest{}_{}_user_manager'.format(
            cls.num, suite)
        config['validator_db_name'] = 'unittest{}_{}_validator'.format(
            cls.num, suite)
        config['staging_db_name'] = 'unittest{}_{}_staging'.format(
            cls.num, suite)
        dataactcore.config.CONFIG_DB = config

        app = createApp()
        app.config['TESTING'] = True
        cls.app = TestApp(app)

        # Allow us to augment default test failure msg w/ more detail
        cls.longMessage = True
        # Flag for each route call to launch a new thread
        cls.useThreads = False
        # Upload files to S3 (False = skip re-uploading on subsequent runs)
        cls.uploadFiles = True
        # Run tests for local broker or not
        cls.local = CONFIG_BROKER['local']
        # This needs to be set to the local directory for error reports if local is True
        cls.local_file_directory = CONFIG_SERVICES['error_report_path']

        # suppress INFO-level logging from Alembic migrations
        logging.disable(logging.WARN)
        # drop and re-create test job db/tables
        setupJobTrackerDB()
        # drop and re-create test error db/tables
        setupErrorDB()
        # drop and re-create test staging db
        setupStagingDB()
        # drop and re-create test vaidation db
        setupValidationDB(True)
        # reset logging defaults
        logging.disable(logging.NOTSET)

        cls.interfaces = InterfaceHolder()
        cls.jobTracker = cls.interfaces.jobDb
        cls.stagingDb = cls.interfaces.stagingDb
        cls.errorInterface = cls.interfaces.errorDb
        cls.validationDb = cls.interfaces.validationDb
        cls.userId = 1
    def threadedValidateJob(self,jobId) :
        """
        args
        jobId -- (Integer) a valid jobId
        This method runs on a new thread thus
        there are zero error messages other then the
        job status being updated
        """

        # As this is the start of a new thread, first generate new connections to the databases
        interfaces = InterfaceHolder()

        self.filename = ""
        jobTracker = interfaces.jobDb
        errorDb = interfaces.errorDb
        try:
            jobType = interfaces.jobDb.checkJobType(jobId)
            if jobType == interfaces.jobDb.getJobTypeId("csv_record_validation"):
                self.runValidation(jobId,interfaces)
            elif jobType == interfaces.jobDb.getJobTypeId("validation"):
                self.runCrossValidation(jobId, interfaces)
            else:
                raise ResponseException("Bad job type for validator", StatusCode.INTERNAL_ERROR)
            self.runValidation(jobId, interfaces)
            errorDb.markFileComplete(jobId,self.filename)
            return
        except ResponseException as e:
            CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2]))
            self.markJob(jobId,jobTracker,"invalid",errorDb,self.filename,e.errorType,e.extraInfo)
        except ValueError as e:
            CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2]))
            self.markJob(jobId,jobTracker,"invalid",errorDb,self.filename,ValidationError.unknownError)
        except Exception as e:
            #Something unknown happened we may need to try again!
            CloudLogger.logError(str(e),e,traceback.extract_tb(sys.exc_info()[2]))
            self.markJob(jobId,jobTracker,"failed",errorDb,self.filename,ValidationError.unknownError)
        finally:
            interfaces.close()
Example #6
0
    def evaluateCrossFileRule(cls, rule, submissionId, record = None):
        """ Evaluate specified rule against all records to which it applies """
        failures = [] # Can get multiple failures for these rule types
        rulePassed = True # Set to false on first failures
        # Get rule type
        ruleType = rule.multi_field_rule_type.name.lower()
        fileType = rule.file_type.name
        interfaces = InterfaceHolder()
        stagingDb = interfaces.stagingDb
        if ruleType == "field_match":
            targetType = rule.rule_text_2
            # Get ORM objects for source and target staging tables
            sourceTable = cls.getTable(submissionId, fileType, stagingDb)
            targetTable = cls.getTable(submissionId, targetType, stagingDb)
            # TODO Could try to do a join and see what doesn't match, or otherwise improve performance by avoiding a
            # TODO new query against second table for every record in first table, possibly index second table at start
            # Can apply rule to a specified record or all records in first table
            sourceRecords = cls.getRecordsIfNone(sourceTable,stagingDb,record)
            fieldsToCheck = cls.cleanSplit(rule.rule_text_1,True)
            # For each entry, check for the presence of matching values in second table
            for thisRecord in sourceRecords:
                # Build query to filter for each field to match
                matchDict = {}
                query = stagingDb.session.query(targetTable)
                for field in fieldsToCheck:
                    # Have to get file column IDs for source and target tables
                    targetColId = interfaces.validationDb.getColumnId(field,targetType)
                    if isinstance(thisRecord,dict):
                        matchDict[field] = thisRecord[str(field)]
                    else:
                        sourceColId = interfaces.validationDb.getColumnId(field,fileType)
                        matchDict[field] = getattr(thisRecord,str(sourceColId))
                    query = query.filter(getattr(targetTable.c,str(targetColId)) == matchDict[field])
                # Make sure at least one in target table record matches
                if not query.first():
                    # Fields don't match target file, add to failures
                    rulePassed = False
                    dictString = str(matchDict)[1:-1] # Remove braces

                    failures.append([", ".join(fieldsToCheck),rule.description,dictString])
        elif ruleType == "rule_if":
            # Get all records from source table
            sourceTable = cls.getTable(submissionId, fileType, stagingDb)

            columns = list(sourceTable.columns)
            colNames = []
            for i in range(0,len(columns)):
                try:
                    int(columns[i].name)
                except ValueError:
                    # Each staging table has a primary key field that is not an int, just include this directly
                    colNames.append(columns[i].name)
                else:
                    # If it is an int, treat it as a column id
                    colNames.append(interfaces.validationDb.getFieldNameByColId(columns[i].name))


            # Can apply rule to a specified record or all records in first table
            sourceRecords = cls.getRecordsIfNone(sourceTable,stagingDb,record)
            # Get both rules, condition to check and rule to apply based on condition
            condition = interfaces.validationDb.getMultiFieldRuleByLabel(rule.rule_text_2)
            conditionalRule = interfaces.validationDb.getMultiFieldRuleByLabel(rule.rule_text_1)
            # Apply first rule for all records that pass second rule
            for record in sourceRecords:
                # Record is a tuple, we need it to be a dict with field names as keys
                recordDict = dict(zip(colNames,list(record)))
                if cls.evaluateCrossFileRule(condition,submissionId,recordDict)[0]:
                    result = cls.evaluateCrossFileRule(conditionalRule,submissionId,recordDict)
                    if not result[0]:
                        # Record if we have seen a failure
                        rulePassed = False
                        failures.extend(result[1])
        elif ruleType == "greater":
            if not record:
                # Must provide a record for this rule
                raise ValueError("Cannot apply greater rule without a record")
            rulePassed = int(record[rule.rule_text_2]) > int(rule.rule_text_1)
        return rulePassed,failures