Esempio n. 1
0
def validator_process_job(job_id, agency_code, is_retry=False):
    """ Retrieves a Job based on its ID, and kicks off a validation. Handles errors by ensuring the Job (if exists) is
        no longer running.

        Args:
            job_id: ID of a Job
            agency_code: CGAC or FREC code for agency, only required for file generations by Job
            is_retry: If this is not the very first time handling execution of this job. If True, cleanup is
                      performed before proceeding to retry the job

        Raises:
            Any Exceptions raised by the GenerationManager or ValidationManager, excluding those explicitly handled
    """
    if is_retry:
        if cleanup_validation(job_id):
            log_job_message(
                logger=logger,
                message=
                "Attempting a retry of {} after successful retry-cleanup.".
                format(inspect.stack()[0][3]),
                job_id=job_id,
                is_debug=True)
        else:
            log_job_message(
                logger=logger,
                message="Retry of {} found to be not necessary after cleanup. "
                "Returning from job with success.".format(
                    inspect.stack()[0][3]),
                job_id=job_id,
                is_debug=True)
            return

    sess = GlobalDB.db().session
    job = None

    try:
        # Get the job
        job = sess.query(Job).filter_by(job_id=job_id).one_or_none()
        if job is None:
            validation_error_type = ValidationError.jobError
            write_file_error(job_id, None, validation_error_type)
            raise ResponseException(
                'Job ID {} not found in database'.format(job_id),
                StatusCode.CLIENT_ERROR, None, validation_error_type)

        mark_job_status(job_id, 'ready')

        # We can either validate or generate a file based on Job ID
        if job.job_type.name == 'file_upload':
            # Generate A, E, or F file
            file_generation_manager = FileGenerationManager(sess,
                                                            g.is_local,
                                                            job=job)
            file_generation_manager.generate_file(agency_code)
        else:
            # Run validations
            validation_manager = ValidationManager(
                g.is_local, CONFIG_SERVICES['error_report_path'])
            validation_manager.validate_job(job.job_id)

    except (ResponseException, csv.Error, UnicodeDecodeError, ValueError) as e:
        # Handle exceptions explicitly raised during validation
        error_data = {
            'message': 'An exception occurred in the Validator',
            'message_type': 'ValidatorInfo',
            'job_id': job_id,
            'traceback': traceback.format_exc()
        }

        if job:
            error_data.update({
                'submission_id': job.submission_id,
                'file_type': job.file_type.name
            })
            logger.error(error_data)

            sess.refresh(job)
            job.error_message = str(e)
            if job.filename is not None:
                error_type = ValidationError.unknownError
                if isinstance(e, UnicodeDecodeError):
                    error_type = ValidationError.encodingError
                elif isinstance(e, ResponseException):
                    error_type = e.errorType

                write_file_error(job.job_id, job.filename, error_type)

            mark_job_status(job.job_id, 'invalid')
        else:
            logger.error(error_data)
            raise e

    except Exception as e:
        # Log uncaught exceptions and fail the Job
        error_data = {
            'message': 'An unhandled exception occurred in the Validator',
            'message_type': 'ValidatorInfo',
            'job_id': job_id,
            'traceback': traceback.format_exc()
        }
        if job:
            error_data.update({
                'submission_id': job.submission_id,
                'file_type': job.file_type.name
            })
        logger.error(error_data)

        # Try to mark the Job as failed, but continue raising the original Exception if not possible
        try:
            mark_job_status(job_id, 'failed')

            sess.refresh(job)
            job.error_message = str(e)
            sess.commit()
        except:
            pass

        raise e
Esempio n. 2
0
 def validate():
     """Start the validation process."""
     if request.is_json:
         g.job_id = request.json.get('job_id')
     validation_manager = ValidationManager(local, error_report_path)
     return validation_manager.validate_job(request)
Esempio n. 3
0
def run_app():
    """Run the application."""
    app = create_app()

    # This is for DataDog (Do Not Delete)
    if USE_DATADOG:
        TraceMiddleware(app,
                        tracer,
                        service="broker-dd",
                        distributed_tracing=False)

    with app.app_context():
        current_app.debug = CONFIG_SERVICES['debug']
        local = CONFIG_BROKER['local']
        g.is_local = local
        error_report_path = CONFIG_SERVICES['error_report_path']
        current_app.config.from_object(__name__)

        # Create connection to job tracker database
        sess = GlobalDB.db().session

        # Future: Override config w/ environment variable, if set
        current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True)

        queue = sqs_queue()
        messages = []

        logger.info("Starting SQS polling")
        while True:
            # Set current_message to None before every loop to ensure it's never set to the previous message
            current_message = None
            try:
                # Grabs one (or more) messages from the queue
                messages = queue.receive_messages(
                    WaitTimeSeconds=10, MessageAttributeNames=['All'])
                for message in messages:
                    logger.info("Message received: %s", message.body)

                    # Retrieve the job_id from the message body
                    current_message = message
                    g.job_id = message.body
                    mark_job_status(g.job_id, "ready")

                    # Get the job
                    job = sess.query(Job).filter_by(
                        job_id=g.job_id).one_or_none()
                    if job is None:
                        validation_error_type = ValidationError.jobError
                        write_file_error(g.job_id, None, validation_error_type)
                        raise ResponseException(
                            'Job ID {} not found in database'.format(g.job_id),
                            StatusCode.CLIENT_ERROR, None,
                            validation_error_type)

                    # We have two major functionalities in the Validator: validation and file generation
                    if (not job.file_type or job.file_type.letter_name
                            in ['A', 'B', 'C', 'FABS'] or job.job_type.name !=
                            'file_upload') and job.submission_id:
                        # Run validations
                        validation_manager = ValidationManager(
                            local, error_report_path)
                        validation_manager.validate_job(job.job_id)
                    else:
                        # Retrieve the agency code data from the message attributes
                        msg_attr = current_message.message_attributes
                        agency_code = msg_attr['agency_code']['StringValue'] if msg_attr and \
                            msg_attr.get('agency_code') else None
                        agency_type = msg_attr['agency_type']['StringValue'] if msg_attr and \
                            msg_attr.get('agency_type') else None

                        file_generation_manager = FileGenerationManager(
                            job, agency_code, agency_type, local)
                        file_generation_manager.generate_from_job()
                        sess.commit()
                        sess.refresh(job)

                    # Delete from SQS once processed
                    message.delete()

            except ResponseException as e:
                # Handle exceptions explicitly raised during validation.
                logger.error(traceback.format_exc())

                job = get_current_job()
                if job:
                    if job.filename is not None:
                        # Insert file-level error info to the database
                        write_file_error(job.job_id, job.filename, e.errorType,
                                         e.extraInfo)
                    if e.errorType != ValidationError.jobError:
                        # Job passed prerequisites for validation but an error happened somewhere: mark job as 'invalid'
                        mark_job_status(job.job_id, 'invalid')
                        if current_message:
                            if e.errorType in [
                                    ValidationError.rowCountError,
                                    ValidationError.headerError,
                                    ValidationError.fileTypeError
                            ]:
                                current_message.delete()
            except Exception as e:
                # Handle uncaught exceptions in validation process.
                logger.error(traceback.format_exc())

                # csv-specific errors get a different job status and response code
                if isinstance(e, ValueError) or isinstance(
                        e, csv.Error) or isinstance(e, UnicodeDecodeError):
                    job_status = 'invalid'
                else:
                    job_status = 'failed'
                job = get_current_job()
                if job:
                    if job.filename is not None:
                        error_type = ValidationError.unknownError
                        if isinstance(e, UnicodeDecodeError):
                            error_type = ValidationError.encodingError
                            # TODO Is this really the only case where the message should be deleted?
                            if current_message:
                                current_message.delete()
                        write_file_error(job.job_id, job.filename, error_type)
                    mark_job_status(job.job_id, job_status)
            finally:
                GlobalDB.close()
                # Set visibility to 0 so that another attempt can be made to process in SQS immediately,
                # instead of waiting for the timeout window to expire
                for message in messages:
                    try:
                        message.change_visibility(VisibilityTimeout=0)
                    except ClientError:
                        # Deleted messages will throw errors, which is fine because they are handled
                        pass
def run_app():
    """Run the application."""
    app = Flask(__name__)

    with app.app_context():
        current_app.debug = CONFIG_SERVICES['debug']
        local = CONFIG_BROKER['local']
        g.is_local = local
        error_report_path = CONFIG_SERVICES['error_report_path']
        current_app.config.from_object(__name__)

        # Future: Override config w/ environment variable, if set
        current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True)

        queue = sqs_queue()

        logger.info("Starting SQS polling")
        current_message = None
        while True:
            try:
                # Grabs one (or more) messages from the queue
                messages = queue.receive_messages(WaitTimeSeconds=10)
                for message in messages:
                    logger.info("Message received: %s", message.body)
                    current_message = message
                    GlobalDB.db()
                    g.job_id = message.body
                    mark_job_status(g.job_id, "ready")
                    validation_manager = ValidationManager(local, error_report_path)
                    validation_manager.validate_job(g.job_id)

                    # delete from SQS once processed
                    message.delete()
            except ResponseException as e:
                # Handle exceptions explicitly raised during validation.
                logger.error(str(e))

                job = get_current_job()
                if job:
                    if job.filename is not None:
                        # insert file-level error info to the database
                        write_file_error(job.job_id, job.filename, e.errorType, e.extraInfo)
                    if e.errorType != ValidationError.jobError:
                        # job pass prerequisites for validation, but an error
                        # happened somewhere. mark job as 'invalid'
                        mark_job_status(job.job_id, 'invalid')
                        if current_message:
                            if e.errorType in [ValidationError.rowCountError, ValidationError.headerError,
                                               ValidationError.fileTypeError]:
                                current_message.delete()
            except Exception as e:
                # Handle uncaught exceptions in validation process.
                logger.error(str(e))

                # csv-specific errors get a different job status and response code
                if isinstance(e, ValueError) or isinstance(e, csv.Error) or isinstance(e, UnicodeDecodeError):
                    job_status = 'invalid'
                else:
                    job_status = 'failed'
                job = get_current_job()
                if job:
                    if job.filename is not None:
                        error_type = ValidationError.unknownError
                        if isinstance(e, UnicodeDecodeError):
                            error_type = ValidationError.encodingError
                            # TODO Is this really the only case where the message should be deleted?
                            if current_message:
                                current_message.delete()
                        write_file_error(job.job_id, job.filename, error_type)
                    mark_job_status(job.job_id, job_status)
            finally:
                GlobalDB.close()
                # Set visibility to 0 so that another attempt can be made to process in SQS immediately,
                # instead of waiting for the timeout window to expire
                for message in messages:
                    message.change_visibility(VisibilityTimeout=0)