def start_e_f_generation(job): """ Passes the Job ID for an E or F generation Job to SQS Args: job: File generation job to start """ mark_job_status(job.job_id, "waiting") file_type = job.file_type.letter_name log_data = { 'message': 'Sending {} file generation job {} to Validator in SQS'.format( file_type, job.job_id), 'message_type': 'BrokerInfo', 'submission_id': job.submission_id, 'job_id': job.job_id, 'file_type': file_type } logger.info(log_data) # Add job_id to the SQS job queue queue = sqs_queue() msg_response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes={}) log_data['message'] = 'SQS message response: {}'.format(msg_response) logger.debug(log_data)
def start_a_generation(job, start_date, end_date, agency_code): """ Validates the start and end dates of the generation and sends the job information to SQS. Args: job: File generation job to start start_date: String to parse as the start date of the generation end_date: String to parse as the end date of the generation agency_code: Agency code for A file generations """ if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)): raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY", StatusCode.CLIENT_ERROR) # Update the Job's start and end dates sess = GlobalDB.db().session job.start_date = start_date job.end_date = end_date sess.commit() mark_job_status(job.job_id, "waiting") file_type = job.file_type.letter_name log_data = {'message': 'Sending {} file generation job {} to Validator in SQS'.format(file_type, job.job_id), 'message_type': 'BrokerInfo', 'job_id': job.job_id, 'file_type': file_type} logger.info(log_data) # Set SQS message attributes message_attr = {'agency_code': {'DataType': 'String', 'StringValue': agency_code}} # Add job_id to the SQS job queue queue = sqs_queue() msg_response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes=message_attr) log_data['message'] = 'SQS message response: {}'.format(msg_response) logger.debug(log_data)
def start_a_generation(job, start_date, end_date, agency_code): """ Validates the start and end dates of the generation and sends the job information to SQS. Args: job: File generation job to start start_date: String to parse as the start date of the generation end_date: String to parse as the end date of the generation agency_code: Agency code for A file generations """ if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)): raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY", StatusCode.CLIENT_ERROR) # Update the Job's start and end dates sess = GlobalDB.db().session job.start_date = start_date job.end_date = end_date sess.commit() mark_job_status(job.job_id, "waiting") file_type = job.file_type.letter_name log_data = {'message': 'Sending {} file generation job {} to Validator in SQS'.format(file_type, job.job_id), 'message_type': 'BrokerInfo', 'job_id': job.job_id, 'file_type': file_type} logger.info(log_data) # Set SQS message attributes message_attr = {'agency_code': {'DataType': 'String', 'StringValue': agency_code}} # Add job_id to the SQS job queue queue = sqs_queue() msg_response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes=message_attr) log_data['message'] = 'SQS message response: {}'.format(msg_response) logger.debug(log_data)
def check_job_dependencies(job_id): """ For specified job, check which of its dependencies are ready to be started and add them to the queue Args: job_id: the ID of the job that was just finished Raises: ValueError: If the job provided is not finished """ sess = GlobalDB.db().session log_data = {'message_type': 'CoreError', 'job_id': job_id} # raise exception if current job is not actually finished job = sess.query(Job).filter(Job.job_id == job_id).one() if job.job_status_id != JOB_STATUS_DICT['finished']: log_data[ 'message'] = 'Current job not finished, unable to check dependencies' logger.error(log_data) raise ValueError( 'Current job not finished, unable to check dependencies') # get the jobs that are dependent on job_id being finished dependencies = sess.query(JobDependency).filter_by( prerequisite_id=job_id).all() for dependency in dependencies: dep_job_id = dependency.job_id if dependency.dependent_job.job_status_id != JOB_STATUS_DICT['waiting']: log_data['message_type'] = 'CoreError' log_data[ 'message'] = '{} (dependency of {}) is not in a \'waiting\' state'.format( dep_job_id, job_id) logger.error(log_data) else: # find the number of this job's prerequisites that do not have a status of 'finished' or have errors. unfinished_prerequisites = sess.query(JobDependency).\ join(Job, JobDependency.prerequisite_job).\ filter(or_(Job.job_status_id != JOB_STATUS_DICT['finished'], Job.number_of_errors > 0), JobDependency.job_id == dep_job_id).\ count() if unfinished_prerequisites == 0: # this job has no unfinished prerequisite jobs, so it is eligible to be set to a 'ready' status and # added to the queue mark_job_status(dep_job_id, 'ready') # Only want to send validation jobs to the queue, other job types should be forwarded if dependency.dependent_job.job_type_name in [ 'csv_record_validation', 'validation' ]: # add dep_job_id to the SQS job queue log_data['message_type'] = 'CoreInfo' log_data[ 'message'] = 'Sending job {} to job manager in sqs'.format( dep_job_id) logger.info(log_data) queue = sqs_queue() response = queue.send_message(MessageBody=str(dep_job_id)) log_data['message'] = 'Send message response: {}'.format( response) logger.info(log_data)
def test_push_poll_queue(self): """ Adds a single message to the queue then retrieves it immediately. Default number of messages for retrieval is 1 message. """ queue = sqs_queue() response = queue.send_message(MessageBody="1234") self.assertEqual(response['ResponseMetadata']['HTTPStatusCode'], 200) messages = queue.receive_messages(WaitTimeSeconds=10) self.assertNotEqual(messages, []) for message in messages: message.delete()
def test_push_poll_queue(self): """ Adds a single message to the queue then retrieves it immediately. Default number of messages for retrieval is 1 message. """ queue = sqs_queue() response = queue.send_message(MessageBody="1234") self.assertEqual(response['ResponseMetadata']['HTTPStatusCode'], 200) messages = queue.receive_messages(WaitTimeSeconds=10) self.assertNotEqual(messages, []) for message in messages: message.delete()
def check_job_dependencies(job_id): """ For specified job, check which of its dependencies are ready to be started and add them to the queue Args: job_id: the ID of the job that was just finished Raises: ValueError: If the job provided is not finished """ sess = GlobalDB.db().session log_data = { 'message_type': 'CoreError', 'job_id': job_id } # raise exception if current job is not actually finished job = sess.query(Job).filter(Job.job_id == job_id).one() if job.job_status_id != JOB_STATUS_DICT['finished']: log_data['message'] = 'Current job not finished, unable to check dependencies' logger.error(log_data) raise ValueError('Current job not finished, unable to check dependencies') # get the jobs that are dependent on job_id being finished dependencies = sess.query(JobDependency).filter_by(prerequisite_id=job_id).all() for dependency in dependencies: dep_job_id = dependency.job_id if dependency.dependent_job.job_status_id != JOB_STATUS_DICT['waiting']: log_data['message_type'] = 'CoreError' log_data['message'] = "{} (dependency of {}) is not in a 'waiting' state".format(dep_job_id, job_id) logger.error(log_data) else: # find the number of this job's prerequisites that do not have a status of 'finished' or have errors. unfinished_prerequisites = sess.query(JobDependency).\ join(Job, JobDependency.prerequisite_job).\ filter(or_(Job.job_status_id != JOB_STATUS_DICT['finished'], Job.number_of_errors > 0), JobDependency.job_id == dep_job_id).\ count() if unfinished_prerequisites == 0: # this job has no unfinished prerequisite jobs, so it is eligible to be set to a 'ready' status and # added to the queue mark_job_status(dep_job_id, 'ready') # Only want to send validation jobs to the queue, other job types should be forwarded if dependency.dependent_job.job_type_name in ['csv_record_validation', 'validation']: # add dep_job_id to the SQS job queue log_data['message_type'] = 'CoreInfo' log_data['message'] = 'Sending job {} to job manager in sqs'.format(dep_job_id) logger.info(log_data) queue = sqs_queue() response = queue.send_message(MessageBody=str(dep_job_id)) log_data['message'] = 'Send message response: {}'.format(response) logger.info(log_data)
def start_d_generation(job, start_date, end_date, agency_type, agency_code=None): """ Validates the start and end dates of the generation, updates the submission's publish status and progress (if its not detached generation), and sends the job information to SQS. Args: job: File generation job to start start_date: String to parse as the start date of the generation end_date: String to parse as the end date of the generation agency_type: Type of Agency to generate files by: "awarding" or "funding" agency_code: Agency code for detached D file generations Returns: SQS send_message response """ if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)): raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY", StatusCode.CLIENT_ERROR) # Update the Job's start and end dates sess = GlobalDB.db().session job.start_date = start_date job.end_date = end_date sess.commit() # Update submission if job.submission_id: agency_code = update_generation_submission(sess, job) mark_job_status(job.job_id, "waiting") log_data = {'message': 'Sending {} file generation job {} to SQS'.format(job.file_type.letter_name, job.job_id), 'message_type': 'BrokerInfo', 'submission_id': job.submission_id, 'job_id': job.job_id, 'file_type': job.file_type.letter_name} logger.info(log_data) file_request = retrieve_cached_file_request(job, agency_type, agency_code, g.is_local) if file_request: log_data['message'] = 'No new file generated, used FileRequest with ID {}'.format(file_request.file_request_id) logger.info(log_data) else: # Set SQS message attributes message_attr = {'agency_type': {'DataType': 'String', 'StringValue': agency_type}} if not job.submission_id: message_attr['agency_code'] = {'DataType': 'String', 'StringValue': agency_code} # Add job_id to the SQS job queue queue = sqs_queue() msg_response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes=message_attr) log_data['message'] = 'SQS message response: {}'.format(msg_response) logger.debug(log_data)
def run_app(): """ Run the application. """ app = create_app() # This is for DataDog (Do Not Delete) if USE_DATADOG: TraceMiddleware(app, tracer, service="broker-dd", distributed_tracing=False) with app.app_context(): current_app.debug = CONFIG_SERVICES['debug'] local = CONFIG_BROKER['local'] g.is_local = local current_app.config.from_object(__name__) # Future: Override config w/ environment variable, if set current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True) queue = sqs_queue() logger.info("Starting SQS polling") while True: # Grabs one (or more) messages from the queue messages = queue.receive_messages(WaitTimeSeconds=10, MessageAttributeNames=['All']) for message in messages: logger.info("Message received: %s", message.body) msg_attr = message.message_attributes if msg_attr and msg_attr.get( 'validation_type', {}).get('StringValue') == 'generation': # Generating a file validator_process_file_generation(message.body) else: # Running validations (or generating a file from a Job) a_agency_code = msg_attr.get( 'agency_code', {}).get('StringValue') if msg_attr else None validator_process_job(message.body, a_agency_code) # Delete from SQS once processed message.delete() # When you receive an empty response from the queue, wait a second before trying again if len(messages) == 0: time.sleep(1)
def start_e_f_generation(job): """ Passes the Job ID for an E or F generation Job to SQS Args: job: File generation job to start """ mark_job_status(job.job_id, "waiting") file_type = job.file_type.letter_name log_data = {'message': 'Sending {} file generation job {} to Validator in SQS'.format(file_type, job.job_id), 'message_type': 'BrokerInfo', 'submission_id': job.submission_id, 'job_id': job.job_id, 'file_type': file_type} logger.info(log_data) # Add job_id to the SQS job queue queue = sqs_queue() msg_response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes={}) log_data['message'] = 'SQS message response: {}'.format(msg_response) logger.debug(log_data)
def check_job_dependencies(job_id): """ For specified job, check which of its dependencies are ready to be started and add them to the queue """ sess = GlobalDB.db().session # raise exception if current job is not actually finished job = sess.query(Job).filter(Job.job_id == job_id).one() if job.job_status_id != JOB_STATUS_DICT['finished']: raise ValueError('Current job not finished, unable to check dependencies') # get the jobs that are dependent on job_id being finished dependencies = sess.query(JobDependency).filter_by(prerequisite_id=job_id).all() for dependency in dependencies: dep_job_id = dependency.job_id if dependency.dependent_job.job_status_id != JOB_STATUS_DICT['waiting']: logger.error("%s (dependency of %s) is not in a 'waiting' state", dep_job_id, job_id) else: # find the number of this job's prerequisites that do # not have a status of 'finished'. unfinished_prerequisites = sess.query(JobDependency).\ join(Job, JobDependency.prerequisite_job).\ filter( Job.job_status_id != JOB_STATUS_DICT['finished'], JobDependency.job_id == dep_job_id).\ count() if unfinished_prerequisites == 0: # this job has no unfinished prerequisite jobs, # so it is eligible to be set to a 'ready' # status and added to the queue mark_job_status(dep_job_id, 'ready') # Only want to send validation jobs to the queue, other job types should be forwarded if dependency.dependent_job.job_type_name in ['csv_record_validation', 'validation']: # add dep_job_id to the SQS job queue logger.info('Sending job %s to job manager in sqs', dep_job_id) queue = sqs_queue() response = queue.send_message(MessageBody=str(dep_job_id)) logger.info('Send message response: %s', response)
def start_generation_job(job, start_date, end_date, agency_code=None): """ Validates the dates for a D file generation job and passes the Job ID to SQS Args: job: File generation job to start start_date: Start date of the file generation end_date: End date of the file generation agency_code: Agency code for detached D file generations Returns: Tuple of boolean indicating successful start, and error response if False """ sess = GlobalDB.db().session file_type = job.file_type.letter_name try: if file_type in ['D1', 'D2']: # Validate and set Job's start and end dates if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)): raise ResponseException( "Start or end date cannot be parsed into a date", StatusCode.CLIENT_ERROR) job.start_date = start_date job.end_date = end_date sess.commit() elif file_type not in ["E", "F"]: raise ResponseException("File type must be either D1, D2, E or F", StatusCode.CLIENT_ERROR) except ResponseException as e: return False, JsonResponse.error(e, e.status, file_type=file_type, status='failed') mark_job_status(job.job_id, "waiting") # Add job_id to the SQS job queue logger.info({ 'message_type': 'ValidatorInfo', 'job_id': job.job_id, 'message': 'Sending file generation job {} to Validator in SQS'.format(job.job_id) }) queue = sqs_queue() message_attr = { 'agency_code': { 'DataType': 'String', 'StringValue': agency_code } } if agency_code else {} response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes=message_attr) logger.debug({ 'message_type': 'ValidatorInfo', 'job_id': job.job_id, 'message': 'Send message response: {}'.format(response) }) return True, None
def run_app(): """Run the application.""" app = create_app() # This is for DataDog (Do Not Delete) if USE_DATADOG: TraceMiddleware(app, tracer, service="broker-dd", distributed_tracing=False) with app.app_context(): current_app.debug = CONFIG_SERVICES['debug'] local = CONFIG_BROKER['local'] g.is_local = local error_report_path = CONFIG_SERVICES['error_report_path'] current_app.config.from_object(__name__) # Create connection to job tracker database sess = GlobalDB.db().session # Future: Override config w/ environment variable, if set current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True) queue = sqs_queue() messages = [] logger.info("Starting SQS polling") while True: # Set current_message to None before every loop to ensure it's never set to the previous message current_message = None try: # Grabs one (or more) messages from the queue messages = queue.receive_messages( WaitTimeSeconds=10, MessageAttributeNames=['All']) for message in messages: logger.info("Message received: %s", message.body) # Retrieve the job_id from the message body current_message = message g.job_id = message.body mark_job_status(g.job_id, "ready") # Get the job job = sess.query(Job).filter_by( job_id=g.job_id).one_or_none() if job is None: validation_error_type = ValidationError.jobError write_file_error(g.job_id, None, validation_error_type) raise ResponseException( 'Job ID {} not found in database'.format(g.job_id), StatusCode.CLIENT_ERROR, None, validation_error_type) # We have two major functionalities in the Validator: validation and file generation if (not job.file_type or job.file_type.letter_name in ['A', 'B', 'C', 'FABS'] or job.job_type.name != 'file_upload') and job.submission_id: # Run validations validation_manager = ValidationManager( local, error_report_path) validation_manager.validate_job(job.job_id) else: # Retrieve the agency code data from the message attributes msg_attr = current_message.message_attributes agency_code = msg_attr['agency_code']['StringValue'] if msg_attr and \ msg_attr.get('agency_code') else None agency_type = msg_attr['agency_type']['StringValue'] if msg_attr and \ msg_attr.get('agency_type') else None file_generation_manager = FileGenerationManager( job, agency_code, agency_type, local) file_generation_manager.generate_from_job() sess.commit() sess.refresh(job) # Delete from SQS once processed message.delete() except ResponseException as e: # Handle exceptions explicitly raised during validation. logger.error(traceback.format_exc()) job = get_current_job() if job: if job.filename is not None: # Insert file-level error info to the database write_file_error(job.job_id, job.filename, e.errorType, e.extraInfo) if e.errorType != ValidationError.jobError: # Job passed prerequisites for validation but an error happened somewhere: mark job as 'invalid' mark_job_status(job.job_id, 'invalid') if current_message: if e.errorType in [ ValidationError.rowCountError, ValidationError.headerError, ValidationError.fileTypeError ]: current_message.delete() except Exception as e: # Handle uncaught exceptions in validation process. logger.error(traceback.format_exc()) # csv-specific errors get a different job status and response code if isinstance(e, ValueError) or isinstance( e, csv.Error) or isinstance(e, UnicodeDecodeError): job_status = 'invalid' else: job_status = 'failed' job = get_current_job() if job: if job.filename is not None: error_type = ValidationError.unknownError if isinstance(e, UnicodeDecodeError): error_type = ValidationError.encodingError # TODO Is this really the only case where the message should be deleted? if current_message: current_message.delete() write_file_error(job.job_id, job.filename, error_type) mark_job_status(job.job_id, job_status) finally: GlobalDB.close() # Set visibility to 0 so that another attempt can be made to process in SQS immediately, # instead of waiting for the timeout window to expire for message in messages: try: message.change_visibility(VisibilityTimeout=0) except ClientError: # Deleted messages will throw errors, which is fine because they are handled pass
def start_d_generation(job, start_date, end_date, agency_type, agency_code=None): """ Validates the start and end dates of the generation, updates the submission's publish status and progress (if its not detached generation), and sends the job information to SQS. Args: job: File generation job to start start_date: String to parse as the start date of the generation end_date: String to parse as the end date of the generation agency_type: Type of Agency to generate files by: "awarding" or "funding" agency_code: Agency code for detached D file generations """ if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)): raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY", StatusCode.CLIENT_ERROR) # Update the Job's start and end dates sess = GlobalDB.db().session job.start_date = start_date job.end_date = end_date sess.commit() # Update submission if job.submission_id: agency_code = update_generation_submission(sess, job) mark_job_status(job.job_id, 'waiting') file_generation = retrieve_cached_file_generation(job, agency_type, agency_code) if file_generation: try: copy_file_generation_to_job(job, file_generation, g.is_local) except Exception as e: logger.error(traceback.format_exc()) mark_job_status(job.job_id, 'failed') job.error_message = str(e) sess.commit() else: # Create new FileGeneration and reset Jobs file_generation = FileGeneration( request_date=datetime.now().date(), start_date=job.start_date, end_date=job.end_date, file_type=job.file_type.letter_name, agency_code=agency_code, agency_type=agency_type, is_cached_file=True) sess.add(file_generation) sess.commit() try: job.file_generation_id = file_generation.file_generation_id sess.commit() reset_generation_jobs(sess, job) logger.info({'message': 'Sending new FileGeneration {} to SQS'.format(file_generation.file_generation_id), 'message_type': 'BrokerInfo', 'file_type': job.file_type.letter_name, 'job_id': job.job_id, 'submission_id': job.submission_id, 'file_generation_id': file_generation.file_generation_id}) # Add file_generation_id to the SQS job queue queue = sqs_queue() message_attr = {"validation_type": {"DataType": "String", "StringValue": "generation"}} queue.send_message(MessageBody=str(file_generation.file_generation_id), MessageAttributes=message_attr) except Exception as e: logger.error(traceback.format_exc()) mark_job_status(job.job_id, 'failed') job.error_message = str(e) file_generation.is_cached_file = False sess.commit()
def run_app(): """ Run the application. """ app = create_app() with app.app_context(): current_app.debug = CONFIG_SERVICES['debug'] local = CONFIG_BROKER['local'] g.is_local = local current_app.config.from_object(__name__) # Future: Override config w/ environment variable, if set current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True) queue = sqs_queue() logger.info("Starting SQS polling") keep_polling = True while keep_polling: # Start a Datadog Trace for this poll iter to capture activity in APM with tracer.trace( name="job.{}".format(JOB_TYPE), service=JOB_TYPE.lower(), resource=queue.url, span_type=SpanTypes.WORKER ) as span: # With cleanup handling engaged, allowing retries dispatcher = SQSWorkDispatcher(queue) def choose_job_by_message_attributes(message): # Determine if this is a retry of this message, in which case job execution should know so it can # do cleanup before proceeding with the job q_msg_attr = message.attributes # the non-user-defined (queue-defined) attributes on the message is_retry = False if q_msg_attr.get('ApproximateReceiveCount') is not None: is_retry = int(q_msg_attr.get('ApproximateReceiveCount')) > 1 msg_attr = message.message_attributes if msg_attr and msg_attr.get('validation_type', {}).get('StringValue') == 'generation': # Generating a file job_signature = {"_job": validator_process_file_generation, "file_gen_id": message.body, "is_retry": is_retry} else: # Running validations (or generating a file from a Job) a_agency_code = msg_attr.get('agency_code', {}).get('StringValue') if msg_attr else None job_signature = {"_job": validator_process_job, "job_id": message.body, "agency_code": a_agency_code, "is_retry": is_retry} return job_signature found_message = dispatcher.dispatch_by_message_attribute(choose_job_by_message_attributes) if not found_message: # Drop the Datadog trace, since no trace-worthy activity happened on this poll tracer.context_provider.active().sampling_priority = USER_REJECT span.set_tag(DatadogEagerlyDropTraceFilter.EAGERLY_DROP_TRACE_KEY, True) # When you receive an empty response from the queue, wait before trying again time.sleep(1) # If this process is exiting, don't poll for more work keep_polling = not dispatcher.is_exiting
def start_d_generation(job, start_date, end_date, agency_type, agency_code=None, file_format='csv'): """ Validates the start and end dates of the generation, updates the submission's publish status and progress (if its not detached generation), and sends the job information to SQS. Args: job: File generation job to start start_date: String to parse as the start date of the generation end_date: String to parse as the end date of the generation agency_type: Type of Agency to generate files by: "awarding" or "funding" agency_code: Agency code for detached D file generations file_format: determines if the file generated is a txt or a csv """ if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)): raise ResponseException( "Start or end date cannot be parsed into a date of format MM/DD/YYYY", StatusCode.CLIENT_ERROR) # Update the Job's start and end dates sess = GlobalDB.db().session job.start_date = start_date job.end_date = end_date sess.commit() # Update submission if job.submission_id: agency_code = update_generation_submission(sess, job) mark_job_status(job.job_id, 'waiting') file_generation = retrieve_cached_file_generation(job, agency_type, agency_code, file_format) if file_generation: try: copy_file_generation_to_job(job, file_generation, g.is_local) except Exception as e: logger.error(traceback.format_exc()) mark_job_status(job.job_id, 'failed') job.error_message = str(e) sess.commit() else: # Create new FileGeneration and reset Jobs file_generation = FileGeneration(request_date=datetime.now().date(), start_date=job.start_date, end_date=job.end_date, file_type=job.file_type.letter_name, agency_code=agency_code, agency_type=agency_type, file_format=file_format, is_cached_file=True) sess.add(file_generation) sess.commit() try: job.file_generation_id = file_generation.file_generation_id sess.commit() reset_generation_jobs(sess, job) logger.info({ 'message': 'Sending new FileGeneration {} to SQS'.format( file_generation.file_generation_id), 'message_type': 'BrokerInfo', 'file_type': job.file_type.letter_name, 'job_id': job.job_id, 'submission_id': job.submission_id, 'file_generation_id': file_generation.file_generation_id }) # Add file_generation_id to the SQS job queue queue = sqs_queue() message_attr = { "validation_type": { "DataType": "String", "StringValue": "generation" } } queue.send_message(MessageBody=str( file_generation.file_generation_id), MessageAttributes=message_attr) except Exception as e: logger.error(traceback.format_exc()) mark_job_status(job.job_id, 'failed') job.error_message = str(e) file_generation.is_cached_file = False sess.commit()
def run_app(): """ Run the application. """ app = create_app() # This is for DataDog (Do Not Delete) if USE_DATADOG: TraceMiddleware(app, tracer, service="broker-dd", distributed_tracing=False) with app.app_context(): current_app.debug = CONFIG_SERVICES['debug'] local = CONFIG_BROKER['local'] g.is_local = local current_app.config.from_object(__name__) # Future: Override config w/ environment variable, if set current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True) queue = sqs_queue() logger.info("Starting SQS polling") keep_polling = True while keep_polling: # With cleanup handling engaged, allowing retries dispatcher = SQSWorkDispatcher(queue) def choose_job_by_message_attributes(message): # Determine if this is a retry of this message, in which case job execution should know so it can # do cleanup before proceeding with the job q_msg_attr = message.attributes # the non-user-defined (queue-defined) attributes on the message is_retry = False if q_msg_attr.get('ApproximateReceiveCount') is not None: is_retry = int( q_msg_attr.get('ApproximateReceiveCount')) > 1 msg_attr = message.message_attributes if msg_attr and msg_attr.get( 'validation_type', {}).get('StringValue') == 'generation': # Generating a file job_signature = { "_job": validator_process_file_generation, "file_gen_id": message.body, "is_retry": is_retry } else: # Running validations (or generating a file from a Job) a_agency_code = msg_attr.get( 'agency_code', {}).get('StringValue') if msg_attr else None job_signature = { "_job": validator_process_job, "job_id": message.body, "agency_code": a_agency_code, "is_retry": is_retry } return job_signature found_message = dispatcher.dispatch_by_message_attribute( choose_job_by_message_attributes) # When you receive an empty response from the queue, wait before trying again if not found_message: time.sleep(1) # If this process is exiting, don't poll for more work keep_polling = not dispatcher.is_exiting
def run_app(): """Run the application.""" app = Flask(__name__) with app.app_context(): current_app.debug = CONFIG_SERVICES['debug'] local = CONFIG_BROKER['local'] g.is_local = local error_report_path = CONFIG_SERVICES['error_report_path'] current_app.config.from_object(__name__) # Future: Override config w/ environment variable, if set current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True) queue = sqs_queue() logger.info("Starting SQS polling") current_message = None while True: try: # Grabs one (or more) messages from the queue messages = queue.receive_messages(WaitTimeSeconds=10) for message in messages: logger.info("Message received: %s", message.body) current_message = message GlobalDB.db() g.job_id = message.body mark_job_status(g.job_id, "ready") validation_manager = ValidationManager(local, error_report_path) validation_manager.validate_job(g.job_id) # delete from SQS once processed message.delete() except ResponseException as e: # Handle exceptions explicitly raised during validation. logger.error(str(e)) job = get_current_job() if job: if job.filename is not None: # insert file-level error info to the database write_file_error(job.job_id, job.filename, e.errorType, e.extraInfo) if e.errorType != ValidationError.jobError: # job pass prerequisites for validation, but an error # happened somewhere. mark job as 'invalid' mark_job_status(job.job_id, 'invalid') if current_message: if e.errorType in [ValidationError.rowCountError, ValidationError.headerError, ValidationError.fileTypeError]: current_message.delete() except Exception as e: # Handle uncaught exceptions in validation process. logger.error(str(e)) # csv-specific errors get a different job status and response code if isinstance(e, ValueError) or isinstance(e, csv.Error) or isinstance(e, UnicodeDecodeError): job_status = 'invalid' else: job_status = 'failed' job = get_current_job() if job: if job.filename is not None: error_type = ValidationError.unknownError if isinstance(e, UnicodeDecodeError): error_type = ValidationError.encodingError # TODO Is this really the only case where the message should be deleted? if current_message: current_message.delete() write_file_error(job.job_id, job.filename, error_type) mark_job_status(job.job_id, job_status) finally: GlobalDB.close() # Set visibility to 0 so that another attempt can be made to process in SQS immediately, # instead of waiting for the timeout window to expire for message in messages: message.change_visibility(VisibilityTimeout=0)