예제 #1
0
def test_generate_new_d1_file_success(monkeypatch, mock_broker_config_paths,
                                      database):
    """ Testing that a new D1 file is generated """
    sess = database.session
    job = JobFactory(
        job_status_id=JOB_STATUS_DICT['waiting'],
        job_type_id=JOB_TYPE_DICT['file_upload'],
        file_type_id=FILE_TYPE_DICT['award_procurement'],
        filename=str(
            mock_broker_config_paths['d_file_storage_path'].join('original')),
        start_date='01/01/2017',
        end_date='01/31/2017',
        original_filename='original',
        from_cached=True)
    sess.add(job)
    sess.commit()

    file_gen_manager = FileGenerationManager(job, '123', 'awarding',
                                             CONFIG_BROKER['local'])
    file_gen_manager.generate_from_job()

    sess.refresh(job)
    file_request = sess.query(FileRequest).filter(
        FileRequest.job_id == job.job_id).one_or_none()
    assert file_request is not None
    assert file_request.is_cached_file is True
    assert file_request.start_date == job.start_date
    assert file_request.end_date == job.end_date
    assert file_request.agency_code == '123'
    assert file_request.request_date == datetime.now().date()

    assert job.original_filename != 'original'
    assert job.from_cached is False
    assert job.job_status_id == JOB_STATUS_DICT['finished']
예제 #2
0
def test_generate_new_d1_file_keep_old_job_files_success(
        monkeypatch, mock_broker_config_paths, database):
    """ Testing that when a new file is generated by a child job, the parent job's files stay the same """
    sess = database.session
    original_job = JobFactory(
        job_status_id=JOB_STATUS_DICT['waiting'],
        job_type_id=JOB_TYPE_DICT['file_upload'],
        file_type_id=FILE_TYPE_DICT['award_procurement'],
        filename=str(
            mock_broker_config_paths['d_file_storage_path'].join('original')),
        start_date='01/01/2017',
        end_date='01/31/2017',
        original_filename='original',
        from_cached=False)
    new_job = JobFactory(
        job_status_id=JOB_STATUS_DICT['finished'],
        job_type_id=JOB_TYPE_DICT['file_upload'],
        file_type_id=FILE_TYPE_DICT['award_procurement'],
        filename=str(
            mock_broker_config_paths['d_file_storage_path'].join('original')),
        start_date='01/01/2017',
        end_date='01/31/2017',
        original_filename='original',
        from_cached=False)
    sess.add_all([original_job, new_job])
    sess.commit()

    fr = FileRequestFactory(job=original_job,
                            parent_job_id=None,
                            is_cached_file=False,
                            agency_code='123',
                            agency_type='awarding',
                            start_date=original_job.start_date,
                            end_date=original_job.end_date,
                            file_type='D1',
                            request_date=datetime.now().date())
    fr_2 = FileRequestFactory(job=new_job,
                              parent_job_id=original_job.job_id,
                              is_cached_file=False,
                              agency_code='123',
                              agency_type='awarding',
                              start_date=new_job.start_date,
                              end_date=new_job.end_date,
                              file_type='D1',
                              request_date=datetime.now().date())
    sess.add_all([fr, fr_2])
    sess.commit()

    file_gen_manager = FileGenerationManager(original_job, '123', 'awarding',
                                             CONFIG_BROKER['local'])
    file_gen_manager.generate_from_job()

    sess.refresh(original_job)
    sess.refresh(new_job)

    assert original_job.original_filename != 'original'

    assert new_job.original_filename == 'original'
예제 #3
0
def test_generate_new_d1_file_different_dates_success(monkeypatch,
                                                      mock_broker_config_paths,
                                                      database):
    """ Testing that a new D1 file is generated using the same data except for the dates """
    sess = database.session
    job = JobFactory(
        job_status_id=JOB_STATUS_DICT['waiting'],
        job_type_id=JOB_TYPE_DICT['file_upload'],
        file_type_id=FILE_TYPE_DICT['award_procurement'],
        filename=str(
            mock_broker_config_paths['d_file_storage_path'].join('original')),
        start_date='01/01/2017',
        end_date='01/31/2017',
        original_filename='original',
        from_cached=True)
    sess.add(job)
    sess.commit()

    fr = FileRequestFactory(job=job,
                            is_cached_file=True,
                            agency_code='123',
                            agency_type='awarding',
                            start_date=job.start_date,
                            end_date=job.end_date,
                            file_type='D1',
                            request_date=datetime.now().date())
    sess.add(fr)
    sess.commit()

    # Change the job start date
    old_start_date = job.start_date
    job.start_date = '01/02/2017'
    sess.commit()

    file_gen_manager = FileGenerationManager(job, '123', 'awarding',
                                             CONFIG_BROKER['local'])
    file_gen_manager.generate_from_job()

    sess.refresh(job)
    new_file_request = sess.query(FileRequest).filter(
        FileRequest.job_id == job.job_id,
        FileRequest.is_cached_file.is_(True)).one_or_none()
    assert new_file_request is not None
    assert new_file_request.is_cached_file is True
    assert new_file_request.start_date == job.start_date
    assert new_file_request.end_date == job.end_date
    assert new_file_request.agency_code == '123'
    assert new_file_request.agency_type == 'awarding'

    old_file_request = sess.query(FileRequest).filter(
        FileRequest.job_id == job.job_id,
        FileRequest.is_cached_file.is_(False)).one_or_none()
    assert old_file_request is not None
    assert old_file_request.is_cached_file is False
    assert old_file_request.start_date == old_start_date
    assert old_file_request.end_date == job.end_date
    assert old_file_request.agency_code == '123'
    assert old_file_request.agency_type == 'awarding'
예제 #4
0
def test_uncache_new_d1_file_fpds_success(monkeypatch,
                                          mock_broker_config_paths, database):
    """Testing that a new file is not generated if another job has already has a successfully generated file"""
    sess = database.session
    original_job = JobFactory(
        job_status_id=JOB_STATUS_DICT['finished'],
        job_type_id=JOB_TYPE_DICT['file_upload'],
        file_type_id=FILE_TYPE_DICT['award_procurement'],
        filename=str(
            mock_broker_config_paths['d_file_storage_path'].join('original')),
        start_date='01/01/2017',
        end_date='01/31/2017',
        original_filename='original',
        from_cached=True)
    sess.add(original_job)
    sess.commit()

    file_request = FileRequestFactory(job=original_job,
                                      is_cached_file=True,
                                      agency_code='123',
                                      agency_type='awarding',
                                      start_date='01/01/2017',
                                      end_date='01/31/2017',
                                      file_type='D1',
                                      request_date=(datetime.now().date() -
                                                    timedelta(1)))
    new_job = JobFactory(job_status_id=JOB_STATUS_DICT['waiting'],
                         job_type_id=JOB_TYPE_DICT['file_upload'],
                         file_type_id=FILE_TYPE_DICT['award_procurement'],
                         start_date='01/01/2017',
                         end_date='01/31/2017')
    sess.add_all([file_request, new_job])
    sess.commit()

    file_gen_manager = FileGenerationManager(new_job, '123', 'awarding',
                                             CONFIG_BROKER['local'])
    file_gen_manager.generate_from_job()

    sess.refresh(new_job)
    file_request = sess.query(FileRequest).filter(
        FileRequest.job_id == new_job.job_id).one_or_none()
    assert file_request is not None
    assert file_request.is_cached_file is True
    assert file_request.start_date == new_job.start_date
    assert file_request.end_date == new_job.end_date
    assert file_request.agency_code == '123'
    assert file_request.request_date == datetime.now().date()

    assert new_job.original_filename != 'original'
    assert new_job.from_cached is False
    assert new_job.job_status_id == JOB_STATUS_DICT['finished']
예제 #5
0
def test_generate_noncached_d2_file_success(monkeypatch,
                                            mock_broker_config_paths,
                                            database):
    """ Testing that a new D2 file is generated """
    sess = database.session
    job1 = JobFactory(job_status_id=JOB_STATUS_DICT['waiting'],
                      job_type_id=JOB_TYPE_DICT['file_upload'],
                      file_type_id=FILE_TYPE_DICT['award_procurement'],
                      filename=str(
                          mock_broker_config_paths['d_file_storage_path'].join(
                              'diff_agency')),
                      start_date='01/01/2017',
                      end_date='01/31/2017',
                      original_filename='diff_agency',
                      from_cached=False)
    job2 = JobFactory(job_status_id=JOB_STATUS_DICT['waiting'],
                      job_type_id=JOB_TYPE_DICT['file_upload'],
                      file_type_id=FILE_TYPE_DICT['award'],
                      filename=str(
                          mock_broker_config_paths['d_file_storage_path'].join(
                              'diff_start_date')),
                      start_date='01/02/2017',
                      end_date='01/31/2017',
                      original_filename='diff_start_date',
                      from_cached=False)
    job3 = JobFactory(job_status_id=JOB_STATUS_DICT['waiting'],
                      job_type_id=JOB_TYPE_DICT['file_upload'],
                      file_type_id=FILE_TYPE_DICT['award_procurement'],
                      filename=str(
                          mock_broker_config_paths['d_file_storage_path'].join(
                              'diff_end_date')),
                      start_date='01/01/2017',
                      end_date='01/30/2017',
                      original_filename='diff_end_date',
                      from_cached=False)
    sess.add_all([job1, job2, job3])
    sess.commit()

    file_request1 = FileRequestFactory(job=job1,
                                       is_cached_file=True,
                                       agency_code='124',
                                       agency_type='awarding',
                                       start_date='01/01/2017',
                                       end_date='01/31/2017',
                                       file_type='D2',
                                       request_date=datetime.now().date())
    file_request2 = FileRequestFactory(job=job1,
                                       is_cached_file=True,
                                       agency_code='123',
                                       agency_type='awarding',
                                       start_date='01/02/2017',
                                       end_date='01/31/2017',
                                       file_type='D2',
                                       request_date=datetime.now().date())
    file_request3 = FileRequestFactory(job=job1,
                                       is_cached_file=True,
                                       agency_code='123',
                                       agency_type='awarding',
                                       start_date='01/01/2017',
                                       end_date='01/30/2017',
                                       file_type='D2',
                                       request_date=datetime.now().date())
    job = JobFactory(job_status_id=JOB_STATUS_DICT['waiting'],
                     job_type_id=JOB_TYPE_DICT['file_upload'],
                     file_type_id=FILE_TYPE_DICT['award_procurement'],
                     start_date='01/01/2017',
                     end_date='01/31/2017')
    sess.add_all([job, file_request1, file_request2, file_request3])
    sess.commit()

    file_gen_manager = FileGenerationManager(job, '123', 'awarding',
                                             CONFIG_BROKER['local'])
    file_gen_manager.generate_from_job()

    sess.refresh(job)
    file_request = sess.query(FileRequest).filter(
        FileRequest.job_id == job.job_id).one_or_none()
    assert file_request is not None
    assert file_request.is_cached_file is True
    assert file_request.start_date == job.start_date
    assert file_request.end_date == job.end_date
    assert file_request.agency_code == '123'
    assert file_request.request_date == datetime.now().date()

    assert job.original_filename != job1.original_filename
    assert job.original_filename != job2.original_filename
    assert job.original_filename != job3.original_filename
    assert job.from_cached is False
    assert job.job_status_id == JOB_STATUS_DICT['finished']
예제 #6
0
def run_app():
    """Run the application."""
    app = create_app()

    # This is for DataDog (Do Not Delete)
    if USE_DATADOG:
        TraceMiddleware(app,
                        tracer,
                        service="broker-dd",
                        distributed_tracing=False)

    with app.app_context():
        current_app.debug = CONFIG_SERVICES['debug']
        local = CONFIG_BROKER['local']
        g.is_local = local
        error_report_path = CONFIG_SERVICES['error_report_path']
        current_app.config.from_object(__name__)

        # Create connection to job tracker database
        sess = GlobalDB.db().session

        # Future: Override config w/ environment variable, if set
        current_app.config.from_envvar('VALIDATOR_SETTINGS', silent=True)

        queue = sqs_queue()
        messages = []

        logger.info("Starting SQS polling")
        while True:
            # Set current_message to None before every loop to ensure it's never set to the previous message
            current_message = None
            try:
                # Grabs one (or more) messages from the queue
                messages = queue.receive_messages(
                    WaitTimeSeconds=10, MessageAttributeNames=['All'])
                for message in messages:
                    logger.info("Message received: %s", message.body)

                    # Retrieve the job_id from the message body
                    current_message = message
                    g.job_id = message.body
                    mark_job_status(g.job_id, "ready")

                    # Get the job
                    job = sess.query(Job).filter_by(
                        job_id=g.job_id).one_or_none()
                    if job is None:
                        validation_error_type = ValidationError.jobError
                        write_file_error(g.job_id, None, validation_error_type)
                        raise ResponseException(
                            'Job ID {} not found in database'.format(g.job_id),
                            StatusCode.CLIENT_ERROR, None,
                            validation_error_type)

                    # We have two major functionalities in the Validator: validation and file generation
                    if (not job.file_type or job.file_type.letter_name
                            in ['A', 'B', 'C', 'FABS'] or job.job_type.name !=
                            'file_upload') and job.submission_id:
                        # Run validations
                        validation_manager = ValidationManager(
                            local, error_report_path)
                        validation_manager.validate_job(job.job_id)
                    else:
                        # Retrieve the agency code data from the message attributes
                        msg_attr = current_message.message_attributes
                        agency_code = msg_attr['agency_code']['StringValue'] if msg_attr and \
                            msg_attr.get('agency_code') else None
                        agency_type = msg_attr['agency_type']['StringValue'] if msg_attr and \
                            msg_attr.get('agency_type') else None

                        file_generation_manager = FileGenerationManager(
                            job, agency_code, agency_type, local)
                        file_generation_manager.generate_from_job()
                        sess.commit()
                        sess.refresh(job)

                    # Delete from SQS once processed
                    message.delete()

            except ResponseException as e:
                # Handle exceptions explicitly raised during validation.
                logger.error(traceback.format_exc())

                job = get_current_job()
                if job:
                    if job.filename is not None:
                        # Insert file-level error info to the database
                        write_file_error(job.job_id, job.filename, e.errorType,
                                         e.extraInfo)
                    if e.errorType != ValidationError.jobError:
                        # Job passed prerequisites for validation but an error happened somewhere: mark job as 'invalid'
                        mark_job_status(job.job_id, 'invalid')
                        if current_message:
                            if e.errorType in [
                                    ValidationError.rowCountError,
                                    ValidationError.headerError,
                                    ValidationError.fileTypeError
                            ]:
                                current_message.delete()
            except Exception as e:
                # Handle uncaught exceptions in validation process.
                logger.error(traceback.format_exc())

                # csv-specific errors get a different job status and response code
                if isinstance(e, ValueError) or isinstance(
                        e, csv.Error) or isinstance(e, UnicodeDecodeError):
                    job_status = 'invalid'
                else:
                    job_status = 'failed'
                job = get_current_job()
                if job:
                    if job.filename is not None:
                        error_type = ValidationError.unknownError
                        if isinstance(e, UnicodeDecodeError):
                            error_type = ValidationError.encodingError
                            # TODO Is this really the only case where the message should be deleted?
                            if current_message:
                                current_message.delete()
                        write_file_error(job.job_id, job.filename, error_type)
                    mark_job_status(job.job_id, job_status)
            finally:
                GlobalDB.close()
                # Set visibility to 0 so that another attempt can be made to process in SQS immediately,
                # instead of waiting for the timeout window to expire
                for message in messages:
                    try:
                        message.change_visibility(VisibilityTimeout=0)
                    except ClientError:
                        # Deleted messages will throw errors, which is fine because they are handled
                        pass