def start_a_generation(job, start_date, end_date, agency_code):
    """ Validates the start and end dates of the generation and sends the job information to SQS.

        Args:
            job: File generation job to start
            start_date: String to parse as the start date of the generation
            end_date: String to parse as the end date of the generation
            agency_code: Agency code for A file generations
    """
    if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)):
        raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY",
                                StatusCode.CLIENT_ERROR)

    # Update the Job's start and end dates
    sess = GlobalDB.db().session
    job.start_date = start_date
    job.end_date = end_date
    sess.commit()

    mark_job_status(job.job_id, "waiting")

    file_type = job.file_type.letter_name
    log_data = {'message': 'Sending {} file generation job {} to Validator in SQS'.format(file_type, job.job_id),
                'message_type': 'BrokerInfo', 'job_id': job.job_id, 'file_type': file_type}
    logger.info(log_data)

    # Set SQS message attributes
    message_attr = {'agency_code': {'DataType': 'String', 'StringValue': agency_code}}

    # Add job_id to the SQS job queue
    queue = sqs_queue()
    msg_response = queue.send_message(MessageBody=str(job.job_id), MessageAttributes=message_attr)

    log_data['message'] = 'SQS message response: {}'.format(msg_response)
    logger.debug(log_data)
def job_to_dict(job):
    """ Convert a Job model into a dictionary, ready to be serialized as JSON

        Args:
            job: job to convert into a dictionary

        Returns:
            A dictionary of job information
    """
    sess = GlobalDB.db().session

    job_info = {
        'job_id': job.job_id,
        'job_status': job.job_status_name,
        'job_type': job.job_type_name,
        'filename': job.original_filename,
        'file_size': job.file_size,
        'number_of_rows': job.number_of_rows,
        'file_type': job.file_type_name or ''
    }

    # @todo replace with relationships
    file_results = sess.query(File).filter_by(job_id=job.job_id).one_or_none()
    if file_results is None:
        # Job ID not in error database, probably did not make it to validation, or has not yet been validated
        job_info.update({
            'file_status': "",
            'error_type': "",
            'error_data': [],
            'warning_data': [],
            'missing_headers': [],
            'duplicated_headers': []
        })
    else:
        # If job ID was found in file, we should be able to get header error lists and file data. Get string of missing
        # headers and parse as a list
        job_info['file_status'] = file_results.file_status_name
        job_info['missing_headers'] = StringCleaner.split_csv(file_results.headers_missing)
        job_info["duplicated_headers"] = StringCleaner.split_csv(file_results.headers_duplicated)
        job_info["error_type"] = get_error_type(job.job_id)
        job_info["error_data"] = get_error_metrics_by_job_id(job.job_id, job.job_type_name == 'validation',
                                                             severity_id=RULE_SEVERITY_DICT['fatal'])
        job_info["warning_data"] = get_error_metrics_by_job_id(job.job_id, job.job_type_name == 'validation',
                                                               severity_id=RULE_SEVERITY_DICT['warning'])
    return job_info
def start_d_generation(job, start_date, end_date, agency_type, agency_code=None, file_format='csv',
                       element_numbers=False):
    """ Validates the start and end dates of the generation, updates the submission's publish status and progress (if
        its not detached generation), and sends the job information to SQS.

        Args:
            job: File generation job to start
            start_date: String to parse as the start date of the generation
            end_date: String to parse as the end date of the generation
            agency_type: Type of Agency to generate files by: "awarding" or "funding"
            agency_code: Agency code for detached D file generations
            file_format: determines if the file generated is a txt or a csv
            element_numbers: determines if the alternate headers with FPDS element numbers should be used
    """
    if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)):
        raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY",
                                StatusCode.CLIENT_ERROR)

    # Update the Job's start and end dates
    sess = GlobalDB.db().session
    job.start_date = start_date
    job.end_date = end_date
    sess.commit()

    # Update submission
    if job.submission_id:
        agency_code = update_generation_submission(sess, job)

    mark_job_status(job.job_id, 'waiting')

    file_generation = retrieve_cached_file_generation(job, agency_type, agency_code, file_format, element_numbers)
    if file_generation:
        try:
            copy_file_generation_to_job(job, file_generation, g.is_local)
        except Exception as e:
            logger.error(traceback.format_exc())

            mark_job_status(job.job_id, 'failed')
            job.error_message = str(e)
            sess.commit()
    else:
        # Create new FileGeneration and reset Jobs
        file_generation = FileGeneration(
            request_date=datetime.now().date(), start_date=job.start_date, end_date=job.end_date,
            file_type=job.file_type.letter_name, agency_code=agency_code, agency_type=agency_type,
            file_format=file_format, is_cached_file=True, element_numbers=element_numbers)
        sess.add(file_generation)
        sess.commit()

        try:
            job.file_generation_id = file_generation.file_generation_id
            sess.commit()
            reset_generation_jobs(sess, job)
            logger.info({'message': 'Sending new FileGeneration {} to SQS'.format(file_generation.file_generation_id),
                         'message_type': 'BrokerInfo', 'file_type': job.file_type.letter_name, 'job_id': job.job_id,
                         'submission_id': job.submission_id, 'file_generation_id': file_generation.file_generation_id})

            # Add file_generation_id to the SQS job queue
            queue = sqs_queue()
            message_attr = {"validation_type": {"DataType": "String", "StringValue": "generation"}}
            queue.send_message(MessageBody=str(file_generation.file_generation_id), MessageAttributes=message_attr)
        except Exception as e:
            logger.error(traceback.format_exc())

            mark_job_status(job.job_id, 'failed')
            job.error_message = str(e)
            file_generation.is_cached_file = False
            sess.commit()
Beispiel #4
0
def start_generation_job(job, start_date, end_date, agency_code=None):
    """ Validates the dates for a D file generation job and passes the Job ID to SQS

        Args:
            job: File generation job to start
            start_date: Start date of the file generation
            end_date: End date of the file generation
            agency_code: Agency code for detached D file generations

        Returns:
            Tuple of boolean indicating successful start, and error response if False
    """
    sess = GlobalDB.db().session
    file_type = job.file_type.letter_name
    try:
        if file_type in ['D1', 'D2']:
            # Validate and set Job's start and end dates
            if not (StringCleaner.is_date(start_date)
                    and StringCleaner.is_date(end_date)):
                raise ResponseException(
                    "Start or end date cannot be parsed into a date",
                    StatusCode.CLIENT_ERROR)
            job.start_date = start_date
            job.end_date = end_date
            sess.commit()
        elif file_type not in ["E", "F"]:
            raise ResponseException("File type must be either D1, D2, E or F",
                                    StatusCode.CLIENT_ERROR)

    except ResponseException as e:
        return False, JsonResponse.error(e,
                                         e.status,
                                         file_type=file_type,
                                         status='failed')

    mark_job_status(job.job_id, "waiting")

    # Add job_id to the SQS job queue
    logger.info({
        'message_type':
        'ValidatorInfo',
        'job_id':
        job.job_id,
        'message':
        'Sending file generation job {} to Validator in SQS'.format(job.job_id)
    })
    queue = sqs_queue()

    message_attr = {
        'agency_code': {
            'DataType': 'String',
            'StringValue': agency_code
        }
    } if agency_code else {}
    response = queue.send_message(MessageBody=str(job.job_id),
                                  MessageAttributes=message_attr)
    logger.debug({
        'message_type': 'ValidatorInfo',
        'job_id': job.job_id,
        'message': 'Send message response: {}'.format(response)
    })

    return True, None
def generate_detached_file(file_type, cgac_code, frec_code, start, end,
                           quarter, agency_type):
    """ Start a file generation job for the specified file type not connected to a submission

        Args:
            file_type: type of file to be generated
            cgac_code: the code of a CGAC agency if generating for a CGAC agency
            frec_code: the code of a FREC agency if generating for a FREC agency
            start: start date in a string, formatted MM/DD/YYYY
            end: end date in a string, formatted MM/DD/YYYY
            quarter: quarter to generate for, formatted Q#/YYYY
            agency_type: The type of agency (awarding or funding) to generate the file for

        Returns:
            JSONResponse object with keys job_id, status, file_type, url, message, start, and end.

        Raises:
            ResponseException: if the start and end Strings cannot be parsed into dates
    """
    # Make sure it's a valid request
    if not cgac_code and not frec_code:
        return JsonResponse.error(
            ValueError(
                "Detached file generation requires CGAC or FR Entity Code"),
            StatusCode.CLIENT_ERROR)

    if file_type in ['D1', 'D2']:
        # Make sure we have a start and end date for D1/D2 generation
        if not start or not end:
            return JsonResponse.error(
                ValueError(
                    "Must have a start and end date for D file generation."),
                StatusCode.CLIENT_ERROR)
        # Check if date format is MM/DD/YYYY
        if not (StringCleaner.is_date(start) and StringCleaner.is_date(end)):
            raise ResponseException(
                'Start or end date cannot be parsed into a date',
                StatusCode.CLIENT_ERROR)

        if agency_type not in ('awarding', 'funding'):
            return JsonResponse.error(
                ValueError("agency_type must be either awarding or funding."),
                StatusCode.CLIENT_ERROR)
    else:
        # Check if date format is Q#/YYYY
        if not quarter:
            return JsonResponse.error(
                ValueError("Must have a quarter for A file generation."),
                StatusCode.CLIENT_ERROR)

        try:
            start, end = generic_helper.quarter_to_dates(quarter)
        except ResponseException as e:
            return JsonResponse.error(e, StatusCode.CLIENT_ERROR)

    # Add job info
    file_type_name = lookups.FILE_TYPE_DICT_LETTER_NAME[file_type]
    new_job = generation_helper.add_generation_job_info(
        file_type_name=file_type_name, start_date=start, end_date=end)

    agency_code = frec_code if frec_code else cgac_code
    log_data = {
        'message': 'Starting detached {} file generation'.format(file_type),
        'message_type': 'BrokerInfo',
        'job_id': new_job.job_id,
        'file_type': file_type,
        'agency_code': agency_code,
        'start_date': start,
        'end_date': end
    }
    logger.info(log_data)

    try:
        if file_type in ['D1', 'D2']:
            generation_helper.start_d_generation(new_job,
                                                 start,
                                                 end,
                                                 agency_type,
                                                 agency_code=agency_code)
        else:
            generation_helper.start_a_generation(new_job, start, end,
                                                 agency_code)
    except Exception as e:
        mark_job_status(new_job.job_id, 'failed')
        new_job.error_message = str(e)
        GlobalDB.db().session.commit()
        return JsonResponse.error(e, StatusCode.INTERNAL_ERROR)

    # Return same response as check generation route
    return check_detached_generation(new_job.job_id)
def start_d_generation(job, start_date, end_date, agency_type, agency_code=None):
    """ Validates the start and end dates of the generation, updates the submission's publish status and progress (if
        its not detached generation), and sends the job information to SQS.

        Args:
            job: File generation job to start
            start_date: String to parse as the start date of the generation
            end_date: String to parse as the end date of the generation
            agency_type: Type of Agency to generate files by: "awarding" or "funding"
            agency_code: Agency code for detached D file generations
    """
    if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)):
        raise ResponseException("Start or end date cannot be parsed into a date of format MM/DD/YYYY",
                                StatusCode.CLIENT_ERROR)

    # Update the Job's start and end dates
    sess = GlobalDB.db().session
    job.start_date = start_date
    job.end_date = end_date
    sess.commit()

    # Update submission
    if job.submission_id:
        agency_code = update_generation_submission(sess, job)

    mark_job_status(job.job_id, 'waiting')

    file_generation = retrieve_cached_file_generation(job, agency_type, agency_code)
    if file_generation:
        try:
            copy_file_generation_to_job(job, file_generation, g.is_local)
        except Exception as e:
            logger.error(traceback.format_exc())

            mark_job_status(job.job_id, 'failed')
            job.error_message = str(e)
            sess.commit()
    else:
        # Create new FileGeneration and reset Jobs
        file_generation = FileGeneration(
            request_date=datetime.now().date(), start_date=job.start_date, end_date=job.end_date,
            file_type=job.file_type.letter_name, agency_code=agency_code, agency_type=agency_type, is_cached_file=True)
        sess.add(file_generation)
        sess.commit()

        try:
            job.file_generation_id = file_generation.file_generation_id
            sess.commit()
            reset_generation_jobs(sess, job)
            logger.info({'message': 'Sending new FileGeneration {} to SQS'.format(file_generation.file_generation_id),
                         'message_type': 'BrokerInfo', 'file_type': job.file_type.letter_name, 'job_id': job.job_id,
                         'submission_id': job.submission_id, 'file_generation_id': file_generation.file_generation_id})

            # Add file_generation_id to the SQS job queue
            queue = sqs_queue()
            message_attr = {"validation_type": {"DataType": "String", "StringValue": "generation"}}
            queue.send_message(MessageBody=str(file_generation.file_generation_id), MessageAttributes=message_attr)
        except Exception as e:
            logger.error(traceback.format_exc())

            mark_job_status(job.job_id, 'failed')
            job.error_message = str(e)
            file_generation.is_cached_file = False
            sess.commit()
def generate_detached_file(file_type, cgac_code, frec_code, start_date,
                           end_date, year, period, agency_type, file_format):
    """ Start a file generation job for the specified file type not connected to a submission

        Args:
            file_type: type of file to be generated
            cgac_code: the code of a CGAC agency if generating for a CGAC agency
            frec_code: the code of a FREC agency if generating for a FREC agency
            start_date: start date in a string, formatted MM/DD/YYYY
            end_date: end date in a string, formatted MM/DD/YYYY
            year: year to generate for, integer 4 digits
            period: period to generate for, integer (2-12)
            agency_type: The type of agency (awarding or funding) to generate the file for
            file_format: determines if the file generated is a txt or a csv (only used for D file generation)

        Returns:
            JSONResponse object with keys job_id, status, file_type, url, message, start_date, and end_date.

        Raises:
            ResponseException: if the start_date and end_date Strings cannot be parsed into dates
    """
    # Make sure it's a valid request
    if not cgac_code and not frec_code:
        return JsonResponse.error(
            ValueError(
                "Detached file generation requires CGAC or FR Entity Code"),
            StatusCode.CLIENT_ERROR)

    if file_type in ['D1', 'D2']:
        # Make sure we have a start and end date for D1/D2 generation
        if not start_date or not end_date:
            return JsonResponse.error(
                ValueError(
                    'Must have a start and end date for D file generation.'),
                StatusCode.CLIENT_ERROR)

        # Check if date format is MM/DD/YYYY
        if not (StringCleaner.is_date(start_date)
                and StringCleaner.is_date(end_date)):
            raise ResponseException(
                'Start or end date cannot be parsed into a date',
                StatusCode.CLIENT_ERROR)

        if agency_type not in ['awarding', 'funding']:
            return JsonResponse.error(
                ValueError('agency_type must be either awarding or funding.'),
                StatusCode.CLIENT_ERROR)

        if file_format not in ['csv', 'txt']:
            return JsonResponse.error(
                ValueError('file_format must be either csv or txt.'),
                StatusCode.CLIENT_ERROR)
    else:
        # Make sure both year and period are provided
        if not (year and period):
            return JsonResponse.error(
                ValueError(
                    "Must have a year and period for A file generation."),
                StatusCode.CLIENT_ERROR)

        try:
            # Convert to real start and end dates
            start_date, end_date = generic_helper.year_period_to_dates(
                year, period)
        except ResponseException as e:
            return JsonResponse.error(e, StatusCode.CLIENT_ERROR)

    # Add job info
    file_type_name = lookups.FILE_TYPE_DICT_LETTER_NAME[file_type]
    new_job = generation_helper.create_generation_job(file_type_name,
                                                      start_date, end_date)

    agency_code = frec_code if frec_code else cgac_code
    logger.info({
        'message':
        'Starting detached {} file generation'.format(file_type),
        'message_type':
        'BrokerInfo',
        'job_id':
        new_job.job_id,
        'file_type':
        file_type,
        'agency_code':
        agency_code,
        'start_date':
        start_date,
        'end_date':
        end_date
    })

    try:
        if file_type in ['D1', 'D2']:
            generation_helper.start_d_generation(new_job,
                                                 start_date,
                                                 end_date,
                                                 agency_type,
                                                 agency_code=agency_code,
                                                 file_format=file_format)
        else:
            generation_helper.start_a_generation(new_job, start_date, end_date,
                                                 agency_code)
    except Exception as e:
        mark_job_status(new_job.job_id, 'failed')
        new_job.error_message = str(e)
        GlobalDB.db().session.commit()
        return JsonResponse.error(e, StatusCode.INTERNAL_ERROR)

    # Return same response as check generation route
    return check_detached_generation(new_job.job_id)
Beispiel #8
0
    def startGenerationJob(self, submission_id, file_type):
        """ Initiates a file generation job

        Args:
            submission_id: ID of submission to start job for
            file_type: Type of file to be generated

        Returns:
            Tuple of boolean indicating successful start, and error response if False

        """
        jobDb = self.interfaces.jobDb
        file_type_name = self.fileTypeMap[file_type]

        if file_type in ["D1", "D2"]:
            # Populate start and end dates, these should be provided in MM/DD/YYYY format, using calendar year (not fiscal year)
            requestDict = RequestDictionary(self.request)
            start_date = requestDict.getValue("start")
            end_date = requestDict.getValue("end")

            if not (StringCleaner.isDate(start_date)
                    and StringCleaner.isDate(end_date)):
                exc = ResponseException(
                    "Start or end date cannot be parsed into a date",
                    StatusCode.CLIENT_ERROR)
                return False, JsonResponse.error(exc,
                                                 exc.status,
                                                 start="",
                                                 end="",
                                                 file_type=file_type,
                                                 status="failed")
        elif file_type not in ["E", "F"]:
            exc = ResponseException("File type must be either D1, D2, E or F",
                                    StatusCode.CLIENT_ERROR)
            return False, JsonResponse.error(exc,
                                             exc.status,
                                             file_type=file_type,
                                             status="failed")

        cgac_code = self.jobManager.getSubmissionById(submission_id).cgac_code

        # Generate and upload file to S3
        user_id = LoginSession.getName(session)
        timestamped_name = s3UrlHandler.getTimestampedFilename(
            CONFIG_BROKER["".join([str(file_type_name), "_file_name"])])
        if self.isLocal:
            upload_file_name = "".join(
                [CONFIG_BROKER['broker_files'], timestamped_name])
        else:
            upload_file_name = "".join([str(user_id), "/", timestamped_name])

        job = jobDb.getJobBySubmissionFileTypeAndJobType(
            submission_id, file_type_name, "file_upload")
        job.filename = upload_file_name
        job.original_filename = timestamped_name
        job.job_status_id = jobDb.getJobStatusId("running")
        jobDb.session.commit()
        if file_type in ["D1", "D2"]:
            CloudLogger.log("DEBUG: Adding job info for job id of " +
                            str(job.job_id),
                            log_type="debug",
                            file_name=self.debug_file_name)
            return self.addJobInfoForDFile(upload_file_name, timestamped_name,
                                           submission_id, file_type,
                                           file_type_name, start_date,
                                           end_date, cgac_code, job)
        elif file_type == 'E':
            generate_e_file.delay(submission_id, job.job_id, InterfaceHolder,
                                  timestamped_name, upload_file_name,
                                  self.isLocal)
        elif file_type == 'F':
            generate_f_file.delay(submission_id, job.job_id, InterfaceHolder,
                                  timestamped_name, upload_file_name,
                                  self.isLocal)

        return True, None
def generate_detached_file(file_type, cgac_code, frec_code, start_date, end_date, year, period, agency_type):
    """ Start a file generation job for the specified file type not connected to a submission

        Args:
            file_type: type of file to be generated
            cgac_code: the code of a CGAC agency if generating for a CGAC agency
            frec_code: the code of a FREC agency if generating for a FREC agency
            start_date: start date in a string, formatted MM/DD/YYYY
            end_date: end date in a string, formatted MM/DD/YYYY
            year: year to generate for, integer 4 digits
            period: period to generate for, integer (2-12)
            agency_type: The type of agency (awarding or funding) to generate the file for

        Returns:
            JSONResponse object with keys job_id, status, file_type, url, message, start_date, and end_date.

        Raises:
            ResponseException: if the start_date and end_date Strings cannot be parsed into dates
    """
    # Make sure it's a valid request
    if not cgac_code and not frec_code:
        return JsonResponse.error(ValueError("Detached file generation requires CGAC or FR Entity Code"),
                                  StatusCode.CLIENT_ERROR)

    if file_type in ['D1', 'D2']:
        # Make sure we have a start and end date for D1/D2 generation
        if not start_date or not end_date:
            return JsonResponse.error(ValueError("Must have a start and end date for D file generation."),
                                      StatusCode.CLIENT_ERROR)

        # Check if date format is MM/DD/YYYY
        if not (StringCleaner.is_date(start_date) and StringCleaner.is_date(end_date)):
            raise ResponseException('Start or end date cannot be parsed into a date', StatusCode.CLIENT_ERROR)

        if agency_type not in ('awarding', 'funding'):
            return JsonResponse.error(ValueError("agency_type must be either awarding or funding."),
                                      StatusCode.CLIENT_ERROR)
    else:
        # Make sure both year and period are provided
        if not (year and period):
            return JsonResponse.error(ValueError("Must have a year and period for A file generation."),
                                      StatusCode.CLIENT_ERROR)

        try:
            # Convert to real start and end dates
            start_date, end_date = generic_helper.year_period_to_dates(year, period)
        except ResponseException as e:
            return JsonResponse.error(e, StatusCode.CLIENT_ERROR)

    # Add job info
    file_type_name = lookups.FILE_TYPE_DICT_LETTER_NAME[file_type]
    new_job = generation_helper.create_generation_job(file_type_name, start_date, end_date)

    agency_code = frec_code if frec_code else cgac_code
    logger.info({'message': 'Starting detached {} file generation'.format(file_type), 'message_type': 'BrokerInfo',
                 'job_id': new_job.job_id, 'file_type': file_type, 'agency_code': agency_code, 'start_date': start_date,
                 'end_date': end_date})

    try:
        if file_type in ['D1', 'D2']:
            generation_helper.start_d_generation(new_job, start_date, end_date, agency_type, agency_code=agency_code)
        else:
            generation_helper.start_a_generation(new_job, start_date, end_date, agency_code)
    except Exception as e:
        mark_job_status(new_job.job_id, 'failed')
        new_job.error_message = str(e)
        GlobalDB.db().session.commit()
        return JsonResponse.error(e, StatusCode.INTERNAL_ERROR)

    # Return same response as check generation route
    return check_detached_generation(new_job.job_id)