def startGenerationJob(self, submission_id, file_type):
        """ Initiates a file generation job

        Args:
            submission_id: ID of submission to start job for
            file_type: Type of file to be generated

        Returns:
            Tuple of boolean indicating successful start, and error response if False

        """
        jobDb = self.interfaces.jobDb
        file_type_name = self.fileTypeMap[file_type]

        if file_type in ["D1", "D2"]:
            # Populate start and end dates, these should be provided in MM/DD/YYYY format, using calendar year (not fiscal year)
            requestDict = RequestDictionary(self.request)
            start_date = requestDict.getValue("start")
            end_date = requestDict.getValue("end")

            if not (StringCleaner.isDate(start_date) and StringCleaner.isDate(end_date)):
                exc = ResponseException("Start or end date cannot be parsed into a date", StatusCode.CLIENT_ERROR)
                return False, JsonResponse.error(exc, exc.status, start = "", end = "", file_type = file_type, status = "failed")
        elif file_type not in ["E","F"]:
            exc = ResponseException("File type must be either D1, D2, E or F", StatusCode.CLIENT_ERROR)
            return False, JsonResponse.error(exc, exc.status, file_type = file_type, status = "failed")

        cgac_code = self.jobManager.getSubmissionById(submission_id).cgac_code

        # Generate and upload file to S3
        user_id = LoginSession.getName(session)
        timestamped_name = s3UrlHandler.getTimestampedFilename(CONFIG_BROKER["".join([str(file_type_name),"_file_name"])])
        if self.isLocal:
            upload_file_name = "".join([CONFIG_BROKER['broker_files'], timestamped_name])
        else:
            upload_file_name = "".join([str(user_id), "/", timestamped_name])

        job = jobDb.getJobBySubmissionFileTypeAndJobType(submission_id, file_type_name, "file_upload")
        job.filename = upload_file_name
        job.original_filename = timestamped_name
        job.job_status_id = jobDb.getJobStatusId("running")
        jobDb.session.commit()
        if file_type in ["D1", "D2"]:
            CloudLogger.log("DEBUG: Adding job info for job id of " + str(job.job_id),
                            log_type="debug",
                            file_name=self.debug_file_name)
            return self.addJobInfoForDFile(upload_file_name, timestamped_name, submission_id, file_type, file_type_name, start_date, end_date, cgac_code, job)
        elif file_type == 'E':
            generate_e_file.delay(
                submission_id, job.job_id, InterfaceHolder, timestamped_name,
                upload_file_name, self.isLocal)
        elif file_type == 'F':
            generate_f_file.delay(
                submission_id, job.job_id, InterfaceHolder, timestamped_name,
                upload_file_name, self.isLocal)

        return True, None
    def addJobInfoForDFile(self, upload_file_name, timestamped_name, submission_id, file_type, file_type_name, start_date, end_date, cgac_code, job):
        """ Populates upload and validation job objects with start and end dates, filenames, and status

        Args:
            upload_file_name - Filename to use on S3
            timestamped_name - Version of filename without user ID
            submission_id - Submission to add D files to
            file_type - File type as either "D1" or "D2"
            file_type_name - Full name of file type
            start_date - Beginning of period for D file
            end_date - End of period for D file
            cgac_code - Agency to generate D file for
            job - Job object for upload job
        """
        jobDb = self.interfaces.jobDb
        try:
            valJob = jobDb.getJobBySubmissionFileTypeAndJobType(submission_id, file_type_name, "csv_record_validation")
            valJob.filename = upload_file_name
            valJob.original_filename = timestamped_name
            valJob.job_status_id = jobDb.getJobStatusId("waiting")
            job.start_date = datetime.strptime(start_date,"%m/%d/%Y").date()
            job.end_date = datetime.strptime(end_date,"%m/%d/%Y").date()
            valJob.start_date = datetime.strptime(start_date,"%m/%d/%Y").date()
            valJob.end_date = datetime.strptime(end_date,"%m/%d/%Y").date()
            # Generate random uuid and store generation task
            task_key = uuid4()
            task = FileGenerationTask(generation_task_key = task_key, submission_id = submission_id, file_type_id = jobDb.getFileTypeId(file_type_name), job_id = job.job_id)
            jobDb.session.add(task)

            jobDb.session.commit()
        except ValueError as e:
            # Date was not in expected format
            exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,ValueError)
            return False, JsonResponse.error(exc, exc.status, url = "", start = "", end = "",  file_type = file_type)
        # Create file D API URL with dates and callback URL
        callback = "{}://{}:{}/v1/complete_generation/{}/".format(CONFIG_SERVICES["protocol"],CONFIG_SERVICES["broker_api_host"], CONFIG_SERVICES["broker_api_port"],task_key)
        CloudLogger.log(
            'DEBUG: Callback URL for {}: {}'.format(file_type, callback),
            log_type='debug', file_name=self.debug_file_name)
        get_url = CONFIG_BROKER["".join([file_type_name, "_url"])].format(cgac_code, start_date, end_date, callback)

        CloudLogger.log("DEBUG: Calling D file API => " + str(get_url),
                        log_type="debug",
                        file_name=self.debug_file_name)
        try:
            if not self.call_d_file_api(get_url):
                self.handleEmptyResponse(job, valJob)
        except Timeout as e:
            exc = ResponseException(str(e), StatusCode.CLIENT_ERROR, Timeout)
            return False, JsonResponse.error(e, exc.status, url="", start="", end="", file_type=file_type)

        return True, None
Exemple #3
0
def write_csv(file_name, upload_name, is_local, header, body):
    """Derive the relevant location and write a CSV to it.
    :return: the final file name (complete with prefix)"""
    if is_local:
        file_name = CONFIG_BROKER['broker_files'] + file_name
        csv_writer = CsvLocalWriter(file_name, header)
        message = 'DEBUG: Writing file locally...'
    else:
        bucket = CONFIG_BROKER['aws_bucket']
        region = CONFIG_BROKER['aws_region']
        csv_writer = CsvS3Writer(region, bucket, upload_name, header)
        message = 'DEBUG: Writing file to S3...'

    CloudLogger.log(message, log_type="debug", file_name='smx_request.log')

    with csv_writer as writer:
        for line in body:
            writer.write(line)
        writer.finishBatch()
    def completeGeneration(self, generationId):
        """ For files D1 and D2, the API uses this route as a callback to load the generated file.
        Requires an 'href' key in the request that specifies the URL of the file to be downloaded

        Args:
            generationId - Unique key stored in file_generation_task table, used in callback to identify which submission
            this file is for.

        """
        if generationId is None:
            return JsonResponse.error(ResponseException("Must include a generation ID",StatusCode.CLIENT_ERROR), StatusCode.CLIENT_ERROR)

        self.smx_log_file_name = "smx_request.log"

        # Pull url from request
        safeDictionary = RequestDictionary(self.request)
        CloudLogger.log("DEBUG: Request content => " + safeDictionary.to_string(), log_type="debug", file_name=self.smx_log_file_name)


        if not safeDictionary.exists("href"):
            return JsonResponse.error(ResponseException("Request must include href key with URL of D file", StatusCode.CLIENT_ERROR), StatusCode.CLIENT_ERROR)
        url =  safeDictionary.getValue("href")
        CloudLogger.log("DEBUG: Download URL => " + url, log_type="debug", file_name=self.smx_log_file_name)

        #Pull information based on task key
        try:
            CloudLogger.log("DEBUG: Pulling information based on task key...", log_type="debug",
                            file_name=self.smx_log_file_name)
            task = self.interfaces.jobDb.session.query(FileGenerationTask).options(joinedload(FileGenerationTask.file_type)).filter(FileGenerationTask.generation_task_key == generationId).one()
            job = self.interfaces.jobDb.getJobById(task.job_id)
            CloudLogger.log("DEBUG: Loading D file...", log_type="debug", file_name=self.smx_log_file_name)
            result = self.load_d_file(url,job.filename,job.original_filename,job.job_id,self.isLocal)
            CloudLogger.log("DEBUG: Load D file result => " + str(result), log_type="debug",
                            file_name=self.smx_log_file_name)
            return JsonResponse.create(StatusCode.OK,{"message":"File loaded successfully"})
        except ResponseException as e:
            return JsonResponse.error(e, e.status)
        except NoResultFound as e:
            # Did not find file generation task
            return JsonResponse.error(ResponseException("Generation task key not found", StatusCode.CLIENT_ERROR), StatusCode.CLIENT_ERROR)
    def generateFile(self):
        """ Start a file generation job for the specified file type """
        self.debug_file_name = "debug.log"
        CloudLogger.log("DEBUG: Starting D file generation", log_type="debug",
                        file_name=self.debug_file_name)
        submission_id, file_type = self.getRequestParamsForGenerate()

        CloudLogger.log("DEBUG: Submission ID = " + str(submission_id) + " / File type = " + str(file_type), log_type="debug",
                        file_name=self.debug_file_name)
        # Check permission to submission
        success, error_response = self.checkSubmissionById(submission_id, file_type)
        if not success:
            return error_response

        job = self.interfaces.jobDb.getJobBySubmissionFileTypeAndJobType(submission_id, self.fileTypeMap[file_type], "file_upload")
        # Check prerequisites on upload job
        if not self.interfaces.jobDb.runChecks(job.job_id):
            exc = ResponseException("Must wait for completion of prerequisite validation job", StatusCode.CLIENT_ERROR)
            return JsonResponse.error(exc, exc.status)

        success, error_response = self.startGenerationJob(submission_id,file_type)

        CloudLogger.log("DEBUG: Finished startGenerationJob method",
                        log_type="debug",
                        file_name=self.debug_file_name)
        if not success:
            # If not successful, set job status as "failed"
            self.interfaces.jobDb.markJobStatus(job.job_id, "failed")
            return error_response

        # Return same response as check generation route
        return self.checkGeneration(submission_id, file_type)
    def checkJobDependencies(self, jobId):
        """ For specified job, check which of its dependencies are ready to be started, and add them to the queue """

        # raise exception if current job is not actually finished
        if self.getJobStatus(jobId) != self.getJobStatusId('finished'):
            raise ValueError(
                'Current job not finished, unable to check dependencies')

        # check if dependent jobs are finished
        for depJobId in self.getDependentJobs(jobId):
            isReady = True
            if not (self.getJobStatus(depJobId)
                    == self.getJobStatusId('waiting')):
                CloudLogger.logError(
                    "Job dependency is not in a 'waiting' state",
                    ResponseException(
                        "Job dependency is not in a 'waiting' state",
                        StatusCode.CLIENT_ERROR, ValueError),
                    traceback.extract_stack())
                continue
            # if dependent jobs are finished, then check the jobs of which the current job is a dependent
            for preReqJobId in self.getPrerequisiteJobs(depJobId):
                if not (self.getJobStatus(preReqJobId)
                        == self.getJobStatusId('finished')):
                    # Do nothing
                    isReady = False
                    break
            # The type check here is temporary and needs to be removed once the validator is able
            # to handle cross-file validation job
            if isReady and (self.getJobType(depJobId)
                            == 'csv_record_validation'
                            or self.getJobType(depJobId) == 'validation'):
                # mark job as ready
                self.markJobStatus(depJobId, 'ready')
                # add to the job queue
                CloudLogger.log("Sending job {} to the job manager".format(
                    str(depJobId)))
                mp = ManagerProxy()
                mp.sendJobRequest(depJobId)
Exemple #7
0
def enqueue(jobID):
    """POST a job to the validator"""
    CloudLogger.log("Adding job {} to the queue".format(str(jobID)))
    validatorUrl = '{validator_host}:{validator_port}'.format(
        **CONFIG_SERVICES)
    if 'http://' not in validatorUrl:
        validatorUrl = 'http://' + validatorUrl
    validatorUrl += '/validate/'
    params = {
        'job_id': jobID
    }
    response = requests.post(validatorUrl, params)
    CloudLogger.log("Job {} has completed validation".format(str(jobID)))
    CloudLogger.log("Validator response: {}".format(str(response.json())))
    return response.json()
Exemple #8
0
    def load_d_file(self, url, upload_name, timestamped_name, job_id, isLocal):
        """ Pull D file from specified URL and write to S3 """
        job_manager = self.interfaces.jobDb
        try:
            full_file_path = "".join(
                [CONFIG_BROKER['d_file_storage_path'], timestamped_name])

            CloudLogger.log("DEBUG: Downloading file...",
                            log_type="debug",
                            file_name=self.smx_log_file_name)
            if not self.download_file(full_file_path, url):
                # Error occurred while downloading file, mark job as failed and record error message
                job_manager.markJobStatus(job_id, "failed")
                job = job_manager.getJobById(job_id)
                file_type = job_manager.getFileType(job_id)
                if file_type == "award":
                    source = "ASP"
                elif file_type == "award_procurement":
                    source = "FPDS"
                else:
                    source = "unknown source"
                job.error_message = "A problem occurred receiving data from {}".format(
                    source)

                raise ResponseException(job.error_message,
                                        StatusCode.CLIENT_ERROR)
            lines = self.get_lines_from_csv(full_file_path)

            write_csv(timestamped_name, upload_name, isLocal, lines[0],
                      lines[1:])

            CloudLogger.log("DEBUG: Marking job id of " + str(job_id) +
                            " as finished",
                            log_type="debug",
                            file_name=self.smx_log_file_name)
            job_manager.markJobStatus(job_id, "finished")
            return {"message": "Success", "file_name": timestamped_name}
        except Exception as e:
            CloudLogger.log("ERROR: Exception caught => " + str(e),
                            log_type="debug",
                            file_name=self.smx_log_file_name)
            # Log the error
            JsonResponse.error(e, 500)
            job_manager.getJobById(job_id).error_message = str(e)
            job_manager.markJobStatus(job_id, "failed")
            job_manager.session.commit()
            raise e
Exemple #9
0
# we want to allow access to, e.g. config.CONFIG_BROKER
from dataactcore.read_config import (  # noqa
    CONFIG_BROKER, CONFIG_LOGGING, CONFIG_SERVICES, CONFIG_DB,
    CONFIG_JOB_QUEUE, CONFIG_PATH, ALEMBIC_PATH, MIGRATION_PATH, log_message)
from dataactcore.utils.cloudLogger import CloudLogger

# Log config values along with warnings for missing files
if log_message:
    CloudLogger.log(log_message)
Exemple #10
0
 def get_xml_response_content(self, api_url):
     """ Retrieve XML Response from the provided API url """
     CloudLogger.log("DEBUG: Getting XML response",
                     log_type="debug",
                     file_name=self.debug_file_name)
     return requests.get(api_url, verify=False, timeout=120).text