def startGenerationJob(self, submission_id, file_type): """ Initiates a file generation job Args: submission_id: ID of submission to start job for file_type: Type of file to be generated Returns: Tuple of boolean indicating successful start, and error response if False """ jobDb = self.interfaces.jobDb file_type_name = self.fileTypeMap[file_type] if file_type in ["D1", "D2"]: # Populate start and end dates, these should be provided in MM/DD/YYYY format, using calendar year (not fiscal year) requestDict = RequestDictionary(self.request) start_date = requestDict.getValue("start") end_date = requestDict.getValue("end") if not (StringCleaner.isDate(start_date) and StringCleaner.isDate(end_date)): exc = ResponseException("Start or end date cannot be parsed into a date", StatusCode.CLIENT_ERROR) return False, JsonResponse.error(exc, exc.status, start = "", end = "", file_type = file_type, status = "failed") elif file_type not in ["E","F"]: exc = ResponseException("File type must be either D1, D2, E or F", StatusCode.CLIENT_ERROR) return False, JsonResponse.error(exc, exc.status, file_type = file_type, status = "failed") cgac_code = self.jobManager.getSubmissionById(submission_id).cgac_code # Generate and upload file to S3 user_id = LoginSession.getName(session) timestamped_name = s3UrlHandler.getTimestampedFilename(CONFIG_BROKER["".join([str(file_type_name),"_file_name"])]) if self.isLocal: upload_file_name = "".join([CONFIG_BROKER['broker_files'], timestamped_name]) else: upload_file_name = "".join([str(user_id), "/", timestamped_name]) job = jobDb.getJobBySubmissionFileTypeAndJobType(submission_id, file_type_name, "file_upload") job.filename = upload_file_name job.original_filename = timestamped_name job.job_status_id = jobDb.getJobStatusId("running") jobDb.session.commit() if file_type in ["D1", "D2"]: CloudLogger.log("DEBUG: Adding job info for job id of " + str(job.job_id), log_type="debug", file_name=self.debug_file_name) return self.addJobInfoForDFile(upload_file_name, timestamped_name, submission_id, file_type, file_type_name, start_date, end_date, cgac_code, job) elif file_type == 'E': generate_e_file.delay( submission_id, job.job_id, InterfaceHolder, timestamped_name, upload_file_name, self.isLocal) elif file_type == 'F': generate_f_file.delay( submission_id, job.job_id, InterfaceHolder, timestamped_name, upload_file_name, self.isLocal) return True, None
def addJobInfoForDFile(self, upload_file_name, timestamped_name, submission_id, file_type, file_type_name, start_date, end_date, cgac_code, job): """ Populates upload and validation job objects with start and end dates, filenames, and status Args: upload_file_name - Filename to use on S3 timestamped_name - Version of filename without user ID submission_id - Submission to add D files to file_type - File type as either "D1" or "D2" file_type_name - Full name of file type start_date - Beginning of period for D file end_date - End of period for D file cgac_code - Agency to generate D file for job - Job object for upload job """ jobDb = self.interfaces.jobDb try: valJob = jobDb.getJobBySubmissionFileTypeAndJobType(submission_id, file_type_name, "csv_record_validation") valJob.filename = upload_file_name valJob.original_filename = timestamped_name valJob.job_status_id = jobDb.getJobStatusId("waiting") job.start_date = datetime.strptime(start_date,"%m/%d/%Y").date() job.end_date = datetime.strptime(end_date,"%m/%d/%Y").date() valJob.start_date = datetime.strptime(start_date,"%m/%d/%Y").date() valJob.end_date = datetime.strptime(end_date,"%m/%d/%Y").date() # Generate random uuid and store generation task task_key = uuid4() task = FileGenerationTask(generation_task_key = task_key, submission_id = submission_id, file_type_id = jobDb.getFileTypeId(file_type_name), job_id = job.job_id) jobDb.session.add(task) jobDb.session.commit() except ValueError as e: # Date was not in expected format exc = ResponseException(str(e),StatusCode.CLIENT_ERROR,ValueError) return False, JsonResponse.error(exc, exc.status, url = "", start = "", end = "", file_type = file_type) # Create file D API URL with dates and callback URL callback = "{}://{}:{}/v1/complete_generation/{}/".format(CONFIG_SERVICES["protocol"],CONFIG_SERVICES["broker_api_host"], CONFIG_SERVICES["broker_api_port"],task_key) CloudLogger.log( 'DEBUG: Callback URL for {}: {}'.format(file_type, callback), log_type='debug', file_name=self.debug_file_name) get_url = CONFIG_BROKER["".join([file_type_name, "_url"])].format(cgac_code, start_date, end_date, callback) CloudLogger.log("DEBUG: Calling D file API => " + str(get_url), log_type="debug", file_name=self.debug_file_name) try: if not self.call_d_file_api(get_url): self.handleEmptyResponse(job, valJob) except Timeout as e: exc = ResponseException(str(e), StatusCode.CLIENT_ERROR, Timeout) return False, JsonResponse.error(e, exc.status, url="", start="", end="", file_type=file_type) return True, None
def write_csv(file_name, upload_name, is_local, header, body): """Derive the relevant location and write a CSV to it. :return: the final file name (complete with prefix)""" if is_local: file_name = CONFIG_BROKER['broker_files'] + file_name csv_writer = CsvLocalWriter(file_name, header) message = 'DEBUG: Writing file locally...' else: bucket = CONFIG_BROKER['aws_bucket'] region = CONFIG_BROKER['aws_region'] csv_writer = CsvS3Writer(region, bucket, upload_name, header) message = 'DEBUG: Writing file to S3...' CloudLogger.log(message, log_type="debug", file_name='smx_request.log') with csv_writer as writer: for line in body: writer.write(line) writer.finishBatch()
def completeGeneration(self, generationId): """ For files D1 and D2, the API uses this route as a callback to load the generated file. Requires an 'href' key in the request that specifies the URL of the file to be downloaded Args: generationId - Unique key stored in file_generation_task table, used in callback to identify which submission this file is for. """ if generationId is None: return JsonResponse.error(ResponseException("Must include a generation ID",StatusCode.CLIENT_ERROR), StatusCode.CLIENT_ERROR) self.smx_log_file_name = "smx_request.log" # Pull url from request safeDictionary = RequestDictionary(self.request) CloudLogger.log("DEBUG: Request content => " + safeDictionary.to_string(), log_type="debug", file_name=self.smx_log_file_name) if not safeDictionary.exists("href"): return JsonResponse.error(ResponseException("Request must include href key with URL of D file", StatusCode.CLIENT_ERROR), StatusCode.CLIENT_ERROR) url = safeDictionary.getValue("href") CloudLogger.log("DEBUG: Download URL => " + url, log_type="debug", file_name=self.smx_log_file_name) #Pull information based on task key try: CloudLogger.log("DEBUG: Pulling information based on task key...", log_type="debug", file_name=self.smx_log_file_name) task = self.interfaces.jobDb.session.query(FileGenerationTask).options(joinedload(FileGenerationTask.file_type)).filter(FileGenerationTask.generation_task_key == generationId).one() job = self.interfaces.jobDb.getJobById(task.job_id) CloudLogger.log("DEBUG: Loading D file...", log_type="debug", file_name=self.smx_log_file_name) result = self.load_d_file(url,job.filename,job.original_filename,job.job_id,self.isLocal) CloudLogger.log("DEBUG: Load D file result => " + str(result), log_type="debug", file_name=self.smx_log_file_name) return JsonResponse.create(StatusCode.OK,{"message":"File loaded successfully"}) except ResponseException as e: return JsonResponse.error(e, e.status) except NoResultFound as e: # Did not find file generation task return JsonResponse.error(ResponseException("Generation task key not found", StatusCode.CLIENT_ERROR), StatusCode.CLIENT_ERROR)
def generateFile(self): """ Start a file generation job for the specified file type """ self.debug_file_name = "debug.log" CloudLogger.log("DEBUG: Starting D file generation", log_type="debug", file_name=self.debug_file_name) submission_id, file_type = self.getRequestParamsForGenerate() CloudLogger.log("DEBUG: Submission ID = " + str(submission_id) + " / File type = " + str(file_type), log_type="debug", file_name=self.debug_file_name) # Check permission to submission success, error_response = self.checkSubmissionById(submission_id, file_type) if not success: return error_response job = self.interfaces.jobDb.getJobBySubmissionFileTypeAndJobType(submission_id, self.fileTypeMap[file_type], "file_upload") # Check prerequisites on upload job if not self.interfaces.jobDb.runChecks(job.job_id): exc = ResponseException("Must wait for completion of prerequisite validation job", StatusCode.CLIENT_ERROR) return JsonResponse.error(exc, exc.status) success, error_response = self.startGenerationJob(submission_id,file_type) CloudLogger.log("DEBUG: Finished startGenerationJob method", log_type="debug", file_name=self.debug_file_name) if not success: # If not successful, set job status as "failed" self.interfaces.jobDb.markJobStatus(job.job_id, "failed") return error_response # Return same response as check generation route return self.checkGeneration(submission_id, file_type)
def checkJobDependencies(self, jobId): """ For specified job, check which of its dependencies are ready to be started, and add them to the queue """ # raise exception if current job is not actually finished if self.getJobStatus(jobId) != self.getJobStatusId('finished'): raise ValueError( 'Current job not finished, unable to check dependencies') # check if dependent jobs are finished for depJobId in self.getDependentJobs(jobId): isReady = True if not (self.getJobStatus(depJobId) == self.getJobStatusId('waiting')): CloudLogger.logError( "Job dependency is not in a 'waiting' state", ResponseException( "Job dependency is not in a 'waiting' state", StatusCode.CLIENT_ERROR, ValueError), traceback.extract_stack()) continue # if dependent jobs are finished, then check the jobs of which the current job is a dependent for preReqJobId in self.getPrerequisiteJobs(depJobId): if not (self.getJobStatus(preReqJobId) == self.getJobStatusId('finished')): # Do nothing isReady = False break # The type check here is temporary and needs to be removed once the validator is able # to handle cross-file validation job if isReady and (self.getJobType(depJobId) == 'csv_record_validation' or self.getJobType(depJobId) == 'validation'): # mark job as ready self.markJobStatus(depJobId, 'ready') # add to the job queue CloudLogger.log("Sending job {} to the job manager".format( str(depJobId))) mp = ManagerProxy() mp.sendJobRequest(depJobId)
def enqueue(jobID): """POST a job to the validator""" CloudLogger.log("Adding job {} to the queue".format(str(jobID))) validatorUrl = '{validator_host}:{validator_port}'.format( **CONFIG_SERVICES) if 'http://' not in validatorUrl: validatorUrl = 'http://' + validatorUrl validatorUrl += '/validate/' params = { 'job_id': jobID } response = requests.post(validatorUrl, params) CloudLogger.log("Job {} has completed validation".format(str(jobID))) CloudLogger.log("Validator response: {}".format(str(response.json()))) return response.json()
def load_d_file(self, url, upload_name, timestamped_name, job_id, isLocal): """ Pull D file from specified URL and write to S3 """ job_manager = self.interfaces.jobDb try: full_file_path = "".join( [CONFIG_BROKER['d_file_storage_path'], timestamped_name]) CloudLogger.log("DEBUG: Downloading file...", log_type="debug", file_name=self.smx_log_file_name) if not self.download_file(full_file_path, url): # Error occurred while downloading file, mark job as failed and record error message job_manager.markJobStatus(job_id, "failed") job = job_manager.getJobById(job_id) file_type = job_manager.getFileType(job_id) if file_type == "award": source = "ASP" elif file_type == "award_procurement": source = "FPDS" else: source = "unknown source" job.error_message = "A problem occurred receiving data from {}".format( source) raise ResponseException(job.error_message, StatusCode.CLIENT_ERROR) lines = self.get_lines_from_csv(full_file_path) write_csv(timestamped_name, upload_name, isLocal, lines[0], lines[1:]) CloudLogger.log("DEBUG: Marking job id of " + str(job_id) + " as finished", log_type="debug", file_name=self.smx_log_file_name) job_manager.markJobStatus(job_id, "finished") return {"message": "Success", "file_name": timestamped_name} except Exception as e: CloudLogger.log("ERROR: Exception caught => " + str(e), log_type="debug", file_name=self.smx_log_file_name) # Log the error JsonResponse.error(e, 500) job_manager.getJobById(job_id).error_message = str(e) job_manager.markJobStatus(job_id, "failed") job_manager.session.commit() raise e
# we want to allow access to, e.g. config.CONFIG_BROKER from dataactcore.read_config import ( # noqa CONFIG_BROKER, CONFIG_LOGGING, CONFIG_SERVICES, CONFIG_DB, CONFIG_JOB_QUEUE, CONFIG_PATH, ALEMBIC_PATH, MIGRATION_PATH, log_message) from dataactcore.utils.cloudLogger import CloudLogger # Log config values along with warnings for missing files if log_message: CloudLogger.log(log_message)
def get_xml_response_content(self, api_url): """ Retrieve XML Response from the provided API url """ CloudLogger.log("DEBUG: Getting XML response", log_type="debug", file_name=self.debug_file_name) return requests.get(api_url, verify=False, timeout=120).text