def test_check_job_dependencies_has_unfinished_dependencies(database): """ Tests check_job_dependencies with a job that isn't finished """ sess = database.session sub = SubmissionFactory(submission_id=1) job = JobFactory(submission_id=sub.submission_id, job_status_id=JOB_STATUS_DICT['finished'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['award'], number_of_errors=0) job_2 = JobFactory(submission_id=sub.submission_id, job_status_id=JOB_STATUS_DICT['waiting'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['award']) job_3 = JobFactory(submission_id=sub.submission_id, job_status_id=JOB_STATUS_DICT['waiting'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['award'], number_of_errors=0) sess.add_all([sub, job, job_2, job_3]) sess.commit() # Job 1 finished, it is a prerequisite for job 2 (waiting) job_dep = JobDependency(job_id=job_2.job_id, prerequisite_id=job.job_id) # Job 3 is also a prerequisite of job 2, it's not done, job 2 should stay in "waiting" job_dep_2 = JobDependency(job_id=job_2.job_id, prerequisite_id=job_3.job_id) sess.add_all([job_dep, job_dep_2]) sess.commit() check_job_dependencies(job.job_id) assert job_2.job_status_id == JOB_STATUS_DICT['waiting']
def createJobs(self, filenames, submissionId, existingSubmission=False): """ Given the filenames to be uploaded, create the set of jobs needing to be completed for this submission Arguments: filenames -- List of tuples containing (file type, upload path, original filenames) submissionId -- Submission ID to be linked to jobs existingSubmission -- True if we should update jobs in an existing submission rather than creating new jobs Returns: Dictionary of upload ids by filename to return to client, used for calling finalize_submission route """ jobsRequired, uploadDict = self.addUploadJobs(filenames, submissionId, existingSubmission) if (existingSubmission): # Find cross-file and external validation jobs and mark them as waiting valQuery = self.session.query(Job).filter( Job.submission_id == submissionId).filter( Job.job_type_id == self.getJobTypeId("validation")) valJob = self.runUniqueQuery(valQuery, "No cross-file validation job found", "Conflicting jobs found") valJob.job_status_id = self.getJobStatusId("waiting") extQuery = self.session.query(Job).filter( Job.submission_id == submissionId).filter( Job.job_type_id == self.getJobTypeId( "external_validation")) extJob = self.runUniqueQuery(valQuery, "No external validation job found", "Conflicting jobs found") extJob.job_status_id = self.getJobStatusId("waiting") self.session.commit() else: # Create validation job validationJob = Job(job_status_id=self.getJobStatusId("waiting"), job_type_id=self.getJobTypeId("validation"), submission_id=submissionId) self.session.add(validationJob) # Create external validation job externalJob = Job( job_status_id=self.getJobStatusId("waiting"), job_type_id=self.getJobTypeId("external_validation"), submission_id=submissionId) self.session.add(externalJob) self.session.flush() # Create dependencies for validation jobs for job_id in jobsRequired: valDependency = JobDependency(job_id=validationJob.job_id, prerequisite_id=job_id) self.session.add(valDependency) extDependency = JobDependency(job_id=externalJob.job_id, prerequisite_id=job_id) self.session.add(extDependency) # Commit all changes self.session.commit() uploadDict["submission_id"] = submissionId return uploadDict
def test_check_job_dependencies_ready(mock_sqs_queue, database): """ Tests check_job_dependencies with a job that can be set to ready """ # Mock so it always returns the mock queue for the test mock_sqs_queue.return_value = SQSMockQueue sess = database.session sub = SubmissionFactory(submission_id=1) job = JobFactory(submission_id=sub.submission_id, job_status_id=JOB_STATUS_DICT['finished'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['award'], number_of_errors=0) job_2 = JobFactory(submission_id=sub.submission_id, job_status_id=JOB_STATUS_DICT['waiting'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['award']) sess.add_all([sub, job, job_2]) sess.commit() # Job 1 finished, it is a prerequisite for job 2 (waiting) but it has errors job_dep = JobDependency(job_id=job_2.job_id, prerequisite_id=job.job_id) sess.add(job_dep) sess.commit() check_job_dependencies(job.job_id) assert job_2.job_status_id == JOB_STATUS_DICT['ready']
def test_check_job_dependencies_prior_dependency_has_errors(database): """ Tests check_job_dependencies with a job that is finished but has errors """ sess = database.session sub = SubmissionFactory(submission_id=1) job = JobFactory( submission_id=sub.submission_id, job_status=sess.query(JobStatus).filter_by(name='finished').one(), job_type=sess.query(JobType).filter_by( name='csv_record_validation').one(), file_type=sess.query(FileType).filter_by(name='award').one(), number_of_errors=3) job_2 = JobFactory( submission_id=sub.submission_id, job_status=sess.query(JobStatus).filter_by(name='waiting').one(), job_type=sess.query(JobType).filter_by( name='csv_record_validation').one(), file_type=sess.query(FileType).filter_by(name='award').one()) sess.add_all([sub, job, job_2]) sess.commit() # Job 1 finished, it is a prerequisite for job 2 (waiting) but it has errors job_dep = JobDependency(job_id=job_2.job_id, prerequisite_id=job.job_id) sess.add(job_dep) sess.commit() check_job_dependencies(job.job_id) assert job_2.job_status_id == JOB_STATUS_DICT['waiting']
def create_jobs(upload_files, submission, existing_submission=False): """Create the set of jobs associated with the specified submission Arguments: upload_files -- list of named tuples that describe files uploaded to the broker submission -- submission existing_submission -- true if we should update jobs in an existing submission rather than creating new jobs Returns: Dictionary of upload ids by filename to return to client, used for calling finalize_submission route """ sess = GlobalDB.db().session submission_id = submission.submission_id # create the file upload and single-file validation jobs and # set up the dependencies between them # before starting, sort the incoming list of jobs by letter # to ensure that jobs dependent on the awards jobs being present # are processed last. jobs_required = [] upload_dict = {} sorted_uploads = sorted(upload_files, key=attrgetter('file_letter')) for upload_file in sorted_uploads: validation_job_id, upload_job_id = add_jobs_for_uploaded_file( upload_file, submission_id, existing_submission) if validation_job_id: jobs_required.append(validation_job_id) upload_dict[upload_file.file_type] = upload_job_id # once single-file upload/validation jobs are created, create the cross-file # validation job and dependencies if existing_submission and not submission.d2_submission: # find cross-file jobs and mark them as waiting # (note: job_type of 'validation' is a cross-file job) val_job = sess.query(Job).\ filter_by( submission_id=submission_id, job_type_id=JOB_TYPE_DICT["validation"]).\ one() val_job.job_status_id = JOB_STATUS_DICT["waiting"] submission.updated_at = time.strftime("%c") # todo: add these back in for detached_d2 when we have actual validations elif not submission.d2_submission: # create cross-file validation job validation_job = Job(job_status_id=JOB_STATUS_DICT["waiting"], job_type_id=JOB_TYPE_DICT["validation"], submission_id=submission_id) sess.add(validation_job) sess.flush() # create dependencies for validation jobs for job_id in jobs_required: val_dependency = JobDependency(job_id=validation_job.job_id, prerequisite_id=job_id) sess.add(val_dependency) sess.commit() upload_dict["submission_id"] = submission_id return upload_dict
def setup_file_generation_submission(cls, sess, submission_id=None): """Create jobs for D, E, and F files.""" submission_id = cls.generation_submission_id if not submission_id else submission_id submission = sess.query(Submission).filter(Submission.submission_id == submission_id).one() # Create D1 jobs ready for generation route to be called insert_job( sess, FILE_TYPE_DICT['award_procurement'], JOB_STATUS_DICT['ready'], JOB_TYPE_DICT['file_upload'], submission.submission_id ) award_roc_val_job = insert_job( sess, FILE_TYPE_DICT['award_procurement'], JOB_STATUS_DICT['waiting'], JOB_TYPE_DICT['csv_record_validation'], submission.submission_id ) # Create E and F jobs ready for check route exec_comp_job = insert_job( sess, FILE_TYPE_DICT['executive_compensation'], JOB_STATUS_DICT['finished'], JOB_TYPE_DICT['file_upload'], submission.submission_id ) sub_award_job = insert_job( sess, FILE_TYPE_DICT['sub_award'], JOB_STATUS_DICT['invalid'], JOB_TYPE_DICT['file_upload'], submission.submission_id ) sub_award_job.error_message = "File was invalid" # Create D2 jobs insert_job( sess, FILE_TYPE_DICT['award'], JOB_STATUS_DICT['finished'], JOB_TYPE_DICT['file_upload'], submission.submission_id ) insert_job( sess, FILE_TYPE_DICT['award'], JOB_STATUS_DICT['invalid'], JOB_TYPE_DICT['csv_record_validation'], submission.submission_id ) # Create dependency exec_comp_dep = JobDependency( job_id=exec_comp_job.job_id, prerequisite_id=award_roc_val_job.job_id ) sess.add(exec_comp_dep) sess.commit()
def setupFileGenerationSubmission(cls, sess): """Create jobs for D, E, and F files.""" submission = sess.query(Submission).filter( Submission.submission_id == cls.generation_submission_id).one() # Create D1 jobs ready for generation route to be called cls.insertJob(sess, cls.fileTypeDict['award_procurement'], cls.jobStatusDict['ready'], cls.jobTypeDict['file_upload'], submission.submission_id) awardProcValJob = cls.insertJob( sess, cls.fileTypeDict['award_procurement'], cls.jobStatusDict['waiting'], cls.jobTypeDict['csv_record_validation'], submission.submission_id) # Create E and F jobs ready for check route awardeeAttJob = cls.insertJob(sess, cls.fileTypeDict['awardee_attributes'], cls.jobStatusDict['finished'], cls.jobTypeDict['file_upload'], submission.submission_id) subAwardJob = cls.insertJob(sess, cls.fileTypeDict['sub_award'], cls.jobStatusDict['invalid'], cls.jobTypeDict['file_upload'], submission.submission_id) subAwardJob.error_message = "File was invalid" # Create D2 jobs cls.insertJob(sess, cls.fileTypeDict['award'], cls.jobStatusDict['finished'], cls.jobTypeDict['file_upload'], submission.submission_id) cls.insertJob(sess, cls.fileTypeDict['award'], cls.jobStatusDict['invalid'], cls.jobTypeDict['csv_record_validation'], submission.submission_id) # Create dependency awardeeAttDep = JobDependency(job_id=awardeeAttJob.job_id, prerequisite_id=awardProcValJob.job_id) sess.add(awardeeAttDep) sess.commit()
def setUpClass(cls): """Set up class-wide resources (test data)""" super(JobTests, cls).setUpClass() user = cls.userId # Flag for testing a million+ errors (can take ~30 min to run) cls.includeLongTests = False with createApp().app_context(): # get the submission test user sess = GlobalDB.db().session # Create test submissions and jobs, also uploading # the files needed for each job. jobDict = {} submissionId = cls.insertSubmission(sess, user) job_info = Job(job_status_id=JOB_STATUS_DICT['ready'], job_type_id=JOB_TYPE_DICT['file_upload'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['bad_upload'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(job_status_id=JOB_STATUS_DICT['ready'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['bad_prereq'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(job_status_id=JOB_STATUS_DICT['ready'], job_type_id=JOB_TYPE_DICT['external_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['wrong_type'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(job_status_id=JOB_STATUS_DICT['finished'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['not_ready'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile('testEmpty.csv', user), job_status_id=JOB_STATUS_DICT['ready'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['empty'] = job_info.job_id # create dependency dependency = JobDependency(job_id=jobDict["bad_prereq"], prerequisite_id=jobDict["bad_upload"]) sess.add(dependency) colIdDict = {} for fileId in range(1, 5): for columnId in range(1, 6): if columnId < 3: fieldType = FIELD_TYPE_DICT['INT'] else: fieldType = FIELD_TYPE_DICT['STRING'] columnName = "header_{}".format(columnId) fileCol = FileColumn( file_id=fileId, field_types_id=fieldType, name=columnName, required=(columnId != FIELD_TYPE_DICT['STRING'])) sess.add(fileCol) sess.flush() colIdDict["header_{}_file_type_{}".format( columnId, fileId)] = fileCol.file_column_id # commit submissions/jobs and output IDs sess.commit() for job_type, job_id in jobDict.items(): print('{}: {}'.format(job_type, job_id)) cls.jobDict = jobDict
def add_jobs_for_uploaded_file(upload_file, submission_id, existing_submission): """ Add upload and validation jobs for a single filetype Arguments: upload_file: UploadFile named tuple submission_id: submission ID to attach to jobs existing_submission: true if we should update existing jobs rather than creating new ones Returns: the validation job id for this file type (if any) the upload job id for this file type """ sess = GlobalDB.db().session file_type_id = FILE_TYPE_DICT[upload_file.file_type] validation_job_id = None # Create a file upload job or, for an existing submission, modify the # existing upload job. if existing_submission: # mark existing upload job as running upload_job = sess.query(Job).filter_by( submission_id=submission_id, file_type_id=file_type_id, job_type_id=JOB_TYPE_DICT['file_upload'] ).one() # mark as running and set new file name and path upload_job.job_status_id = JOB_STATUS_DICT['running'] upload_job.original_filename = upload_file.file_name upload_job.filename = upload_file.upload_name else: if upload_file.file_type in ["award", "award_procurement"]: # file generation handled on backend, mark as ready upload_status = JOB_STATUS_DICT['ready'] elif upload_file.file_type in ["awardee_attributes", "sub_award"]: # these are dependent on file D2 validation upload_status = JOB_STATUS_DICT['waiting'] else: # mark as running since frontend should be doing this upload upload_status = JOB_STATUS_DICT['running'] upload_job = Job( original_filename=upload_file.file_name, filename=upload_file.upload_name, file_type_id=file_type_id, job_status_id=upload_status, job_type_id=JOB_TYPE_DICT['file_upload'], submission_id=submission_id) sess.add(upload_job) sess.flush() if existing_submission: # if the file's validation job is attached to an existing submission, # reset its status and delete any validation artifacts (e.g., error metadata) that # might exist from a previous run. val_job = sess.query(Job).filter_by( submission_id=submission_id, file_type_id=file_type_id, job_type_id=JOB_TYPE_DICT['csv_record_validation'] ).one() val_job.job_status_id = JOB_STATUS_DICT['waiting'] val_job.original_filename = upload_file.file_name val_job.filename = upload_file.upload_name # reset file size and number of rows to be set during validation of new file val_job.file_size = None val_job.number_of_rows = None # delete error metadata this might exist from a previous run of this validation job sess.query(ErrorMetadata).\ filter(ErrorMetadata.job_id == val_job.job_id).\ delete(synchronize_session='fetch') # delete file error information that might exist from a previous run of this validation job sess.query(File).filter(File.job_id == val_job.job_id).delete(synchronize_session='fetch') else: # create a new record validation job and add dependencies if necessary if upload_file.file_type == "awardee_attributes": d1_val_job = sess.query(Job).\ filter(Job.submission_id == submission_id, Job.file_type_id == FILE_TYPE_DICT['award_procurement'], Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\ one_or_none() if d1_val_job is None: raise Exception("Cannot create E job without a D1 job") # Add dependency on D1 validation job d1_dependency = JobDependency(job_id=upload_job.job_id, prerequisite_id=d1_val_job.job_id) sess.add(d1_dependency) elif upload_file.file_type == "sub_award": # todo: check for C validation job c_val_job = sess.query(Job).\ filter(Job.submission_id == submission_id, Job.file_type_id == FILE_TYPE_DICT['award_financial'], Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\ one_or_none() if c_val_job is None: raise Exception("Cannot create F job without a C job") # add dependency on C validation job c_dependency = JobDependency(job_id=upload_job.job_id, prerequisite_id=c_val_job.job_id) sess.add(c_dependency) else: # E and F don't get validation jobs val_job = Job( original_filename=upload_file.file_name, filename=upload_file.upload_name, file_type_id=file_type_id, job_status_id=JOB_STATUS_DICT['waiting'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], submission_id=submission_id) sess.add(val_job) sess.flush() # add dependency between file upload job and file validation job upload_dependency = JobDependency(job_id=val_job.job_id, prerequisite_id=upload_job.job_id) sess.add(upload_dependency) validation_job_id = val_job.job_id sess.commit() return validation_job_id, upload_job.job_id
def setUpClass(cls): """Set up class-wide resources (test data)""" super(JobTests, cls).setUpClass() #TODO: refactor into a pytest fixture # Flag for testing a million+ errors (can take ~30 min to run) cls.includeLongTests = False validationDb = cls.validationDb jobTracker = cls.jobTracker # Clear validation rules for fileType in [ "award", "award_financial", "appropriations", "program_activity" ]: validationDb.removeRulesByFileType(fileType) validationDb.removeColumnsByFileType(fileType) # Create submissions and get IDs back submissionIDs = {} for i in range(1, 17): submissionIDs[i] = cls.insertSubmission(jobTracker, userId=cls.userId) csvFiles = { "valid": { "filename": "testValid.csv", "status": "ready", "type": "csv_record_validation", "submissionLocalId": 1, "fileType": 1 }, "bad_upload": { "filename": "", "status": "ready", "type": "file_upload", "submissionLocalId": 2, "fileType": 1 }, "bad_prereq": { "filename": "", "status": "ready", "type": "csv_record_validation", "submissionLocalId": 2, "fileType": 1 }, "wrong_type": { "filename": "", "status": "ready", "type": "external_validation", "submissionLocalId": 4, "fileType": 1 }, "not_ready": { "filename": "", "status": "finished", "type": "csv_record_validation", "submissionLocalId": 5, "fileType": 1 }, "valid_upload": { "filename": "", "status": "finished", "type": "file_upload", "submissionLocalId": 6, "fileType": 1 }, "valid_prereq": { "filename": "testPrereq.csv", "status": "ready", "type": "csv_record_validation", "submissionLocalId": 6, "fileType": 1 }, "bad_values": { "filename": "testBadValues.csv", "status": "ready", "type": "csv_record_validation", "submissionLocalId": 8, "fileType": 1 }, "mixed": { "filename": "testMixed.csv", "status": "ready", "type": "csv_record_validation", "submissionLocalId": 9, "fileType": 1 }, "empty": { "filename": "testEmpty.csv", "status": "ready", "type": "csv_record_validation", "submissionLocalId": 10, "fileType": 1 }, "missing_header": { "filename": "testMissingHeader.csv", "status": "ready", "type": "csv_record_validation", "submissionLocalId": 11, "fileType": 1 }, "bad_header": { "filename": "testBadHeader.csv", "status": "ready", "type": "csv_record_validation", "submissionLocalId": 12, "fileType": 2 }, "many": { "filename": "testMany.csv", "status": "ready", "type": "csv_record_validation", "submissionLocalId": 11, "fileType": 3 }, "odd_characters": { "filename": "testOddCharacters.csv", "status": "ready", "type": "csv_record_validation", "submissionLocalId": 14, "fileType": 2 }, "many_bad": { "filename": "testManyBadValues.csv", "status": "ready", "type": "csv_record_validation", "submissionLocalId": 11, "fileType": 4 }, "rules": { "filename": "testRules.csv", "status": "ready", "type": "csv_record_validation", "submissionLocalId": 16, "fileType": 3 } } # Upload needed files to S3 for key in csvFiles.keys(): csvFiles[key]["s3Filename"] = cls.uploadFile( csvFiles[key]["filename"], cls.userId) jobIdDict = {} for key in csvFiles.keys(): file = csvFiles[key] job = cls.addJob(str(jobTracker.getStatusId(file["status"])), str(jobTracker.getTypeId(file["type"])), str(submissionIDs[file["submissionLocalId"]]), file["s3Filename"], str(file["fileType"]), jobTracker.session) # TODO: fix statement below--does this error really happen? if (job.job_id == None): # Failed to commit job correctly raise Exception("".join( ["Job for ", str(key), " did not get an id back"])) jobIdDict[key] = job.job_id # Print submission IDs for error report checking print("".join([ str(key), ": ", str(jobTracker.getSubmissionId(job.job_id)), ", " ]), end="") # Create dependencies dependencies = [ JobDependency(job_id=str(jobIdDict["bad_prereq"]), prerequisite_id=str(jobIdDict["bad_upload"])), JobDependency(job_id=str(jobIdDict["valid_prereq"]), prerequisite_id=str(jobIdDict["valid_upload"])) ] for dependency in dependencies: jobTracker.session.add(dependency) jobTracker.session.commit() colIdDict = {} for fileId in range(1, 5): for columnId in range(1, 6): #TODO: get rid of hard-coded surrogate keys if columnId < 3: fieldType = 1 else: fieldType = 4 columnName = "header_{}".format(columnId) column = cls.addFileColumn(fileId, fieldType, columnName, "", (columnId != 3), validationDb.session) colIdDict["header_{}_file_type_{}".format( columnId, fileId)] = column.file_column_id rules = [ Rule(file_column_id=str(colIdDict["".join( ["header_", str(1), "_file_type_", str(3)])]), rule_type_id=5, rule_text_1=0, description='value 1 must be greater than zero', rule_timing_id=1), Rule(file_column_id=str(colIdDict["".join( ["header_", str(1), "_file_type_", str(3)])]), rule_type_id=3, rule_text_1=13, description='value 1 may not be 13', rule_timing_id=1), Rule(file_column_id=str(colIdDict["".join( ["header_", str(5), "_file_type_", str(3)])]), rule_type_id=1, rule_text_1="INT", description='value 5 must be an integer', rule_timing_id=1), Rule(file_column_id=str(colIdDict["".join( ["header_", str(3), "_file_type_", str(3)])]), rule_type_id=2, rule_text_1=42, description='value 3 must be equal to 42 if present', rule_timing_id=1), Rule(file_column_id=str(colIdDict["".join( ["header_", str(1), "_file_type_", str(3)])]), rule_type_id=4, rule_text_1=100, description='value 1 must be less than 100', rule_timing_id=1), Rule(file_column_id=str(colIdDict["".join( ["header_", str(1), "_file_type_", str(3)])]), rule_type_id=2, rule_text_1=" ", description='None shall pass', rule_timing_id=2 ) #This rule should never be checked with rule_timing 2 ] for rule in rules: validationDb.session.add(rule) validationDb.session.commit() # If staging already has corresponding job tables, drop them for k, v in jobIdDict.items(): try: cls.stagingDb.dropTable("job{}".format(v)) except Exception as e: cls.stagingDb.session.close() cls.stagingDb.session = cls.stagingDb.Session() cls.jobIdDict = jobIdDict
def addUploadJobs(self, filenames, submissionId, existingSubmission): """ Add upload jobs to job tracker database Arguments: filenames -- List of tuples containing (file type, upload path, original filenames) submissionId -- Submission ID to attach to jobs existingSubmission -- True if we should update existing jobs rather than creating new ones Returns: jobsRequired -- List of job ids required for validation jobs, used to populate the prerequisite table uploadDict -- Dictionary of upload ids by filename to return to client, used for calling finalize_submission route """ # Keep list of job ids required for validation jobs jobsRequired = [] # Dictionary of upload ids by filename to return to client uploadDict = {} for fileType, filePath, filename in filenames: fileTypeQuery = self.session.query( FileType.file_type_id).filter(FileType.name == fileType) fileTypeResult = self.runUniqueQuery( fileTypeQuery, "No matching file type", "Multiple matching file types") fileTypeId = fileTypeResult.file_type_id if existingSubmission: # Find existing upload job and mark as running uploadQuery = self.session.query(Job).filter( Job.submission_id == submissionId ).filter(Job.file_type_id == fileTypeId).filter( Job.job_type_id == self.getJobTypeId("file_upload")) uploadJob = self.runUniqueQuery( uploadQuery, "No upload job found for this file", "Conflicting jobs found") # Mark as running and set new file name and path uploadJob.job_status_id = self.getJobStatusId("running") uploadJob.original_filename = filename uploadJob.filename = filePath self.session.commit() else: # Create upload job, mark as running since frontend should be doing this upload uploadJob = Job(original_filename=filename, filename=filePath, file_type_id=fileTypeId, job_status_id=self.getJobStatusId("running"), job_type_id=self.getJobTypeId("file_upload"), submission_id=submissionId) self.session.add(uploadJob) if existingSubmission: valQuery = self.session.query(Job).filter( Job.submission_id == submissionId).filter( Job.file_type_id == fileTypeId).filter( Job.job_type_id == self.getJobTypeId( "csv_record_validation")) valJob = self.runUniqueQuery( valQuery, "No validation job found for this file", "Conflicting jobs found") valJob.job_status_id = self.getJobStatusId("waiting") valJob.original_filename = filename valJob.filename = filePath # Reset file size and number of rows to be set during validation of new file valJob.file_size = None valJob.number_of_rows = None # Reset number of errors errorDb = ErrorHandler() errorDb.resetErrorsByJobId(valJob.job_id) errorDb.resetFileByJobId(valJob.job_id) self.session.commit() else: # Create parse into DB job valJob = Job( original_filename=filename, filename=filePath, file_type_id=fileTypeId, job_status_id=self.getJobStatusId("waiting"), job_type_id=self.getJobTypeId("csv_record_validation"), submission_id=submissionId) self.session.add(valJob) self.session.flush() if not existingSubmission: # Add dependency between file upload and db upload uploadDependency = JobDependency( job_id=valJob.job_id, prerequisite_id=uploadJob.job_id) self.session.add(uploadDependency) # Later validation jobs are dependent only on record level validation, not upload jobs jobsRequired.append(valJob.job_id) uploadDict[fileType] = uploadJob.job_id # Return list of upload jobs return jobsRequired, uploadDict
Job.file_type_id == fileTypeId).filter( Job.job_type_id == fileUpload).all() if uploadJob is None or len(uploadJob) == 0: # Create upload job with ready status newUploadJob = Job(job_status_id=ready, job_type_id=fileUpload, submission_id=submissionId, file_type_id=fileTypeId) session.add(newUploadJob) session.commit() uploadId = newUploadJob.job_id else: uploadId = uploadJob[0].job_id # If type is D1 or D2, also create a validation job with waiting status and dependency if fileTypeId in [awardTypeId, awardProcTypeId]: # Check that validation job exists existingValJob = session.query(Job).filter( Job.submission_id == submissionId).filter( Job.file_type_id == fileTypeId).filter( Job.job_type_id == validation).all() if existingValJob is None or len(existingValJob) == 0: validationJob = Job(job_status_id=ready, job_type_id=validation, submission_id=submissionId, file_type_id=fileTypeId) session.add(validationJob) session.commit() dependency = JobDependency(job_id=validationJob.job_id, prerequisite_id=uploadId) session.add(dependency) session.commit()
def addJobsForFileType(fileType, filePath, filename, submissionId, existingSubmission, jobsRequired, uploadDict): """ Add upload and validation jobs for a single filetype Args: fileType: What type of file to add jobs for filePath: Path to upload the file to filename: Original filename submissionId -- Submission ID to attach to jobs existingSubmission -- True if we should update existing jobs rather than creating new ones jobsRequired: List of job ids that will be prerequisites for cross-file job uploadDict: Dictionary of upload ids by filename to return to client, used for calling finalize_submission route Returns: jobsRequired: List of job ids that will be prerequisites for cross-file job uploadDict: Dictionary of upload ids by filename to return to client, used for calling finalize_submission route """ sess = GlobalDB.db().session fileTypeId = FILE_TYPE_DICT[fileType] # Create a file upload job or, for an existing submission, modify the # existing upload job. if existingSubmission: # mark existing upload job as running uploadJob = sess.query(Job).filter_by( submission_id=submissionId, file_type_id=fileTypeId, job_type_id=JOB_TYPE_DICT['file_upload']).one() # Mark as running and set new file name and path uploadJob.job_status_id = JOB_STATUS_DICT['running'] uploadJob.original_filename = filename uploadJob.filename = filePath else: if fileType in ["award", "award_procurement"]: # file generation handled on backend, mark as ready uploadStatus = JOB_STATUS_DICT['ready'] elif fileType in ["awardee_attributes", "sub_award"]: # these are dependent on file D2 validation uploadStatus = JOB_STATUS_DICT['waiting'] else: # mark as running since frontend should be doing this upload uploadStatus = JOB_STATUS_DICT['running'] uploadJob = Job(original_filename=filename, filename=filePath, file_type_id=fileTypeId, job_status_id=uploadStatus, job_type_id=JOB_TYPE_DICT['file_upload'], submission_id=submissionId) sess.add(uploadJob) sess.flush() # Create a file validation job or, for an existing submission, modify the # existing validation job. if existingSubmission: # if the file's validation job is attached to an existing submission, # reset its status and delete any validation artifacts (e.g., error metadata) that # might exist from a previous run. valJob = sess.query(Job).filter_by( submission_id=submissionId, file_type_id=fileTypeId, job_type_id=JOB_TYPE_DICT['csv_record_validation']).one() valJob.job_status_id = JOB_STATUS_DICT['waiting'] valJob.original_filename = filename valJob.filename = filePath # Reset file size and number of rows to be set during validation of new file valJob.file_size = None valJob.number_of_rows = None # Delete error metdata this might exist from a previous run of this validation job sess.query(ErrorMetadata).\ filter(ErrorMetadata.job_id == valJob.job_id).\ delete(synchronize_session='fetch') # Delete file error information that might exist from a previous run of this validation job sess.query(File).filter(File.job_id == valJob.job_id).delete( synchronize_session='fetch') else: # create a new record validation job and add dependencies if necessary if fileType == "awardee_attributes": d1ValJob = sess.query(Job).\ filter(Job.submission_id == submissionId, Job.file_type_id == FILE_TYPE_DICT['award_procurement'], Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\ first() if d1ValJob is None: raise Exception("Cannot create E job without a D1 job") # Add dependency on D1 validation job d1Dependency = JobDependency(job_id=uploadJob.job_id, prerequisite_id=d1ValJob.job_id) sess.add(d1Dependency) elif fileType == "sub_award": # todo: check for C validation job cValJob = sess.query(Job). \ filter(Job.submission_id == submissionId, Job.file_type_id == FILE_TYPE_DICT['award_financial'], Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']). \ first() if cValJob is None: raise Exception("Cannot create F job without a C job") # Add dependency on C validation job cDependency = JobDependency(job_id=uploadJob.job_id, prerequisite_id=cValJob.job_id) sess.add(cDependency) else: # E and F don't get validation jobs valJob = Job(original_filename=filename, filename=filePath, file_type_id=fileTypeId, job_status_id=JOB_STATUS_DICT['waiting'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], submission_id=submissionId) sess.add(valJob) sess.flush() # Add dependency between file upload and db upload uploadDependency = JobDependency(job_id=valJob.job_id, prerequisite_id=uploadJob.job_id) sess.add(uploadDependency) jobsRequired.append(valJob.job_id) sess.commit() uploadDict[fileType] = uploadJob.job_id return jobsRequired, uploadDict