def create_generation_job(file_type_name, start_date, end_date): """ Add details to jobs for generating files Args: file_type_name: the name of the file type being generated job: the generation job, None if it is a detached generation start_date: The start date for the generation job, only used for detached files end_date: The end date for the generation job, only used for detached files Returns: the file generation job """ sess = GlobalDB.db().session # Create a new job for a detached generation job = Job(job_type_id=lookups.JOB_TYPE_DICT['file_upload'], user_id=g.user.user_id, file_type_id=lookups.FILE_TYPE_DICT[file_type_name], start_date=start_date, end_date=end_date) sess.add(job) # Update the job details job.message = None job.job_status_id = lookups.JOB_STATUS_DICT["ready"] sess.commit() sess.refresh(job) return job
def insert_job(sess, filetype, status, type_id, submission, job_id=None, filename=None, original_filename=None, file_size=None, num_rows=None, num_valid_rows=0, num_errors=0, updated_at=None): """Insert one job into job tracker and get ID back.""" if not updated_at: updated_at = datetime.utcnow() job = Job(created_at=datetime.utcnow(), updated_at=updated_at, file_type_id=filetype, job_status_id=status, job_type_id=type_id, submission_id=submission, filename=filename, original_filename=original_filename, file_size=file_size, number_of_rows=num_rows, number_of_rows_valid=num_valid_rows, number_of_errors=num_errors) if job_id: job.job_id = job_id sess.add(job) sess.commit() return job
def insert_job(sess, filetype, status, type_id, submission, job_id=None, filename=None, original_filename=None, file_size=None, num_rows=None, num_errors=0): """Insert one job into job tracker and get ID back.""" job = Job(file_type_id=filetype, job_status_id=status, job_type_id=type_id, submission_id=submission, filename=filename, original_filename=original_filename, file_size=file_size, number_of_rows=num_rows, number_of_errors=num_errors) if job_id: job.job_id = job_id sess.add(job) sess.commit() return job
def createJobs(self, filenames, submissionId, existingSubmission=False): """ Given the filenames to be uploaded, create the set of jobs needing to be completed for this submission Arguments: filenames -- List of tuples containing (file type, upload path, original filenames) submissionId -- Submission ID to be linked to jobs existingSubmission -- True if we should update jobs in an existing submission rather than creating new jobs Returns: Dictionary of upload ids by filename to return to client, used for calling finalize_submission route """ jobsRequired, uploadDict = self.addUploadJobs(filenames, submissionId, existingSubmission) if (existingSubmission): # Find cross-file and external validation jobs and mark them as waiting valQuery = self.session.query(Job).filter( Job.submission_id == submissionId).filter( Job.job_type_id == self.getJobTypeId("validation")) valJob = self.runUniqueQuery(valQuery, "No cross-file validation job found", "Conflicting jobs found") valJob.job_status_id = self.getJobStatusId("waiting") extQuery = self.session.query(Job).filter( Job.submission_id == submissionId).filter( Job.job_type_id == self.getJobTypeId( "external_validation")) extJob = self.runUniqueQuery(valQuery, "No external validation job found", "Conflicting jobs found") extJob.job_status_id = self.getJobStatusId("waiting") self.session.commit() else: # Create validation job validationJob = Job(job_status_id=self.getJobStatusId("waiting"), job_type_id=self.getJobTypeId("validation"), submission_id=submissionId) self.session.add(validationJob) # Create external validation job externalJob = Job( job_status_id=self.getJobStatusId("waiting"), job_type_id=self.getJobTypeId("external_validation"), submission_id=submissionId) self.session.add(externalJob) self.session.flush() # Create dependencies for validation jobs for job_id in jobsRequired: valDependency = JobDependency(job_id=validationJob.job_id, prerequisite_id=job_id) self.session.add(valDependency) extDependency = JobDependency(job_id=externalJob.job_id, prerequisite_id=job_id) self.session.add(extDependency) # Commit all changes self.session.commit() uploadDict["submission_id"] = submissionId return uploadDict
def insertJob(jobTracker, filetype, status, type_id, submission, job_id=None, filename = None, file_size = None, num_rows = None): """Insert one job into job tracker and get ID back.""" job = Job( file_type_id=filetype, job_status_id=status, job_type_id=type_id, submission_id=submission, original_filename=filename, file_size = file_size, number_of_rows = num_rows ) if job_id: job.job_id = job_id jobTracker.session.add(job) jobTracker.session.commit() return job.job_id
def setUpClass(cls): """Set up class-wide resources like submissions and jobs.""" super(UserTests, cls).setUpClass() # Add submissions to one of the users jobDb = cls.jobTracker # Delete existing submissions for approved user jobDb.deleteSubmissionsForUserId(cls.approved_user_id) for i in range(0, 5): sub = Submission(user_id=cls.approved_user_id) jobDb.session.add(sub) jobDb.session.commit() if i == 0: cls.submission_id = sub.submission_id # Add submissions for agency user jobDb.deleteSubmissionsForUserId(cls.agency_user_id) for i in range(0, 6): sub = Submission(user_id=cls.agency_user_id) sub.cgac_code = "SYS" jobDb.session.add(sub) jobDb.session.commit() # Add job to first submission job = Job(submission_id=cls.submission_id, job_status_id=3, job_type_id=1, file_type_id=1) jobDb.session.add(job) jobDb.session.commit() cls.uploadId = job.job_id
def setUpClass(cls): """Set up class-wide resources like submissions and jobs.""" super(UserTests, cls).setUpClass() with create_app().app_context(): sess = GlobalDB.db().session # Add submissions for agency user sess.query(Submission).filter(Submission.user_id == cls.agency_user_id).delete() sess.commit() for i in range(0, 6): sub = Submission(user_id=cls.agency_user_id) sub.reporting_start_date = datetime(2015, 10, 1) sub.reporting_end_date = datetime(2015, 12, 31) sub.cgac_code = cls.admin_cgac_code sess.add(sub) sess.commit() if i == 0: cls.submission_id = sub.submission_id # Add job to first submission job = Job( submission_id=cls.submission_id, job_status_id=JOB_STATUS_DICT['running'], job_type_id=JOB_TYPE_DICT['file_upload'], file_type_id=FILE_TYPE_DICT['appropriations'] ) sess.add(job) sess.commit() cls.uploadId = job.job_id
def insert_agency_user_submission_data(sess, submission_id): """Insert jobs for the submission, and create a CGAC, FREC, and SubTierAgency""" for job_type in ['file_upload', 'csv_record_validation', 'validation']: sess.add( Job(file_type_id=FILE_TYPE_DICT['fabs'], job_status_id=FILE_STATUS_DICT['complete'], job_type_id=JOB_TYPE_DICT[job_type], submission_id=submission_id, original_filename=None, file_size=None, number_of_rows=None)) sess.commit() cgac = CGAC(cgac_code="NOT") sess.add(cgac) sess.commit() frec = FREC(cgac_id=cgac.cgac_id, frec_code="BLAH") sess.add(frec) sess.commit() sub = SubTierAgency(sub_tier_agency_code="WRONG", cgac_id=cgac.cgac_id, frec_id=frec.frec_id, is_frec=False) sess.add(sub) sess.commit()
def create_jobs(upload_files, submission, existing_submission=False): """Create the set of jobs associated with the specified submission Arguments: upload_files -- list of named tuples that describe files uploaded to the broker submission -- submission existing_submission -- true if we should update jobs in an existing submission rather than creating new jobs Returns: Dictionary of upload ids by filename to return to client, used for calling finalize_submission route """ sess = GlobalDB.db().session submission_id = submission.submission_id # create the file upload and single-file validation jobs and # set up the dependencies between them # before starting, sort the incoming list of jobs by letter # to ensure that jobs dependent on the awards jobs being present # are processed last. jobs_required = [] upload_dict = {} sorted_uploads = sorted(upload_files, key=attrgetter('file_letter')) for upload_file in sorted_uploads: validation_job_id, upload_job_id = add_jobs_for_uploaded_file( upload_file, submission_id, existing_submission) if validation_job_id: jobs_required.append(validation_job_id) upload_dict[upload_file.file_type] = upload_job_id # once single-file upload/validation jobs are created, create the cross-file # validation job and dependencies if existing_submission and not submission.d2_submission: # find cross-file jobs and mark them as waiting # (note: job_type of 'validation' is a cross-file job) val_job = sess.query(Job).\ filter_by( submission_id=submission_id, job_type_id=JOB_TYPE_DICT["validation"]).\ one() val_job.job_status_id = JOB_STATUS_DICT["waiting"] submission.updated_at = time.strftime("%c") # todo: add these back in for detached_d2 when we have actual validations elif not submission.d2_submission: # create cross-file validation job validation_job = Job(job_status_id=JOB_STATUS_DICT["waiting"], job_type_id=JOB_TYPE_DICT["validation"], submission_id=submission_id) sess.add(validation_job) sess.flush() # create dependencies for validation jobs for job_id in jobs_required: val_dependency = JobDependency(job_id=validation_job.job_id, prerequisite_id=job_id) sess.add(val_dependency) sess.commit() upload_dict["submission_id"] = submission_id return upload_dict
def insert_job(sess, filetype, status, type_id, submission, job_id=None, filename=None, original_filename=None, file_size=None, num_rows=None, num_errors=0): """Insert one job into job tracker and get ID back.""" job = Job( file_type_id=filetype, job_status_id=status, job_type_id=type_id, submission_id=submission, filename=filename, original_filename=original_filename, file_size=file_size, number_of_rows=num_rows, number_of_errors=num_errors ) if job_id: job.job_id = job_id sess.add(job) sess.commit() return job
def addJob(status, jobType, submissionId, s3Filename, fileType, session): """ Create a job model and add it to the session """ job = Job(job_status_id=status, job_type_id=jobType, submission_id=submissionId, filename=s3Filename, file_type_id=fileType) session.add(job) session.commit() return job
def insertJob(jobTracker, filetype, status, type_id, submission, job_id=None, filename=None, file_size=None, num_rows=None): """Insert one job into job tracker and get ID back.""" job = Job(file_type_id=filetype, job_status_id=status, job_type_id=type_id, submission_id=submission, original_filename=filename, file_size=file_size, number_of_rows=num_rows) if job_id: job.job_id = job_id jobTracker.session.add(job) jobTracker.session.commit() return job.job_id
def setUpClass(cls): """Set up class-wide resources like submissions and jobs.""" super(UserTests, cls).setUpClass() with createApp().app_context(): sess = GlobalDB.db().session # Add submissions to one of the users # Delete existing submissions for approved user sess.query(Submission).filter( Submission.user_id == cls.approved_user_id).delete() sess.commit() for i in range(0, 5): sub = Submission(user_id=cls.approved_user_id) sub.reporting_start_date = datetime(2015, 10, 1) sub.reporting_end_date = datetime(2015, 12, 31) sess.add(sub) sess.commit() # Add submissions for agency user sess.query(Submission).filter( Submission.user_id == cls.agency_user_id).delete() sess.commit() for i in range(0, 6): sub = Submission(user_id=cls.agency_user_id) sub.reporting_start_date = datetime(2015, 10, 1) sub.reporting_end_date = datetime(2015, 12, 31) sub.cgac_code = "SYS" sess.add(sub) sess.commit() if i == 0: cls.submission_id = sub.submission_id # Add job to first submission job = Job(submission_id=cls.submission_id, job_status_id=cls.jobStatusDict['running'], job_type_id=cls.jobTypeDict['file_upload'], file_type_id=cls.fileTypeDict['appropriations']) sess.add(job) sess.commit() cls.uploadId = job.job_id
def insert_job(sess, submission_id, job_status_id, job_type_id): """ Insert one job into job tracker and get job ID back. Args: sess: the current session submission_id: the ID of the submission the job is attached to job_status_id: the status of the job job_type_id: the type of the job Returns: the job ID of the created job """ job = Job(file_type_id=FILE_TYPE_DICT['fabs'], job_status_id=job_status_id, job_type_id=job_type_id, submission_id=submission_id, original_filename=None, file_size=None, number_of_rows=None) sess.add(job) sess.commit() return job.job_id
def test_upload_fabs_duplicate_running(self): """ Test file submissions for when the job is already running """ # Mark a job as already running self.session.add( Job(file_type_id=FILE_TYPE_DICT['fabs'], job_status_id=JOB_STATUS_DICT['running'], job_type_id=JOB_TYPE_DICT['file_upload'], submission_id=str(self.d2_submission), original_filename=None, file_size=None, number_of_rows=None)) self.session.commit() response = self.app.post( "/v1/upload_fabs_file/", {"existing_submission_id": str(self.d2_submission)}, upload_files=[('fabs', 'fabs.csv', open('tests/integration/data/fabs.csv', 'rb').read())], headers={"x-session-id": self.session_id}, expect_errors=True) self.assertEqual(response.status_code, 400) self.assertEqual(response.json['message'], 'Submission already has a running job')
for fileType in EXTERNAL_FILE_TYPES: externalIds.append(interfaces.jobDb.getFileTypeId(fileType)) # For each submission ID, check that all jobs are present and create any missing print("external IDs: " + str(externalIds)) with databaseSession() as session: for submissionId in submissionIds: for fileTypeId in externalIds: # If job does not exist, create it uploadJob = session.query(Job).filter( Job.submission_id == submissionId).filter( Job.file_type_id == fileTypeId).filter( Job.job_type_id == fileUpload).all() if uploadJob is None or len(uploadJob) == 0: # Create upload job with ready status newUploadJob = Job(job_status_id=ready, job_type_id=fileUpload, submission_id=submissionId, file_type_id=fileTypeId) session.add(newUploadJob) session.commit() uploadId = newUploadJob.job_id else: uploadId = uploadJob[0].job_id # If type is D1 or D2, also create a validation job with waiting status and dependency if fileTypeId in [awardTypeId, awardProcTypeId]: # Check that validation job exists existingValJob = session.query(Job).filter( Job.submission_id == submissionId).filter( Job.file_type_id == fileTypeId).filter( Job.job_type_id == validation).all() if existingValJob is None or len(existingValJob) == 0: validationJob = Job(job_status_id=ready,
def test_revert_submission(database, monkeypatch): """ Tests reverting an updated DABS certification """ sess = database.session sub = Submission(publish_status_id=PUBLISH_STATUS_DICT['updated'], is_quarter_format=True, d2_submission=False, publishable=False, number_of_errors=20, number_of_warnings=15) sess.add(sub) sess.commit() job = Job(submission_id=sub.submission_id, job_status_id=JOB_STATUS_DICT['finished'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], number_of_warnings=0, number_of_errors=10, filename='new/test/file.csv', number_of_rows=5, number_of_rows_valid=0) cert_history = CertifyHistory(submission_id=sub.submission_id) sess.add_all([job, cert_history]) sess.commit() cert_approp = CertifiedAppropriation(submission_id=sub.submission_id, job_id=job.job_id, row_number=1, spending_authority_from_of_cpe=2, tas='test') approp = Appropriation(submission_id=sub.submission_id, job_id=job.job_id, row_number=1, spending_authority_from_of_cpe=15, tas='test') cert_files = CertifiedFilesHistory(certify_history_id=cert_history.certify_history_id, submission_id=sub.submission_id, filename='old/test/file2.csv', file_type_id=FILE_TYPE_DICT['appropriations'], warning_filename='a/warning.csv') cert_meta1 = CertifiedErrorMetadata(job_id=job.job_id, file_type_id=FILE_TYPE_DICT['appropriations'], target_file_type_id=None, occurrences=15) cert_meta2 = CertifiedErrorMetadata(job_id=job.job_id, file_type_id=FILE_TYPE_DICT['appropriations'], target_file_type_id=None, occurrences=10) file_entry = File(file_id=FILE_TYPE_DICT['appropriations'], job_id=job.job_id, file_status_id=FILE_STATUS_DICT['incomplete'], headers_missing='something') sess.add_all([cert_approp, approp, cert_files, cert_meta1, cert_meta2, file_entry]) sess.commit() file_handler = fileHandler.FileHandler({}, is_local=True) monkeypatch.setattr(file_handler, 'revert_certified_error_files', Mock()) revert_to_certified(sub, file_handler) # Test that certified data is moved back approp_query = sess.query(Appropriation).filter_by(submission_id=sub.submission_id).all() assert len(approp_query) == 1 assert approp_query[0].spending_authority_from_of_cpe == 2 # Test that the job got updated job_query = sess.query(Job).filter_by(submission_id=sub.submission_id).all() assert len(job_query) == 1 assert job_query[0].filename == CONFIG_BROKER['broker_files'] + 'file2.csv' assert job_query[0].number_of_warnings == 25 assert job_query[0].number_of_errors == 0 assert job_query[0].number_of_rows == 2 assert job_query[0].number_of_rows_valid == 1 # Test that File got updated file_query = sess.query(File).filter_by(job_id=job.job_id).all() assert len(file_query) == 1 assert file_query[0].headers_missing is None assert file_query[0].file_status_id == FILE_STATUS_DICT['complete'] # Make sure submission got updated sub_query = sess.query(Submission).filter_by(submission_id=sub.submission_id).all() assert len(sub_query) == 1 assert sub_query[0].publishable is True assert sub_query[0].number_of_errors == 0 assert sub_query[0].number_of_warnings == 25
def addUploadJobs(self, filenames, submissionId, existingSubmission): """ Add upload jobs to job tracker database Arguments: filenames -- List of tuples containing (file type, upload path, original filenames) submissionId -- Submission ID to attach to jobs existingSubmission -- True if we should update existing jobs rather than creating new ones Returns: jobsRequired -- List of job ids required for validation jobs, used to populate the prerequisite table uploadDict -- Dictionary of upload ids by filename to return to client, used for calling finalize_submission route """ # Keep list of job ids required for validation jobs jobsRequired = [] # Dictionary of upload ids by filename to return to client uploadDict = {} for fileType, filePath, filename in filenames: fileTypeQuery = self.session.query( FileType.file_type_id).filter(FileType.name == fileType) fileTypeResult = self.runUniqueQuery( fileTypeQuery, "No matching file type", "Multiple matching file types") fileTypeId = fileTypeResult.file_type_id if existingSubmission: # Find existing upload job and mark as running uploadQuery = self.session.query(Job).filter( Job.submission_id == submissionId ).filter(Job.file_type_id == fileTypeId).filter( Job.job_type_id == self.getJobTypeId("file_upload")) uploadJob = self.runUniqueQuery( uploadQuery, "No upload job found for this file", "Conflicting jobs found") # Mark as running and set new file name and path uploadJob.job_status_id = self.getJobStatusId("running") uploadJob.original_filename = filename uploadJob.filename = filePath self.session.commit() else: # Create upload job, mark as running since frontend should be doing this upload uploadJob = Job(original_filename=filename, filename=filePath, file_type_id=fileTypeId, job_status_id=self.getJobStatusId("running"), job_type_id=self.getJobTypeId("file_upload"), submission_id=submissionId) self.session.add(uploadJob) if existingSubmission: valQuery = self.session.query(Job).filter( Job.submission_id == submissionId).filter( Job.file_type_id == fileTypeId).filter( Job.job_type_id == self.getJobTypeId( "csv_record_validation")) valJob = self.runUniqueQuery( valQuery, "No validation job found for this file", "Conflicting jobs found") valJob.job_status_id = self.getJobStatusId("waiting") valJob.original_filename = filename valJob.filename = filePath # Reset file size and number of rows to be set during validation of new file valJob.file_size = None valJob.number_of_rows = None # Reset number of errors errorDb = ErrorHandler() errorDb.resetErrorsByJobId(valJob.job_id) errorDb.resetFileByJobId(valJob.job_id) self.session.commit() else: # Create parse into DB job valJob = Job( original_filename=filename, filename=filePath, file_type_id=fileTypeId, job_status_id=self.getJobStatusId("waiting"), job_type_id=self.getJobTypeId("csv_record_validation"), submission_id=submissionId) self.session.add(valJob) self.session.flush() if not existingSubmission: # Add dependency between file upload and db upload uploadDependency = JobDependency( job_id=valJob.job_id, prerequisite_id=uploadJob.job_id) self.session.add(uploadDependency) # Later validation jobs are dependent only on record level validation, not upload jobs jobsRequired.append(valJob.job_id) uploadDict[fileType] = uploadJob.job_id # Return list of upload jobs return jobsRequired, uploadDict
def setUpClass(cls): """Set up class-wide resources.""" super(FileTypeTests, cls).setUpClass() #TODO: refactor into a pytest fixture user = cls.userId # TODO: get rid of this flag once we're using a tempdb for test fixtures force_tas_load = False with createApp().app_context(): sess = GlobalDB.db().session # Create submissions and jobs, also uploading # the files needed for each job. statusReadyId = JOB_STATUS_DICT['ready'] jobTypeCsvId = JOB_TYPE_DICT['csv_record_validation'] jobDict = {} submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile("appropValid.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['valid'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile("programActivityValid.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['program_activity'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['programValid'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile("awardFinancialValid.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award_financial'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['awardFinValid'] = job_info.job_id # next two jobs have the same submission id submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile("awardValid.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['awardValid'] = job_info.job_id job_info = Job(filename=cls.uploadFile("awardProcValid.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award_procurement'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['awardProcValid'] = job_info.job_id # commit submissions/jobs and output IDs sess.commit() for job_type, job_id in jobDict.items(): print('{}: {}'.format(job_type, job_id)) # Load fields and rules FileTypeTests.load_definitions(sess, force_tas_load) cls.jobDict = jobDict
def add_jobs_for_uploaded_file(upload_file, submission_id, existing_submission): """ Add upload and validation jobs for a single filetype Arguments: upload_file: UploadFile named tuple submission_id: submission ID to attach to jobs existing_submission: true if we should update existing jobs rather than creating new ones Returns: the validation job id for this file type (if any) the upload job id for this file type """ sess = GlobalDB.db().session file_type_id = FILE_TYPE_DICT[upload_file.file_type] validation_job_id = None # Create a file upload job or, for an existing submission, modify the # existing upload job. if existing_submission: # mark existing upload job as running upload_job = sess.query(Job).filter_by( submission_id=submission_id, file_type_id=file_type_id, job_type_id=JOB_TYPE_DICT['file_upload'] ).one() # mark as running and set new file name and path upload_job.job_status_id = JOB_STATUS_DICT['running'] upload_job.original_filename = upload_file.file_name upload_job.filename = upload_file.upload_name else: if upload_file.file_type in ["award", "award_procurement"]: # file generation handled on backend, mark as ready upload_status = JOB_STATUS_DICT['ready'] elif upload_file.file_type in ["awardee_attributes", "sub_award"]: # these are dependent on file D2 validation upload_status = JOB_STATUS_DICT['waiting'] else: # mark as running since frontend should be doing this upload upload_status = JOB_STATUS_DICT['running'] upload_job = Job( original_filename=upload_file.file_name, filename=upload_file.upload_name, file_type_id=file_type_id, job_status_id=upload_status, job_type_id=JOB_TYPE_DICT['file_upload'], submission_id=submission_id) sess.add(upload_job) sess.flush() if existing_submission: # if the file's validation job is attached to an existing submission, # reset its status and delete any validation artifacts (e.g., error metadata) that # might exist from a previous run. val_job = sess.query(Job).filter_by( submission_id=submission_id, file_type_id=file_type_id, job_type_id=JOB_TYPE_DICT['csv_record_validation'] ).one() val_job.job_status_id = JOB_STATUS_DICT['waiting'] val_job.original_filename = upload_file.file_name val_job.filename = upload_file.upload_name # reset file size and number of rows to be set during validation of new file val_job.file_size = None val_job.number_of_rows = None # delete error metadata this might exist from a previous run of this validation job sess.query(ErrorMetadata).\ filter(ErrorMetadata.job_id == val_job.job_id).\ delete(synchronize_session='fetch') # delete file error information that might exist from a previous run of this validation job sess.query(File).filter(File.job_id == val_job.job_id).delete(synchronize_session='fetch') else: # create a new record validation job and add dependencies if necessary if upload_file.file_type == "awardee_attributes": d1_val_job = sess.query(Job).\ filter(Job.submission_id == submission_id, Job.file_type_id == FILE_TYPE_DICT['award_procurement'], Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\ one_or_none() if d1_val_job is None: raise Exception("Cannot create E job without a D1 job") # Add dependency on D1 validation job d1_dependency = JobDependency(job_id=upload_job.job_id, prerequisite_id=d1_val_job.job_id) sess.add(d1_dependency) elif upload_file.file_type == "sub_award": # todo: check for C validation job c_val_job = sess.query(Job).\ filter(Job.submission_id == submission_id, Job.file_type_id == FILE_TYPE_DICT['award_financial'], Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\ one_or_none() if c_val_job is None: raise Exception("Cannot create F job without a C job") # add dependency on C validation job c_dependency = JobDependency(job_id=upload_job.job_id, prerequisite_id=c_val_job.job_id) sess.add(c_dependency) else: # E and F don't get validation jobs val_job = Job( original_filename=upload_file.file_name, filename=upload_file.upload_name, file_type_id=file_type_id, job_status_id=JOB_STATUS_DICT['waiting'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], submission_id=submission_id) sess.add(val_job) sess.flush() # add dependency between file upload job and file validation job upload_dependency = JobDependency(job_id=val_job.job_id, prerequisite_id=upload_job.job_id) sess.add(upload_dependency) validation_job_id = val_job.job_id sess.commit() return validation_job_id, upload_job.job_id
def addJobsForFileType(fileType, filePath, filename, submissionId, existingSubmission, jobsRequired, uploadDict): """ Add upload and validation jobs for a single filetype Args: fileType: What type of file to add jobs for filePath: Path to upload the file to filename: Original filename submissionId -- Submission ID to attach to jobs existingSubmission -- True if we should update existing jobs rather than creating new ones jobsRequired: List of job ids that will be prerequisites for cross-file job uploadDict: Dictionary of upload ids by filename to return to client, used for calling finalize_submission route Returns: jobsRequired: List of job ids that will be prerequisites for cross-file job uploadDict: Dictionary of upload ids by filename to return to client, used for calling finalize_submission route """ sess = GlobalDB.db().session fileTypeId = FILE_TYPE_DICT[fileType] # Create a file upload job or, for an existing submission, modify the # existing upload job. if existingSubmission: # mark existing upload job as running uploadJob = sess.query(Job).filter_by( submission_id=submissionId, file_type_id=fileTypeId, job_type_id=JOB_TYPE_DICT['file_upload']).one() # Mark as running and set new file name and path uploadJob.job_status_id = JOB_STATUS_DICT['running'] uploadJob.original_filename = filename uploadJob.filename = filePath else: if fileType in ["award", "award_procurement"]: # file generation handled on backend, mark as ready uploadStatus = JOB_STATUS_DICT['ready'] elif fileType in ["awardee_attributes", "sub_award"]: # these are dependent on file D2 validation uploadStatus = JOB_STATUS_DICT['waiting'] else: # mark as running since frontend should be doing this upload uploadStatus = JOB_STATUS_DICT['running'] uploadJob = Job(original_filename=filename, filename=filePath, file_type_id=fileTypeId, job_status_id=uploadStatus, job_type_id=JOB_TYPE_DICT['file_upload'], submission_id=submissionId) sess.add(uploadJob) sess.flush() # Create a file validation job or, for an existing submission, modify the # existing validation job. if existingSubmission: # if the file's validation job is attached to an existing submission, # reset its status and delete any validation artifacts (e.g., error metadata) that # might exist from a previous run. valJob = sess.query(Job).filter_by( submission_id=submissionId, file_type_id=fileTypeId, job_type_id=JOB_TYPE_DICT['csv_record_validation']).one() valJob.job_status_id = JOB_STATUS_DICT['waiting'] valJob.original_filename = filename valJob.filename = filePath # Reset file size and number of rows to be set during validation of new file valJob.file_size = None valJob.number_of_rows = None # Delete error metdata this might exist from a previous run of this validation job sess.query(ErrorMetadata).\ filter(ErrorMetadata.job_id == valJob.job_id).\ delete(synchronize_session='fetch') # Delete file error information that might exist from a previous run of this validation job sess.query(File).filter(File.job_id == valJob.job_id).delete( synchronize_session='fetch') else: # create a new record validation job and add dependencies if necessary if fileType == "awardee_attributes": d1ValJob = sess.query(Job).\ filter(Job.submission_id == submissionId, Job.file_type_id == FILE_TYPE_DICT['award_procurement'], Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\ first() if d1ValJob is None: raise Exception("Cannot create E job without a D1 job") # Add dependency on D1 validation job d1Dependency = JobDependency(job_id=uploadJob.job_id, prerequisite_id=d1ValJob.job_id) sess.add(d1Dependency) elif fileType == "sub_award": # todo: check for C validation job cValJob = sess.query(Job). \ filter(Job.submission_id == submissionId, Job.file_type_id == FILE_TYPE_DICT['award_financial'], Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']). \ first() if cValJob is None: raise Exception("Cannot create F job without a C job") # Add dependency on C validation job cDependency = JobDependency(job_id=uploadJob.job_id, prerequisite_id=cValJob.job_id) sess.add(cDependency) else: # E and F don't get validation jobs valJob = Job(original_filename=filename, filename=filePath, file_type_id=fileTypeId, job_status_id=JOB_STATUS_DICT['waiting'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], submission_id=submissionId) sess.add(valJob) sess.flush() # Add dependency between file upload and db upload uploadDependency = JobDependency(job_id=valJob.job_id, prerequisite_id=uploadJob.job_id) sess.add(uploadDependency) jobsRequired.append(valJob.job_id) sess.commit() uploadDict[fileType] = uploadJob.job_id return jobsRequired, uploadDict
def setUpClass(cls): """Set up class-wide resources (test data)""" super(JobTests, cls).setUpClass() user = cls.userId # Flag for testing a million+ errors (can take ~30 min to run) cls.includeLongTests = False with createApp().app_context(): # get the submission test user sess = GlobalDB.db().session # Create test submissions and jobs, also uploading # the files needed for each job. jobDict = {} submissionId = cls.insertSubmission(sess, user) job_info = Job(job_status_id=JOB_STATUS_DICT['ready'], job_type_id=JOB_TYPE_DICT['file_upload'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['bad_upload'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(job_status_id=JOB_STATUS_DICT['ready'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['bad_prereq'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(job_status_id=JOB_STATUS_DICT['ready'], job_type_id=JOB_TYPE_DICT['external_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['wrong_type'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(job_status_id=JOB_STATUS_DICT['finished'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['not_ready'] = job_info.job_id submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile('testEmpty.csv', user), job_status_id=JOB_STATUS_DICT['ready'], job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['empty'] = job_info.job_id # create dependency dependency = JobDependency(job_id=jobDict["bad_prereq"], prerequisite_id=jobDict["bad_upload"]) sess.add(dependency) colIdDict = {} for fileId in range(1, 5): for columnId in range(1, 6): if columnId < 3: fieldType = FIELD_TYPE_DICT['INT'] else: fieldType = FIELD_TYPE_DICT['STRING'] columnName = "header_{}".format(columnId) fileCol = FileColumn( file_id=fileId, field_types_id=fieldType, name=columnName, required=(columnId != FIELD_TYPE_DICT['STRING'])) sess.add(fileCol) sess.flush() colIdDict["header_{}_file_type_{}".format( columnId, fileId)] = fileCol.file_column_id # commit submissions/jobs and output IDs sess.commit() for job_type, job_id in jobDict.items(): print('{}: {}'.format(job_type, job_id)) cls.jobDict = jobDict
def setUpClass(cls): """Set up class-wide resources.""" super(MixedFileTests, cls).setUpClass() user = cls.userId force_tas_load = False with createApp().app_context(): # get the submission test user sess = GlobalDB.db().session # Create test submissions and jobs, also uploading # the files needed for each job. statusReadyId = JOB_STATUS_DICT['ready'] jobTypeCsvId = JOB_TYPE_DICT['csv_record_validation'] jobDict = {} # next three jobs belong to the same submission and are tests # for single-file validations that contain failing rows submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile("appropMixed.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['mixed'] = job_info.job_id job_info = Job(filename=cls.uploadFile("programActivityMixed.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['program_activity'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['programMixed'] = job_info.job_id job_info = Job(filename=cls.uploadFile("awardMixed.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['awardMixed'] = job_info.job_id # next job tests single-file validations for award_financial # (submission has a non-Q1 end date) submissionId = cls.insertSubmission(sess, user, datetime(2015, 3, 15)) job_info = Job(filename=cls.uploadFile("awardFinancialMixed.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award_financial'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['awardFinMixed'] = job_info.job_id # job below tests a file that has a mixed-delimiter heading submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile("awardMixedDelimiter.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['awardMixedDelimiter'] = job_info.job_id # next five jobs are cross-file and belong to the same submission submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile("cross_file_A.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['crossApprop'] = job_info.job_id job_info = Job(filename=cls.uploadFile("cross_file_B.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['program_activity'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['crossPgmAct'] = job_info.job_id job_info = Job(filename=cls.uploadFile("cross_file_C.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award_financial'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['crossAwardFin'] = job_info.job_id job_info = Job(filename=cls.uploadFile("cross_file_D2.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['crossAward'] = job_info.job_id job_info = Job(job_status_id=statusReadyId, job_type_id=JOB_TYPE_DICT['validation'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['crossFile'] = job_info.job_id # next four jobs test short columns names and belong to the same submission submissionId = cls.insertSubmission(sess, user) job_info = Job(filename=cls.uploadFile("appropValidShortcols.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['appropriations'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['appropValidShortcols'] = job_info.job_id job_info = Job(filename=cls.uploadFile( "programActivityMixedShortcols.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['program_activity'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['programMixedShortcols'] = job_info.job_id job_info = Job(filename=cls.uploadFile( "awardFinancialMixedShortcols.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award_financial'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['awardFinMixedShortcols'] = job_info.job_id job_info = Job(filename=cls.uploadFile("awardValidShortcols.csv", user), job_status_id=statusReadyId, job_type_id=jobTypeCsvId, file_type_id=FILE_TYPE_DICT['award'], submission_id=submissionId) sess.add(job_info) sess.flush() jobDict['awardValidShortcols'] = job_info.job_id # commit submissions/jobs and output IDs sess.commit() for job_type, job_id in jobDict.items(): print('{}: {}'.format(job_type, job_id)) # Load fields and rules FileTypeTests.load_definitions(sess, force_tas_load, cls.RULES_TO_APPLY) cls.jobDict = jobDict