def createJobs(self, filenames, submissionId, existingSubmission=False): """ Given the filenames to be uploaded, create the set of jobs needing to be completed for this submission Arguments: filenames -- List of tuples containing (file type, upload path, original filenames) submissionId -- Submission ID to be linked to jobs existingSubmission -- True if we should update jobs in an existing submission rather than creating new jobs Returns: Dictionary of upload ids by filename to return to client, used for calling finalize_submission route """ jobsRequired, uploadDict = self.addUploadJobs(filenames, submissionId, existingSubmission) if (existingSubmission): # Find cross-file and external validation jobs and mark them as waiting valQuery = self.session.query(JobStatus).filter( JobStatus.submission_id == submissionId).filter( JobStatus.type_id == self.getTypeId("validation")) valJob = self.runUniqueQuery(valQuery, "No cross-file validation job found", "Conflicting jobs found") valJob.status_id = self.getStatusId("waiting") extQuery = self.session.query(JobStatus).filter( JobStatus.submission_id == submissionId).filter( JobStatus.type_id == self.getTypeId("external_validation")) extJob = self.runUniqueQuery(valQuery, "No external validation job found", "Conflicting jobs found") extJob.status_id = self.getStatusId("waiting") self.session.commit() else: # Create validation job validationJob = JobStatus(status_id=self.getStatusId("waiting"), type_id=self.getTypeId("validation"), submission_id=submissionId) self.session.add(validationJob) # Create external validation job externalJob = JobStatus( status_id=self.getStatusId("waiting"), type_id=self.getTypeId("external_validation"), submission_id=submissionId) self.session.add(externalJob) self.session.flush() # Create dependencies for validation jobs for job_id in jobsRequired: valDependency = JobDependency(job_id=validationJob.job_id, prerequisite_id=job_id) self.session.add(valDependency) extDependency = JobDependency(job_id=externalJob.job_id, prerequisite_id=job_id) self.session.add(extDependency) # Commit all changes self.session.commit() uploadDict["submission_id"] = submissionId return uploadDict
def insertCodes(sess): """Create job tracker tables from model metadata.""" # TODO: define these codes as enums in the data model? # insert status types for s in lookups.JOB_STATUS: status = JobStatus(job_status_id=s.id, name=s.name, description=s.desc) sess.merge(status) # insert job types for t in lookups.JOB_TYPE: thisType = JobType(job_type_id=t.id, name=t.name, description=t.desc) sess.merge(thisType) # insert publish status for ps in lookups.PUBLISH_STATUS: status = PublishStatus(publish_status_id=ps.id, name=ps.name, description=ps.desc) sess.merge(status) # insert file types for ft in lookups.FILE_TYPE: fileType = FileType( file_type_id=ft.id, name=ft.name, description=ft.desc, letter_name=ft.letter, file_order=ft.order ) sess.merge(fileType)
def setUpClass(cls): """Set up class-wide resources like submissions and jobs.""" super(UserTests, cls).setUpClass() # Add submissions to one of the users jobDb = cls.jobTracker # Delete existing submissions for approved user jobDb.deleteSubmissionsForUserId(cls.approved_user_id) for i in range(0, 5): sub = Submission(user_id=cls.approved_user_id) jobDb.session.add(sub) jobDb.session.commit() if i == 0: cls.submission_id = sub.submission_id # Add job to first submission job = JobStatus(submission_id=cls.submission_id, status_id=3, type_id=1, file_type_id=1) jobDb.session.add(job) jobDb.session.commit() cls.uploadId = job.job_id
def insertCodes(): """Create job tracker tables from model metadata.""" jobDb = JobTrackerInterface() # TODO: define these codes as enums in the data model? # insert status types statusList = [(1, 'waiting', 'check dependency table'), (2, 'ready', 'can be assigned'), (3, 'running', 'job is currently in progress'), (4, 'finished', 'job is complete'), (5, 'invalid', 'job is invalid'), (6, 'failed', 'job failed to complete')] for s in statusList: status = JobStatus(job_status_id=s[0], name=s[1], description=s[2]) jobDb.session.merge(status) typeList = [(1, 'file_upload', 'file must be uploaded to S3'), (2, 'csv_record_validation', 'do record level validation and add to staging DB'), (3, 'db_transfer', 'information must be moved from production DB to staging DB'), (4, 'validation', 'new information must be validated'), (5, 'external_validation', 'new information must be validated against external sources')] for t in typeList: thisType = JobType(job_type_id=t[0],name=t[1], description=t[2]) jobDb.session.merge(thisType) fileTypeList = [(1, 'award', ''), (2, 'award_financial', ''), (3, 'appropriations', ''), (4, 'program_activity', '')] for ft in fileTypeList: fileType = FileType(file_type_id=ft[0], name=ft[1], description=ft[2]) jobDb.session.merge(fileType) jobDb.session.commit() jobDb.session.close()
def insert_codes(sess): """Create job tracker tables from model metadata.""" # TODO: define these codes as enums in the data model? # insert status types for s in lookups.JOB_STATUS: status = JobStatus(job_status_id=s.id, name=s.name, description=s.desc) sess.merge(status) # insert job types for t in lookups.JOB_TYPE: this_type = JobType(job_type_id=t.id, name=t.name, description=t.desc) sess.merge(this_type) # Delete unused job types if they exist if sess.query(JobType).filter(JobType.name.in_(['db_transfer', 'external_validation'])).count() > 0: logger.info('Deleting unused job types db_transfer and external_validation') delete_unused_job_types(sess) # insert publish status for ps in lookups.PUBLISH_STATUS: status = PublishStatus(publish_status_id=ps.id, name=ps.name, description=ps.desc) sess.merge(status) # insert file types for ft in lookups.FILE_TYPE: file_type = FileType( file_type_id=ft.id, name=ft.name, description=ft.desc, letter_name=ft.letter, file_order=ft.order ) sess.merge(file_type)
def addJob(status, jobType, submissionId, s3Filename, fileType, session): """ Create a job model and add it to the session """ job = JobStatus(status_id=status, type_id=jobType, submission_id=submissionId, filename=s3Filename, file_type_id=fileType) session.add(job) session.commit() return job
def insertJob(jobTracker, filetype, status, type_id, submission, job_id=None, filename = None, file_size = None, num_rows = None): """Insert one job into job tracker and get ID back.""" job = JobStatus( file_type_id=filetype, status_id=status, type_id=type_id, submission_id=submission, original_filename=filename, file_size = file_size, number_of_rows = num_rows ) if job_id: job.job_id = job_id jobTracker.session.add(job) jobTracker.session.commit() return job.job_id
def addUploadJobs(self, filenames, submissionId, existingSubmission): """ Add upload jobs to job tracker database Arguments: filenames -- List of tuples containing (file type, upload path, original filenames) submissionId -- Submission ID to attach to jobs existingSubmission -- True if we should update existing jobs rather than creating new ones Returns: jobsRequired -- List of job ids required for validation jobs, used to populate the prerequisite table uploadDict -- Dictionary of upload ids by filename to return to client, used for calling finalize_submission route """ # Keep list of job ids required for validation jobs jobsRequired = [] # Dictionary of upload ids by filename to return to client uploadDict = {} for fileType, filePath, filename in filenames: fileTypeQuery = self.session.query( FileType.file_type_id).filter(FileType.name == fileType) fileTypeResult = self.runUniqueQuery( fileTypeQuery, "No matching file type", "Multiple matching file types") fileTypeId = fileTypeResult.file_type_id if existingSubmission: # Find existing upload job and mark as running uploadQuery = self.session.query(JobStatus).filter( JobStatus.submission_id == submissionId).filter( JobStatus.file_type_id == fileTypeId).filter( JobStatus.type_id == self.getTypeId("file_upload")) uploadJob = self.runUniqueQuery( uploadQuery, "No upload job found for this file", "Conflicting jobs found") # Mark as running and set new file name and path uploadJob.status_id = self.getStatusId("running") uploadJob.original_filename = filename uploadJob.filename = filePath self.session.commit() else: # Create upload job, mark as running since frontend should be doing this upload uploadJob = JobStatus(original_filename=filename, filename=filePath, file_type_id=fileTypeId, status_id=self.getStatusId("running"), type_id=self.getTypeId("file_upload"), submission_id=submissionId) self.session.add(uploadJob) if existingSubmission: valQuery = self.session.query(JobStatus).filter( JobStatus.submission_id == submissionId).filter( JobStatus.file_type_id == fileTypeId).filter( JobStatus.type_id == self.getTypeId( "csv_record_validation")) valJob = self.runUniqueQuery( valQuery, "No validation job found for this file", "Conflicting jobs found") valJob.status_id = self.getStatusId("waiting") valJob.original_filename = filename valJob.filename = filePath # Reset file size and number of rows to be set during validation of new file valJob.file_size = None valJob.number_of_rows = None # Reset number of errors errorDb = ErrorHandler() errorDb.resetErrorsByJobId(valJob.job_id) errorDb.resetFileStatusByJobId(valJob.job_id) self.session.commit() else: # Create parse into DB job valJob = JobStatus( original_filename=filename, filename=filePath, file_type_id=fileTypeId, status_id=self.getStatusId("waiting"), type_id=self.getTypeId("csv_record_validation"), submission_id=submissionId) self.session.add(valJob) self.session.flush() if not existingSubmission: # Add dependency between file upload and db upload uploadDependency = JobDependency( job_id=valJob.job_id, prerequisite_id=uploadJob.job_id) self.session.add(uploadDependency) # Later validation jobs are dependent only on record level validation, not upload jobs jobsRequired.append(valJob.job_id) uploadDict[fileType] = uploadJob.job_id # Return list of upload jobs return jobsRequired, uploadDict