Esempio n. 1
0
def test_check_job_dependencies_has_unfinished_dependencies(database):
    """ Tests check_job_dependencies with a job that isn't finished """
    sess = database.session
    sub = SubmissionFactory(submission_id=1)
    job = JobFactory(submission_id=sub.submission_id,
                     job_status_id=JOB_STATUS_DICT['finished'],
                     job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                     file_type_id=FILE_TYPE_DICT['award'],
                     number_of_errors=0)
    job_2 = JobFactory(submission_id=sub.submission_id,
                       job_status_id=JOB_STATUS_DICT['waiting'],
                       job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                       file_type_id=FILE_TYPE_DICT['award'])
    job_3 = JobFactory(submission_id=sub.submission_id,
                       job_status_id=JOB_STATUS_DICT['waiting'],
                       job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                       file_type_id=FILE_TYPE_DICT['award'],
                       number_of_errors=0)
    sess.add_all([sub, job, job_2, job_3])
    sess.commit()

    # Job 1 finished, it is a prerequisite for job 2 (waiting)
    job_dep = JobDependency(job_id=job_2.job_id, prerequisite_id=job.job_id)
    # Job 3 is also a prerequisite of job 2, it's not done, job 2 should stay in "waiting"
    job_dep_2 = JobDependency(job_id=job_2.job_id,
                              prerequisite_id=job_3.job_id)
    sess.add_all([job_dep, job_dep_2])
    sess.commit()

    check_job_dependencies(job.job_id)

    assert job_2.job_status_id == JOB_STATUS_DICT['waiting']
    def createJobs(self, filenames, submissionId, existingSubmission=False):
        """  Given the filenames to be uploaded, create the set of jobs needing to be completed for this submission

        Arguments:
        filenames -- List of tuples containing (file type, upload path, original filenames)
        submissionId -- Submission ID to be linked to jobs
        existingSubmission -- True if we should update jobs in an existing submission rather than creating new jobs

        Returns:
        Dictionary of upload ids by filename to return to client, used for calling finalize_submission route
        """

        jobsRequired, uploadDict = self.addUploadJobs(filenames, submissionId,
                                                      existingSubmission)

        if (existingSubmission):
            # Find cross-file and external validation jobs and mark them as waiting
            valQuery = self.session.query(Job).filter(
                Job.submission_id == submissionId).filter(
                    Job.job_type_id == self.getJobTypeId("validation"))
            valJob = self.runUniqueQuery(valQuery,
                                         "No cross-file validation job found",
                                         "Conflicting jobs found")
            valJob.job_status_id = self.getJobStatusId("waiting")
            extQuery = self.session.query(Job).filter(
                Job.submission_id == submissionId).filter(
                    Job.job_type_id == self.getJobTypeId(
                        "external_validation"))
            extJob = self.runUniqueQuery(valQuery,
                                         "No external validation job found",
                                         "Conflicting jobs found")
            extJob.job_status_id = self.getJobStatusId("waiting")
            self.session.commit()
        else:
            # Create validation job
            validationJob = Job(job_status_id=self.getJobStatusId("waiting"),
                                job_type_id=self.getJobTypeId("validation"),
                                submission_id=submissionId)
            self.session.add(validationJob)
            # Create external validation job
            externalJob = Job(
                job_status_id=self.getJobStatusId("waiting"),
                job_type_id=self.getJobTypeId("external_validation"),
                submission_id=submissionId)
            self.session.add(externalJob)
            self.session.flush()
            # Create dependencies for validation jobs
            for job_id in jobsRequired:
                valDependency = JobDependency(job_id=validationJob.job_id,
                                              prerequisite_id=job_id)
                self.session.add(valDependency)
                extDependency = JobDependency(job_id=externalJob.job_id,
                                              prerequisite_id=job_id)
                self.session.add(extDependency)

        # Commit all changes
        self.session.commit()
        uploadDict["submission_id"] = submissionId
        return uploadDict
Esempio n. 3
0
def test_check_job_dependencies_ready(mock_sqs_queue, database):
    """ Tests check_job_dependencies with a job that can be set to ready """
    # Mock so it always returns the mock queue for the test
    mock_sqs_queue.return_value = SQSMockQueue
    sess = database.session
    sub = SubmissionFactory(submission_id=1)
    job = JobFactory(submission_id=sub.submission_id,
                     job_status_id=JOB_STATUS_DICT['finished'],
                     job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                     file_type_id=FILE_TYPE_DICT['award'],
                     number_of_errors=0)
    job_2 = JobFactory(submission_id=sub.submission_id,
                       job_status_id=JOB_STATUS_DICT['waiting'],
                       job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                       file_type_id=FILE_TYPE_DICT['award'])
    sess.add_all([sub, job, job_2])
    sess.commit()

    # Job 1 finished, it is a prerequisite for job 2 (waiting) but it has errors
    job_dep = JobDependency(job_id=job_2.job_id, prerequisite_id=job.job_id)
    sess.add(job_dep)
    sess.commit()

    check_job_dependencies(job.job_id)

    assert job_2.job_status_id == JOB_STATUS_DICT['ready']
def test_check_job_dependencies_prior_dependency_has_errors(database):
    """ Tests check_job_dependencies with a job that is finished but has errors """
    sess = database.session
    sub = SubmissionFactory(submission_id=1)
    job = JobFactory(
        submission_id=sub.submission_id,
        job_status=sess.query(JobStatus).filter_by(name='finished').one(),
        job_type=sess.query(JobType).filter_by(
            name='csv_record_validation').one(),
        file_type=sess.query(FileType).filter_by(name='award').one(),
        number_of_errors=3)
    job_2 = JobFactory(
        submission_id=sub.submission_id,
        job_status=sess.query(JobStatus).filter_by(name='waiting').one(),
        job_type=sess.query(JobType).filter_by(
            name='csv_record_validation').one(),
        file_type=sess.query(FileType).filter_by(name='award').one())
    sess.add_all([sub, job, job_2])
    sess.commit()

    # Job 1 finished, it is a prerequisite for job 2 (waiting) but it has errors
    job_dep = JobDependency(job_id=job_2.job_id, prerequisite_id=job.job_id)
    sess.add(job_dep)
    sess.commit()

    check_job_dependencies(job.job_id)

    assert job_2.job_status_id == JOB_STATUS_DICT['waiting']
Esempio n. 5
0
def create_jobs(upload_files, submission, existing_submission=False):
    """Create the set of jobs associated with the specified submission

    Arguments:
    upload_files -- list of named tuples that describe files uploaded to the broker
    submission -- submission
    existing_submission -- true if we should update jobs in an existing submission rather than creating new jobs

    Returns:
    Dictionary of upload ids by filename to return to client, used for calling finalize_submission route
    """
    sess = GlobalDB.db().session
    submission_id = submission.submission_id

    # create the file upload and single-file validation jobs and
    # set up the dependencies between them
    # before starting, sort the incoming list of jobs by letter
    # to ensure that jobs dependent on the awards jobs being present
    # are processed last.
    jobs_required = []
    upload_dict = {}
    sorted_uploads = sorted(upload_files, key=attrgetter('file_letter'))

    for upload_file in sorted_uploads:
        validation_job_id, upload_job_id = add_jobs_for_uploaded_file(
            upload_file, submission_id, existing_submission)
        if validation_job_id:
            jobs_required.append(validation_job_id)
        upload_dict[upload_file.file_type] = upload_job_id

    # once single-file upload/validation jobs are created, create the cross-file
    # validation job and dependencies
    if existing_submission and not submission.d2_submission:
        # find cross-file jobs and mark them as waiting
        # (note: job_type of 'validation' is a cross-file job)
        val_job = sess.query(Job).\
            filter_by(
                submission_id=submission_id,
                job_type_id=JOB_TYPE_DICT["validation"]).\
            one()
        val_job.job_status_id = JOB_STATUS_DICT["waiting"]
        submission.updated_at = time.strftime("%c")
    # todo: add these back in for detached_d2 when we have actual validations
    elif not submission.d2_submission:
        # create cross-file validation job
        validation_job = Job(job_status_id=JOB_STATUS_DICT["waiting"],
                             job_type_id=JOB_TYPE_DICT["validation"],
                             submission_id=submission_id)
        sess.add(validation_job)
        sess.flush()
        # create dependencies for validation jobs
        for job_id in jobs_required:
            val_dependency = JobDependency(job_id=validation_job.job_id,
                                           prerequisite_id=job_id)
            sess.add(val_dependency)

    sess.commit()
    upload_dict["submission_id"] = submission_id
    return upload_dict
    def setup_file_generation_submission(cls, sess, submission_id=None):
        """Create jobs for D, E, and F files."""
        submission_id = cls.generation_submission_id if not submission_id else submission_id
        submission = sess.query(Submission).filter(Submission.submission_id == submission_id).one()

        # Create D1 jobs ready for generation route to be called
        insert_job(
            sess,
            FILE_TYPE_DICT['award_procurement'],
            JOB_STATUS_DICT['ready'],
            JOB_TYPE_DICT['file_upload'],
            submission.submission_id
        )
        award_roc_val_job = insert_job(
            sess,
            FILE_TYPE_DICT['award_procurement'],
            JOB_STATUS_DICT['waiting'],
            JOB_TYPE_DICT['csv_record_validation'],
            submission.submission_id
        )
        # Create E and F jobs ready for check route
        exec_comp_job = insert_job(
            sess,
            FILE_TYPE_DICT['executive_compensation'],
            JOB_STATUS_DICT['finished'],
            JOB_TYPE_DICT['file_upload'],
            submission.submission_id
        )
        sub_award_job = insert_job(
            sess,
            FILE_TYPE_DICT['sub_award'],
            JOB_STATUS_DICT['invalid'],
            JOB_TYPE_DICT['file_upload'],
            submission.submission_id
        )
        sub_award_job.error_message = "File was invalid"

        # Create D2 jobs
        insert_job(
            sess,
            FILE_TYPE_DICT['award'],
            JOB_STATUS_DICT['finished'],
            JOB_TYPE_DICT['file_upload'],
            submission.submission_id
        )
        insert_job(
            sess,
            FILE_TYPE_DICT['award'],
            JOB_STATUS_DICT['invalid'],
            JOB_TYPE_DICT['csv_record_validation'],
            submission.submission_id
        )
        # Create dependency
        exec_comp_dep = JobDependency(
            job_id=exec_comp_job.job_id,
            prerequisite_id=award_roc_val_job.job_id
        )
        sess.add(exec_comp_dep)
        sess.commit()
Esempio n. 7
0
    def setupFileGenerationSubmission(cls, sess):
        """Create jobs for D, E, and F files."""
        submission = sess.query(Submission).filter(
            Submission.submission_id == cls.generation_submission_id).one()

        # Create D1 jobs ready for generation route to be called
        cls.insertJob(sess, cls.fileTypeDict['award_procurement'],
                      cls.jobStatusDict['ready'],
                      cls.jobTypeDict['file_upload'], submission.submission_id)
        awardProcValJob = cls.insertJob(
            sess, cls.fileTypeDict['award_procurement'],
            cls.jobStatusDict['waiting'],
            cls.jobTypeDict['csv_record_validation'], submission.submission_id)
        # Create E and F jobs ready for check route
        awardeeAttJob = cls.insertJob(sess,
                                      cls.fileTypeDict['awardee_attributes'],
                                      cls.jobStatusDict['finished'],
                                      cls.jobTypeDict['file_upload'],
                                      submission.submission_id)
        subAwardJob = cls.insertJob(sess, cls.fileTypeDict['sub_award'],
                                    cls.jobStatusDict['invalid'],
                                    cls.jobTypeDict['file_upload'],
                                    submission.submission_id)
        subAwardJob.error_message = "File was invalid"

        # Create D2 jobs
        cls.insertJob(sess, cls.fileTypeDict['award'],
                      cls.jobStatusDict['finished'],
                      cls.jobTypeDict['file_upload'], submission.submission_id)
        cls.insertJob(sess, cls.fileTypeDict['award'],
                      cls.jobStatusDict['invalid'],
                      cls.jobTypeDict['csv_record_validation'],
                      submission.submission_id)
        # Create dependency
        awardeeAttDep = JobDependency(job_id=awardeeAttJob.job_id,
                                      prerequisite_id=awardProcValJob.job_id)
        sess.add(awardeeAttDep)
        sess.commit()
    def setUpClass(cls):
        """Set up class-wide resources (test data)"""
        super(JobTests, cls).setUpClass()
        user = cls.userId

        # Flag for testing a million+ errors (can take ~30 min to run)
        cls.includeLongTests = False

        with createApp().app_context():
            # get the submission test user
            sess = GlobalDB.db().session

            # Create test submissions and jobs, also uploading
            # the files needed for each job.
            jobDict = {}

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(job_status_id=JOB_STATUS_DICT['ready'],
                           job_type_id=JOB_TYPE_DICT['file_upload'],
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['bad_upload'] = job_info.job_id

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(job_status_id=JOB_STATUS_DICT['ready'],
                           job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['bad_prereq'] = job_info.job_id

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(job_status_id=JOB_STATUS_DICT['ready'],
                           job_type_id=JOB_TYPE_DICT['external_validation'],
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['wrong_type'] = job_info.job_id

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(job_status_id=JOB_STATUS_DICT['finished'],
                           job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['not_ready'] = job_info.job_id

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(filename=cls.uploadFile('testEmpty.csv', user),
                           job_status_id=JOB_STATUS_DICT['ready'],
                           job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['empty'] = job_info.job_id

            # create dependency
            dependency = JobDependency(job_id=jobDict["bad_prereq"],
                                       prerequisite_id=jobDict["bad_upload"])
            sess.add(dependency)

            colIdDict = {}
            for fileId in range(1, 5):
                for columnId in range(1, 6):
                    if columnId < 3:
                        fieldType = FIELD_TYPE_DICT['INT']
                    else:
                        fieldType = FIELD_TYPE_DICT['STRING']
                    columnName = "header_{}".format(columnId)

                    fileCol = FileColumn(
                        file_id=fileId,
                        field_types_id=fieldType,
                        name=columnName,
                        required=(columnId != FIELD_TYPE_DICT['STRING']))
                    sess.add(fileCol)
                    sess.flush()
                    colIdDict["header_{}_file_type_{}".format(
                        columnId, fileId)] = fileCol.file_column_id

            # commit submissions/jobs and output IDs
            sess.commit()
            for job_type, job_id in jobDict.items():
                print('{}: {}'.format(job_type, job_id))

            cls.jobDict = jobDict
def add_jobs_for_uploaded_file(upload_file, submission_id, existing_submission):
    """ Add upload and validation jobs for a single filetype

    Arguments:
        upload_file: UploadFile named tuple
        submission_id: submission ID to attach to jobs
        existing_submission: true if we should update existing jobs rather than creating new ones

    Returns:
        the validation job id for this file type (if any)
        the upload job id for this file type
    """
    sess = GlobalDB.db().session

    file_type_id = FILE_TYPE_DICT[upload_file.file_type]
    validation_job_id = None

    # Create a file upload job or, for an existing submission, modify the
    # existing upload job.

    if existing_submission:
        # mark existing upload job as running
        upload_job = sess.query(Job).filter_by(
            submission_id=submission_id,
            file_type_id=file_type_id,
            job_type_id=JOB_TYPE_DICT['file_upload']
        ).one()
        # mark as running and set new file name and path
        upload_job.job_status_id = JOB_STATUS_DICT['running']
        upload_job.original_filename = upload_file.file_name
        upload_job.filename = upload_file.upload_name

    else:
        if upload_file.file_type in ["award", "award_procurement"]:
            # file generation handled on backend, mark as ready
            upload_status = JOB_STATUS_DICT['ready']
        elif upload_file.file_type in ["awardee_attributes", "sub_award"]:
            # these are dependent on file D2 validation
            upload_status = JOB_STATUS_DICT['waiting']
        else:
            # mark as running since frontend should be doing this upload
            upload_status = JOB_STATUS_DICT['running']

        upload_job = Job(
            original_filename=upload_file.file_name,
            filename=upload_file.upload_name,
            file_type_id=file_type_id,
            job_status_id=upload_status,
            job_type_id=JOB_TYPE_DICT['file_upload'],
            submission_id=submission_id)
        sess.add(upload_job)
        sess.flush()

    if existing_submission:
        # if the file's validation job is attached to an existing submission,
        # reset its status and delete any validation artifacts (e.g., error metadata) that
        # might exist from a previous run.
        val_job = sess.query(Job).filter_by(
            submission_id=submission_id,
            file_type_id=file_type_id,
            job_type_id=JOB_TYPE_DICT['csv_record_validation']
        ).one()
        val_job.job_status_id = JOB_STATUS_DICT['waiting']
        val_job.original_filename = upload_file.file_name
        val_job.filename = upload_file.upload_name
        # reset file size and number of rows to be set during validation of new file
        val_job.file_size = None
        val_job.number_of_rows = None
        # delete error metadata this might exist from a previous run of this validation job
        sess.query(ErrorMetadata).\
            filter(ErrorMetadata.job_id == val_job.job_id).\
            delete(synchronize_session='fetch')
        # delete file error information that might exist from a previous run of this validation job
        sess.query(File).filter(File.job_id == val_job.job_id).delete(synchronize_session='fetch')

    else:
        # create a new record validation job and add dependencies if necessary
        if upload_file.file_type == "awardee_attributes":
            d1_val_job = sess.query(Job).\
                filter(Job.submission_id == submission_id,
                       Job.file_type_id == FILE_TYPE_DICT['award_procurement'],
                       Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\
                one_or_none()
            if d1_val_job is None:
                raise Exception("Cannot create E job without a D1 job")
            # Add dependency on D1 validation job
            d1_dependency = JobDependency(job_id=upload_job.job_id, prerequisite_id=d1_val_job.job_id)
            sess.add(d1_dependency)

        elif upload_file.file_type == "sub_award":
            # todo: check for C validation job
            c_val_job = sess.query(Job).\
                filter(Job.submission_id == submission_id,
                       Job.file_type_id == FILE_TYPE_DICT['award_financial'],
                       Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\
                one_or_none()
            if c_val_job is None:
                raise Exception("Cannot create F job without a C job")
            # add dependency on C validation job
            c_dependency = JobDependency(job_id=upload_job.job_id, prerequisite_id=c_val_job.job_id)
            sess.add(c_dependency)

        else:
            # E and F don't get validation jobs
            val_job = Job(
                original_filename=upload_file.file_name,
                filename=upload_file.upload_name,
                file_type_id=file_type_id,
                job_status_id=JOB_STATUS_DICT['waiting'],
                job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                submission_id=submission_id)
            sess.add(val_job)
            sess.flush()
            # add dependency between file upload job and file validation job
            upload_dependency = JobDependency(job_id=val_job.job_id, prerequisite_id=upload_job.job_id)
            sess.add(upload_dependency)
            validation_job_id = val_job.job_id

    sess.commit()

    return validation_job_id, upload_job.job_id
Esempio n. 10
0
    def setUpClass(cls):
        """Set up class-wide resources (test data)"""
        super(JobTests, cls).setUpClass()
        #TODO: refactor into a pytest fixture

        # Flag for testing a million+ errors (can take ~30 min to run)
        cls.includeLongTests = False

        validationDb = cls.validationDb
        jobTracker = cls.jobTracker

        # Clear validation rules
        for fileType in [
                "award", "award_financial", "appropriations",
                "program_activity"
        ]:
            validationDb.removeRulesByFileType(fileType)
            validationDb.removeColumnsByFileType(fileType)

        # Create submissions and get IDs back
        submissionIDs = {}
        for i in range(1, 17):
            submissionIDs[i] = cls.insertSubmission(jobTracker,
                                                    userId=cls.userId)

        csvFiles = {
            "valid": {
                "filename": "testValid.csv",
                "status": "ready",
                "type": "csv_record_validation",
                "submissionLocalId": 1,
                "fileType": 1
            },
            "bad_upload": {
                "filename": "",
                "status": "ready",
                "type": "file_upload",
                "submissionLocalId": 2,
                "fileType": 1
            },
            "bad_prereq": {
                "filename": "",
                "status": "ready",
                "type": "csv_record_validation",
                "submissionLocalId": 2,
                "fileType": 1
            },
            "wrong_type": {
                "filename": "",
                "status": "ready",
                "type": "external_validation",
                "submissionLocalId": 4,
                "fileType": 1
            },
            "not_ready": {
                "filename": "",
                "status": "finished",
                "type": "csv_record_validation",
                "submissionLocalId": 5,
                "fileType": 1
            },
            "valid_upload": {
                "filename": "",
                "status": "finished",
                "type": "file_upload",
                "submissionLocalId": 6,
                "fileType": 1
            },
            "valid_prereq": {
                "filename": "testPrereq.csv",
                "status": "ready",
                "type": "csv_record_validation",
                "submissionLocalId": 6,
                "fileType": 1
            },
            "bad_values": {
                "filename": "testBadValues.csv",
                "status": "ready",
                "type": "csv_record_validation",
                "submissionLocalId": 8,
                "fileType": 1
            },
            "mixed": {
                "filename": "testMixed.csv",
                "status": "ready",
                "type": "csv_record_validation",
                "submissionLocalId": 9,
                "fileType": 1
            },
            "empty": {
                "filename": "testEmpty.csv",
                "status": "ready",
                "type": "csv_record_validation",
                "submissionLocalId": 10,
                "fileType": 1
            },
            "missing_header": {
                "filename": "testMissingHeader.csv",
                "status": "ready",
                "type": "csv_record_validation",
                "submissionLocalId": 11,
                "fileType": 1
            },
            "bad_header": {
                "filename": "testBadHeader.csv",
                "status": "ready",
                "type": "csv_record_validation",
                "submissionLocalId": 12,
                "fileType": 2
            },
            "many": {
                "filename": "testMany.csv",
                "status": "ready",
                "type": "csv_record_validation",
                "submissionLocalId": 11,
                "fileType": 3
            },
            "odd_characters": {
                "filename": "testOddCharacters.csv",
                "status": "ready",
                "type": "csv_record_validation",
                "submissionLocalId": 14,
                "fileType": 2
            },
            "many_bad": {
                "filename": "testManyBadValues.csv",
                "status": "ready",
                "type": "csv_record_validation",
                "submissionLocalId": 11,
                "fileType": 4
            },
            "rules": {
                "filename": "testRules.csv",
                "status": "ready",
                "type": "csv_record_validation",
                "submissionLocalId": 16,
                "fileType": 3
            }
        }

        # Upload needed files to S3
        for key in csvFiles.keys():
            csvFiles[key]["s3Filename"] = cls.uploadFile(
                csvFiles[key]["filename"], cls.userId)
        jobIdDict = {}

        for key in csvFiles.keys():
            file = csvFiles[key]
            job = cls.addJob(str(jobTracker.getStatusId(file["status"])),
                             str(jobTracker.getTypeId(file["type"])),
                             str(submissionIDs[file["submissionLocalId"]]),
                             file["s3Filename"], str(file["fileType"]),
                             jobTracker.session)
            # TODO: fix statement below--does this error really happen?
            if (job.job_id == None):
                # Failed to commit job correctly
                raise Exception("".join(
                    ["Job for ",
                     str(key), " did not get an id back"]))
            jobIdDict[key] = job.job_id
            # Print submission IDs for error report checking
            print("".join([
                str(key), ": ",
                str(jobTracker.getSubmissionId(job.job_id)), ", "
            ]),
                  end="")

        # Create dependencies
        dependencies = [
            JobDependency(job_id=str(jobIdDict["bad_prereq"]),
                          prerequisite_id=str(jobIdDict["bad_upload"])),
            JobDependency(job_id=str(jobIdDict["valid_prereq"]),
                          prerequisite_id=str(jobIdDict["valid_upload"]))
        ]

        for dependency in dependencies:
            jobTracker.session.add(dependency)
        jobTracker.session.commit()

        colIdDict = {}
        for fileId in range(1, 5):
            for columnId in range(1, 6):
                #TODO: get rid of hard-coded surrogate keys
                if columnId < 3:
                    fieldType = 1
                else:
                    fieldType = 4
                columnName = "header_{}".format(columnId)
                column = cls.addFileColumn(fileId, fieldType, columnName, "",
                                           (columnId != 3),
                                           validationDb.session)
                colIdDict["header_{}_file_type_{}".format(
                    columnId, fileId)] = column.file_column_id

        rules = [
            Rule(file_column_id=str(colIdDict["".join(
                ["header_", str(1), "_file_type_",
                 str(3)])]),
                 rule_type_id=5,
                 rule_text_1=0,
                 description='value 1 must be greater than zero',
                 rule_timing_id=1),
            Rule(file_column_id=str(colIdDict["".join(
                ["header_", str(1), "_file_type_",
                 str(3)])]),
                 rule_type_id=3,
                 rule_text_1=13,
                 description='value 1 may not be 13',
                 rule_timing_id=1),
            Rule(file_column_id=str(colIdDict["".join(
                ["header_", str(5), "_file_type_",
                 str(3)])]),
                 rule_type_id=1,
                 rule_text_1="INT",
                 description='value 5 must be an integer',
                 rule_timing_id=1),
            Rule(file_column_id=str(colIdDict["".join(
                ["header_", str(3), "_file_type_",
                 str(3)])]),
                 rule_type_id=2,
                 rule_text_1=42,
                 description='value 3 must be equal to 42 if present',
                 rule_timing_id=1),
            Rule(file_column_id=str(colIdDict["".join(
                ["header_", str(1), "_file_type_",
                 str(3)])]),
                 rule_type_id=4,
                 rule_text_1=100,
                 description='value 1 must be less than 100',
                 rule_timing_id=1),
            Rule(file_column_id=str(colIdDict["".join(
                ["header_", str(1), "_file_type_",
                 str(3)])]),
                 rule_type_id=2,
                 rule_text_1="  ",
                 description='None shall pass',
                 rule_timing_id=2
                 )  #This rule should never be checked with rule_timing 2
        ]

        for rule in rules:
            validationDb.session.add(rule)
        validationDb.session.commit()

        # If staging already has corresponding job tables, drop them
        for k, v in jobIdDict.items():
            try:
                cls.stagingDb.dropTable("job{}".format(v))
            except Exception as e:
                cls.stagingDb.session.close()
                cls.stagingDb.session = cls.stagingDb.Session()

        cls.jobIdDict = jobIdDict
Esempio n. 11
0
    def addUploadJobs(self, filenames, submissionId, existingSubmission):
        """  Add upload jobs to job tracker database

        Arguments:
        filenames -- List of tuples containing (file type, upload path, original filenames)
        submissionId -- Submission ID to attach to jobs
        existingSubmission -- True if we should update existing jobs rather than creating new ones

        Returns:
        jobsRequired -- List of job ids required for validation jobs, used to populate the prerequisite table
        uploadDict -- Dictionary of upload ids by filename to return to client, used for calling finalize_submission route
        """

        # Keep list of job ids required for validation jobs
        jobsRequired = []
        # Dictionary of upload ids by filename to return to client
        uploadDict = {}

        for fileType, filePath, filename in filenames:
            fileTypeQuery = self.session.query(
                FileType.file_type_id).filter(FileType.name == fileType)
            fileTypeResult = self.runUniqueQuery(
                fileTypeQuery, "No matching file type",
                "Multiple matching file types")
            fileTypeId = fileTypeResult.file_type_id

            if existingSubmission:
                # Find existing upload job and mark as running
                uploadQuery = self.session.query(Job).filter(
                    Job.submission_id == submissionId
                ).filter(Job.file_type_id == fileTypeId).filter(
                    Job.job_type_id == self.getJobTypeId("file_upload"))
                uploadJob = self.runUniqueQuery(
                    uploadQuery, "No upload job found for this file",
                    "Conflicting jobs found")
                # Mark as running and set new file name and path
                uploadJob.job_status_id = self.getJobStatusId("running")
                uploadJob.original_filename = filename
                uploadJob.filename = filePath
                self.session.commit()
            else:
                # Create upload job, mark as running since frontend should be doing this upload
                uploadJob = Job(original_filename=filename,
                                filename=filePath,
                                file_type_id=fileTypeId,
                                job_status_id=self.getJobStatusId("running"),
                                job_type_id=self.getJobTypeId("file_upload"),
                                submission_id=submissionId)
                self.session.add(uploadJob)

            if existingSubmission:
                valQuery = self.session.query(Job).filter(
                    Job.submission_id == submissionId).filter(
                        Job.file_type_id == fileTypeId).filter(
                            Job.job_type_id == self.getJobTypeId(
                                "csv_record_validation"))
                valJob = self.runUniqueQuery(
                    valQuery, "No validation job found for this file",
                    "Conflicting jobs found")
                valJob.job_status_id = self.getJobStatusId("waiting")
                valJob.original_filename = filename
                valJob.filename = filePath
                # Reset file size and number of rows to be set during validation of new file
                valJob.file_size = None
                valJob.number_of_rows = None
                # Reset number of errors
                errorDb = ErrorHandler()
                errorDb.resetErrorsByJobId(valJob.job_id)
                errorDb.resetFileByJobId(valJob.job_id)
                self.session.commit()
            else:
                # Create parse into DB job
                valJob = Job(
                    original_filename=filename,
                    filename=filePath,
                    file_type_id=fileTypeId,
                    job_status_id=self.getJobStatusId("waiting"),
                    job_type_id=self.getJobTypeId("csv_record_validation"),
                    submission_id=submissionId)
                self.session.add(valJob)
                self.session.flush()
            if not existingSubmission:
                # Add dependency between file upload and db upload
                uploadDependency = JobDependency(
                    job_id=valJob.job_id, prerequisite_id=uploadJob.job_id)
                self.session.add(uploadDependency)
                # Later validation jobs are dependent only on record level validation, not upload jobs
                jobsRequired.append(valJob.job_id)
            uploadDict[fileType] = uploadJob.job_id

        # Return list of upload jobs
        return jobsRequired, uploadDict
Esempio n. 12
0
         Job.file_type_id == fileTypeId).filter(
             Job.job_type_id == fileUpload).all()
 if uploadJob is None or len(uploadJob) == 0:
     # Create upload job with ready status
     newUploadJob = Job(job_status_id=ready,
                        job_type_id=fileUpload,
                        submission_id=submissionId,
                        file_type_id=fileTypeId)
     session.add(newUploadJob)
     session.commit()
     uploadId = newUploadJob.job_id
 else:
     uploadId = uploadJob[0].job_id
 # If type is D1 or D2, also create a validation job with waiting status and dependency
 if fileTypeId in [awardTypeId, awardProcTypeId]:
     # Check that validation job exists
     existingValJob = session.query(Job).filter(
         Job.submission_id == submissionId).filter(
             Job.file_type_id == fileTypeId).filter(
                 Job.job_type_id == validation).all()
     if existingValJob is None or len(existingValJob) == 0:
         validationJob = Job(job_status_id=ready,
                             job_type_id=validation,
                             submission_id=submissionId,
                             file_type_id=fileTypeId)
         session.add(validationJob)
         session.commit()
         dependency = JobDependency(job_id=validationJob.job_id,
                                    prerequisite_id=uploadId)
         session.add(dependency)
         session.commit()
Esempio n. 13
0
def addJobsForFileType(fileType, filePath, filename, submissionId,
                       existingSubmission, jobsRequired, uploadDict):
    """ Add upload and validation jobs for a single filetype

    Args:
        fileType: What type of file to add jobs for
        filePath: Path to upload the file to
        filename: Original filename
        submissionId -- Submission ID to attach to jobs
        existingSubmission -- True if we should update existing jobs rather than creating new ones
        jobsRequired: List of job ids that will be prerequisites for cross-file job
        uploadDict: Dictionary of upload ids by filename to return to client, used for calling finalize_submission route

    Returns:
        jobsRequired: List of job ids that will be prerequisites for cross-file job
        uploadDict: Dictionary of upload ids by filename to return to client, used for calling finalize_submission route
    """
    sess = GlobalDB.db().session

    fileTypeId = FILE_TYPE_DICT[fileType]

    # Create a file upload job or, for an existing submission, modify the
    # existing upload job.

    if existingSubmission:
        # mark existing upload job as running
        uploadJob = sess.query(Job).filter_by(
            submission_id=submissionId,
            file_type_id=fileTypeId,
            job_type_id=JOB_TYPE_DICT['file_upload']).one()
        # Mark as running and set new file name and path
        uploadJob.job_status_id = JOB_STATUS_DICT['running']
        uploadJob.original_filename = filename
        uploadJob.filename = filePath

    else:
        if fileType in ["award", "award_procurement"]:
            # file generation handled on backend, mark as ready
            uploadStatus = JOB_STATUS_DICT['ready']
        elif fileType in ["awardee_attributes", "sub_award"]:
            # these are dependent on file D2 validation
            uploadStatus = JOB_STATUS_DICT['waiting']
        else:
            # mark as running since frontend should be doing this upload
            uploadStatus = JOB_STATUS_DICT['running']

        uploadJob = Job(original_filename=filename,
                        filename=filePath,
                        file_type_id=fileTypeId,
                        job_status_id=uploadStatus,
                        job_type_id=JOB_TYPE_DICT['file_upload'],
                        submission_id=submissionId)
        sess.add(uploadJob)

    sess.flush()

    # Create a file validation job or, for an existing submission, modify the
    # existing validation job.

    if existingSubmission:
        # if the file's validation job is attached to an existing submission,
        # reset its status and delete any validation artifacts (e.g., error metadata) that
        # might exist from a previous run.
        valJob = sess.query(Job).filter_by(
            submission_id=submissionId,
            file_type_id=fileTypeId,
            job_type_id=JOB_TYPE_DICT['csv_record_validation']).one()
        valJob.job_status_id = JOB_STATUS_DICT['waiting']
        valJob.original_filename = filename
        valJob.filename = filePath
        # Reset file size and number of rows to be set during validation of new file
        valJob.file_size = None
        valJob.number_of_rows = None
        # Delete error metdata this might exist from a previous run of this validation job
        sess.query(ErrorMetadata).\
            filter(ErrorMetadata.job_id == valJob.job_id).\
            delete(synchronize_session='fetch')
        # Delete file error information that might exist from a previous run of this validation job
        sess.query(File).filter(File.job_id == valJob.job_id).delete(
            synchronize_session='fetch')

    else:
        # create a new record validation job and add dependencies if necessary
        if fileType == "awardee_attributes":
            d1ValJob = sess.query(Job).\
                filter(Job.submission_id == submissionId,
                       Job.file_type_id == FILE_TYPE_DICT['award_procurement'],
                       Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\
                first()
            if d1ValJob is None:
                raise Exception("Cannot create E job without a D1 job")
            # Add dependency on D1 validation job
            d1Dependency = JobDependency(job_id=uploadJob.job_id,
                                         prerequisite_id=d1ValJob.job_id)
            sess.add(d1Dependency)

        elif fileType == "sub_award":
            # todo: check for C validation job
            cValJob = sess.query(Job). \
                filter(Job.submission_id == submissionId,
                       Job.file_type_id == FILE_TYPE_DICT['award_financial'],
                       Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']). \
                first()
            if cValJob is None:
                raise Exception("Cannot create F job without a C job")
            # Add dependency on C validation job
            cDependency = JobDependency(job_id=uploadJob.job_id,
                                        prerequisite_id=cValJob.job_id)
            sess.add(cDependency)

        else:
            # E and F don't get validation jobs
            valJob = Job(original_filename=filename,
                         filename=filePath,
                         file_type_id=fileTypeId,
                         job_status_id=JOB_STATUS_DICT['waiting'],
                         job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                         submission_id=submissionId)
            sess.add(valJob)
            sess.flush()
            # Add dependency between file upload and db upload
            uploadDependency = JobDependency(job_id=valJob.job_id,
                                             prerequisite_id=uploadJob.job_id)
            sess.add(uploadDependency)
            jobsRequired.append(valJob.job_id)

    sess.commit()

    uploadDict[fileType] = uploadJob.job_id
    return jobsRequired, uploadDict