def create_generation_job(file_type_name, start_date, end_date):
    """ Add details to jobs for generating files

        Args:
            file_type_name: the name of the file type being generated
            job: the generation job, None if it is a detached generation
            start_date: The start date for the generation job, only used for detached files
            end_date: The end date for the generation job, only used for detached files

        Returns:
            the file generation job
    """
    sess = GlobalDB.db().session

    # Create a new job for a detached generation
    job = Job(job_type_id=lookups.JOB_TYPE_DICT['file_upload'], user_id=g.user.user_id,
              file_type_id=lookups.FILE_TYPE_DICT[file_type_name], start_date=start_date, end_date=end_date)
    sess.add(job)

    # Update the job details
    job.message = None
    job.job_status_id = lookups.JOB_STATUS_DICT["ready"]
    sess.commit()
    sess.refresh(job)

    return job
Ejemplo n.º 2
0
def insert_job(sess,
               filetype,
               status,
               type_id,
               submission,
               job_id=None,
               filename=None,
               original_filename=None,
               file_size=None,
               num_rows=None,
               num_valid_rows=0,
               num_errors=0,
               updated_at=None):
    """Insert one job into job tracker and get ID back."""
    if not updated_at:
        updated_at = datetime.utcnow()

    job = Job(created_at=datetime.utcnow(),
              updated_at=updated_at,
              file_type_id=filetype,
              job_status_id=status,
              job_type_id=type_id,
              submission_id=submission,
              filename=filename,
              original_filename=original_filename,
              file_size=file_size,
              number_of_rows=num_rows,
              number_of_rows_valid=num_valid_rows,
              number_of_errors=num_errors)
    if job_id:
        job.job_id = job_id
    sess.add(job)
    sess.commit()
    return job
Ejemplo n.º 3
0
def create_generation_job(file_type_name, start_date, end_date):
    """ Add details to jobs for generating files

        Args:
            file_type_name: the name of the file type being generated
            job: the generation job, None if it is a detached generation
            start_date: The start date for the generation job, only used for detached files
            end_date: The end date for the generation job, only used for detached files

        Returns:
            the file generation job
    """
    sess = GlobalDB.db().session

    # Create a new job for a detached generation
    job = Job(job_type_id=lookups.JOB_TYPE_DICT['file_upload'],
              user_id=g.user.user_id,
              file_type_id=lookups.FILE_TYPE_DICT[file_type_name],
              start_date=start_date,
              end_date=end_date)
    sess.add(job)

    # Update the job details
    job.message = None
    job.job_status_id = lookups.JOB_STATUS_DICT["ready"]
    sess.commit()
    sess.refresh(job)

    return job
def insert_job(sess,
               filetype,
               status,
               type_id,
               submission,
               job_id=None,
               filename=None,
               original_filename=None,
               file_size=None,
               num_rows=None,
               num_errors=0):
    """Insert one job into job tracker and get ID back."""
    job = Job(file_type_id=filetype,
              job_status_id=status,
              job_type_id=type_id,
              submission_id=submission,
              filename=filename,
              original_filename=original_filename,
              file_size=file_size,
              number_of_rows=num_rows,
              number_of_errors=num_errors)
    if job_id:
        job.job_id = job_id
    sess.add(job)
    sess.commit()
    return job
Ejemplo n.º 5
0
    def createJobs(self, filenames, submissionId, existingSubmission=False):
        """  Given the filenames to be uploaded, create the set of jobs needing to be completed for this submission

        Arguments:
        filenames -- List of tuples containing (file type, upload path, original filenames)
        submissionId -- Submission ID to be linked to jobs
        existingSubmission -- True if we should update jobs in an existing submission rather than creating new jobs

        Returns:
        Dictionary of upload ids by filename to return to client, used for calling finalize_submission route
        """

        jobsRequired, uploadDict = self.addUploadJobs(filenames, submissionId,
                                                      existingSubmission)

        if (existingSubmission):
            # Find cross-file and external validation jobs and mark them as waiting
            valQuery = self.session.query(Job).filter(
                Job.submission_id == submissionId).filter(
                    Job.job_type_id == self.getJobTypeId("validation"))
            valJob = self.runUniqueQuery(valQuery,
                                         "No cross-file validation job found",
                                         "Conflicting jobs found")
            valJob.job_status_id = self.getJobStatusId("waiting")
            extQuery = self.session.query(Job).filter(
                Job.submission_id == submissionId).filter(
                    Job.job_type_id == self.getJobTypeId(
                        "external_validation"))
            extJob = self.runUniqueQuery(valQuery,
                                         "No external validation job found",
                                         "Conflicting jobs found")
            extJob.job_status_id = self.getJobStatusId("waiting")
            self.session.commit()
        else:
            # Create validation job
            validationJob = Job(job_status_id=self.getJobStatusId("waiting"),
                                job_type_id=self.getJobTypeId("validation"),
                                submission_id=submissionId)
            self.session.add(validationJob)
            # Create external validation job
            externalJob = Job(
                job_status_id=self.getJobStatusId("waiting"),
                job_type_id=self.getJobTypeId("external_validation"),
                submission_id=submissionId)
            self.session.add(externalJob)
            self.session.flush()
            # Create dependencies for validation jobs
            for job_id in jobsRequired:
                valDependency = JobDependency(job_id=validationJob.job_id,
                                              prerequisite_id=job_id)
                self.session.add(valDependency)
                extDependency = JobDependency(job_id=externalJob.job_id,
                                              prerequisite_id=job_id)
                self.session.add(extDependency)

        # Commit all changes
        self.session.commit()
        uploadDict["submission_id"] = submissionId
        return uploadDict
Ejemplo n.º 6
0
 def insertJob(jobTracker, filetype, status, type_id, submission, job_id=None, filename = None, file_size = None, num_rows = None):
     """Insert one job into job tracker and get ID back."""
     job = Job(
         file_type_id=filetype,
         job_status_id=status,
         job_type_id=type_id,
         submission_id=submission,
         original_filename=filename,
         file_size = file_size,
         number_of_rows = num_rows
     )
     if job_id:
         job.job_id = job_id
     jobTracker.session.add(job)
     jobTracker.session.commit()
     return job.job_id
Ejemplo n.º 7
0
    def setUpClass(cls):
        """Set up class-wide resources like submissions and jobs."""
        super(UserTests, cls).setUpClass()

        # Add submissions to one of the users
        jobDb = cls.jobTracker

        # Delete existing submissions for approved user
        jobDb.deleteSubmissionsForUserId(cls.approved_user_id)

        for i in range(0, 5):
            sub = Submission(user_id=cls.approved_user_id)
            jobDb.session.add(sub)
            jobDb.session.commit()
            if i == 0:
                cls.submission_id = sub.submission_id

        # Add submissions for agency user
        jobDb.deleteSubmissionsForUserId(cls.agency_user_id)
        for i in range(0, 6):
            sub = Submission(user_id=cls.agency_user_id)
            sub.cgac_code = "SYS"
            jobDb.session.add(sub)
            jobDb.session.commit()

        # Add job to first submission
        job = Job(submission_id=cls.submission_id,
                  job_status_id=3,
                  job_type_id=1,
                  file_type_id=1)
        jobDb.session.add(job)
        jobDb.session.commit()
        cls.uploadId = job.job_id
Ejemplo n.º 8
0
    def setUpClass(cls):
        """Set up class-wide resources like submissions and jobs."""
        super(UserTests, cls).setUpClass()

        with create_app().app_context():
            sess = GlobalDB.db().session

            # Add submissions for agency user
            sess.query(Submission).filter(Submission.user_id == cls.agency_user_id).delete()
            sess.commit()
            for i in range(0, 6):
                sub = Submission(user_id=cls.agency_user_id)
                sub.reporting_start_date = datetime(2015, 10, 1)
                sub.reporting_end_date = datetime(2015, 12, 31)
                sub.cgac_code = cls.admin_cgac_code
                sess.add(sub)
                sess.commit()
                if i == 0:
                    cls.submission_id = sub.submission_id

            # Add job to first submission
            job = Job(
                submission_id=cls.submission_id,
                job_status_id=JOB_STATUS_DICT['running'],
                job_type_id=JOB_TYPE_DICT['file_upload'],
                file_type_id=FILE_TYPE_DICT['appropriations']
            )
            sess.add(job)
            sess.commit()
            cls.uploadId = job.job_id
    def insert_agency_user_submission_data(sess, submission_id):
        """Insert jobs for the submission, and create a CGAC, FREC, and SubTierAgency"""
        for job_type in ['file_upload', 'csv_record_validation', 'validation']:
            sess.add(
                Job(file_type_id=FILE_TYPE_DICT['fabs'],
                    job_status_id=FILE_STATUS_DICT['complete'],
                    job_type_id=JOB_TYPE_DICT[job_type],
                    submission_id=submission_id,
                    original_filename=None,
                    file_size=None,
                    number_of_rows=None))
            sess.commit()

        cgac = CGAC(cgac_code="NOT")
        sess.add(cgac)
        sess.commit()
        frec = FREC(cgac_id=cgac.cgac_id, frec_code="BLAH")
        sess.add(frec)
        sess.commit()
        sub = SubTierAgency(sub_tier_agency_code="WRONG",
                            cgac_id=cgac.cgac_id,
                            frec_id=frec.frec_id,
                            is_frec=False)
        sess.add(sub)
        sess.commit()
Ejemplo n.º 10
0
def create_jobs(upload_files, submission, existing_submission=False):
    """Create the set of jobs associated with the specified submission

    Arguments:
    upload_files -- list of named tuples that describe files uploaded to the broker
    submission -- submission
    existing_submission -- true if we should update jobs in an existing submission rather than creating new jobs

    Returns:
    Dictionary of upload ids by filename to return to client, used for calling finalize_submission route
    """
    sess = GlobalDB.db().session
    submission_id = submission.submission_id

    # create the file upload and single-file validation jobs and
    # set up the dependencies between them
    # before starting, sort the incoming list of jobs by letter
    # to ensure that jobs dependent on the awards jobs being present
    # are processed last.
    jobs_required = []
    upload_dict = {}
    sorted_uploads = sorted(upload_files, key=attrgetter('file_letter'))

    for upload_file in sorted_uploads:
        validation_job_id, upload_job_id = add_jobs_for_uploaded_file(
            upload_file, submission_id, existing_submission)
        if validation_job_id:
            jobs_required.append(validation_job_id)
        upload_dict[upload_file.file_type] = upload_job_id

    # once single-file upload/validation jobs are created, create the cross-file
    # validation job and dependencies
    if existing_submission and not submission.d2_submission:
        # find cross-file jobs and mark them as waiting
        # (note: job_type of 'validation' is a cross-file job)
        val_job = sess.query(Job).\
            filter_by(
                submission_id=submission_id,
                job_type_id=JOB_TYPE_DICT["validation"]).\
            one()
        val_job.job_status_id = JOB_STATUS_DICT["waiting"]
        submission.updated_at = time.strftime("%c")
    # todo: add these back in for detached_d2 when we have actual validations
    elif not submission.d2_submission:
        # create cross-file validation job
        validation_job = Job(job_status_id=JOB_STATUS_DICT["waiting"],
                             job_type_id=JOB_TYPE_DICT["validation"],
                             submission_id=submission_id)
        sess.add(validation_job)
        sess.flush()
        # create dependencies for validation jobs
        for job_id in jobs_required:
            val_dependency = JobDependency(job_id=validation_job.job_id,
                                           prerequisite_id=job_id)
            sess.add(val_dependency)

    sess.commit()
    upload_dict["submission_id"] = submission_id
    return upload_dict
def insert_job(sess, filetype, status, type_id, submission, job_id=None, filename=None, original_filename=None,
               file_size=None, num_rows=None, num_errors=0):
    """Insert one job into job tracker and get ID back."""
    job = Job(
        file_type_id=filetype,
        job_status_id=status,
        job_type_id=type_id,
        submission_id=submission,
        filename=filename,
        original_filename=original_filename,
        file_size=file_size,
        number_of_rows=num_rows,
        number_of_errors=num_errors
    )
    if job_id:
        job.job_id = job_id
    sess.add(job)
    sess.commit()
    return job
Ejemplo n.º 12
0
 def addJob(status, jobType, submissionId, s3Filename, fileType, session):
     """ Create a job model and add it to the session """
     job = Job(job_status_id=status,
               job_type_id=jobType,
               submission_id=submissionId,
               filename=s3Filename,
               file_type_id=fileType)
     session.add(job)
     session.commit()
     return job
Ejemplo n.º 13
0
 def insertJob(jobTracker,
               filetype,
               status,
               type_id,
               submission,
               job_id=None,
               filename=None,
               file_size=None,
               num_rows=None):
     """Insert one job into job tracker and get ID back."""
     job = Job(file_type_id=filetype,
               job_status_id=status,
               job_type_id=type_id,
               submission_id=submission,
               original_filename=filename,
               file_size=file_size,
               number_of_rows=num_rows)
     if job_id:
         job.job_id = job_id
     jobTracker.session.add(job)
     jobTracker.session.commit()
     return job.job_id
Ejemplo n.º 14
0
    def setUpClass(cls):
        """Set up class-wide resources like submissions and jobs."""
        super(UserTests, cls).setUpClass()

        with createApp().app_context():
            sess = GlobalDB.db().session

            # Add submissions to one of the users

            # Delete existing submissions for approved user
            sess.query(Submission).filter(
                Submission.user_id == cls.approved_user_id).delete()
            sess.commit()

            for i in range(0, 5):
                sub = Submission(user_id=cls.approved_user_id)
                sub.reporting_start_date = datetime(2015, 10, 1)
                sub.reporting_end_date = datetime(2015, 12, 31)
                sess.add(sub)
            sess.commit()

            # Add submissions for agency user
            sess.query(Submission).filter(
                Submission.user_id == cls.agency_user_id).delete()
            sess.commit()
            for i in range(0, 6):
                sub = Submission(user_id=cls.agency_user_id)
                sub.reporting_start_date = datetime(2015, 10, 1)
                sub.reporting_end_date = datetime(2015, 12, 31)
                sub.cgac_code = "SYS"
                sess.add(sub)
                sess.commit()
                if i == 0:
                    cls.submission_id = sub.submission_id

            # Add job to first submission
            job = Job(submission_id=cls.submission_id,
                      job_status_id=cls.jobStatusDict['running'],
                      job_type_id=cls.jobTypeDict['file_upload'],
                      file_type_id=cls.fileTypeDict['appropriations'])
            sess.add(job)
            sess.commit()
            cls.uploadId = job.job_id
    def insert_job(sess, submission_id, job_status_id, job_type_id):
        """ Insert one job into job tracker and get job ID back.

            Args:
                sess: the current session
                submission_id: the ID of the submission the job is attached to
                job_status_id: the status of the job
                job_type_id: the type of the job

            Returns:
                the job ID of the created job
        """
        job = Job(file_type_id=FILE_TYPE_DICT['fabs'],
                  job_status_id=job_status_id,
                  job_type_id=job_type_id,
                  submission_id=submission_id,
                  original_filename=None,
                  file_size=None,
                  number_of_rows=None)
        sess.add(job)
        sess.commit()
        return job.job_id
    def test_upload_fabs_duplicate_running(self):
        """ Test file submissions for when the job is already running """
        # Mark a job as already running
        self.session.add(
            Job(file_type_id=FILE_TYPE_DICT['fabs'],
                job_status_id=JOB_STATUS_DICT['running'],
                job_type_id=JOB_TYPE_DICT['file_upload'],
                submission_id=str(self.d2_submission),
                original_filename=None,
                file_size=None,
                number_of_rows=None))
        self.session.commit()

        response = self.app.post(
            "/v1/upload_fabs_file/",
            {"existing_submission_id": str(self.d2_submission)},
            upload_files=[('fabs', 'fabs.csv',
                           open('tests/integration/data/fabs.csv',
                                'rb').read())],
            headers={"x-session-id": self.session_id},
            expect_errors=True)
        self.assertEqual(response.status_code, 400)
        self.assertEqual(response.json['message'],
                         'Submission already has a running job')
Ejemplo n.º 17
0
for fileType in EXTERNAL_FILE_TYPES:
    externalIds.append(interfaces.jobDb.getFileTypeId(fileType))
# For each submission ID, check that all jobs are present and create any missing
print("external IDs: " + str(externalIds))
with databaseSession() as session:
    for submissionId in submissionIds:
        for fileTypeId in externalIds:
            # If job does not exist, create it
            uploadJob = session.query(Job).filter(
                Job.submission_id == submissionId).filter(
                    Job.file_type_id == fileTypeId).filter(
                        Job.job_type_id == fileUpload).all()
            if uploadJob is None or len(uploadJob) == 0:
                # Create upload job with ready status
                newUploadJob = Job(job_status_id=ready,
                                   job_type_id=fileUpload,
                                   submission_id=submissionId,
                                   file_type_id=fileTypeId)
                session.add(newUploadJob)
                session.commit()
                uploadId = newUploadJob.job_id
            else:
                uploadId = uploadJob[0].job_id
            # If type is D1 or D2, also create a validation job with waiting status and dependency
            if fileTypeId in [awardTypeId, awardProcTypeId]:
                # Check that validation job exists
                existingValJob = session.query(Job).filter(
                    Job.submission_id == submissionId).filter(
                        Job.file_type_id == fileTypeId).filter(
                            Job.job_type_id == validation).all()
                if existingValJob is None or len(existingValJob) == 0:
                    validationJob = Job(job_status_id=ready,
def test_revert_submission(database, monkeypatch):
    """ Tests reverting an updated DABS certification """
    sess = database.session

    sub = Submission(publish_status_id=PUBLISH_STATUS_DICT['updated'], is_quarter_format=True, d2_submission=False,
                     publishable=False, number_of_errors=20, number_of_warnings=15)
    sess.add(sub)
    sess.commit()

    job = Job(submission_id=sub.submission_id, job_status_id=JOB_STATUS_DICT['finished'],
              job_type_id=JOB_TYPE_DICT['csv_record_validation'], file_type_id=FILE_TYPE_DICT['appropriations'],
              number_of_warnings=0, number_of_errors=10, filename='new/test/file.csv', number_of_rows=5,
              number_of_rows_valid=0)
    cert_history = CertifyHistory(submission_id=sub.submission_id)
    sess.add_all([job, cert_history])
    sess.commit()

    cert_approp = CertifiedAppropriation(submission_id=sub.submission_id, job_id=job.job_id, row_number=1,
                                         spending_authority_from_of_cpe=2, tas='test')
    approp = Appropriation(submission_id=sub.submission_id, job_id=job.job_id, row_number=1,
                           spending_authority_from_of_cpe=15, tas='test')
    cert_files = CertifiedFilesHistory(certify_history_id=cert_history.certify_history_id,
                                       submission_id=sub.submission_id, filename='old/test/file2.csv',
                                       file_type_id=FILE_TYPE_DICT['appropriations'], warning_filename='a/warning.csv')
    cert_meta1 = CertifiedErrorMetadata(job_id=job.job_id, file_type_id=FILE_TYPE_DICT['appropriations'],
                                        target_file_type_id=None, occurrences=15)
    cert_meta2 = CertifiedErrorMetadata(job_id=job.job_id, file_type_id=FILE_TYPE_DICT['appropriations'],
                                        target_file_type_id=None, occurrences=10)
    file_entry = File(file_id=FILE_TYPE_DICT['appropriations'], job_id=job.job_id,
                      file_status_id=FILE_STATUS_DICT['incomplete'], headers_missing='something')
    sess.add_all([cert_approp, approp, cert_files, cert_meta1, cert_meta2, file_entry])
    sess.commit()

    file_handler = fileHandler.FileHandler({}, is_local=True)
    monkeypatch.setattr(file_handler, 'revert_certified_error_files', Mock())
    revert_to_certified(sub, file_handler)

    # Test that certified data is moved back
    approp_query = sess.query(Appropriation).filter_by(submission_id=sub.submission_id).all()
    assert len(approp_query) == 1
    assert approp_query[0].spending_authority_from_of_cpe == 2

    # Test that the job got updated
    job_query = sess.query(Job).filter_by(submission_id=sub.submission_id).all()
    assert len(job_query) == 1
    assert job_query[0].filename == CONFIG_BROKER['broker_files'] + 'file2.csv'
    assert job_query[0].number_of_warnings == 25
    assert job_query[0].number_of_errors == 0
    assert job_query[0].number_of_rows == 2
    assert job_query[0].number_of_rows_valid == 1

    # Test that File got updated
    file_query = sess.query(File).filter_by(job_id=job.job_id).all()
    assert len(file_query) == 1
    assert file_query[0].headers_missing is None
    assert file_query[0].file_status_id == FILE_STATUS_DICT['complete']

    # Make sure submission got updated
    sub_query = sess.query(Submission).filter_by(submission_id=sub.submission_id).all()
    assert len(sub_query) == 1
    assert sub_query[0].publishable is True
    assert sub_query[0].number_of_errors == 0
    assert sub_query[0].number_of_warnings == 25
Ejemplo n.º 19
0
    def addUploadJobs(self, filenames, submissionId, existingSubmission):
        """  Add upload jobs to job tracker database

        Arguments:
        filenames -- List of tuples containing (file type, upload path, original filenames)
        submissionId -- Submission ID to attach to jobs
        existingSubmission -- True if we should update existing jobs rather than creating new ones

        Returns:
        jobsRequired -- List of job ids required for validation jobs, used to populate the prerequisite table
        uploadDict -- Dictionary of upload ids by filename to return to client, used for calling finalize_submission route
        """

        # Keep list of job ids required for validation jobs
        jobsRequired = []
        # Dictionary of upload ids by filename to return to client
        uploadDict = {}

        for fileType, filePath, filename in filenames:
            fileTypeQuery = self.session.query(
                FileType.file_type_id).filter(FileType.name == fileType)
            fileTypeResult = self.runUniqueQuery(
                fileTypeQuery, "No matching file type",
                "Multiple matching file types")
            fileTypeId = fileTypeResult.file_type_id

            if existingSubmission:
                # Find existing upload job and mark as running
                uploadQuery = self.session.query(Job).filter(
                    Job.submission_id == submissionId
                ).filter(Job.file_type_id == fileTypeId).filter(
                    Job.job_type_id == self.getJobTypeId("file_upload"))
                uploadJob = self.runUniqueQuery(
                    uploadQuery, "No upload job found for this file",
                    "Conflicting jobs found")
                # Mark as running and set new file name and path
                uploadJob.job_status_id = self.getJobStatusId("running")
                uploadJob.original_filename = filename
                uploadJob.filename = filePath
                self.session.commit()
            else:
                # Create upload job, mark as running since frontend should be doing this upload
                uploadJob = Job(original_filename=filename,
                                filename=filePath,
                                file_type_id=fileTypeId,
                                job_status_id=self.getJobStatusId("running"),
                                job_type_id=self.getJobTypeId("file_upload"),
                                submission_id=submissionId)
                self.session.add(uploadJob)

            if existingSubmission:
                valQuery = self.session.query(Job).filter(
                    Job.submission_id == submissionId).filter(
                        Job.file_type_id == fileTypeId).filter(
                            Job.job_type_id == self.getJobTypeId(
                                "csv_record_validation"))
                valJob = self.runUniqueQuery(
                    valQuery, "No validation job found for this file",
                    "Conflicting jobs found")
                valJob.job_status_id = self.getJobStatusId("waiting")
                valJob.original_filename = filename
                valJob.filename = filePath
                # Reset file size and number of rows to be set during validation of new file
                valJob.file_size = None
                valJob.number_of_rows = None
                # Reset number of errors
                errorDb = ErrorHandler()
                errorDb.resetErrorsByJobId(valJob.job_id)
                errorDb.resetFileByJobId(valJob.job_id)
                self.session.commit()
            else:
                # Create parse into DB job
                valJob = Job(
                    original_filename=filename,
                    filename=filePath,
                    file_type_id=fileTypeId,
                    job_status_id=self.getJobStatusId("waiting"),
                    job_type_id=self.getJobTypeId("csv_record_validation"),
                    submission_id=submissionId)
                self.session.add(valJob)
                self.session.flush()
            if not existingSubmission:
                # Add dependency between file upload and db upload
                uploadDependency = JobDependency(
                    job_id=valJob.job_id, prerequisite_id=uploadJob.job_id)
                self.session.add(uploadDependency)
                # Later validation jobs are dependent only on record level validation, not upload jobs
                jobsRequired.append(valJob.job_id)
            uploadDict[fileType] = uploadJob.job_id

        # Return list of upload jobs
        return jobsRequired, uploadDict
Ejemplo n.º 20
0
    def setUpClass(cls):
        """Set up class-wide resources."""
        super(FileTypeTests, cls).setUpClass()
        #TODO: refactor into a pytest fixture

        user = cls.userId
        # TODO: get rid of this flag once we're using a tempdb for test fixtures
        force_tas_load = False

        with createApp().app_context():
            sess = GlobalDB.db().session

            # Create submissions and jobs, also uploading
            # the files needed for each job.
            statusReadyId = JOB_STATUS_DICT['ready']
            jobTypeCsvId = JOB_TYPE_DICT['csv_record_validation']
            jobDict = {}

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(filename=cls.uploadFile("appropValid.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['valid'] = job_info.job_id

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(filename=cls.uploadFile("programActivityValid.csv",
                                                   user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['program_activity'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['programValid'] = job_info.job_id

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(filename=cls.uploadFile("awardFinancialValid.csv",
                                                   user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award_financial'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['awardFinValid'] = job_info.job_id

            # next two jobs have the same submission id
            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(filename=cls.uploadFile("awardValid.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['awardValid'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile("awardProcValid.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award_procurement'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['awardProcValid'] = job_info.job_id

            # commit submissions/jobs and output IDs
            sess.commit()
            for job_type, job_id in jobDict.items():
                print('{}: {}'.format(job_type, job_id))

            # Load fields and rules
            FileTypeTests.load_definitions(sess, force_tas_load)

            cls.jobDict = jobDict
def add_jobs_for_uploaded_file(upload_file, submission_id, existing_submission):
    """ Add upload and validation jobs for a single filetype

    Arguments:
        upload_file: UploadFile named tuple
        submission_id: submission ID to attach to jobs
        existing_submission: true if we should update existing jobs rather than creating new ones

    Returns:
        the validation job id for this file type (if any)
        the upload job id for this file type
    """
    sess = GlobalDB.db().session

    file_type_id = FILE_TYPE_DICT[upload_file.file_type]
    validation_job_id = None

    # Create a file upload job or, for an existing submission, modify the
    # existing upload job.

    if existing_submission:
        # mark existing upload job as running
        upload_job = sess.query(Job).filter_by(
            submission_id=submission_id,
            file_type_id=file_type_id,
            job_type_id=JOB_TYPE_DICT['file_upload']
        ).one()
        # mark as running and set new file name and path
        upload_job.job_status_id = JOB_STATUS_DICT['running']
        upload_job.original_filename = upload_file.file_name
        upload_job.filename = upload_file.upload_name

    else:
        if upload_file.file_type in ["award", "award_procurement"]:
            # file generation handled on backend, mark as ready
            upload_status = JOB_STATUS_DICT['ready']
        elif upload_file.file_type in ["awardee_attributes", "sub_award"]:
            # these are dependent on file D2 validation
            upload_status = JOB_STATUS_DICT['waiting']
        else:
            # mark as running since frontend should be doing this upload
            upload_status = JOB_STATUS_DICT['running']

        upload_job = Job(
            original_filename=upload_file.file_name,
            filename=upload_file.upload_name,
            file_type_id=file_type_id,
            job_status_id=upload_status,
            job_type_id=JOB_TYPE_DICT['file_upload'],
            submission_id=submission_id)
        sess.add(upload_job)
        sess.flush()

    if existing_submission:
        # if the file's validation job is attached to an existing submission,
        # reset its status and delete any validation artifacts (e.g., error metadata) that
        # might exist from a previous run.
        val_job = sess.query(Job).filter_by(
            submission_id=submission_id,
            file_type_id=file_type_id,
            job_type_id=JOB_TYPE_DICT['csv_record_validation']
        ).one()
        val_job.job_status_id = JOB_STATUS_DICT['waiting']
        val_job.original_filename = upload_file.file_name
        val_job.filename = upload_file.upload_name
        # reset file size and number of rows to be set during validation of new file
        val_job.file_size = None
        val_job.number_of_rows = None
        # delete error metadata this might exist from a previous run of this validation job
        sess.query(ErrorMetadata).\
            filter(ErrorMetadata.job_id == val_job.job_id).\
            delete(synchronize_session='fetch')
        # delete file error information that might exist from a previous run of this validation job
        sess.query(File).filter(File.job_id == val_job.job_id).delete(synchronize_session='fetch')

    else:
        # create a new record validation job and add dependencies if necessary
        if upload_file.file_type == "awardee_attributes":
            d1_val_job = sess.query(Job).\
                filter(Job.submission_id == submission_id,
                       Job.file_type_id == FILE_TYPE_DICT['award_procurement'],
                       Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\
                one_or_none()
            if d1_val_job is None:
                raise Exception("Cannot create E job without a D1 job")
            # Add dependency on D1 validation job
            d1_dependency = JobDependency(job_id=upload_job.job_id, prerequisite_id=d1_val_job.job_id)
            sess.add(d1_dependency)

        elif upload_file.file_type == "sub_award":
            # todo: check for C validation job
            c_val_job = sess.query(Job).\
                filter(Job.submission_id == submission_id,
                       Job.file_type_id == FILE_TYPE_DICT['award_financial'],
                       Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\
                one_or_none()
            if c_val_job is None:
                raise Exception("Cannot create F job without a C job")
            # add dependency on C validation job
            c_dependency = JobDependency(job_id=upload_job.job_id, prerequisite_id=c_val_job.job_id)
            sess.add(c_dependency)

        else:
            # E and F don't get validation jobs
            val_job = Job(
                original_filename=upload_file.file_name,
                filename=upload_file.upload_name,
                file_type_id=file_type_id,
                job_status_id=JOB_STATUS_DICT['waiting'],
                job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                submission_id=submission_id)
            sess.add(val_job)
            sess.flush()
            # add dependency between file upload job and file validation job
            upload_dependency = JobDependency(job_id=val_job.job_id, prerequisite_id=upload_job.job_id)
            sess.add(upload_dependency)
            validation_job_id = val_job.job_id

    sess.commit()

    return validation_job_id, upload_job.job_id
Ejemplo n.º 22
0
def addJobsForFileType(fileType, filePath, filename, submissionId,
                       existingSubmission, jobsRequired, uploadDict):
    """ Add upload and validation jobs for a single filetype

    Args:
        fileType: What type of file to add jobs for
        filePath: Path to upload the file to
        filename: Original filename
        submissionId -- Submission ID to attach to jobs
        existingSubmission -- True if we should update existing jobs rather than creating new ones
        jobsRequired: List of job ids that will be prerequisites for cross-file job
        uploadDict: Dictionary of upload ids by filename to return to client, used for calling finalize_submission route

    Returns:
        jobsRequired: List of job ids that will be prerequisites for cross-file job
        uploadDict: Dictionary of upload ids by filename to return to client, used for calling finalize_submission route
    """
    sess = GlobalDB.db().session

    fileTypeId = FILE_TYPE_DICT[fileType]

    # Create a file upload job or, for an existing submission, modify the
    # existing upload job.

    if existingSubmission:
        # mark existing upload job as running
        uploadJob = sess.query(Job).filter_by(
            submission_id=submissionId,
            file_type_id=fileTypeId,
            job_type_id=JOB_TYPE_DICT['file_upload']).one()
        # Mark as running and set new file name and path
        uploadJob.job_status_id = JOB_STATUS_DICT['running']
        uploadJob.original_filename = filename
        uploadJob.filename = filePath

    else:
        if fileType in ["award", "award_procurement"]:
            # file generation handled on backend, mark as ready
            uploadStatus = JOB_STATUS_DICT['ready']
        elif fileType in ["awardee_attributes", "sub_award"]:
            # these are dependent on file D2 validation
            uploadStatus = JOB_STATUS_DICT['waiting']
        else:
            # mark as running since frontend should be doing this upload
            uploadStatus = JOB_STATUS_DICT['running']

        uploadJob = Job(original_filename=filename,
                        filename=filePath,
                        file_type_id=fileTypeId,
                        job_status_id=uploadStatus,
                        job_type_id=JOB_TYPE_DICT['file_upload'],
                        submission_id=submissionId)
        sess.add(uploadJob)

    sess.flush()

    # Create a file validation job or, for an existing submission, modify the
    # existing validation job.

    if existingSubmission:
        # if the file's validation job is attached to an existing submission,
        # reset its status and delete any validation artifacts (e.g., error metadata) that
        # might exist from a previous run.
        valJob = sess.query(Job).filter_by(
            submission_id=submissionId,
            file_type_id=fileTypeId,
            job_type_id=JOB_TYPE_DICT['csv_record_validation']).one()
        valJob.job_status_id = JOB_STATUS_DICT['waiting']
        valJob.original_filename = filename
        valJob.filename = filePath
        # Reset file size and number of rows to be set during validation of new file
        valJob.file_size = None
        valJob.number_of_rows = None
        # Delete error metdata this might exist from a previous run of this validation job
        sess.query(ErrorMetadata).\
            filter(ErrorMetadata.job_id == valJob.job_id).\
            delete(synchronize_session='fetch')
        # Delete file error information that might exist from a previous run of this validation job
        sess.query(File).filter(File.job_id == valJob.job_id).delete(
            synchronize_session='fetch')

    else:
        # create a new record validation job and add dependencies if necessary
        if fileType == "awardee_attributes":
            d1ValJob = sess.query(Job).\
                filter(Job.submission_id == submissionId,
                       Job.file_type_id == FILE_TYPE_DICT['award_procurement'],
                       Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']).\
                first()
            if d1ValJob is None:
                raise Exception("Cannot create E job without a D1 job")
            # Add dependency on D1 validation job
            d1Dependency = JobDependency(job_id=uploadJob.job_id,
                                         prerequisite_id=d1ValJob.job_id)
            sess.add(d1Dependency)

        elif fileType == "sub_award":
            # todo: check for C validation job
            cValJob = sess.query(Job). \
                filter(Job.submission_id == submissionId,
                       Job.file_type_id == FILE_TYPE_DICT['award_financial'],
                       Job.job_type_id == JOB_TYPE_DICT['csv_record_validation']). \
                first()
            if cValJob is None:
                raise Exception("Cannot create F job without a C job")
            # Add dependency on C validation job
            cDependency = JobDependency(job_id=uploadJob.job_id,
                                        prerequisite_id=cValJob.job_id)
            sess.add(cDependency)

        else:
            # E and F don't get validation jobs
            valJob = Job(original_filename=filename,
                         filename=filePath,
                         file_type_id=fileTypeId,
                         job_status_id=JOB_STATUS_DICT['waiting'],
                         job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                         submission_id=submissionId)
            sess.add(valJob)
            sess.flush()
            # Add dependency between file upload and db upload
            uploadDependency = JobDependency(job_id=valJob.job_id,
                                             prerequisite_id=uploadJob.job_id)
            sess.add(uploadDependency)
            jobsRequired.append(valJob.job_id)

    sess.commit()

    uploadDict[fileType] = uploadJob.job_id
    return jobsRequired, uploadDict
Ejemplo n.º 23
0
    def setUpClass(cls):
        """Set up class-wide resources (test data)"""
        super(JobTests, cls).setUpClass()
        user = cls.userId

        # Flag for testing a million+ errors (can take ~30 min to run)
        cls.includeLongTests = False

        with createApp().app_context():
            # get the submission test user
            sess = GlobalDB.db().session

            # Create test submissions and jobs, also uploading
            # the files needed for each job.
            jobDict = {}

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(job_status_id=JOB_STATUS_DICT['ready'],
                           job_type_id=JOB_TYPE_DICT['file_upload'],
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['bad_upload'] = job_info.job_id

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(job_status_id=JOB_STATUS_DICT['ready'],
                           job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['bad_prereq'] = job_info.job_id

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(job_status_id=JOB_STATUS_DICT['ready'],
                           job_type_id=JOB_TYPE_DICT['external_validation'],
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['wrong_type'] = job_info.job_id

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(job_status_id=JOB_STATUS_DICT['finished'],
                           job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['not_ready'] = job_info.job_id

            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(filename=cls.uploadFile('testEmpty.csv', user),
                           job_status_id=JOB_STATUS_DICT['ready'],
                           job_type_id=JOB_TYPE_DICT['csv_record_validation'],
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['empty'] = job_info.job_id

            # create dependency
            dependency = JobDependency(job_id=jobDict["bad_prereq"],
                                       prerequisite_id=jobDict["bad_upload"])
            sess.add(dependency)

            colIdDict = {}
            for fileId in range(1, 5):
                for columnId in range(1, 6):
                    if columnId < 3:
                        fieldType = FIELD_TYPE_DICT['INT']
                    else:
                        fieldType = FIELD_TYPE_DICT['STRING']
                    columnName = "header_{}".format(columnId)

                    fileCol = FileColumn(
                        file_id=fileId,
                        field_types_id=fieldType,
                        name=columnName,
                        required=(columnId != FIELD_TYPE_DICT['STRING']))
                    sess.add(fileCol)
                    sess.flush()
                    colIdDict["header_{}_file_type_{}".format(
                        columnId, fileId)] = fileCol.file_column_id

            # commit submissions/jobs and output IDs
            sess.commit()
            for job_type, job_id in jobDict.items():
                print('{}: {}'.format(job_type, job_id))

            cls.jobDict = jobDict
Ejemplo n.º 24
0
    def setUpClass(cls):
        """Set up class-wide resources."""
        super(MixedFileTests, cls).setUpClass()
        user = cls.userId
        force_tas_load = False

        with createApp().app_context():
            # get the submission test user
            sess = GlobalDB.db().session

            # Create test submissions and jobs, also uploading
            # the files needed for each job.
            statusReadyId = JOB_STATUS_DICT['ready']
            jobTypeCsvId = JOB_TYPE_DICT['csv_record_validation']
            jobDict = {}

            # next three jobs belong to the same submission and are tests
            # for single-file validations that contain failing rows
            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(filename=cls.uploadFile("appropMixed.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['mixed'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile("programActivityMixed.csv",
                                                   user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['program_activity'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['programMixed'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile("awardMixed.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['awardMixed'] = job_info.job_id

            # next job tests single-file validations for award_financial
            # (submission has a non-Q1 end date)
            submissionId = cls.insertSubmission(sess, user,
                                                datetime(2015, 3, 15))
            job_info = Job(filename=cls.uploadFile("awardFinancialMixed.csv",
                                                   user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award_financial'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['awardFinMixed'] = job_info.job_id

            # job below tests a file that has a mixed-delimiter heading
            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(filename=cls.uploadFile("awardMixedDelimiter.csv",
                                                   user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['awardMixedDelimiter'] = job_info.job_id

            # next five jobs are cross-file and belong to the same submission
            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(filename=cls.uploadFile("cross_file_A.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['crossApprop'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile("cross_file_B.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['program_activity'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['crossPgmAct'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile("cross_file_C.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award_financial'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['crossAwardFin'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile("cross_file_D2.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['crossAward'] = job_info.job_id

            job_info = Job(job_status_id=statusReadyId,
                           job_type_id=JOB_TYPE_DICT['validation'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['crossFile'] = job_info.job_id

            # next four jobs test short columns names and belong to the same submission
            submissionId = cls.insertSubmission(sess, user)
            job_info = Job(filename=cls.uploadFile("appropValidShortcols.csv",
                                                   user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['appropriations'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['appropValidShortcols'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile(
                "programActivityMixedShortcols.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['program_activity'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['programMixedShortcols'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile(
                "awardFinancialMixedShortcols.csv", user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award_financial'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['awardFinMixedShortcols'] = job_info.job_id

            job_info = Job(filename=cls.uploadFile("awardValidShortcols.csv",
                                                   user),
                           job_status_id=statusReadyId,
                           job_type_id=jobTypeCsvId,
                           file_type_id=FILE_TYPE_DICT['award'],
                           submission_id=submissionId)
            sess.add(job_info)
            sess.flush()
            jobDict['awardValidShortcols'] = job_info.job_id

            # commit submissions/jobs and output IDs
            sess.commit()
            for job_type, job_id in jobDict.items():
                print('{}: {}'.format(job_type, job_id))

            # Load fields and rules
            FileTypeTests.load_definitions(sess, force_tas_load,
                                           cls.RULES_TO_APPLY)

            cls.jobDict = jobDict