コード例 #1
0
def copy_file_from_parent_to_child(child_job, parent_job, is_local):
    """ Copy the file from the parent job's bucket to the child job's bucket.

        Args:
            child_job: Job object for the child FileRequest
            parent_job: Job object for the parent FileRequest
            is_local: A boolean flag indicating whether the application is being run locally or not

    """
    file_type = parent_job.file_type.letter_name
    log_data = {'message': 'Copying data from parent job with job_id:{}'.format(parent_job.job_id),
                'message_type': 'ValidatorInfo', 'job_id': child_job.job_id, 'file_type': parent_job.file_type.name}

    if not is_local:
        is_local = g.is_local
    if not is_local and parent_job.filename != child_job.filename:
        # Check to see if the same file exists in the child bucket
        s3 = boto3.client('s3', region_name=CONFIG_BROKER["aws_region"])
        response = s3.list_objects_v2(Bucket=CONFIG_BROKER['aws_bucket'], Prefix=child_job.filename)
        for obj in response.get('Contents', []):
            if obj['Key'] == child_job.filename:
                # The file already exists in this location
                log_data['message'] = 'Cached {} file CSV already exists in this location'.format(file_type)
                logger.info(log_data)
                return

        # Copy the parent file into the child's S3 location
        log_data['message'] = 'Copying the cached {} file from job {}'.format(file_type, parent_job.job_id)
        logger.info(log_data)
        S3Handler.copy_file(CONFIG_BROKER['aws_bucket'], CONFIG_BROKER['aws_bucket'], parent_job.filename,
                            child_job.filename)
コード例 #2
0
def copy_file_generation_to_job(job, file_generation, is_local):
    """ Copy cached FileGeneration data to a Job requesting a file.

        Args:
            job: Job object to copy the data to
            file_generation: Cached FileGeneration object to copy the data from
            is_local: A boolean flag indicating whether the application is being run locally or not
    """
    sess = GlobalDB.db().session
    log_data = {
        'message':
        'Copying FileGeneration {} data to Job {}'.format(
            file_generation.file_generation_id, job.job_id),
        'message_type':
        'BrokerInfo',
        'job_id':
        job.job_id,
        'file_type':
        job.file_type.name,
        'file_generation_id':
        file_generation.file_generation_id
    }
    logger.info(log_data)

    # Do not edit submissions that have already successfully completed
    sess.refresh(job)
    if job.job_status_id == lookups.JOB_STATUS_DICT['finished']:
        return

    job.file_generation_id = file_generation.file_generation_id

    # File is still being generated, just mark the FileGeneration ID in the Job and wait
    # FileGeneration will update all child Jobs when it finishes
    if not file_generation.file_path:
        sess.commit()
        return

    # Generate file path for child Job's filename
    filepath = CONFIG_BROKER['broker_files'] if g.is_local else "{}/".format(
        str(job.submission_id))
    original_filename = file_generation.file_path.split('/')[-1]
    filename = '{}{}'.format(filepath, original_filename)

    # Copy parent job's data
    job.filename = filename
    job.original_filename = original_filename
    job.number_of_errors = 0
    job.number_of_warnings = 0

    # Change the validation job's file data when within a submission
    if job.submission_id is not None:
        val_job = sess.query(Job).filter(
            Job.submission_id == job.submission_id,
            Job.file_type_id == job.file_type_id, Job.job_type_id ==
            lookups.JOB_TYPE_DICT['csv_record_validation']).one()
        val_job.filename = filename
        val_job.original_filename = original_filename

        # Copy the data to the Submission's bucket
        if not g.is_local and file_generation.file_path != job.filename:
            # Check to see if the same file exists in the child bucket
            s3 = boto3.client('s3', region_name=CONFIG_BROKER["aws_region"])
            bucket = CONFIG_BROKER['aws_bucket']
            response = s3.list_objects_v2(Bucket=bucket, Prefix=job.filename)
            for obj in response.get('Contents', []):
                if obj['Key'] == job.filename:
                    # The file already exists in this location
                    log_data[
                        'message'] = '{} file already exists in this location: {}; not overwriting.'.format(
                            job.file_type.name, job.filename)
                    logger.info(log_data)
                    mark_job_status(job.job_id, 'finished')
                    return

            S3Handler.copy_file(bucket, bucket, file_generation.file_path,
                                job.filename)
    sess.commit()

    # Mark Job status last so the validation job doesn't start until everything is done
    mark_job_status(job.job_id, 'finished')
def copy_file_generation_to_job(job, file_generation, is_local):
    """ Copy cached FileGeneration data to a Job requesting a file.

        Args:
            job: Job object to copy the data to
            file_generation: Cached FileGeneration object to copy the data from
            is_local: A boolean flag indicating whether the application is being run locally or not
    """
    sess = GlobalDB.db().session
    log_data = {
        'message': 'Copying FileGeneration {} data to Job {}'.format(file_generation.file_generation_id, job.job_id),
        'message_type': 'BrokerInfo', 'job_id': job.job_id, 'file_type': job.file_type.name,
        'file_generation_id': file_generation.file_generation_id}
    logger.info(log_data)

    # Do not edit submissions that have already successfully completed
    sess.refresh(job)
    if job.job_status_id == lookups.JOB_STATUS_DICT['finished']:
        return

    job.file_generation_id = file_generation.file_generation_id

    # File is still being generated, just mark the FileGeneration ID in the Job and wait
    # FileGeneration will update all child Jobs when it finishes
    if not file_generation.file_path:
        sess.commit()
        return

    # Generate file path for child Job's filename
    filepath = CONFIG_BROKER['broker_files'] if g.is_local else "{}/".format(str(job.submission_id))
    original_filename = file_generation.file_path.split('/')[-1]
    filename = '{}{}'.format(filepath, original_filename)

    # Copy parent job's data
    job.filename = filename
    job.original_filename = original_filename
    job.number_of_errors = 0
    job.number_of_warnings = 0

    # Change the validation job's file data when within a submission
    if job.submission_id is not None:
        val_job = sess.query(Job).filter(Job.submission_id == job.submission_id,
                                         Job.file_type_id == job.file_type_id,
                                         Job.job_type_id == lookups.JOB_TYPE_DICT['csv_record_validation']).one()
        val_job.filename = filename
        val_job.original_filename = original_filename

        # Copy the data to the Submission's bucket
        if not g.is_local and file_generation.file_path != job.filename:
            # Check to see if the same file exists in the child bucket
            s3 = boto3.client('s3', region_name=CONFIG_BROKER["aws_region"])
            bucket = CONFIG_BROKER['aws_bucket']
            response = s3.list_objects_v2(Bucket=bucket, Prefix=job.filename)
            for obj in response.get('Contents', []):
                if obj['Key'] == job.filename:
                    # The file already exists in this location
                    log_data['message'] = '{} file already exists in this location: {}; not overwriting.'.format(
                        job.file_type.name, job.filename)
                    logger.info(log_data)
                    mark_job_status(job.job_id, 'finished')
                    return

            S3Handler.copy_file(bucket, bucket, file_generation.file_path, job.filename)
    sess.commit()

    # Mark Job status last so the validation job doesn't start until everything is done
    mark_job_status(job.job_id, 'finished')