def copy_file_from_parent_to_child(child_job, parent_job, is_local): """ Copy the file from the parent job's bucket to the child job's bucket. Args: child_job: Job object for the child FileRequest parent_job: Job object for the parent FileRequest is_local: A boolean flag indicating whether the application is being run locally or not """ file_type = parent_job.file_type.letter_name log_data = {'message': 'Copying data from parent job with job_id:{}'.format(parent_job.job_id), 'message_type': 'ValidatorInfo', 'job_id': child_job.job_id, 'file_type': parent_job.file_type.name} if not is_local: is_local = g.is_local if not is_local and parent_job.filename != child_job.filename: # Check to see if the same file exists in the child bucket s3 = boto3.client('s3', region_name=CONFIG_BROKER["aws_region"]) response = s3.list_objects_v2(Bucket=CONFIG_BROKER['aws_bucket'], Prefix=child_job.filename) for obj in response.get('Contents', []): if obj['Key'] == child_job.filename: # The file already exists in this location log_data['message'] = 'Cached {} file CSV already exists in this location'.format(file_type) logger.info(log_data) return # Copy the parent file into the child's S3 location log_data['message'] = 'Copying the cached {} file from job {}'.format(file_type, parent_job.job_id) logger.info(log_data) S3Handler.copy_file(CONFIG_BROKER['aws_bucket'], CONFIG_BROKER['aws_bucket'], parent_job.filename, child_job.filename)
def copy_file_generation_to_job(job, file_generation, is_local): """ Copy cached FileGeneration data to a Job requesting a file. Args: job: Job object to copy the data to file_generation: Cached FileGeneration object to copy the data from is_local: A boolean flag indicating whether the application is being run locally or not """ sess = GlobalDB.db().session log_data = { 'message': 'Copying FileGeneration {} data to Job {}'.format( file_generation.file_generation_id, job.job_id), 'message_type': 'BrokerInfo', 'job_id': job.job_id, 'file_type': job.file_type.name, 'file_generation_id': file_generation.file_generation_id } logger.info(log_data) # Do not edit submissions that have already successfully completed sess.refresh(job) if job.job_status_id == lookups.JOB_STATUS_DICT['finished']: return job.file_generation_id = file_generation.file_generation_id # File is still being generated, just mark the FileGeneration ID in the Job and wait # FileGeneration will update all child Jobs when it finishes if not file_generation.file_path: sess.commit() return # Generate file path for child Job's filename filepath = CONFIG_BROKER['broker_files'] if g.is_local else "{}/".format( str(job.submission_id)) original_filename = file_generation.file_path.split('/')[-1] filename = '{}{}'.format(filepath, original_filename) # Copy parent job's data job.filename = filename job.original_filename = original_filename job.number_of_errors = 0 job.number_of_warnings = 0 # Change the validation job's file data when within a submission if job.submission_id is not None: val_job = sess.query(Job).filter( Job.submission_id == job.submission_id, Job.file_type_id == job.file_type_id, Job.job_type_id == lookups.JOB_TYPE_DICT['csv_record_validation']).one() val_job.filename = filename val_job.original_filename = original_filename # Copy the data to the Submission's bucket if not g.is_local and file_generation.file_path != job.filename: # Check to see if the same file exists in the child bucket s3 = boto3.client('s3', region_name=CONFIG_BROKER["aws_region"]) bucket = CONFIG_BROKER['aws_bucket'] response = s3.list_objects_v2(Bucket=bucket, Prefix=job.filename) for obj in response.get('Contents', []): if obj['Key'] == job.filename: # The file already exists in this location log_data[ 'message'] = '{} file already exists in this location: {}; not overwriting.'.format( job.file_type.name, job.filename) logger.info(log_data) mark_job_status(job.job_id, 'finished') return S3Handler.copy_file(bucket, bucket, file_generation.file_path, job.filename) sess.commit() # Mark Job status last so the validation job doesn't start until everything is done mark_job_status(job.job_id, 'finished')
def copy_file_generation_to_job(job, file_generation, is_local): """ Copy cached FileGeneration data to a Job requesting a file. Args: job: Job object to copy the data to file_generation: Cached FileGeneration object to copy the data from is_local: A boolean flag indicating whether the application is being run locally or not """ sess = GlobalDB.db().session log_data = { 'message': 'Copying FileGeneration {} data to Job {}'.format(file_generation.file_generation_id, job.job_id), 'message_type': 'BrokerInfo', 'job_id': job.job_id, 'file_type': job.file_type.name, 'file_generation_id': file_generation.file_generation_id} logger.info(log_data) # Do not edit submissions that have already successfully completed sess.refresh(job) if job.job_status_id == lookups.JOB_STATUS_DICT['finished']: return job.file_generation_id = file_generation.file_generation_id # File is still being generated, just mark the FileGeneration ID in the Job and wait # FileGeneration will update all child Jobs when it finishes if not file_generation.file_path: sess.commit() return # Generate file path for child Job's filename filepath = CONFIG_BROKER['broker_files'] if g.is_local else "{}/".format(str(job.submission_id)) original_filename = file_generation.file_path.split('/')[-1] filename = '{}{}'.format(filepath, original_filename) # Copy parent job's data job.filename = filename job.original_filename = original_filename job.number_of_errors = 0 job.number_of_warnings = 0 # Change the validation job's file data when within a submission if job.submission_id is not None: val_job = sess.query(Job).filter(Job.submission_id == job.submission_id, Job.file_type_id == job.file_type_id, Job.job_type_id == lookups.JOB_TYPE_DICT['csv_record_validation']).one() val_job.filename = filename val_job.original_filename = original_filename # Copy the data to the Submission's bucket if not g.is_local and file_generation.file_path != job.filename: # Check to see if the same file exists in the child bucket s3 = boto3.client('s3', region_name=CONFIG_BROKER["aws_region"]) bucket = CONFIG_BROKER['aws_bucket'] response = s3.list_objects_v2(Bucket=bucket, Prefix=job.filename) for obj in response.get('Contents', []): if obj['Key'] == job.filename: # The file already exists in this location log_data['message'] = '{} file already exists in this location: {}; not overwriting.'.format( job.file_type.name, job.filename) logger.info(log_data) mark_job_status(job.job_id, 'finished') return S3Handler.copy_file(bucket, bucket, file_generation.file_path, job.filename) sess.commit() # Mark Job status last so the validation job doesn't start until everything is done mark_job_status(job.job_id, 'finished')