def copy_to_s3(args): # Does the file exists directory_name = 'vis_{0}~{1}'.format(args.min_frequency, args.max_frequency) measurement_set = os.path.join(args.directory, directory_name) LOG.info('check {0} exists'.format(measurement_set)) if not os.path.exists(measurement_set) or not os.path.isdir( measurement_set): LOG.info('Measurement_set: {0} does not exist'.format(measurement_set)) return 0 # Make the tar file tar_filename = os.path.join(args.directory, 'vis.tar') os.chdir(args.directory) bash = 'tar -cvf {0} {1}'.format(tar_filename, directory_name) return_code = run_command(bash) path_exists = os.path.exists(tar_filename) if return_code != 0 or not path_exists: LOG.error('tar return_code: {0}, exists: {1}'.format( return_code, path_exists)) bash = 'java -classpath /opt/chiles02/aws-chiles02/java/build/awsChiles02.jar org.icrar.awsChiles02.copyS3.CopyFileToS3' \ ' -aws_profile aws-chiles02 {0} vis.tar'.format( args.s3_url, ) return_code = run_command(bash) # Clean up shutil.rmtree(args.directory, ignore_errors=True) return return_code
def copy_to_s3(args): # Does the file exists directory_name = 'vis_{0}~{1}'.format(args.min_frequency, args.max_frequency) measurement_set = os.path.join(args.directory, directory_name) LOG.info('check {0} exists'.format(measurement_set)) if not os.path.exists(measurement_set) or not os.path.isdir(measurement_set): LOG.info('Measurement_set: {0} does not exist'.format(measurement_set)) return 0 # Make the tar file tar_filename = os.path.join(args.directory, 'vis.tar') os.chdir(args.directory) bash = 'tar -cvf {0} {1}'.format(tar_filename, directory_name) return_code = run_command(bash) path_exists = os.path.exists(tar_filename) if return_code != 0 or not path_exists: LOG.error('tar return_code: {0}, exists: {1}'.format(return_code, path_exists)) bash = 'java -classpath /opt/chiles02/aws-chiles02/java/build/awsChiles02.jar org.icrar.awsChiles02.copyS3.CopyFileToS3' \ ' -aws_profile aws-chiles02 {0} vis.tar'.format( args.s3_url, ) return_code = run_command(bash) # Clean up shutil.rmtree(args.directory, ignore_errors=True) return return_code
def copy_from_s3(args): # Does the file exists head, tail = os.path.split(args.s3_url) measurement_set = os.path.join(args.directory, tail)[:-4] LOG.info('Checking {0} exists'.format(measurement_set)) if os.path.exists(measurement_set) and os.path.isdir(measurement_set): LOG.info('Measurement Set: {0} exists'.format(measurement_set)) return 0 # Make the directory if not os.path.exists(args.directory): os.makedirs(args.directory) # The following will need (16 + 1) * 262144000 bytes of heap space, ie approximately 4.5G. # Note setting minimum as well as maximum heap results in OutOfMemory errors at times! # The -d64 is to make sure we are using a 64bit JVM. # When extracting to the tar we need even more full_path_tar_file = os.path.join(args.directory, TAR_FILE) LOG.info('Tar: {0}'.format(full_path_tar_file)) bash = 'java -d64 -Xms10g -Xmx10g -classpath /opt/chiles02/aws-chiles02/java/build/awsChiles02.jar org.icrar.awsChiles02.copyS3.CopyFileFromS3' \ ' -thread_buffer 262144000 -thread_pool 16 -aws_profile aws-chiles02' \ ' {0} {1}'.format( args.s3_url, full_path_tar_file, ) run_command(bash) if not os.path.exists(full_path_tar_file): LOG.error('The tar file {0} does not exist'.format(full_path_tar_file)) return 1 # Check the sizes match s3_size = get_s3_size(args.s3_url) tar_size = os.path.getsize(full_path_tar_file) if s3_size != tar_size: LOG.error('The sizes for {0} differ S3: {1}, local FS: {2}'.format( full_path_tar_file, s3_size, tar_size)) return 1 # The tar file exists and is the same size bash = 'tar -xvf {0} -C {1}'.format(full_path_tar_file, args.directory) return_code = run_command(bash) path_exists = os.path.exists(measurement_set) if return_code != 0 or not path_exists: LOG.error('tar return_code: {0}, exists: {1}'.format( return_code, path_exists)) return 1 os.remove(full_path_tar_file) return 0
def copy_from_s3(args): # Does the file exists head, tail = os.path.split(args.s3_url) measurement_set = os.path.join(args.directory, tail)[:-4] LOG.info('Checking {0} exists'.format(measurement_set)) if os.path.exists(measurement_set) and os.path.isdir(measurement_set): LOG.info('Measurement Set: {0} exists'.format(measurement_set)) return 0 # Make the directory if not os.path.exists(args.directory): os.makedirs(args.directory) # The following will need (16 + 1) * 262144000 bytes of heap space, ie approximately 4.5G. # Note setting minimum as well as maximum heap results in OutOfMemory errors at times! # The -d64 is to make sure we are using a 64bit JVM. # When extracting to the tar we need even more full_path_tar_file = os.path.join(args.directory, TAR_FILE) LOG.info('Tar: {0}'.format(full_path_tar_file)) bash = 'java -d64 -Xms10g -Xmx10g -classpath /opt/chiles02/aws-chiles02/java/build/awsChiles02.jar org.icrar.awsChiles02.copyS3.CopyFileFromS3' \ ' -thread_buffer 262144000 -thread_pool 16 -aws_profile aws-chiles02' \ ' {0} {1}'.format( args.s3_url, full_path_tar_file, ) run_command(bash) if not os.path.exists(full_path_tar_file): LOG.error('The tar file {0} does not exist'.format(full_path_tar_file)) return 1 # Check the sizes match s3_size = get_s3_size(args.s3_url) tar_size = os.path.getsize(full_path_tar_file) if s3_size != tar_size: LOG.error('The sizes for {0} differ S3: {1}, local FS: {2}'.format(full_path_tar_file, s3_size, tar_size)) return 1 # The tar file exists and is the same size bash = 'tar -xvf {0} -C {1}'.format(full_path_tar_file, args.directory) return_code = run_command(bash) path_exists = os.path.exists(measurement_set) if return_code != 0 or not path_exists: LOG.error('tar return_code: {0}, exists: {1}'.format(return_code, path_exists)) return 1 os.remove(full_path_tar_file) return 0
def run(self): # Because of the lifecycle the drop isn't attached when the command is # created so we have to do it later measurement_sets = [] for measurement_set in self._measurement_sets: LOG.debug('measurement_set: {0}'.format(measurement_set)) for file_name in os.listdir(measurement_set): if file_name.endswith(".image"): dfms_name = '{0}/{1}'.format(measurement_set, file_name) LOG.info('dfms_name: {0}'.format(dfms_name)) measurement_sets.append(dfms_name) break measurement_set_output = self.outputs[0] measurement_set_dir = measurement_set_output.path if os.path.exists(measurement_set_dir): LOG.info('Directory {0} exists'.format(measurement_set_dir)) else: # Make the directory os.makedirs(measurement_set_dir) command = 'cd {0} && casa --nologger --log2term -c /home/ec2-user/aws-chiles02/pipeline/casa_code/concatenate.py /tmp image_{1}_{2}.cube {3}'.format( measurement_set_dir, self._width, self._iterations, ' '.join(measurement_sets), ) return_code = run_command(command) return return_code
def run(self): measurement_set_output = self.inputs[0] measurement_set_dir = measurement_set_output.path s3_output = self.outputs[0] bucket_name = s3_output.bucket key = s3_output.key LOG.info('dir: {2}, bucket: {0}, key: {1}'.format(bucket_name, key, measurement_set_dir)) # Does the file exists stem_name = 'stats_{0}~{1}.csv'.format(self._min_frequency, self._max_frequency) file_name = os.path.join(measurement_set_dir, stem_name) LOG.debug('checking {0} exists'.format(file_name)) if not os.path.exists(file_name) or not os.path.isfile(file_name): message = 'Stats: {0} does not exist'.format(file_name) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) return 1 # Make the tar file tar_filename = os.path.join(measurement_set_dir, 'stats_{0}~{1}.tar.gz'.format(self._min_frequency, self._max_frequency)) os.chdir(measurement_set_dir) bash = 'tar -cvzf {0} {1}'.format( tar_filename, stem_name, ) return_code = run_command(bash) path_exists = os.path.exists(tar_filename) if return_code != 0 or not path_exists: message = 'tar return_code: {0}, exists: {1}'.format(return_code, path_exists) LOG.error(message) self.send_error_message( message, self.oid, self.uid, ) session = boto3.Session(profile_name='aws-chiles02') s3 = session.resource('s3', use_ssl=False) s3_client = s3.meta.client transfer = S3Transfer(s3_client) transfer.upload_file( tar_filename, bucket_name, key, callback=ProgressPercentage( key, float(os.path.getsize(tar_filename)) ), extra_args={ 'StorageClass': 'REDUCED_REDUNDANCY', } ) return 0
def run(self): measurement_set_output = self.inputs[0] measurement_set_dir = measurement_set_output.path s3_output = self.outputs[0] bucket_name = s3_output.bucket key = s3_output.key LOG.info('dir: {2}, bucket: {0}, key: {1}'.format( bucket_name, key, measurement_set_dir)) # Does the file exists stem_name = 'uvsub_{0}~{1}'.format(self._min_frequency, self._max_frequency) measurement_set = os.path.join(measurement_set_dir, stem_name) LOG.debug('checking {0} exists'.format(measurement_set)) if not os.path.exists(measurement_set) or not os.path.isdir( measurement_set): message = 'Measurement_set: {0} does not exist'.format( measurement_set) LOG.error(message) self.send_error_message(message, self.oid, self.uid) return 0 # Make the tar file tar_filename = os.path.join( measurement_set_dir, 'uvsub_{0}~{1}.tar'.format(self._min_frequency, self._max_frequency)) os.chdir(measurement_set_dir) bash = 'tar -cvf {0} {1}'.format( tar_filename, stem_name, ) return_code = run_command(bash) path_exists = os.path.exists(tar_filename) if return_code != 0 or not path_exists: message = 'tar return_code: {0}, exists: {1}'.format( return_code, path_exists) LOG.error(message) self.send_error_message( message, self.oid, self.uid, ) session = boto3.Session(profile_name='aws-chiles02') s3 = session.resource('s3', use_ssl=False) s3_client = s3.meta.client transfer = S3Transfer(s3_client) transfer.upload_file(tar_filename, bucket_name, key, callback=ProgressPercentage( key, float(os.path.getsize(tar_filename))), extra_args={ 'StorageClass': 'REDUCED_REDUNDANCY', }) return return_code
def run(self): measurement_set_output = self.inputs[0] measurement_set_dir = measurement_set_output.path s3_output = self.outputs[0] bucket_name = s3_output.bucket key = s3_output.key LOG.info('dir: {2}, bucket: {0}, key: {1}'.format(bucket_name, key, measurement_set_dir)) directory_name = 'vis_{0}~{1}'.format(self._min_frequency, self._max_frequency) measurement_set = os.path.join(measurement_set_dir, directory_name) LOG.info('check {0} exists'.format(measurement_set)) if not os.path.exists(measurement_set) or not os.path.isdir(measurement_set): message = 'Measurement_set: {0} does not exist'.format(measurement_set) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) return 0 # Make the tar file tar_filename = os.path.join(measurement_set_dir, 'vis.tar') os.chdir(measurement_set_dir) bash = 'tar -cvf {0} {1}'.format(tar_filename, directory_name) return_code = run_command(bash) path_exists = os.path.exists(tar_filename) if return_code != 0 or not path_exists: message = 'tar return_code: {0}, exists: {1}'.format(return_code, path_exists) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) session = boto3.Session(profile_name='aws-chiles02') s3 = session.resource('s3', use_ssl=False) s3_client = s3.meta.client transfer = S3Transfer(s3_client) transfer.upload_file( tar_filename, bucket_name, key, callback=ProgressPercentage( key, float(os.path.getsize(tar_filename)) ), extra_args={ 'StorageClass': 'REDUCED_REDUNDANCY', } ) # Clean up shutil.rmtree(measurement_set_dir, ignore_errors=True) return return_code
def copy_logs_to_s3(args): # Make the tar file tar_filename = os.path.join(args.directory, 'logs.tar') os.chdir(args.directory) bash = 'tar -cvf {0} *.log'.format(tar_filename) return_code = run_command(bash) path_exists = os.path.exists(tar_filename) if return_code != 0 or not path_exists: LOG.error('tar return_code: {0}, exists: {1}'.format(return_code, path_exists)) return 0 else: bash = 'java -classpath /opt/chiles02/aws-chiles02/java/build/awsChiles02.jar org.icrar.awsChiles02.copyS3.CopyFileToS3' \ ' -aws_profile aws-chiles02 {0} {1}'.format( args.s3_url, tar_filename ) return_code = run_command(bash) return return_code
def copy_measurement_set(measurement_set, directory_out, bucket_name): LOG.info('measurement_set: {0}, bucket_name: {1}'.format(measurement_set, bucket_name)) (measurement_set_directory, measurement_set_filename) = split(measurement_set) key = 'observation_data/{0}.tar'.format(measurement_set_filename) session = boto3.Session(profile_name='aws-chiles02') s3 = session.resource('s3', use_ssl=False) bucket = s3.Bucket(bucket_name) objs = list(bucket.objects.filter(Prefix=key)) if len(objs) > 0 and objs[0].key == key: LOG.info('The measurement set {0} exists in {1}'.format(key, bucket_name)) else: tar_filename = os.path.join(directory_out, '{0}.tar'.format(measurement_set_filename)) bash = 'cd {0} && tar -cvf {1} {2}'.format(measurement_set_directory, tar_filename, measurement_set_filename) return_code = run_command(bash) path_exists = os.path.exists(tar_filename) if return_code != 0 or not path_exists: LOG.error('tar return_code: {0}, exists: {1}'.format(return_code, path_exists)) else: tar_size = float(os.path.getsize(tar_filename)) chunk_size = int(tar_size / 9999) LOG.info( 'tar_filename: {0}, bucket: {1}, key: {2}, size: {3}, chunk_size: {4}'.format( tar_filename, bucket_name, key, bytes2human(tar_size), chunk_size ) ) transfer_config = TransferConfig( multipart_chunksize=chunk_size ) s3_client = s3.meta.client transfer = S3Transfer(s3_client, transfer_config) transfer.upload_file( tar_filename, bucket_name, key, callback=ProgressPercentage( key, tar_size ), extra_args={ 'StorageClass': 'REDUCED_REDUNDANCY', } ) # Clean up if path_exists: os.remove(tar_filename)
def copy_measurement_set(measurement_set, directory_out, bucket_name): LOG.info('measurement_set: {0}, bucket_name: {1}'.format( measurement_set, bucket_name)) (measurement_set_directory, measurement_set_filename) = split(measurement_set) key = 'observation_data/{0}.tar'.format(measurement_set_filename) session = boto3.Session(profile_name='aws-chiles02') s3 = session.resource('s3', use_ssl=False) bucket = s3.Bucket(bucket_name) objs = list(bucket.objects.filter(Prefix=key)) if len(objs) > 0 and objs[0].key == key: LOG.info('The measurement set {0} exists in {1}'.format( key, bucket_name)) else: tar_filename = os.path.join(directory_out, '{0}.tar'.format(measurement_set_filename)) bash = 'cd {0} && tar -cvf {1} {2}'.format(measurement_set_directory, tar_filename, measurement_set_filename) return_code = run_command(bash) path_exists = os.path.exists(tar_filename) if return_code != 0 or not path_exists: LOG.error('tar return_code: {0}, exists: {1}'.format( return_code, path_exists)) else: tar_size = float(os.path.getsize(tar_filename)) chunk_size = int(tar_size / 9999) LOG.info( 'tar_filename: {0}, bucket: {1}, key: {2}, size: {3}, chunk_size: {4}' .format(tar_filename, bucket_name, key, bytes2human(tar_size), chunk_size)) transfer_config = TransferConfig(multipart_chunksize=chunk_size) s3_client = s3.meta.client transfer = S3Transfer(s3_client, transfer_config) transfer.upload_file(tar_filename, bucket_name, key, callback=ProgressPercentage(key, tar_size), extra_args={ 'StorageClass': 'REDUCED_REDUNDANCY', }) # Clean up if path_exists: os.remove(tar_filename)
def run(self): log_file_dir = '/mnt/dfms/dfms_root' s3_output = self.outputs[0] bucket_name = s3_output.bucket key = s3_output.key LOG.info('dir: {2}, bucket: {0}, key: {1}'.format(bucket_name, key, log_file_dir)) # Make the tar file tar_filename = os.path.join(log_file_dir, 'log.tar') os.chdir(log_file_dir) bash = 'tar -cvf {0} {1}'.format(tar_filename, 'dfms*.log') return_code = run_command(bash) path_exists = os.path.exists(tar_filename) if return_code != 0 or not path_exists: message = 'tar return_code: {0}, exists: {1}'.format(return_code, path_exists) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) return return_code session = boto3.Session(profile_name='aws-chiles02') s3 = session.resource('s3', use_ssl=False) s3_client = s3.meta.client transfer = S3Transfer(s3_client) transfer.upload_file( tar_filename, bucket_name, key, callback=ProgressPercentage( key, float(os.path.getsize(tar_filename)) ), extra_args={ 'StorageClass': 'REDUCED_REDUNDANCY', } )
def run(self): s3_input = self.inputs[0] bucket_name = s3_input.bucket key = s3_input.key measurement_set_output = self.outputs[0] measurement_set_dir = measurement_set_output.path LOG.info('bucket: {0}, key: {1}, dir: {2}'.format( bucket_name, key, measurement_set_dir)) measurement_set = os.path.join( measurement_set_dir, 'vis_{0}~{1}'.format(self._min_frequency, self._max_frequency)) LOG.debug('Checking {0} exists'.format(measurement_set)) if os.path.exists(measurement_set) and os.path.isdir(measurement_set): LOG.warn('Measurement Set: {0} exists'.format(measurement_set)) return 0 # Make the directory if not os.path.exists(measurement_set_dir): os.makedirs(measurement_set_dir) full_path_tar_file = os.path.join(measurement_set_dir, TAR_FILE) LOG.info('Tar: {0}'.format(full_path_tar_file)) session = boto3.Session(profile_name='aws-chiles02') s3 = session.resource('s3', use_ssl=False) s3_object = s3.Object(bucket_name, key) s3_size = s3_object.content_length s3_client = s3.meta.client transfer = S3Transfer(s3_client) transfer.download_file(bucket_name, key, full_path_tar_file, callback=ProgressPercentage(key, s3_size)) if not os.path.exists(full_path_tar_file): message = 'The tar file {0} does not exist'.format( full_path_tar_file) LOG.error(message) self.send_error_message(message, self.oid, self.uid) return 1 # Check the sizes match tar_size = os.path.getsize(full_path_tar_file) if s3_size != tar_size: message = 'The sizes for {0} differ S3: {1}, local FS: {2}'.format( full_path_tar_file, s3_size, tar_size) LOG.error(message) self.send_error_message(message, self.oid, self.uid) return 1 # The tar file exists and is the same size bash = 'tar -xvf {0} -C {1}'.format(full_path_tar_file, measurement_set_dir) return_code = run_command(bash) path_exists = os.path.exists(measurement_set) if return_code != 0 or not path_exists: message = 'tar return_code: {0}, exists: {1}-{2}'.format( return_code, measurement_set, path_exists) LOG.error(message) self.send_error_message(message, self.oid, self.uid) return 1 os.remove(full_path_tar_file) return 0
def run(self): s3_input = self.inputs[0] bucket_name = s3_input.bucket key = s3_input.key measurement_set_output = self.outputs[0] measurement_set_dir = measurement_set_output.path LOG.info('bucket: {0}, key: {1}, dir: {2}'.format(bucket_name, key, measurement_set_dir)) measurement_set = os.path.join(measurement_set_dir, 'uvsub_{0}~{1}'.format(self._min_frequency, self._max_frequency)) LOG.debug('Checking {0} exists'.format(measurement_set)) if os.path.exists(measurement_set) and os.path.isdir(measurement_set): LOG.warn('Measurement Set: {0} exists'.format(measurement_set)) return 0 # Make the directory if not os.path.exists(measurement_set_dir): os.makedirs(measurement_set_dir) full_path_tar_file = os.path.join(measurement_set_dir, TAR_FILE) LOG.info('Tar: {0}'.format(full_path_tar_file)) session = boto3.Session(profile_name='aws-chiles02') s3 = session.resource('s3', use_ssl=False) s3_object = s3.Object(bucket_name, key) s3_size = s3_object.content_length s3_client = s3.meta.client transfer = S3Transfer(s3_client) transfer.download_file( bucket_name, key, full_path_tar_file, callback=ProgressPercentage( key, s3_size ) ) if not os.path.exists(full_path_tar_file): message = 'The tar file {0} does not exist'.format(full_path_tar_file) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) return 1 # Check the sizes match tar_size = os.path.getsize(full_path_tar_file) if s3_size != tar_size: message = 'The sizes for {0} differ S3: {1}, local FS: {2}'.format(full_path_tar_file, s3_size, tar_size) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) return 1 # The tar file exists and is the same size bash = 'tar -xvf {0} -C {1}'.format(full_path_tar_file, measurement_set_dir) return_code = run_command(bash) path_exists = os.path.exists(measurement_set) if return_code != 0 or not path_exists: message = 'tar return_code: {0}, exists: {1}-{2}'.format(return_code, measurement_set, path_exists) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) return 1 os.remove(full_path_tar_file) return 0
def run(self): s3_input = self.inputs[0] bucket_name = s3_input.bucket key = s3_input.key measurement_set_output = self.outputs[0] measurement_set_dir = measurement_set_output.path LOG.info('bucket: {0}, key: {1}, dir: {2}'.format(bucket_name, key, measurement_set_dir)) measurement_set = key[:-4] LOG.info('Checking {0} exists'.format(measurement_set)) if os.path.exists(measurement_set) and os.path.isdir(measurement_set): LOG.info('Measurement Set: {0} exists'.format(measurement_set)) return 0 # Make the directory if not os.path.exists(measurement_set_dir): os.makedirs(measurement_set_dir) # The following will need (16 + 1) * 262144000 bytes of heap space, ie approximately 4.5G. # Note setting minimum as well as maximum heap results in OutOfMemory errors at times! # The -d64 is to make sure we are using a 64bit JVM. # When extracting to the tar we need even more full_path_tar_file = os.path.join(measurement_set_dir, TAR_FILE) LOG.info('Tar: {0}'.format(full_path_tar_file)) session = boto3.Session(profile_name='aws-chiles02') s3 = session.resource('s3', use_ssl=False) s3_object = s3.Object(bucket_name, key) s3_size = s3_object.content_length s3_client = s3.meta.client transfer = S3Transfer(s3_client) transfer.download_file( bucket_name, key, full_path_tar_file, callback=ProgressPercentage( key, s3_size ) ) if not os.path.exists(full_path_tar_file): message = 'The tar file {0} does not exist'.format(full_path_tar_file) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) return 1 # Check the sizes match tar_size = os.path.getsize(full_path_tar_file) if s3_size != tar_size: message = 'The sizes for {0} differ S3: {1}, local FS: {2}'.format(full_path_tar_file, s3_size, tar_size) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) return 1 # The tar file exists and is the same size bash = 'tar -xvf {0} -C {1}'.format(full_path_tar_file, measurement_set_dir) return_code = run_command(bash) path_exists = os.path.exists(measurement_set) if return_code != 0 or not path_exists: message = 'tar return_code: {0}, exists: {1}, measurement_set: {2}'.format(return_code, path_exists, measurement_set) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) return 1 os.remove(full_path_tar_file) return 0
def run(self): s3_input = self.inputs[0] bucket_name = s3_input.bucket key = s3_input.key measurement_set_output = self.outputs[0] measurement_set_dir = measurement_set_output.path LOG.info('bucket: {0}, key: {1}, dir: {2}'.format(bucket_name, key, measurement_set_dir)) # Does the directory exist if os.path.exists(measurement_set_dir): for filename in os.listdir(measurement_set_dir): LOG.debug('filename: {0}'.format(filename)) if filename.endswith('.image'): LOG.warn('Measurement Set: {0} exists'.format(filename)) return 0 else: # Make the directory os.makedirs(measurement_set_dir) full_path_tar_file = os.path.join(measurement_set_dir, TAR_FILE) LOG.info('Tar: {0}'.format(full_path_tar_file)) session = boto3.Session(profile_name='aws-chiles02') s3 = session.resource('s3', use_ssl=False) s3_object = s3.Object(bucket_name, key) s3_size = s3_object.content_length s3_client = s3.meta.client transfer = S3Transfer(s3_client) transfer.download_file( bucket_name, key, full_path_tar_file, callback=ProgressPercentage( key, s3_size ) ) if not os.path.exists(full_path_tar_file): message = 'The tar file {0} does not exist'.format(full_path_tar_file) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) return 1 # Check the sizes match tar_size = os.path.getsize(full_path_tar_file) if s3_size != tar_size: message = 'The sizes for {0} differ S3: {1}, local FS: {2}'.format(full_path_tar_file, s3_size, tar_size) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) return 1 # The tar file exists and is the same size bash = 'tar -xvf {0} -C {1}'.format(full_path_tar_file, measurement_set_dir) return_code = run_command(bash) if return_code != 0: message = 'tar return_code: {0}'.format(return_code) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) return 1 os.remove(full_path_tar_file) # Remove the stuff we don't need LOG.info('measurement_set_dir: {0}'.format(measurement_set_dir)) for filename in os.listdir(measurement_set_dir): LOG.debug('filename: {0}'.format(filename)) if filename.endswith(tuple(['.flux', '.model', '.residual', '.psf'])): full_name = os.path.join(measurement_set_dir, filename) LOG.info('full_name: {0}'.format(full_name)) shutil.rmtree(full_name, ignore_errors=True) return 0
def run(self): s3_input = self.inputs[0] bucket_name = s3_input.bucket key = s3_input.key measurement_set_output = self.outputs[0] measurement_set_dir = measurement_set_output.path LOG.info('bucket: {0}, key: {1}, dir: {2}'.format(bucket_name, key, measurement_set_dir)) measurement_set = key[:-4] LOG.debug('Checking {0} exists'.format(measurement_set)) if os.path.exists(measurement_set) and os.path.isdir(measurement_set): LOG.warn('Measurement Set: {0} exists'.format(measurement_set)) return 0 # Make the directory if not os.path.exists(measurement_set_dir): os.makedirs(measurement_set_dir) # The following will need (16 + 1) * 262144000 bytes of heap space, ie approximately 4.5G. # Note setting minimum as well as maximum heap results in OutOfMemory errors at times! # The -d64 is to make sure we are using a 64bit JVM. # When extracting to the tar we need even more full_path_tar_file = os.path.join(measurement_set_dir, TAR_FILE) LOG.info('Tar: {0}'.format(full_path_tar_file)) session = boto3.Session(profile_name='aws-chiles02') s3 = session.resource('s3', use_ssl=False) s3_object = s3.Object(bucket_name, key) s3_size = s3_object.content_length s3_client = s3.meta.client transfer = S3Transfer(s3_client) transfer.download_file( bucket_name, key, full_path_tar_file, callback=ProgressPercentage( key, s3_size ) ) if not os.path.exists(full_path_tar_file): message = 'The tar file {0} does not exist'.format(full_path_tar_file) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) return 1 # Check the sizes match tar_size = os.path.getsize(full_path_tar_file) if s3_size != tar_size: message = 'The sizes for {0} differ S3: {1}, local FS: {2}'.format(full_path_tar_file, s3_size, tar_size) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) return 1 # The tar file exists and is the same size bash = 'tar -xvf {0} -C {1}'.format(full_path_tar_file, measurement_set_dir) return_code = run_command(bash) path_exists = os.path.exists(measurement_set) if return_code != 0 or not path_exists: message = 'tar return_code: {0}, exists: {1}, measurement_set: {2}'.format(return_code, path_exists, measurement_set) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) return 1 os.remove(full_path_tar_file) return 0
def run(self): measurement_set_output = self.inputs[0] measurement_set_dir = measurement_set_output.path s3_output = self.outputs[0] bucket_name = s3_output.bucket key = s3_output.key LOG.info('dir: {2}, bucket: {0}, key: {1}'.format(bucket_name, key, measurement_set_dir)) # Does the file exists stem_name = 'clean_{0}~{1}'.format(self._min_frequency, self._max_frequency) measurement_set = os.path.join(measurement_set_dir, stem_name) LOG.debug('checking {0}.image exists'.format(measurement_set)) if not os.path.exists(measurement_set + '.image') or not os.path.isdir(measurement_set + '.image'): message = 'Measurement_set: {0}.image does not exist'.format(measurement_set) LOG.error(message) self.send_error_message( message, self.oid, self.uid ) return 0 # Make the tar file tar_filename = os.path.join(measurement_set_dir, 'clean_{0}~{1}.tar'.format(self._min_frequency, self._max_frequency)) os.chdir(measurement_set_dir) if self._only_image: bash = 'tar -cvf {0} {1}.image {1}.psf.centre'.format( tar_filename, stem_name, ) else: bash = 'tar -cvf {0} {1}.flux {1}.image {1}.model {1}.residual {1}.psf'.format( tar_filename, stem_name, ) return_code = run_command(bash) path_exists = os.path.exists(tar_filename) if return_code != 0 or not path_exists: message = 'tar return_code: {0}, exists: {1}'.format(return_code, path_exists) LOG.error(message) self.send_error_message( message, self.oid, self.uid, ) session = boto3.Session(profile_name='aws-chiles02') s3 = session.resource('s3', use_ssl=False) s3_client = s3.meta.client transfer = S3Transfer(s3_client) transfer.upload_file( tar_filename, bucket_name, key, callback=ProgressPercentage( key, float(os.path.getsize(tar_filename)) ), extra_args={ 'StorageClass': 'REDUCED_REDUNDANCY', } ) # Centred images if os.path.exists(measurement_set + '.image.centre'): tar_filename = os.path.join(measurement_set_dir, 'clean_{0}~{1}.centre.tar'.format(self._min_frequency, self._max_frequency)) bash = 'tar -cvf {0} {1}.image.centre {1}.psf.centre'.format( tar_filename, stem_name, ) return_code = run_command(bash) path_exists = os.path.exists(tar_filename) if return_code != 0 or not path_exists: message = 'tar return_code: {0}, exists: {1}'.format(return_code, path_exists) LOG.error(message) self.send_error_message( message, self.oid, self.uid, ) transfer.upload_file( tar_filename, bucket_name, key + '.centre', callback=ProgressPercentage( key, float(os.path.getsize(tar_filename)) ), extra_args={ 'StorageClass': 'REDUCED_REDUNDANCY', } ) tar_filename = os.path.join(measurement_set_dir, 'clean_{0}~{1}.qa.tar'.format(self._min_frequency, self._max_frequency)) bash = 'tar -cvf {0} {1}.image.mom.mean_freq {1}.image.mom.mean_ra {1}.image.mom.slice_ra ' \ '{1}.image.slice.txt {1}.image.slice.svg ' \ '{1}.image.onsource_centre.txt {1}.image.onsource_centre.svg ' \ '{1}.image.onsource_south.txt {1}.image.onsource_south.svg ' \ '{1}.image.boresight.txt {1}.image.boresight.svg'.format( tar_filename, stem_name, ) return_code = run_command(bash) path_exists = os.path.exists(tar_filename) if return_code != 0 or not path_exists: message = 'tar return_code: {0}, exists: {1}'.format(return_code, path_exists) LOG.error(message) self.send_error_message( message, self.oid, self.uid, ) transfer.upload_file( tar_filename, bucket_name, key + '.qa', callback=ProgressPercentage( key, float(os.path.getsize(tar_filename)) ), extra_args={ 'StorageClass': 'REDUCED_REDUNDANCY', } ) return return_code