def copy_files(args): # Create the queue queue = multiprocessing.JoinableQueue() # Start the consumers for x in range(PROCESSES): consumer = Consumer(queue) consumer.start() # Look in the output directory for root, dir_names, filenames in os.walk(args.product_dir): LOGGER.debug('root: {0}, dir_names: {1}, filenames: {2}'.format( root, dir_names, filenames)) for match in fnmatch.filter(dir_names, '13B-266*calibrated_deepfield.ms'): result_dir = join(root, match) LOGGER.info('Queuing result_dir: {0}'.format(result_dir)) queue.put( CopyTask(args.bucket, match, result_dir, args.aws_access_key_id, args.aws_secret_access_key)) # Add a poison pill to shut things down for x in range(PROCESSES): queue.put(None) # Wait for the queue to terminate queue.join()
def _upload(retries_left=amount_of_retries): try: LOGGER.info( 'Start uploading part: #{0}, source_path: {1}'.format( part_num, source_path ) ) conn = get_s3_connection(aws_access_key_id, aws_secret_access_key) bucket = conn.get_bucket(bucket_name) for mp in bucket.get_all_multipart_uploads(): if mp.id == multipart_id: with FileChunkIO(source_path, 'r', offset=offset, bytes=bytes_to_copy) as fp: mp.upload_part_from_file(fp=fp, part_num=part_num) break except Exception, exc: if retries_left: _upload(retries_left=retries_left - 1) else: LOGGER.info( 'Failed uploading part: #{0}, source_path: {1}'.format( part_num, source_path ) ) raise exc
def copy_files(frequency_id, processes, days): s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) LOGGER.info('Scanning bucket: {0}, frequency_id: {1}'.format( bucket, frequency_id)) # Create the queue queue = multiprocessing.JoinableQueue() # Start the consumers for x in range(processes): consumer = Consumer(queue) consumer.start() for key in bucket.list(prefix='CVEL/{0}'.format(frequency_id)): LOGGER.info('Checking {0}'.format(key.key)) # Ignore the key if key.key.endswith('/data.tar.gz') or key.key.endswith('/data.tar'): elements = key.key.split('/') if elements[2] in days: directory = '/mnt/output/Chiles/split_vis/{0}/'.format( elements[2]) # Queue the copy of the file temp_file = os.path.join( directory, 'data.tar.gz' if key.key.endswith('/data.tar.gz') else 'data.tar') queue.put(Task(key, temp_file, directory, frequency_id)) # Add a poison pill to shut things down for x in range(processes): queue.put(None) # Wait for the queue to terminate queue.join()
def create_users(emails): failed = [] users = [] for email in emails: username = email.split('@')[0] data = { 'username': username, 'password': config['default_password'], 'email': email } dumped = json.dumps(data, ensure_ascii=False).encode('utf-8') resp = requests.post( url="{}/register/".format(base_url), data=dumped, headers={'Content-Type': 'application/json;charset=UTF-8'}) if resp.status_code == 400: failed.append(email) LOGGER.error( "Failed to create account for email {0}. Response: {1}".format( email, str(resp.json()))) if resp.status_code == 201: users.append(resp.json()) LOGGER.info("Account successfully created.") return users, failed
def __call__(self): """ Actually run the job """ LOGGER.info('frequency_id: {0}'.format(self._frequency_id)) ec2_helper = EC2Helper() zone = ec2_helper.get_cheapest_spot_price(self._instance_type, self._spot_price) if zone is not None: user_data_mime = self.get_mime_encoded_user_data() LOGGER.info('{0}'.format(user_data_mime)) ec2_helper.run_spot_instance( self._ami_id, self._spot_price, user_data_mime, self._instance_type, None, self._created_by, '{0}-{1}'.format(self._frequency_id, self._name), instance_details=self._instance_details, zone=zone, ephemeral=True) else: LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format( self._instance_type, self._spot_price))
def add_file_to_bucket_multipart(self, bucket_name, key_name, source_path, parallel_processes=2, reduced_redundancy=True): """ Parallel multipart upload. """ LOGGER.info('bucket_name: {0}, key_name: {1}, filename: {2}, parallel_processes: {3}, reduced_redundancy: {4}'.format( bucket_name, key_name, source_path, parallel_processes, reduced_redundancy)) source_size = os.stat(source_path).st_size bytes_per_chunk = 10 * 1024 * 1024 chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk))) if chunk_amount < 10000: bucket = self.get_bucket(bucket_name) headers = {'Content-Type': mimetypes.guess_type(key_name)[0] or 'application/octet-stream'} mp = bucket.initiate_multipart_upload(key_name, headers=headers, reduced_redundancy=reduced_redundancy) LOGGER.info('bytes_per_chunk: {0}, chunk_amount: {1}'.format(bytes_per_chunk, chunk_amount)) # You can only upload 10,000 chunks pool = Pool(processes=parallel_processes) for i in range(chunk_amount): offset = i * bytes_per_chunk remaining_bytes = source_size - offset bytes_to_copy = min([bytes_per_chunk, remaining_bytes]) part_num = i + 1 pool.apply_async(upload_part, [self._aws_access_key_id, self._aws_secret_access_key, bucket_name, mp.id, part_num, source_path, offset, bytes_to_copy]) pool.close() pool.join() if len(mp.get_all_parts()) == chunk_amount: mp.complete_upload() else: mp.cancel_upload() else: raise S3UploadException('Too many chunks')
def copy_files(frequency_id, processes, days): s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) LOGGER.info('Scanning bucket: {0}, frequency_id: {1}'.format(bucket, frequency_id)) # Create the queue queue = multiprocessing.JoinableQueue() # Start the consumers for x in range(processes): consumer = Consumer(queue) consumer.start() for key in bucket.list(prefix='CVEL/{0}'.format(frequency_id)): LOGGER.info('Checking {0}'.format(key.key)) # Ignore the key if key.key.endswith('/data.tar.gz') or key.key.endswith('/data.tar'): elements = key.key.split('/') if elements[2] in days: directory = '/mnt/output/Chiles/split_vis/{0}/'.format(elements[2]) # Queue the copy of the file temp_file = os.path.join(directory, 'data.tar.gz' if key.key.endswith('/data.tar.gz') else 'data.tar') queue.put(Task(key, temp_file, directory, frequency_id)) # Add a poison pill to shut things down for x in range(processes): queue.put(None) # Wait for the queue to terminate queue.join()
def __call__(self): """ Actually run the job """ # Get the name of the volume ec2_helper = EC2Helper() iops = None if self._instance_details.iops_support: iops = 500 zone = ec2_helper.get_cheapest_spot_price(self._instance_type, self._spot_price) if zone is not None: volume, snapshot_name = ec2_helper.create_volume(self._snapshot_id, zone, iops=iops) LOGGER.info('obs_id: {0}, volume_name: {1}'.format(self._obs_id, snapshot_name)) user_data_mime = self.get_mime_encoded_user_data(volume.id) if self._spot_price is not None: ec2_helper.run_spot_instance( self._ami_id, self._spot_price, user_data_mime, self._instance_type, volume.id, self._created_by, '{1}-{2}-{0}'.format(self._name, snapshot_name, self._counter), self._instance_details, zone, ephemeral=True) else: LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(self._instance_type, self._spot_price))
def copy_files(frequency_id): s3_helper = S3Helper() # Look in the output directory LOGGER.info('directory_data: {0}'.format(CHILES_CLEAN_OUTPUT)) for dir_name in os.listdir(CHILES_CLEAN_OUTPUT): LOGGER.info('dir_name: {0}'.format(dir_name)) result_dir = join(CHILES_CLEAN_OUTPUT, dir_name) if isdir(result_dir) and dir_name.startswith( 'cube_') and dir_name.endswith('.image'): LOGGER.info('dir_name: {0}'.format(dir_name)) output_tar_filename = join(CHILES_CLEAN_OUTPUT, dir_name + '.tar') if can_be_multipart_tar(result_dir): LOGGER.info('Using add_tar_to_bucket_multipart') s3_helper.add_tar_to_bucket_multipart( CHILES_BUCKET_NAME, '/CLEAN/{0}/{1}'.format(frequency_id, basename(output_tar_filename)), result_dir) else: LOGGER.info('Using make_tarfile, then adding file to bucket') make_tarfile(output_tar_filename, result_dir) s3_helper.add_file_to_bucket( CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format( frequency_id, basename(output_tar_filename)), output_tar_filename) # Clean up os.remove(output_tar_filename)
def copy_files(frequency_id): s3_helper = S3Helper() # Look in the output directory LOGGER.info('directory_data: {0}'.format(CHILES_CLEAN_OUTPUT)) for dir_name in os.listdir(CHILES_CLEAN_OUTPUT): LOGGER.info('dir_name: {0}'.format(dir_name)) result_dir = join(CHILES_CLEAN_OUTPUT, dir_name) if isdir(result_dir) and dir_name.startswith('cube_') and dir_name.endswith('.image'): LOGGER.info('dir_name: {0}'.format(dir_name)) output_tar_filename = join(CHILES_CLEAN_OUTPUT, dir_name + '.tar') if can_be_multipart_tar(result_dir): LOGGER.info('Using add_tar_to_bucket_multipart') s3_helper.add_tar_to_bucket_multipart( CHILES_BUCKET_NAME, '/CLEAN/{0}/{1}'.format(frequency_id, basename(output_tar_filename)), result_dir) else: LOGGER.info('Using make_tarfile, then adding file to bucket') make_tarfile(output_tar_filename, result_dir) s3_helper.add_file_to_bucket( CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format(frequency_id, basename(output_tar_filename)), output_tar_filename) # Clean up os.remove(output_tar_filename)
def copy_files(args): # Create the queue queue = multiprocessing.JoinableQueue() # Start the consumers for x in range(PROCESSES): consumer = Consumer(queue) consumer.start() # Look in the output directory for root, dir_names, filenames in os.walk(args.product_dir): LOGGER.debug('root: {0}, dir_names: {1}, filenames: {2}'.format(root, dir_names, filenames)) for match in fnmatch.filter(dir_names, '13B-266*calibrated_deepfield.ms'): result_dir = join(root, match) LOGGER.info('Queuing result_dir: {0}'.format(result_dir)) queue.put( CopyTask( args.bucket, match, result_dir, args.aws_access_key_id, args.aws_secret_access_key ) ) # Add a poison pill to shut things down for x in range(PROCESSES): queue.put(None) # Wait for the queue to terminate queue.join()
def __call__(self): """ Actually run the job """ if self._tar_file.endswith('.tar.gz'): image_name = basename(self._tar_file).replace('.tar.gz', '') else: image_name = basename(self._tar_file).replace('.tar', '') directory = join(self._directory, image_name) # noinspection PyBroadException try: LOGGER.info('key: {0}, tar_file: {1}, directory: {2}'.format( self._key.key, self._tar_file, directory)) if not os.path.exists(directory): os.makedirs(directory) self._key.get_contents_to_filename(self._tar_file) with closing( tarfile.open( self._tar_file, "r:gz" if self._tar_file.endswith('.tar.gz') else "r:")) as tar: tar.extractall(path=directory) os.remove(self._tar_file) except Exception: LOGGER.exception('Task died') shutil.rmtree(directory, ignore_errors=True)
def add_file_to_bucket(self, bucket_name, key_name, filename, reduced_redundancy=True): """ Add file to a bucket :param bucket_name: :param key_name: :param filename: """ LOGGER.info( 'bucket_name: {0}, key_name: {1}, filename: {2}, reduced_redundancy: {3}' .format(bucket_name, key_name, filename, reduced_redundancy)) retry_count = 0 done = False while retry_count < 3 and not done: try: bucket = self.get_bucket(bucket_name) key = Key(bucket) key.key = key_name key.set_contents_from_filename( filename, reduced_redundancy=reduced_redundancy) done = True except socket.error: LOGGER.exception('Error') retry_count += 1 time.sleep(10)
def copy_files(processes, bottom_frequency, frequency_range): # Create the directory if not exists(DIRECTORY): os.makedirs(DIRECTORY) # Scan the bucket s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) LOGGER.info('Scanning bucket: {0}/CLEAN'.format(bucket)) # Create the queue queue = multiprocessing.JoinableQueue() # Start the consumers for x in range(processes): consumer = Consumer(queue) consumer.start() for key in bucket.list(prefix='CLEAN/'): LOGGER.info('Checking {0}'.format(key.key)) # Ignore the key if key.key.endswith('.image.tar.gz') or key.key.endswith('.image.tar'): # Do we need this file? basename_key = basename(key.key) if in_frequency_range(basename_key, bottom_frequency, frequency_range): # Queue the copy of the file temp_file = os.path.join(DIRECTORY, basename_key) queue.put(Task(key, temp_file, DIRECTORY)) # Add a poison pill to shut things down for x in range(processes): queue.put(None) # Wait for the queue to terminate queue.join()
def start_pong(self, xxx_id: int) -> None: global g_processing_xxx_id r = get_redis_client() processing_key = (f"{PROJECT_NAME}.{APP_NAME}.xxx.is_processing.{xxx_id}") if r.get(processing_key) == '1': LOGGER.info(f"ignore processing xxx {xxx_id}") return r.set(processing_key, '1', ex=30) g_processing_xxx_id = xxx_id list(range(10000)) def start() -> None: LOGGER.info(f"start_work with {xxx_id}") list(range(20000)) try: while True: list(range(3000)) self.dispatch("processed", { 'xxx_id': xxx_id, 'info': {}, }) r.expire(processing_key, 3) time.sleep(1) finally: r.delete(processing_key)
def __call__(self): # noinspection PyBroadException try: LOGGER.info('Copying {0} to s3:{1}'.format(self._filename, self._bucket_location)) s3_helper = S3Helper() s3_helper.add_file_to_bucket(CHILES_BUCKET_NAME, self._bucket_location, self._filename) except Exception: LOGGER.exception('CopyTask died')
def add_tar_to_bucket_multipart(self, bucket_name, key_name, source_path, gzip=False, parallel_processes=2, reduced_redundancy=True, bufsize=10 * 1024 * 1024): """ Parallel multipart upload. """ LOGGER.info( 'bucket_name: {0}, key_name: {1}, source_path: {2}, parallel_processes: {3}, reduced_redundancy: {4}, bufsize: {5}' .format(bucket_name, key_name, source_path, parallel_processes, reduced_redundancy, bufsize)) bucket = self.get_bucket(bucket_name) headers = { 'Content-Type': mimetypes.guess_type(key_name)[0] or 'application/octet-stream' } mp = bucket.initiate_multipart_upload( key_name, headers=headers, reduced_redundancy=reduced_redundancy) s3_feeder = S3Feeder(mp.id, bufsize, bucket_name, parallel_processes, self._aws_access_key_id, self._aws_secret_access_key) if gzip: mode = "w|gz" else: mode = "w|" tar = tarfile.open(mode=mode, fileobj=s3_feeder, bufsize=int(bufsize / 10)) complete = True # noinspection PyBroadException try: for entry in os.listdir(source_path): full_filename = join(source_path, entry) LOGGER.info('tar: [full_filename: {0}, entry: {1}]'.format( full_filename, entry)) tar.add(full_filename, arcname=entry) tar.close() s3_feeder.close() except Exception: complete = False s3_feeder.close() # Finish the upload if complete: mp.complete_upload() else: mp.cancel_upload()
def get_mime_encoded_user_data(volume_id, setup_disks, in_user_data, now): """ AWS allows for a multipart m """ user_data = MIMEMultipart() user_data.attach(get_cloud_init()) data_formatted = in_user_data.format(volume_id, now, PIP_PACKAGES) LOGGER.info(data_formatted) user_data.attach(MIMEText(setup_disks + data_formatted)) return user_data.as_string()
def get(self) -> t.Dict[str, t.Any]: try: rpc_proxy: Pyro4.Proxy = get_health_service_rpc_proxy('pong') except LookupError as e: LOGGER.error(f"{e}: {traceback.format_exc()}") return {'hello': f'error {e}'} LOGGER.info(rpc_proxy.dev_pyro4_ping(1, src='duang')) with make_service_proxy() as service_proxy: LOGGER.info(service_proxy.pong.dev_nameko_ping(1, src='duang')) return {'hello': 'world'}
def __call__(self): # noinspection PyBroadException try: LOGGER.info('Copying {0} to s3:{1}'.format(self._filename, self._bucket_location)) s3_helper = S3Helper() s3_helper.add_file_to_bucket( CHILES_BUCKET_NAME, self._bucket_location, self._filename) except Exception: LOGGER.exception('CopyTask died')
def add_file_to_bucket_multipart(self, bucket_name, key_name, source_path, parallel_processes=2, reduced_redundancy=True): """ Parallel multipart upload. """ LOGGER.info( 'bucket_name: {0}, key_name: {1}, filename: {2}, parallel_processes: {3}, reduced_redundancy: {4}' .format(bucket_name, key_name, source_path, parallel_processes, reduced_redundancy)) source_size = os.stat(source_path).st_size bytes_per_chunk = 10 * 1024 * 1024 chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk))) if chunk_amount < 10000: bucket = self.get_bucket(bucket_name) headers = { 'Content-Type': mimetypes.guess_type(key_name)[0] or 'application/octet-stream' } mp = bucket.initiate_multipart_upload( key_name, headers=headers, reduced_redundancy=reduced_redundancy) LOGGER.info('bytes_per_chunk: {0}, chunk_amount: {1}'.format( bytes_per_chunk, chunk_amount)) # You can only upload 10,000 chunks pool = Pool(processes=parallel_processes) for i in range(chunk_amount): offset = i * bytes_per_chunk remaining_bytes = source_size - offset bytes_to_copy = min([bytes_per_chunk, remaining_bytes]) part_num = i + 1 pool.apply_async(upload_part, [ self._aws_access_key_id, self._aws_secret_access_key, bucket_name, mp.id, part_num, source_path, offset, bytes_to_copy ]) pool.close() pool.join() if len(mp.get_all_parts()) == chunk_amount: mp.complete_upload() else: mp.cancel_upload() else: raise S3UploadException('Too many chunks')
def handle_stop_pong_event(self, event_data: t.Dict[str, t.Any]) -> None: global g_processing_xxx_id xxx_id = event_data['xxx_id'] if xxx_id != g_processing_xxx_id: return LOGGER.info("stop self") def kill_self() -> None: time.sleep(0.1) os.kill(os.getpid(), signal.SIGQUIT) threading.Thread(target=kill_self, daemon=True).start()
def get_mime_encoded_user_data(self, volume_id): """ AWS allows for a multipart m """ user_data = MIMEMultipart() user_data.attach(get_cloud_init()) # Build the strings we need cvel_pipeline = self.build_cvel_pipeline() data_formatted = self._user_data.format(cvel_pipeline, self._obs_id, volume_id, self._now, self._counter, PIP_PACKAGES) LOGGER.info(data_formatted) user_data.attach(MIMEText(self._setup_disks + data_formatted)) return user_data.as_string()
def copy_files(date, vis_file): s3_helper = S3Helper() # Look in the output directory for root, dir_names, filenames in os.walk(CHILES_CVEL_OUTPUT): LOGGER.info('root: {0}, dir_names: {1}, filenames: {2}'.format(root, dir_names, filenames)) for match in fnmatch.filter(dir_names, vis_file): result_dir = join(root, match) LOGGER.info('Working on: {0}'.format(result_dir)) if can_be_multipart_tar(result_dir): LOGGER.info('Using add_tar_to_bucket_multipart') s3_helper.add_tar_to_bucket_multipart( CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format(vis_file, date), result_dir) else: LOGGER.info('Using make_tarfile, then adding file to bucket') output_tar_filename = join(root, match + '.tar') make_tarfile(output_tar_filename, result_dir) s3_helper.add_file_to_bucket( CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format(vis_file, date), output_tar_filename) # Clean up os.remove(output_tar_filename) shutil.rmtree(result_dir, ignore_errors=True)
def copy_files(date, vis_file): s3_helper = S3Helper() # Look in the output directory for root, dir_names, filenames in os.walk(CHILES_CVEL_OUTPUT): LOGGER.info('root: {0}, dir_names: {1}, filenames: {2}'.format( root, dir_names, filenames)) for match in fnmatch.filter(dir_names, vis_file): result_dir = join(root, match) LOGGER.info('Working on: {0}'.format(result_dir)) if can_be_multipart_tar(result_dir): LOGGER.info('Using add_tar_to_bucket_multipart') s3_helper.add_tar_to_bucket_multipart( CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format(vis_file, date), result_dir) else: LOGGER.info('Using make_tarfile, then adding file to bucket') output_tar_filename = join(root, match + '.tar') make_tarfile(output_tar_filename, result_dir) s3_helper.add_file_to_bucket( CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format(vis_file, date), output_tar_filename) # Clean up os.remove(output_tar_filename) shutil.rmtree(result_dir, ignore_errors=True)
def start() -> None: LOGGER.info(f"start_work with {xxx_id}") list(range(20000)) try: while True: list(range(3000)) self.dispatch("processed", { 'xxx_id': xxx_id, 'info': {}, }) r.expire(processing_key, 3) time.sleep(1) finally: r.delete(processing_key)
def add_tar_to_bucket_multipart(self, bucket_name, key_name, source_path, gzip=False, parallel_processes=2, reduced_redundancy=True, bufsize=10*1024*1024): """ Parallel multipart upload. """ LOGGER.info( 'bucket_name: {0}, key_name: {1}, source_path: {2}, parallel_processes: {3}, reduced_redundancy: {4}, bufsize: {5}'.format( bucket_name, key_name, source_path, parallel_processes, reduced_redundancy, bufsize ) ) bucket = self.get_bucket(bucket_name) headers = {'Content-Type': mimetypes.guess_type(key_name)[0] or 'application/octet-stream'} mp = bucket.initiate_multipart_upload(key_name, headers=headers, reduced_redundancy=reduced_redundancy) s3_feeder = S3Feeder(mp.id, bufsize, bucket_name, parallel_processes, self._aws_access_key_id, self._aws_secret_access_key) if gzip: mode = "w|gz" else: mode = "w|" tar = tarfile.open(mode=mode, fileobj=s3_feeder, bufsize=int(bufsize / 10)) complete = True # noinspection PyBroadException try: for entry in os.listdir(source_path): full_filename = join(source_path, entry) LOGGER.info( 'tar: [full_filename: {0}, entry: {1}]'.format( full_filename, entry ) ) tar.add(full_filename, arcname=entry) tar.close() s3_feeder.close() except Exception: complete = False s3_feeder.close() # Finish the upload if complete: mp.complete_upload() else: mp.cancel_upload()
def start_servers( ami_id, user_data, setup_disks, instance_type, obs_id, created_by, name, instance_details, spot_price, ebs, bottom_frequency, frequency_range): LOGGER.info('obs_id: {0}, bottom_frequency: {1}, frequency_range: {2}'.format(obs_id, bottom_frequency, frequency_range)) ec2_helper = EC2Helper() zone = ec2_helper.get_cheapest_spot_price(instance_type, spot_price) if zone is not None: # Swap size if ebs is None: swap_size = 1 else: ephemeral_size = instance_details.number_disks * instance_details.size swap_size = min(int(ephemeral_size * 0.75), 16) user_data_mime = get_mime_encoded_user_data( user_data, obs_id, setup_disks, bottom_frequency, frequency_range, swap_size ) LOGGER.info('{0}'.format(user_data_mime)) ec2_helper.run_spot_instance( ami_id, spot_price, user_data_mime, instance_type, None, created_by, name + '- {0}'.format(obs_id), instance_details=instance_details, zone=zone, ebs_size=ebs, number_ebs_volumes=4, ephemeral=True) else: LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(instance_type, spot_price))
def check_args(args): """ Check the arguments and prompt for new ones """ map_args = {} if args['obs_ids'] is None: return None elif len(args['obs_ids']) == 1 and args['obs_ids'][0] == '*': map_args['obs_ids'] = OBS_IDS.keys() else: map_args['obs_ids'] = args['obs_ids'] if args['instance_type'] is None: return None if args['name'] is None: return None instance_details = AWS_INSTANCES.get(args['instance_type']) if instance_details is None: LOGGER.error('The instance type {0} is not supported.'.format( args['instance_type'])) return None else: LOGGER.info( 'instance: {0}, vCPU: {1}, RAM: {2}GB, Disks: {3}x{4}GB, IOPS: {5}' .format(args['instance_type'], instance_details.vCPU, instance_details.memory, instance_details.number_disks, instance_details.size, instance_details.iops_support)) map_args.update({ 'ami_id': args['ami_id'] if args['ami_id'] is not None else AWS_AMI_ID, 'created_by': args['created_by'] if args['created_by'] is not None else getpass.getuser(), 'spot_price': args['spot_price'] if args['spot_price'] is not None else None, 'user_data': get_script(args['bash_script'] if args['bash_script'] is not None else BASH_SCRIPT_CVEL), 'setup_disks': get_script(BASH_SCRIPT_SETUP_DISKS), 'instance_details': instance_details, }) return map_args
def get_cvel(): s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) cvel_data = {} for key in bucket.list(prefix='CVEL/'): LOGGER.info('Checking {0}'.format(key.key)) if key.key.endswith('data.tar.gz') or key.key.endswith('data.tar'): elements = key.key.split('/') data_list = cvel_data.get(str(elements[1])) if data_list is None: data_list = [] cvel_data[str(elements[1])] = data_list data_list.append(str(elements[2])) return cvel_data
def get_mime_encoded_user_data(instance_details, setup_disks, user_data): """ AWS allows for a multipart m """ # Split the frequencies min_freq = 940 max_freq = 1424 LOGGER.info("min_freq: {0}, max_freq: {1}".format(min_freq, max_freq)) # Build the mime message mime_data = MIMEMultipart() mime_data.attach(get_cloud_init()) swap_size = get_swap_size(instance_details) data_formatted = user_data.format("TODO", min_freq, max_freq, swap_size, PIP_PACKAGES) mime_data.attach(MIMEText(setup_disks + data_formatted)) return mime_data.as_string()
def _upload(retries_left=amount_of_retries): try: LOGGER.info('Start uploading part #{0} ...'.format(part_num)) conn = get_s3_connection(aws_access_key_id, aws_secret_access_key) bucket = conn.get_bucket(bucket_name) for mp in bucket.get_all_multipart_uploads(): if mp.id == multipart_id: fp = StringIO(data_to_store) mp.upload_part_from_file(fp=fp, part_num=part_num, replace=True) fp.close() # Tidy up after ourselves break except Exception, exc: if retries_left: LOGGER.exception('... Failed uploading part #{0} retries left {1}'.format(part_num, retries_left)) _upload(retries_left=retries_left - 1) else: LOGGER.exception('... Failed uploading part #{0}'.format(part_num)) raise exc
def __call__(self): # noinspection PyBroadException try: s3_helper = S3Helper(self._aws_access_key_id, self._aws_secret_access_key) LOGGER.info('Copying to: {0}/{1}/measurement_set.tar'.format(self._bucket, self._bucket_location)) # We can have 10,000 parts # The biggest file from Semester 1 is 803GB # So 100 MB s3_helper.add_tar_to_bucket_multipart( self._bucket, '{0}/measurement_set.tar'.format(self._bucket_location), self._filename, parallel_processes=2, bufsize=100*1024*1024 ) except Exception: LOGGER.exception('CopyTask died')
def check_args(args): """ Check the arguments and prompt for new ones """ map_args = {} if args['obs_ids'] is None: return None elif len(args['obs_ids']) == 1 and args['obs_ids'][0] == '*': map_args['obs_ids'] = OBS_IDS.keys() else: map_args['obs_ids'] = args['obs_ids'] if args['instance_type'] is None: return None if args['name'] is None: return None instance_details = AWS_INSTANCES.get(args['instance_type']) if instance_details is None: LOGGER.error('The instance type {0} is not supported.'.format(args['instance_type'])) return None else: LOGGER.info( 'instance: {0}, vCPU: {1}, RAM: {2}GB, Disks: {3}x{4}GB, IOPS: {5}'.format( args['instance_type'], instance_details.vCPU, instance_details.memory, instance_details.number_disks, instance_details.size, instance_details.iops_support)) map_args.update({ 'ami_id': args['ami_id'] if args['ami_id'] is not None else AWS_AMI_ID, 'created_by': args['created_by'] if args['created_by'] is not None else getpass.getuser(), 'spot_price': args['spot_price'] if args['spot_price'] is not None else None, 'user_data': get_script(args['bash_script'] if args['bash_script'] is not None else BASH_SCRIPT_CVEL), 'setup_disks': get_script(BASH_SCRIPT_SETUP_DISKS), 'instance_details': instance_details, }) return map_args
def get_mime_encoded_user_data(self): """ AWS allows for a multipart m """ # Split the frequencies index_underscore = find(self._frequency_id, '_') index_tilde = find(self._frequency_id, '~') min_freq = self._frequency_id[index_underscore + 1:index_tilde] max_freq = self._frequency_id[index_tilde + 1:] LOGGER.info('min_freq: {0}, max_freq: {1}'.format(min_freq, max_freq)) # Build the mime message user_data = MIMEMultipart() user_data.attach(get_cloud_init()) swap_size = self.get_swap_size() data_formatted = self._user_data.format(self._frequency_id, swap_size, PIP_PACKAGES) user_data.attach(MIMEText(self._setup_disks + data_formatted)) return user_data.as_string()
def get_mime_encoded_user_data(self): """ AWS allows for a multipart m """ # Split the frequencies index_underscore = find(self._frequency_id, '_') index_tilde = find(self._frequency_id, '~') min_freq = self._frequency_id[index_underscore + 1:index_tilde] max_freq = self._frequency_id[index_tilde + 1:] LOGGER.info('min_freq: {0}, max_freq: {1}'.format(min_freq, max_freq)) # Build the mime message user_data = MIMEMultipart() user_data.attach(get_cloud_init()) swap_size = self.get_swap_size() data_formatted = self._user_data.format(self._frequency_id, min_freq, max_freq, swap_size, PIP_PACKAGES) user_data.attach(MIMEText(self._setup_disks + data_formatted)) return user_data.as_string()
def check_args(args): """ Check the arguments and prompt for new ones """ map_args = {} if args["snapshots"] is None: return None if args["instance_type"] is None: return None if args["name"] is None: return None instance_details = AWS_INSTANCES.get(args["instance_type"]) if instance_details is None: LOGGER.error("The instance type {0} is not supported.".format(args["instance_type"])) return None else: LOGGER.info( "instance: {0}, vCPU: {1}, RAM: {2}GB, Disks: {3}x{4}GB, IOPS: {5}".format( args["instance_type"], instance_details.vCPU, instance_details.memory, instance_details.number_disks, instance_details.size, instance_details.iops_support, ) ) map_args.update( { "ami_id": args["ami_id"] if args["ami_id"] is not None else AWS_AMI_ID, "created_by": args["created_by"] if args["created_by"] is not None else getpass.getuser(), "spot_price": args["spot_price"] if args["spot_price"] is not None else None, "user_data": get_script(args["bash_script"] if args["bash_script"] is not None else BASH_SCRIPT_CLEAN_ALL), "setup_disks": get_script(BASH_SCRIPT_SETUP_DISKS), "instance_details": instance_details, } ) return map_args
def start_servers( ami_id, user_data, setup_disks, instance_type, obs_id, created_by, name, instance_details, spot_price): snapshot_id = OBS_IDS.get(obs_id) if snapshot_id is None: LOGGER.warning('The obs-id: {0} does not exist in the settings file') else: ec2_helper = EC2Helper() iops = None if instance_details.iops_support: iops = 500 zone = ec2_helper.get_cheapest_spot_price(instance_type, spot_price) if zone is not None: volume, snapshot_name = ec2_helper.create_volume(snapshot_id, zone, iops=iops) LOGGER.info('obs_id: {0}, volume_name: {1}'.format(obs_id, snapshot_name)) now = datetime.datetime.now() user_data_mime = get_mime_encoded_user_data(volume.id, setup_disks, user_data, now.strftime('%Y-%m-%dT%H-%M-%S')) if spot_price is not None: ec2_helper.run_spot_instance( ami_id, spot_price, user_data_mime, instance_type, volume.id, created_by, '{1}-{0}'.format(name, snapshot_name), instance_details, zone, ephemeral=True) else: LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(instance_type, spot_price))
def run_instance(self, ami_id, user_data, instance_type, volume_id, created_by, name, zone, ephemeral=False): """ Run up an instance """ bdm = self.build_block_device_map(ephemeral) LOGGER.info('Running instance: ami: {0}'.format(ami_id)) reservations = self.ec2_connection.run_instances(ami_id, instance_type=instance_type, instance_initiated_shutdown_behavior='terminate', subnet_id=AWS_SUBNETS[zone], key_name=AWS_KEY_NAME, security_group_ids=AWS_SECURITY_GROUPS, user_data=user_data, block_device_map=bdm) instance = reservations.instances[0] time.sleep(5) while not instance.update() == 'running': LOGGER.info('Not running yet') time.sleep(5) if volume_id: # Now we have an instance id we can attach the disk self.ec2_connection.attach_volume(volume_id, instance.id, '/dev/xvdf') LOGGER.info('Assigning the tags') self.ec2_connection.create_tags([instance.id], {'AMI': '{0}'.format(ami_id), 'Name': '{0}'.format(name), 'Volume_id': '{0}'.format(volume_id), 'Created By': '{0}'.format(created_by)}) return instance
def copy_files(cube): s3_helper = S3Helper() # Look in the output directory directory_to_save = join(CHILES_IMGCONCAT_OUTPUT, cube) + '.cube' if isdir(directory_to_save): LOGGER.info('dir_name: {0}'.format(directory_to_save)) output_tar_filename = directory_to_save + '.tar' if can_be_multipart_tar(directory_to_save): LOGGER.info('Using add_tar_to_bucket_multipart') s3_helper.add_tar_to_bucket_multipart( CHILES_BUCKET_NAME, 'IMGCONCAT/{0}'.format(basename(output_tar_filename)), directory_to_save, bufsize=20 * 1024 * 1024) else: LOGGER.info('Using make_tarfile, then adding file to bucket') make_tarfile(output_tar_filename, directory_to_save) s3_helper.add_file_to_bucket( CHILES_BUCKET_NAME, 'IMGCONCAT/{0}'.format(basename(output_tar_filename)), output_tar_filename) # Clean up os.remove(output_tar_filename)
def copy_files(s3_tag, processes): # Create the queue queue = multiprocessing.JoinableQueue() # Start the consumers for x in range(processes): consumer = Consumer(queue) consumer.start() # Look in the output directory today = datetime.date.today() for root, dir_names, filenames in os.walk(CHILES_LOGS): for match in fnmatch.filter(filenames, '*.log'): LOGGER.info('Looking at: {0}'.format(join(root, match))) queue.put(CopyTask(join(root, match), '{0}/{1}{2:02d}{3:02d}/{4}'.format(s3_tag, today.year, today.month, today.day, match))) for root, dir_names, filenames in os.walk(BENCHMARKING_LOGS): for match in fnmatch.filter(filenames, '*.csv'): LOGGER.info('Looking at: {0}'.format(join(root, match))) queue.put(CopyTask(join(root, match), '{0}/{1}{2:02d}{3:02d}/{4}'.format(s3_tag, today.year, today.month, today.day, match))) for match in fnmatch.filter(filenames, '*.log'): LOGGER.info('Looking at: {0}'.format(join(root, match))) queue.put(CopyTask(join(root, match), '{0}/{1}{2:02d}{3:02d}/{4}'.format(s3_tag, today.year, today.month, today.day, match))) queue.put(CopyTask('/var/log/chiles-output.log', '{0}/{1}{2:02d}{3:02d}/chiles-output.log'.format(s3_tag, today.year, today.month, today.day))) # Add a poison pill to shut things down for x in range(processes): queue.put(None) # Wait for the queue to terminate queue.join()
def start_servers(ami_id, user_data, setup_disks, instance_type, obs_id, created_by, name, instance_details, spot_price): snapshot_id = OBS_IDS.get(obs_id) if snapshot_id is None: LOGGER.warning('The obs-id: {0} does not exist in the settings file') else: ec2_helper = EC2Helper() iops = None if instance_details.iops_support: iops = 500 zone = ec2_helper.get_cheapest_spot_price(instance_type, spot_price) if zone is not None: volume, snapshot_name = ec2_helper.create_volume(snapshot_id, zone, iops=iops) LOGGER.info('obs_id: {0}, volume_name: {1}'.format( obs_id, snapshot_name)) now = datetime.datetime.now() user_data_mime = get_mime_encoded_user_data( volume.id, setup_disks, user_data, now.strftime('%Y-%m-%dT%H-%M-%S')) if spot_price is not None: ec2_helper.run_spot_instance(ami_id, spot_price, user_data_mime, instance_type, volume.id, created_by, '{1}-{0}'.format( name, snapshot_name), instance_details, zone, ephemeral=True) else: LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format( instance_type, spot_price))
def _upload(retries_left=amount_of_retries): try: LOGGER.info('Start uploading part: #{0}, source_path: {1}'.format( part_num, source_path)) conn = get_s3_connection(aws_access_key_id, aws_secret_access_key) bucket = conn.get_bucket(bucket_name) for mp in bucket.get_all_multipart_uploads(): if mp.id == multipart_id: with FileChunkIO(source_path, 'r', offset=offset, bytes=bytes_to_copy) as fp: mp.upload_part_from_file(fp=fp, part_num=part_num) break except Exception, exc: if retries_left: _upload(retries_left=retries_left - 1) else: LOGGER.info( 'Failed uploading part: #{0}, source_path: {1}'.format( part_num, source_path)) raise exc
def __call__(self): """ Actually run the job """ if self._tar_file.endswith('.tar.gz'): image_name = basename(self._tar_file).replace('.tar.gz', '') else: image_name = basename(self._tar_file).replace('.tar', '') directory = join(self._directory, image_name) # noinspection PyBroadException try: LOGGER.info('key: {0}, tar_file: {1}, directory: {2}'.format(self._key.key, self._tar_file, directory)) if not os.path.exists(directory): os.makedirs(directory) self._key.get_contents_to_filename(self._tar_file) with closing(tarfile.open(self._tar_file, "r:gz" if self._tar_file.endswith('.tar.gz') else "r:")) as tar: tar.extractall(path=directory) os.remove(self._tar_file) except Exception: LOGGER.exception('Task died') shutil.rmtree(directory, ignore_errors=True)
def add_file_to_bucket(self, bucket_name, key_name, filename, reduced_redundancy=True): """ Add file to a bucket :param bucket_name: :param key_name: :param filename: """ LOGGER.info('bucket_name: {0}, key_name: {1}, filename: {2}, reduced_redundancy: {3}'.format(bucket_name, key_name, filename, reduced_redundancy)) retry_count = 0 done = False while retry_count < 3 and not done: try: bucket = self.get_bucket(bucket_name) key = Key(bucket) key.key = key_name key.set_contents_from_filename(filename, reduced_redundancy=reduced_redundancy) done = True except socket.error: LOGGER.exception('Error') retry_count += 1 time.sleep(10)
def __call__(self): """ Actually run the job """ corrected_path = join(self._directory, self._frequency_id) # noinspection PyBroadException try: LOGGER.info('key: {0}, tar_file: {1}, directory: {2}, frequency_id: {3}'.format( self._key.key, self._tar_file, self._directory, self._frequency_id)) if not os.path.exists(corrected_path): os.makedirs(corrected_path) self._key.get_contents_to_filename(self._tar_file) with closing(tarfile.open(self._tar_file, "r:gz" if self._tar_file.endswith('tar.gz') else "r:")) as tar: tar.extractall(path=corrected_path) os.remove(self._tar_file) except Exception: LOGGER.exception('Task died') shutil.rmtree(corrected_path, ignore_errors=True)
def process_entry(self, dn, entry): if entry['objectClass'][-1].decode("utf-8") == 'mailaccount': object_class = 'mailaccount' address = entry['cn'][0].decode("utf-8") if 'forwardto' in entry: LOGGER.debug('Skipping alias account {}'.format(address)) return else: LOGGER.debug('Skipping object of class {}'.format( entry['objectClass'][-1].decode("utf-8"))) return if 'lock' in entry: lock = entry['lock'][0].decode("utf-8") if lock == 'submit': LOGGER.info('Address {} has lock = submit!'.format( address, self.mariadb_connection)) locking_date = check_entry_mariadb(address, self.mariadb_connection, self.query) self.output_file.write(address + ' ' + object_class + ' locking date in MariaDB: ' + locking_date + '\n')
def _upload(retries_left=amount_of_retries): try: LOGGER.info('Start uploading part #{0} ...'.format(part_num)) conn = get_s3_connection(aws_access_key_id, aws_secret_access_key) bucket = conn.get_bucket(bucket_name) for mp in bucket.get_all_multipart_uploads(): if mp.id == multipart_id: fp = StringIO(data_to_store) mp.upload_part_from_file(fp=fp, part_num=part_num, replace=True) fp.close() # Tidy up after ourselves break except Exception, exc: if retries_left: LOGGER.exception( '... Failed uploading part #{0} retries left {1}'.format( part_num, retries_left)) _upload(retries_left=retries_left - 1) else: LOGGER.exception( '... Failed uploading part #{0}'.format(part_num)) raise exc
def __call__(self): """ Actually run the job """ # Get the name of the volume ec2_helper = EC2Helper() iops = None if self._instance_details.iops_support: iops = 500 zone = ec2_helper.get_cheapest_spot_price(self._instance_type, self._spot_price) if zone is not None: volume, snapshot_name = ec2_helper.create_volume(self._snapshot_id, zone, iops=iops) LOGGER.info('obs_id: {0}, volume_name: {1}'.format( self._obs_id, snapshot_name)) user_data_mime = self.get_mime_encoded_user_data(volume.id) if self._spot_price is not None: ec2_helper.run_spot_instance(self._ami_id, self._spot_price, user_data_mime, self._instance_type, volume.id, self._created_by, '{1}-{2}-{0}'.format( self._name, snapshot_name, self._counter), self._instance_details, zone, ephemeral=True) else: LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format( self._instance_type, self._spot_price))
def __call__(self): """ Actually run the job """ corrected_path = join(self._directory, self._frequency_id) # noinspection PyBroadException try: LOGGER.info( 'key: {0}, tar_file: {1}, directory: {2}, frequency_id: {3}'. format(self._key.key, self._tar_file, self._directory, self._frequency_id)) if not os.path.exists(corrected_path): os.makedirs(corrected_path) self._key.get_contents_to_filename(self._tar_file) with closing( tarfile.open( self._tar_file, "r:gz" if self._tar_file.endswith('tar.gz') else "r:")) as tar: tar.extractall(path=corrected_path) os.remove(self._tar_file) except Exception: LOGGER.exception('Task died') shutil.rmtree(corrected_path, ignore_errors=True)
def __call__(self): """ Actually run the job """ LOGGER.info('frequency_id: {0}'.format(self._frequency_id)) ec2_helper = EC2Helper() zone = ec2_helper.get_cheapest_spot_price(self._instance_type, self._spot_price) if zone is not None: user_data_mime = self.get_mime_encoded_user_data() LOGGER.info('{0}'.format(user_data_mime)) ec2_helper.run_spot_instance( self._ami_id, self._spot_price, user_data_mime, self._instance_type, None, self._created_by, '{0}-{1}'.format(self._frequency_id, self._name), instance_details=self._instance_details, zone=zone, ephemeral=True) else: LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(self._instance_type, self._spot_price))
def get_s3_connection(aws_access_key_id=None, aws_secret_access_key=None): if aws_access_key_id is not None and aws_secret_access_key is not None: LOGGER.info("Using user provided keys") return boto.connect_s3( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) elif exists(join(expanduser('~'), '.aws/credentials')): # This relies on a ~/.aws/credentials file holding the '<aws access key>', '<aws secret key>' LOGGER.info("Using ~/.aws/credentials") return boto.connect_s3(profile_name='chiles') else: # This relies on a ~/.boto or /etc/boto.cfg file holding the '<aws access key>', '<aws secret key>' LOGGER.info("Using ~/.boto or /etc/boto.cfg") return boto.connect_s3()
def __init__(self, aws_access_key_id=None, aws_secret_access_key=None): """ Get an EC2 connection """ if aws_access_key_id is not None and aws_secret_access_key is not None: LOGGER.info("Using user provided keys") self.ec2_connection = boto.ec2.connect_to_region(AWS_REGION, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) elif exists(join(expanduser('~'), '.aws/credentials')): # This relies on a ~/.aws/credentials file holding the '<aws access key>', '<aws secret key>' LOGGER.info("Using ~/.aws/credentials") self.ec2_connection = boto.ec2.connect_to_region(AWS_REGION, profile_name='chiles') else: # This relies on a ~/.boto or /etc/boto.cfg file holding the '<aws access key>', '<aws secret key>' LOGGER.info("Using ~/.boto or /etc/boto.cfg") self.ec2_connection = boto.ec2.connect_to_region(AWS_REGION)