def add_file_to_bucket_multipart(self, bucket_name, key_name, source_path, parallel_processes=2, reduced_redundancy=True): """ Parallel multipart upload. """ LOGGER.info('bucket_name: {0}, key_name: {1}, filename: {2}, parallel_processes: {3}, reduced_redundancy: {4}'.format( bucket_name, key_name, source_path, parallel_processes, reduced_redundancy)) source_size = os.stat(source_path).st_size bytes_per_chunk = 10 * 1024 * 1024 chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk))) if chunk_amount < 10000: bucket = self.get_bucket(bucket_name) headers = {'Content-Type': mimetypes.guess_type(key_name)[0] or 'application/octet-stream'} mp = bucket.initiate_multipart_upload(key_name, headers=headers, reduced_redundancy=reduced_redundancy) LOGGER.info('bytes_per_chunk: {0}, chunk_amount: {1}'.format(bytes_per_chunk, chunk_amount)) # You can only upload 10,000 chunks pool = Pool(processes=parallel_processes) for i in range(chunk_amount): offset = i * bytes_per_chunk remaining_bytes = source_size - offset bytes_to_copy = min([bytes_per_chunk, remaining_bytes]) part_num = i + 1 pool.apply_async(upload_part, [self._aws_access_key_id, self._aws_secret_access_key, bucket_name, mp.id, part_num, source_path, offset, bytes_to_copy]) pool.close() pool.join() if len(mp.get_all_parts()) == chunk_amount: mp.complete_upload() else: mp.cancel_upload() else: raise S3UploadException('Too many chunks')
def __call__(self): """ Actually run the job """ # Get the name of the volume ec2_helper = EC2Helper() iops = None if self._instance_details.iops_support: iops = 500 zone = ec2_helper.get_cheapest_spot_price(self._instance_type, self._spot_price) if zone is not None: volume, snapshot_name = ec2_helper.create_volume(self._snapshot_id, zone, iops=iops) LOGGER.info('obs_id: {0}, volume_name: {1}'.format(self._obs_id, snapshot_name)) user_data_mime = self.get_mime_encoded_user_data(volume.id) if self._spot_price is not None: ec2_helper.run_spot_instance( self._ami_id, self._spot_price, user_data_mime, self._instance_type, volume.id, self._created_by, '{1}-{2}-{0}'.format(self._name, snapshot_name, self._counter), self._instance_details, zone, ephemeral=True) else: LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(self._instance_type, self._spot_price))
def __init__(self): try: with open(DIR_NAME + "/config.yml", 'r') as config_file: self.cfg = yaml.load(config_file, Loader=yaml.FullLoader) except: LOGGER.exception("Failed to open config file.", exc_info=True) sys.exit(1)
def copy_files(args): # Create the queue queue = multiprocessing.JoinableQueue() # Start the consumers for x in range(PROCESSES): consumer = Consumer(queue) consumer.start() # Look in the output directory for root, dir_names, filenames in os.walk(args.product_dir): LOGGER.debug('root: {0}, dir_names: {1}, filenames: {2}'.format( root, dir_names, filenames)) for match in fnmatch.filter(dir_names, '13B-266*calibrated_deepfield.ms'): result_dir = join(root, match) LOGGER.info('Queuing result_dir: {0}'.format(result_dir)) queue.put( CopyTask(args.bucket, match, result_dir, args.aws_access_key_id, args.aws_secret_access_key)) # Add a poison pill to shut things down for x in range(PROCESSES): queue.put(None) # Wait for the queue to terminate queue.join()
def _upload(retries_left=amount_of_retries): try: LOGGER.info( 'Start uploading part: #{0}, source_path: {1}'.format( part_num, source_path ) ) conn = get_s3_connection(aws_access_key_id, aws_secret_access_key) bucket = conn.get_bucket(bucket_name) for mp in bucket.get_all_multipart_uploads(): if mp.id == multipart_id: with FileChunkIO(source_path, 'r', offset=offset, bytes=bytes_to_copy) as fp: mp.upload_part_from_file(fp=fp, part_num=part_num) break except Exception, exc: if retries_left: _upload(retries_left=retries_left - 1) else: LOGGER.info( 'Failed uploading part: #{0}, source_path: {1}'.format( part_num, source_path ) ) raise exc
def copy_files(processes, bottom_frequency, frequency_range): # Create the directory if not exists(DIRECTORY): os.makedirs(DIRECTORY) # Scan the bucket s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) LOGGER.info('Scanning bucket: {0}/CLEAN'.format(bucket)) # Create the queue queue = multiprocessing.JoinableQueue() # Start the consumers for x in range(processes): consumer = Consumer(queue) consumer.start() for key in bucket.list(prefix='CLEAN/'): LOGGER.info('Checking {0}'.format(key.key)) # Ignore the key if key.key.endswith('.image.tar.gz') or key.key.endswith('.image.tar'): # Do we need this file? basename_key = basename(key.key) if in_frequency_range(basename_key, bottom_frequency, frequency_range): # Queue the copy of the file temp_file = os.path.join(DIRECTORY, basename_key) queue.put(Task(key, temp_file, DIRECTORY)) # Add a poison pill to shut things down for x in range(processes): queue.put(None) # Wait for the queue to terminate queue.join()
def start_servers(processes, ami_id, user_data, setup_disks, instance_type, obs_ids, created_by, name, instance_details, spot_price, frequency_channels, force): cvel_data = get_cvel() # Create the queue tasks = multiprocessing.JoinableQueue() # Start the consumers for x in range(processes): consumer = Consumer(tasks) consumer.start() counter = 1 for obs_id in obs_ids: snapshot_id = OBS_IDS.get(obs_id) if snapshot_id is None: LOGGER.warning( 'The obs-id: {0} does not exist in the settings file') else: obs_id_dashes = obs_id.replace('_', '-') for frequency_groups in get_frequency_groups( frequency_channels, obs_id_dashes, cvel_data, force): tasks.put( Task(ami_id, user_data, setup_disks, instance_type, obs_id, snapshot_id, created_by, name, spot_price, instance_details, frequency_groups, counter)) counter += 1 # Add a poison pill to shut things down for x in range(processes): tasks.put(None) # Wait for the queue to terminate tasks.join()
def process_entry(self, dn, entry): try: address = entry['cn'][0].decode("utf-8") except KeyError: try: address = entry['dn'][0].decode("utf-8") except KeyError: return else: for field in entry: if self.handler.check_field(field) is False: continue for value in entry[field]: try: decode_value = value.decode("utf-8") except UnicodeDecodeError: decode_value = value.decode('latin-1') try: if self.handler.execute(decode_value) is False: continue except IndexError: continue else: LOGGER.debug( 'Address {} has wrong field: {}: {}'.format( address, field, decode_value)) self.output_file.write(address + ' ' + field + ': ' + decode_value + '\n')
def create_users(emails): failed = [] users = [] for email in emails: username = email.split('@')[0] data = { 'username': username, 'password': config['default_password'], 'email': email } dumped = json.dumps(data, ensure_ascii=False).encode('utf-8') resp = requests.post( url="{}/register/".format(base_url), data=dumped, headers={'Content-Type': 'application/json;charset=UTF-8'}) if resp.status_code == 400: failed.append(email) LOGGER.error( "Failed to create account for email {0}. Response: {1}".format( email, str(resp.json()))) if resp.status_code == 201: users.append(resp.json()) LOGGER.info("Account successfully created.") return users, failed
def create_posts(user): access_token, refresh_token = obtain_token_pair(user['username'], config['default_password']) num_posts = random.randint(1, config['max_number_of_posts']) headers = { 'Authorization': 'Bearer {}'.format(access_token), } for i in range(num_posts): title = "{0} {1}".format(user['username'], str(i)) content = "{0} says: {1}".format(user['username'], config['post_content']) payload = { "title": title, "content": content } resp = requests.post(url="{}/posts/".format(base_url), json=payload, headers=headers) if resp.status_code == 401 and resp.json()['code'] == 'token_not_valid': LOGGER.warning("{}. Obtaining new token pair...".format(resp.json()['messages'][0]['message'])) access_token, refresh_token = obtain_token_pair(user['username'], config['default_password']) headers = { 'Authorization': 'Bearer {}'.format(access_token) } resp = requests.post(url="{}/posts/".format(base_url), json=payload, headers=headers) if resp.status_code != 201: LOGGER.error("Failed to create post. Response code: {}".format(resp.status_code))
def copy_files(frequency_id, processes, days): s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) LOGGER.info('Scanning bucket: {0}, frequency_id: {1}'.format(bucket, frequency_id)) # Create the queue queue = multiprocessing.JoinableQueue() # Start the consumers for x in range(processes): consumer = Consumer(queue) consumer.start() for key in bucket.list(prefix='CVEL/{0}'.format(frequency_id)): LOGGER.info('Checking {0}'.format(key.key)) # Ignore the key if key.key.endswith('/data.tar.gz') or key.key.endswith('/data.tar'): elements = key.key.split('/') if elements[2] in days: directory = '/mnt/output/Chiles/split_vis/{0}/'.format(elements[2]) # Queue the copy of the file temp_file = os.path.join(directory, 'data.tar.gz' if key.key.endswith('/data.tar.gz') else 'data.tar') queue.put(Task(key, temp_file, directory, frequency_id)) # Add a poison pill to shut things down for x in range(processes): queue.put(None) # Wait for the queue to terminate queue.join()
def main(): parser = argparse.ArgumentParser('Start a number of CLEAN servers') parser.add_argument('-a', '--ami_id', help='the AMI id to use') parser.add_argument('-i', '--instance_type', required=True, help='the instance type to use') parser.add_argument('-c', '--created_by', help='the username to use') parser.add_argument('-n', '--name', required=True, help='the instance name to use') parser.add_argument('-s', '--spot_price', type=float, help='the spot price to use') parser.add_argument('-b', '--bash_script', help='the bash script to use') parser.add_argument('-p', '--processes', type=int, default=1, help='the number of processes to run') parser.add_argument('frequencies', nargs='+', help='the frequencies to use (vis_14XX~14YY') args = vars(parser.parse_args()) corrected_args = check_args(args) if corrected_args is None: LOGGER.error('The arguments are incorrect: {0}'.format(args)) else: start_servers( args['processes'], corrected_args['ami_id'], corrected_args['user_data'], corrected_args['setup_disks'], args['instance_type'], args['frequencies'], corrected_args['created_by'], args['name'], corrected_args['instance_details'], corrected_args['spot_price'])
def add_file_to_bucket(self, bucket_name, key_name, filename, reduced_redundancy=True): """ Add file to a bucket :param bucket_name: :param key_name: :param filename: """ LOGGER.info( 'bucket_name: {0}, key_name: {1}, filename: {2}, reduced_redundancy: {3}' .format(bucket_name, key_name, filename, reduced_redundancy)) retry_count = 0 done = False while retry_count < 3 and not done: try: bucket = self.get_bucket(bucket_name) key = Key(bucket) key.key = key_name key.set_contents_from_filename( filename, reduced_redundancy=reduced_redundancy) done = True except socket.error: LOGGER.exception('Error') retry_count += 1 time.sleep(10)
def main(): parser = argparse.ArgumentParser("Start a number of CLEAN servers") parser.add_argument("-a", "--ami_id", help="the AMI id to use") parser.add_argument("-i", "--instance_type", required=True, help="the instance type to use") parser.add_argument("-c", "--created_by", help="the username to use") parser.add_argument("-n", "--name", required=True, help="the instance name to use") parser.add_argument("-s", "--spot_price", type=float, help="the spot price to use") parser.add_argument("-b", "--bash_script", help="the bash script to use") parser.add_argument("-p", "--processes", type=int, default=1, help="the number of processes to run") parser.add_argument("snapshots", nargs="+", help="the snapshots to use") args = vars(parser.parse_args()) corrected_args = check_args(args) if corrected_args is None: LOGGER.error("The arguments are incorrect: {0}".format(args)) else: start_server( args["processes"], corrected_args["ami_id"], corrected_args["user_data"], corrected_args["setup_disks"], args["instance_type"], args["snapshots"], corrected_args["created_by"], args["name"], corrected_args["instance_details"], corrected_args["spot_price"], )
def detect_wrong_format(pattern_dict: dict, dump_file): try: type = pattern_dict['type'] except KeyError: LOGGER.error("Introduce a type in the configuration file!") sys.exit(1) try: regex = pattern_dict['regex'] except KeyError: LOGGER.error("Introduce if the pattern is a regex in the configuration file!") sys.exit(1) if regex: handler = PatternHandlerFactoryRegEx(pattern_dict) else: handler = PatternHandlerFactoryPattern(pattern_dict) if dump_file.endswith('.gz'): input_file = gzip.open(dump_file, 'rb') else: input_file = open(dump_file, 'rb') output_file = open('ldap_wrong_format.txt', 'w') processing_object = processing_object_builder(type, output_file, handler) parser = ldap_parser.ParseLDIF(input_file, processing_object) parser.parse()
def copy_files(frequency_id): s3_helper = S3Helper() # Look in the output directory LOGGER.info('directory_data: {0}'.format(CHILES_CLEAN_OUTPUT)) for dir_name in os.listdir(CHILES_CLEAN_OUTPUT): LOGGER.info('dir_name: {0}'.format(dir_name)) result_dir = join(CHILES_CLEAN_OUTPUT, dir_name) if isdir(result_dir) and dir_name.startswith('cube_') and dir_name.endswith('.image'): LOGGER.info('dir_name: {0}'.format(dir_name)) output_tar_filename = join(CHILES_CLEAN_OUTPUT, dir_name + '.tar') if can_be_multipart_tar(result_dir): LOGGER.info('Using add_tar_to_bucket_multipart') s3_helper.add_tar_to_bucket_multipart( CHILES_BUCKET_NAME, '/CLEAN/{0}/{1}'.format(frequency_id, basename(output_tar_filename)), result_dir) else: LOGGER.info('Using make_tarfile, then adding file to bucket') make_tarfile(output_tar_filename, result_dir) s3_helper.add_file_to_bucket( CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format(frequency_id, basename(output_tar_filename)), output_tar_filename) # Clean up os.remove(output_tar_filename)
def start_pong(self, xxx_id: int) -> None: global g_processing_xxx_id r = get_redis_client() processing_key = (f"{PROJECT_NAME}.{APP_NAME}.xxx.is_processing.{xxx_id}") if r.get(processing_key) == '1': LOGGER.info(f"ignore processing xxx {xxx_id}") return r.set(processing_key, '1', ex=30) g_processing_xxx_id = xxx_id list(range(10000)) def start() -> None: LOGGER.info(f"start_work with {xxx_id}") list(range(20000)) try: while True: list(range(3000)) self.dispatch("processed", { 'xxx_id': xxx_id, 'info': {}, }) r.expire(processing_key, 3) time.sleep(1) finally: r.delete(processing_key)
def main(): parser = argparse.ArgumentParser('Start a number of CLEAN servers') parser.add_argument('-a', '--ami_id', help='the AMI id to use') parser.add_argument('-i', '--instance_type', required=True, help='the instance type to use') parser.add_argument('-c', '--created_by', help='the username to use') parser.add_argument('-n', '--name', required=True, help='the instance name to use') parser.add_argument('-s', '--spot_price', type=float, help='the spot price to use') parser.add_argument('-b', '--bash_script', help='the bash script to use') parser.add_argument('-e', '--ebs', type=int, help='the size in GB of any EBS volume') parser.add_argument('bottom_frequency', help='The bottom frequency') parser.add_argument('frequency_range', help='the range of frequencies') parser.add_argument('obs_id', help='the observation id') args = vars(parser.parse_args()) corrected_args = check_args(args) if corrected_args is None: LOGGER.error('The arguments are incorrect: {0}'.format(args)) else: start_servers( corrected_args['ami_id'], corrected_args['user_data'], corrected_args['setup_disks'], args['instance_type'], make_safe_filename(args['obs_id']), corrected_args['created_by'], args['name'], corrected_args['instance_details'], corrected_args['spot_price'], args['ebs'], args['bottom_frequency'], args['frequency_range'])
def main(): parser = argparse.ArgumentParser('Start a number of CVEL servers') parser.add_argument('-a', '--ami_id', help='the AMI id to use') parser.add_argument('-i', '--instance_type', required=True, help='the instance type to use') parser.add_argument('-c', '--created_by', help='the username to use') parser.add_argument('-n', '--name', required=True, help='the instance name to use') parser.add_argument('-s', '--spot_price', type=float, help='the spot price to use') parser.add_argument('-b', '--bash_script', help='the bash script to use') parser.add_argument('-p', '--processes', type=int, default=1, help='the number of processes to run') parser.add_argument('-fc', '--frequency_channels', type=int, default=28, help='how many frequency channels per AWS instance') parser.add_argument('--force', action='store_true', default=False, help='proceed with a frequency band even if we already have it') parser.add_argument('obs_ids', nargs='+', help='the ids of the observation') args = vars(parser.parse_args()) corrected_args = check_args(args) if corrected_args is None: LOGGER.error('The arguments are incorrect: {0}'.format(args)) else: start_servers( args['processes'], corrected_args['ami_id'], corrected_args['user_data'], corrected_args['setup_disks'], args['instance_type'], corrected_args['obs_ids'], corrected_args['created_by'], args['name'], corrected_args['instance_details'], corrected_args['spot_price'], args['frequency_channels'], args['force'])
def copy_files(frequency_id): s3_helper = S3Helper() # Look in the output directory LOGGER.info('directory_data: {0}'.format(CHILES_CLEAN_OUTPUT)) for dir_name in os.listdir(CHILES_CLEAN_OUTPUT): LOGGER.info('dir_name: {0}'.format(dir_name)) result_dir = join(CHILES_CLEAN_OUTPUT, dir_name) if isdir(result_dir) and dir_name.startswith( 'cube_') and dir_name.endswith('.image'): LOGGER.info('dir_name: {0}'.format(dir_name)) output_tar_filename = join(CHILES_CLEAN_OUTPUT, dir_name + '.tar') if can_be_multipart_tar(result_dir): LOGGER.info('Using add_tar_to_bucket_multipart') s3_helper.add_tar_to_bucket_multipart( CHILES_BUCKET_NAME, '/CLEAN/{0}/{1}'.format(frequency_id, basename(output_tar_filename)), result_dir) else: LOGGER.info('Using make_tarfile, then adding file to bucket') make_tarfile(output_tar_filename, result_dir) s3_helper.add_file_to_bucket( CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format( frequency_id, basename(output_tar_filename)), output_tar_filename) # Clean up os.remove(output_tar_filename)
def copy_files(args): # Create the queue queue = multiprocessing.JoinableQueue() # Start the consumers for x in range(PROCESSES): consumer = Consumer(queue) consumer.start() # Look in the output directory for root, dir_names, filenames in os.walk(args.product_dir): LOGGER.debug('root: {0}, dir_names: {1}, filenames: {2}'.format(root, dir_names, filenames)) for match in fnmatch.filter(dir_names, '13B-266*calibrated_deepfield.ms'): result_dir = join(root, match) LOGGER.info('Queuing result_dir: {0}'.format(result_dir)) queue.put( CopyTask( args.bucket, match, result_dir, args.aws_access_key_id, args.aws_secret_access_key ) ) # Add a poison pill to shut things down for x in range(PROCESSES): queue.put(None) # Wait for the queue to terminate queue.join()
def __call__(self): """ Actually run the job """ if self._tar_file.endswith('.tar.gz'): image_name = basename(self._tar_file).replace('.tar.gz', '') else: image_name = basename(self._tar_file).replace('.tar', '') directory = join(self._directory, image_name) # noinspection PyBroadException try: LOGGER.info('key: {0}, tar_file: {1}, directory: {2}'.format( self._key.key, self._tar_file, directory)) if not os.path.exists(directory): os.makedirs(directory) self._key.get_contents_to_filename(self._tar_file) with closing( tarfile.open( self._tar_file, "r:gz" if self._tar_file.endswith('.tar.gz') else "r:")) as tar: tar.extractall(path=directory) os.remove(self._tar_file) except Exception: LOGGER.exception('Task died') shutil.rmtree(directory, ignore_errors=True)
def copy_files(frequency_id, processes, days): s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) LOGGER.info('Scanning bucket: {0}, frequency_id: {1}'.format( bucket, frequency_id)) # Create the queue queue = multiprocessing.JoinableQueue() # Start the consumers for x in range(processes): consumer = Consumer(queue) consumer.start() for key in bucket.list(prefix='CVEL/{0}'.format(frequency_id)): LOGGER.info('Checking {0}'.format(key.key)) # Ignore the key if key.key.endswith('/data.tar.gz') or key.key.endswith('/data.tar'): elements = key.key.split('/') if elements[2] in days: directory = '/mnt/output/Chiles/split_vis/{0}/'.format( elements[2]) # Queue the copy of the file temp_file = os.path.join( directory, 'data.tar.gz' if key.key.endswith('/data.tar.gz') else 'data.tar') queue.put(Task(key, temp_file, directory, frequency_id)) # Add a poison pill to shut things down for x in range(processes): queue.put(None) # Wait for the queue to terminate queue.join()
def processing_object_builder(type, output_file, handler): if type == 'general': return ProblemsDetectorGeneral(output_file, handler) elif type == 'lock_submit': return SubmitLockDetector(output_file, handler, mariadb_connection=open_mdb_connection()) else: LOGGER.error("Introduce a defined type in the configuration file!") sys.exit(1)
def __init__(self, output_file, handler: PatternHandlerFactory, mariadb_connection): super().__init__(output_file, handler) self.mariadb_connection = mariadb_connection try: self.query = self.handler.pattern_dict['mariadb_query'] except KeyError: LOGGER.error('Provide a MariaDB query!') sys.exit(1)
def __call__(self): # noinspection PyBroadException try: LOGGER.info('Copying {0} to s3:{1}'.format(self._filename, self._bucket_location)) s3_helper = S3Helper() s3_helper.add_file_to_bucket(CHILES_BUCKET_NAME, self._bucket_location, self._filename) except Exception: LOGGER.exception('CopyTask died')
def add_tar_to_bucket_multipart(self, bucket_name, key_name, source_path, gzip=False, parallel_processes=2, reduced_redundancy=True, bufsize=10 * 1024 * 1024): """ Parallel multipart upload. """ LOGGER.info( 'bucket_name: {0}, key_name: {1}, source_path: {2}, parallel_processes: {3}, reduced_redundancy: {4}, bufsize: {5}' .format(bucket_name, key_name, source_path, parallel_processes, reduced_redundancy, bufsize)) bucket = self.get_bucket(bucket_name) headers = { 'Content-Type': mimetypes.guess_type(key_name)[0] or 'application/octet-stream' } mp = bucket.initiate_multipart_upload( key_name, headers=headers, reduced_redundancy=reduced_redundancy) s3_feeder = S3Feeder(mp.id, bufsize, bucket_name, parallel_processes, self._aws_access_key_id, self._aws_secret_access_key) if gzip: mode = "w|gz" else: mode = "w|" tar = tarfile.open(mode=mode, fileobj=s3_feeder, bufsize=int(bufsize / 10)) complete = True # noinspection PyBroadException try: for entry in os.listdir(source_path): full_filename = join(source_path, entry) LOGGER.info('tar: [full_filename: {0}, entry: {1}]'.format( full_filename, entry)) tar.add(full_filename, arcname=entry) tar.close() s3_feeder.close() except Exception: complete = False s3_feeder.close() # Finish the upload if complete: mp.complete_upload() else: mp.cancel_upload()
def start_servers( processes, ami_id, user_data, setup_disks, instance_type, obs_ids, created_by, name, instance_details, spot_price, frequency_channels, force): cvel_data = get_cvel() # Create the queue tasks = multiprocessing.JoinableQueue() # Start the consumers for x in range(processes): consumer = Consumer(tasks) consumer.start() counter = 1 for obs_id in obs_ids: snapshot_id = OBS_IDS.get(obs_id) if snapshot_id is None: LOGGER.warning('The obs-id: {0} does not exist in the settings file') else: obs_id_dashes = obs_id.replace('_', '-') for frequency_groups in get_frequency_groups(frequency_channels, obs_id_dashes, cvel_data, force): tasks.put( Task( ami_id, user_data, setup_disks, instance_type, obs_id, snapshot_id, created_by, name, spot_price, instance_details, frequency_groups, counter ) ) counter += 1 # Add a poison pill to shut things down for x in range(processes): tasks.put(None) # Wait for the queue to terminate tasks.join()
def __call__(self): # noinspection PyBroadException try: LOGGER.info('Copying {0} to s3:{1}'.format(self._filename, self._bucket_location)) s3_helper = S3Helper() s3_helper.add_file_to_bucket( CHILES_BUCKET_NAME, self._bucket_location, self._filename) except Exception: LOGGER.exception('CopyTask died')
def get_mime_encoded_user_data(volume_id, setup_disks, in_user_data, now): """ AWS allows for a multipart m """ user_data = MIMEMultipart() user_data.attach(get_cloud_init()) data_formatted = in_user_data.format(volume_id, now, PIP_PACKAGES) LOGGER.info(data_formatted) user_data.attach(MIMEText(setup_disks + data_formatted)) return user_data.as_string()
def handle_stop_pong_event(self, event_data: t.Dict[str, t.Any]) -> None: global g_processing_xxx_id xxx_id = event_data['xxx_id'] if xxx_id != g_processing_xxx_id: return LOGGER.info("stop self") def kill_self() -> None: time.sleep(0.1) os.kill(os.getpid(), signal.SIGQUIT) threading.Thread(target=kill_self, daemon=True).start()
def add_file_to_bucket_multipart(self, bucket_name, key_name, source_path, parallel_processes=2, reduced_redundancy=True): """ Parallel multipart upload. """ LOGGER.info( 'bucket_name: {0}, key_name: {1}, filename: {2}, parallel_processes: {3}, reduced_redundancy: {4}' .format(bucket_name, key_name, source_path, parallel_processes, reduced_redundancy)) source_size = os.stat(source_path).st_size bytes_per_chunk = 10 * 1024 * 1024 chunk_amount = int(math.ceil(source_size / float(bytes_per_chunk))) if chunk_amount < 10000: bucket = self.get_bucket(bucket_name) headers = { 'Content-Type': mimetypes.guess_type(key_name)[0] or 'application/octet-stream' } mp = bucket.initiate_multipart_upload( key_name, headers=headers, reduced_redundancy=reduced_redundancy) LOGGER.info('bytes_per_chunk: {0}, chunk_amount: {1}'.format( bytes_per_chunk, chunk_amount)) # You can only upload 10,000 chunks pool = Pool(processes=parallel_processes) for i in range(chunk_amount): offset = i * bytes_per_chunk remaining_bytes = source_size - offset bytes_to_copy = min([bytes_per_chunk, remaining_bytes]) part_num = i + 1 pool.apply_async(upload_part, [ self._aws_access_key_id, self._aws_secret_access_key, bucket_name, mp.id, part_num, source_path, offset, bytes_to_copy ]) pool.close() pool.join() if len(mp.get_all_parts()) == chunk_amount: mp.complete_upload() else: mp.cancel_upload() else: raise S3UploadException('Too many chunks')
def detect_wrong_format(input_file, pattern): """Delete entries in ldap older than limit_days_ago before today :param input_file: Ldap dumb file to parse :param pattern: Pattern to be detected, it must be defined in config.yml! """ cfg = load_config() try: cfg[pattern] except KeyError: LOGGER.error("Pattern not found in the config.yml file!") sys.exit(1) else: detect_ldap_problems.detect_wrong_format(cfg[pattern], input_file)
def main(): parser = argparse.ArgumentParser('Start a number of CVEL servers') parser.add_argument('-a', '--ami_id', help='the AMI id to use') parser.add_argument('-i', '--instance_type', required=True, help='the instance type to use') parser.add_argument('-c', '--created_by', help='the username to use') parser.add_argument('-n', '--name', required=True, help='the instance name to use') parser.add_argument('-s', '--spot_price', type=float, help='the spot price to use') parser.add_argument('-b', '--bash_script', help='the bash script to use') parser.add_argument('-p', '--processes', type=int, default=1, help='the number of processes to run') parser.add_argument('-fc', '--frequency_channels', type=int, default=28, help='how many frequency channels per AWS instance') parser.add_argument( '--force', action='store_true', default=False, help='proceed with a frequency band even if we already have it') parser.add_argument('obs_ids', nargs='+', help='the ids of the observation') args = vars(parser.parse_args()) corrected_args = check_args(args) if corrected_args is None: LOGGER.error('The arguments are incorrect: {0}'.format(args)) else: start_servers(args['processes'], corrected_args['ami_id'], corrected_args['user_data'], corrected_args['setup_disks'], args['instance_type'], corrected_args['obs_ids'], corrected_args['created_by'], args['name'], corrected_args['instance_details'], corrected_args['spot_price'], args['frequency_channels'], args['force'])
def start() -> None: LOGGER.info(f"start_work with {xxx_id}") list(range(20000)) try: while True: list(range(3000)) self.dispatch("processed", { 'xxx_id': xxx_id, 'info': {}, }) r.expire(processing_key, 3) time.sleep(1) finally: r.delete(processing_key)
def open_mdb_connection(): for host in MARIA_DB_CONFIGURATION['hosts']: try: connection = pymysql.connect( user=MARIA_DB_CONFIGURATION['user'], password=MARIA_DB_CONFIGURATION['password'], database=MARIA_DB_CONFIGURATION['database'], host=host, cursorclass=pymysql.cursors.DictCursor, autocommit=True, ) except: if host == MARIA_DB_CONFIGURATION['hosts'][-1]: log_str = "Failed to connect to any MariaDB host" LOGGER.exception(log_str, exc_info=True) raise DBError(log_str) else: LOGGER.warning( 'Failed to connect to MariaDB on host {}. Trying next host.' .format(host)) else: if connection.open: LOGGER.debug( 'mariadb connection to host {} successful.'.format(host)) return connection else: err_str = 'Connection to MariaDB failed.' LOGGER.error(err_str) raise DBError(err_str)
def get_mime_encoded_user_data(self, volume_id): """ AWS allows for a multipart m """ user_data = MIMEMultipart() user_data.attach(get_cloud_init()) # Build the strings we need cvel_pipeline = self.build_cvel_pipeline() data_formatted = self._user_data.format(cvel_pipeline, self._obs_id, volume_id, self._now, self._counter, PIP_PACKAGES) LOGGER.info(data_formatted) user_data.attach(MIMEText(self._setup_disks + data_formatted)) return user_data.as_string()
def add_tar_to_bucket_multipart(self, bucket_name, key_name, source_path, gzip=False, parallel_processes=2, reduced_redundancy=True, bufsize=10*1024*1024): """ Parallel multipart upload. """ LOGGER.info( 'bucket_name: {0}, key_name: {1}, source_path: {2}, parallel_processes: {3}, reduced_redundancy: {4}, bufsize: {5}'.format( bucket_name, key_name, source_path, parallel_processes, reduced_redundancy, bufsize ) ) bucket = self.get_bucket(bucket_name) headers = {'Content-Type': mimetypes.guess_type(key_name)[0] or 'application/octet-stream'} mp = bucket.initiate_multipart_upload(key_name, headers=headers, reduced_redundancy=reduced_redundancy) s3_feeder = S3Feeder(mp.id, bufsize, bucket_name, parallel_processes, self._aws_access_key_id, self._aws_secret_access_key) if gzip: mode = "w|gz" else: mode = "w|" tar = tarfile.open(mode=mode, fileobj=s3_feeder, bufsize=int(bufsize / 10)) complete = True # noinspection PyBroadException try: for entry in os.listdir(source_path): full_filename = join(source_path, entry) LOGGER.info( 'tar: [full_filename: {0}, entry: {1}]'.format( full_filename, entry ) ) tar.add(full_filename, arcname=entry) tar.close() s3_feeder.close() except Exception: complete = False s3_feeder.close() # Finish the upload if complete: mp.complete_upload() else: mp.cancel_upload()
def copy_files(date, vis_file): s3_helper = S3Helper() # Look in the output directory for root, dir_names, filenames in os.walk(CHILES_CVEL_OUTPUT): LOGGER.info('root: {0}, dir_names: {1}, filenames: {2}'.format( root, dir_names, filenames)) for match in fnmatch.filter(dir_names, vis_file): result_dir = join(root, match) LOGGER.info('Working on: {0}'.format(result_dir)) if can_be_multipart_tar(result_dir): LOGGER.info('Using add_tar_to_bucket_multipart') s3_helper.add_tar_to_bucket_multipart( CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format(vis_file, date), result_dir) else: LOGGER.info('Using make_tarfile, then adding file to bucket') output_tar_filename = join(root, match + '.tar') make_tarfile(output_tar_filename, result_dir) s3_helper.add_file_to_bucket( CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format(vis_file, date), output_tar_filename) # Clean up os.remove(output_tar_filename) shutil.rmtree(result_dir, ignore_errors=True)
def copy_files(date, vis_file): s3_helper = S3Helper() # Look in the output directory for root, dir_names, filenames in os.walk(CHILES_CVEL_OUTPUT): LOGGER.info('root: {0}, dir_names: {1}, filenames: {2}'.format(root, dir_names, filenames)) for match in fnmatch.filter(dir_names, vis_file): result_dir = join(root, match) LOGGER.info('Working on: {0}'.format(result_dir)) if can_be_multipart_tar(result_dir): LOGGER.info('Using add_tar_to_bucket_multipart') s3_helper.add_tar_to_bucket_multipart( CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format(vis_file, date), result_dir) else: LOGGER.info('Using make_tarfile, then adding file to bucket') output_tar_filename = join(root, match + '.tar') make_tarfile(output_tar_filename, result_dir) s3_helper.add_file_to_bucket( CHILES_BUCKET_NAME, 'CVEL/{0}/{1}/data.tar'.format(vis_file, date), output_tar_filename) # Clean up os.remove(output_tar_filename) shutil.rmtree(result_dir, ignore_errors=True)
def check_args(args): """ Check the arguments and prompt for new ones """ map_args = {} if args['obs_ids'] is None: return None elif len(args['obs_ids']) == 1 and args['obs_ids'][0] == '*': map_args['obs_ids'] = OBS_IDS.keys() else: map_args['obs_ids'] = args['obs_ids'] if args['instance_type'] is None: return None if args['name'] is None: return None instance_details = AWS_INSTANCES.get(args['instance_type']) if instance_details is None: LOGGER.error('The instance type {0} is not supported.'.format( args['instance_type'])) return None else: LOGGER.info( 'instance: {0}, vCPU: {1}, RAM: {2}GB, Disks: {3}x{4}GB, IOPS: {5}' .format(args['instance_type'], instance_details.vCPU, instance_details.memory, instance_details.number_disks, instance_details.size, instance_details.iops_support)) map_args.update({ 'ami_id': args['ami_id'] if args['ami_id'] is not None else AWS_AMI_ID, 'created_by': args['created_by'] if args['created_by'] is not None else getpass.getuser(), 'spot_price': args['spot_price'] if args['spot_price'] is not None else None, 'user_data': get_script(args['bash_script'] if args['bash_script'] is not None else BASH_SCRIPT_CVEL), 'setup_disks': get_script(BASH_SCRIPT_SETUP_DISKS), 'instance_details': instance_details, }) return map_args
def get_cvel(): s3_helper = S3Helper() bucket = s3_helper.get_bucket(CHILES_BUCKET_NAME) cvel_data = {} for key in bucket.list(prefix='CVEL/'): LOGGER.info('Checking {0}'.format(key.key)) if key.key.endswith('data.tar.gz') or key.key.endswith('data.tar'): elements = key.key.split('/') data_list = cvel_data.get(str(elements[1])) if data_list is None: data_list = [] cvel_data[str(elements[1])] = data_list data_list.append(str(elements[2])) return cvel_data
def get_mime_encoded_user_data(instance_details, setup_disks, user_data): """ AWS allows for a multipart m """ # Split the frequencies min_freq = 940 max_freq = 1424 LOGGER.info("min_freq: {0}, max_freq: {1}".format(min_freq, max_freq)) # Build the mime message mime_data = MIMEMultipart() mime_data.attach(get_cloud_init()) swap_size = get_swap_size(instance_details) data_formatted = user_data.format("TODO", min_freq, max_freq, swap_size, PIP_PACKAGES) mime_data.attach(MIMEText(setup_disks + data_formatted)) return mime_data.as_string()
def like_em_all(users): iterator = PostIterator() for user in users: access, refresh = obtain_token_pair(user['username'], config['default_password']) iterator.set_auth(access, refresh) iterator.set_user(user) iterator.set_posts(get_all_posts()) while True: try: post_to_like = next(iterator) perform_like(post=post_to_like, user=user, token=access) except StopIteration: break except BotClientError as e: LOGGER.warning("Failed to perform a like: {}".format(str(e)))
def __call__(self): # noinspection PyBroadException try: s3_helper = S3Helper(self._aws_access_key_id, self._aws_secret_access_key) LOGGER.info('Copying to: {0}/{1}/measurement_set.tar'.format(self._bucket, self._bucket_location)) # We can have 10,000 parts # The biggest file from Semester 1 is 803GB # So 100 MB s3_helper.add_tar_to_bucket_multipart( self._bucket, '{0}/measurement_set.tar'.format(self._bucket_location), self._filename, parallel_processes=2, bufsize=100*1024*1024 ) except Exception: LOGGER.exception('CopyTask died')
def check_args(args): """ Check the arguments and prompt for new ones """ map_args = {} if args['obs_ids'] is None: return None elif len(args['obs_ids']) == 1 and args['obs_ids'][0] == '*': map_args['obs_ids'] = OBS_IDS.keys() else: map_args['obs_ids'] = args['obs_ids'] if args['instance_type'] is None: return None if args['name'] is None: return None instance_details = AWS_INSTANCES.get(args['instance_type']) if instance_details is None: LOGGER.error('The instance type {0} is not supported.'.format(args['instance_type'])) return None else: LOGGER.info( 'instance: {0}, vCPU: {1}, RAM: {2}GB, Disks: {3}x{4}GB, IOPS: {5}'.format( args['instance_type'], instance_details.vCPU, instance_details.memory, instance_details.number_disks, instance_details.size, instance_details.iops_support)) map_args.update({ 'ami_id': args['ami_id'] if args['ami_id'] is not None else AWS_AMI_ID, 'created_by': args['created_by'] if args['created_by'] is not None else getpass.getuser(), 'spot_price': args['spot_price'] if args['spot_price'] is not None else None, 'user_data': get_script(args['bash_script'] if args['bash_script'] is not None else BASH_SCRIPT_CVEL), 'setup_disks': get_script(BASH_SCRIPT_SETUP_DISKS), 'instance_details': instance_details, }) return map_args
def get_mime_encoded_user_data(self): """ AWS allows for a multipart m """ # Split the frequencies index_underscore = find(self._frequency_id, '_') index_tilde = find(self._frequency_id, '~') min_freq = self._frequency_id[index_underscore + 1:index_tilde] max_freq = self._frequency_id[index_tilde + 1:] LOGGER.info('min_freq: {0}, max_freq: {1}'.format(min_freq, max_freq)) # Build the mime message user_data = MIMEMultipart() user_data.attach(get_cloud_init()) swap_size = self.get_swap_size() data_formatted = self._user_data.format(self._frequency_id, min_freq, max_freq, swap_size, PIP_PACKAGES) user_data.attach(MIMEText(self._setup_disks + data_formatted)) return user_data.as_string()
def check_args(args): """ Check the arguments and prompt for new ones """ map_args = {} if args["snapshots"] is None: return None if args["instance_type"] is None: return None if args["name"] is None: return None instance_details = AWS_INSTANCES.get(args["instance_type"]) if instance_details is None: LOGGER.error("The instance type {0} is not supported.".format(args["instance_type"])) return None else: LOGGER.info( "instance: {0}, vCPU: {1}, RAM: {2}GB, Disks: {3}x{4}GB, IOPS: {5}".format( args["instance_type"], instance_details.vCPU, instance_details.memory, instance_details.number_disks, instance_details.size, instance_details.iops_support, ) ) map_args.update( { "ami_id": args["ami_id"] if args["ami_id"] is not None else AWS_AMI_ID, "created_by": args["created_by"] if args["created_by"] is not None else getpass.getuser(), "spot_price": args["spot_price"] if args["spot_price"] is not None else None, "user_data": get_script(args["bash_script"] if args["bash_script"] is not None else BASH_SCRIPT_CLEAN_ALL), "setup_disks": get_script(BASH_SCRIPT_SETUP_DISKS), "instance_details": instance_details, } ) return map_args
def copy_files(cube): s3_helper = S3Helper() # Look in the output directory directory_to_save = join(CHILES_IMGCONCAT_OUTPUT, cube) + '.cube' if isdir(directory_to_save): LOGGER.info('dir_name: {0}'.format(directory_to_save)) output_tar_filename = directory_to_save + '.tar' if can_be_multipart_tar(directory_to_save): LOGGER.info('Using add_tar_to_bucket_multipart') s3_helper.add_tar_to_bucket_multipart( CHILES_BUCKET_NAME, 'IMGCONCAT/{0}'.format(basename(output_tar_filename)), directory_to_save, bufsize=20 * 1024 * 1024) else: LOGGER.info('Using make_tarfile, then adding file to bucket') make_tarfile(output_tar_filename, directory_to_save) s3_helper.add_file_to_bucket( CHILES_BUCKET_NAME, 'IMGCONCAT/{0}'.format(basename(output_tar_filename)), output_tar_filename) # Clean up os.remove(output_tar_filename)
def copy_files(s3_tag, processes): # Create the queue queue = multiprocessing.JoinableQueue() # Start the consumers for x in range(processes): consumer = Consumer(queue) consumer.start() # Look in the output directory today = datetime.date.today() for root, dir_names, filenames in os.walk(CHILES_LOGS): for match in fnmatch.filter(filenames, '*.log'): LOGGER.info('Looking at: {0}'.format(join(root, match))) queue.put(CopyTask(join(root, match), '{0}/{1}{2:02d}{3:02d}/{4}'.format(s3_tag, today.year, today.month, today.day, match))) for root, dir_names, filenames in os.walk(BENCHMARKING_LOGS): for match in fnmatch.filter(filenames, '*.csv'): LOGGER.info('Looking at: {0}'.format(join(root, match))) queue.put(CopyTask(join(root, match), '{0}/{1}{2:02d}{3:02d}/{4}'.format(s3_tag, today.year, today.month, today.day, match))) for match in fnmatch.filter(filenames, '*.log'): LOGGER.info('Looking at: {0}'.format(join(root, match))) queue.put(CopyTask(join(root, match), '{0}/{1}{2:02d}{3:02d}/{4}'.format(s3_tag, today.year, today.month, today.day, match))) queue.put(CopyTask('/var/log/chiles-output.log', '{0}/{1}{2:02d}{3:02d}/chiles-output.log'.format(s3_tag, today.year, today.month, today.day))) # Add a poison pill to shut things down for x in range(processes): queue.put(None) # Wait for the queue to terminate queue.join()
def run_instance(self, ami_id, user_data, instance_type, volume_id, created_by, name, zone, ephemeral=False): """ Run up an instance """ bdm = self.build_block_device_map(ephemeral) LOGGER.info('Running instance: ami: {0}'.format(ami_id)) reservations = self.ec2_connection.run_instances(ami_id, instance_type=instance_type, instance_initiated_shutdown_behavior='terminate', subnet_id=AWS_SUBNETS[zone], key_name=AWS_KEY_NAME, security_group_ids=AWS_SECURITY_GROUPS, user_data=user_data, block_device_map=bdm) instance = reservations.instances[0] time.sleep(5) while not instance.update() == 'running': LOGGER.info('Not running yet') time.sleep(5) if volume_id: # Now we have an instance id we can attach the disk self.ec2_connection.attach_volume(volume_id, instance.id, '/dev/xvdf') LOGGER.info('Assigning the tags') self.ec2_connection.create_tags([instance.id], {'AMI': '{0}'.format(ami_id), 'Name': '{0}'.format(name), 'Volume_id': '{0}'.format(volume_id), 'Created By': '{0}'.format(created_by)}) return instance
def __call__(self): """ Actually run the job """ if self._tar_file.endswith('.tar.gz'): image_name = basename(self._tar_file).replace('.tar.gz', '') else: image_name = basename(self._tar_file).replace('.tar', '') directory = join(self._directory, image_name) # noinspection PyBroadException try: LOGGER.info('key: {0}, tar_file: {1}, directory: {2}'.format(self._key.key, self._tar_file, directory)) if not os.path.exists(directory): os.makedirs(directory) self._key.get_contents_to_filename(self._tar_file) with closing(tarfile.open(self._tar_file, "r:gz" if self._tar_file.endswith('.tar.gz') else "r:")) as tar: tar.extractall(path=directory) os.remove(self._tar_file) except Exception: LOGGER.exception('Task died') shutil.rmtree(directory, ignore_errors=True)
def __call__(self): """ Actually run the job """ corrected_path = join(self._directory, self._frequency_id) # noinspection PyBroadException try: LOGGER.info('key: {0}, tar_file: {1}, directory: {2}, frequency_id: {3}'.format( self._key.key, self._tar_file, self._directory, self._frequency_id)) if not os.path.exists(corrected_path): os.makedirs(corrected_path) self._key.get_contents_to_filename(self._tar_file) with closing(tarfile.open(self._tar_file, "r:gz" if self._tar_file.endswith('tar.gz') else "r:")) as tar: tar.extractall(path=corrected_path) os.remove(self._tar_file) except Exception: LOGGER.exception('Task died') shutil.rmtree(corrected_path, ignore_errors=True)
def add_file_to_bucket(self, bucket_name, key_name, filename, reduced_redundancy=True): """ Add file to a bucket :param bucket_name: :param key_name: :param filename: """ LOGGER.info('bucket_name: {0}, key_name: {1}, filename: {2}, reduced_redundancy: {3}'.format(bucket_name, key_name, filename, reduced_redundancy)) retry_count = 0 done = False while retry_count < 3 and not done: try: bucket = self.get_bucket(bucket_name) key = Key(bucket) key.key = key_name key.set_contents_from_filename(filename, reduced_redundancy=reduced_redundancy) done = True except socket.error: LOGGER.exception('Error') retry_count += 1 time.sleep(10)
def start_servers( ami_id, user_data, setup_disks, instance_type, obs_id, created_by, name, instance_details, spot_price, ebs, bottom_frequency, frequency_range): LOGGER.info('obs_id: {0}, bottom_frequency: {1}, frequency_range: {2}'.format(obs_id, bottom_frequency, frequency_range)) ec2_helper = EC2Helper() zone = ec2_helper.get_cheapest_spot_price(instance_type, spot_price) if zone is not None: # Swap size if ebs is None: swap_size = 1 else: ephemeral_size = instance_details.number_disks * instance_details.size swap_size = min(int(ephemeral_size * 0.75), 16) user_data_mime = get_mime_encoded_user_data( user_data, obs_id, setup_disks, bottom_frequency, frequency_range, swap_size ) LOGGER.info('{0}'.format(user_data_mime)) ec2_helper.run_spot_instance( ami_id, spot_price, user_data_mime, instance_type, None, created_by, name + '- {0}'.format(obs_id), instance_details=instance_details, zone=zone, ebs_size=ebs, number_ebs_volumes=4, ephemeral=True) else: LOGGER.error('Cannot get a spot instance of {0} for ${1}'.format(instance_type, spot_price))