def clean_all(config=None): logger.info('Cleaning all PyWren information') config = default_config(config) storage_config = extract_storage_config(config) internal_storage = InternalStorage(storage_config) compute_config = extract_compute_config(config) compute_handler = Compute(compute_config) # Clean object storage temp dirs sh = internal_storage.storage_handler runtimes = sh.list_keys(storage_config['bucket'], RUNTIMES_PREFIX) if runtimes: sh.delete_objects(storage_config['bucket'], runtimes) compute_handler.delete_all_runtimes() clean_bucket(storage_config['bucket'], JOBS_PREFIX, internal_storage, sleep=1) # Clean local runtime_meta cache if os.path.exists(CACHE_DIR): shutil.rmtree(CACHE_DIR) # Clean localhost temp dirs localhost_jobs_path = os.path.join(TEMP, JOBS_PREFIX) if os.path.exists(localhost_jobs_path): shutil.rmtree(localhost_jobs_path) localhost_runtimes_path = os.path.join(TEMP, RUNTIMES_PREFIX) if os.path.exists(localhost_runtimes_path): shutil.rmtree(localhost_runtimes_path)
def update_runtime(name, config=None): config = default_config(config) storage_config = extract_storage_config(config) internal_storage = InternalStorage(storage_config) compute_config = extract_compute_config(config) compute_handler = Compute(compute_config) timeout = config['pywren']['runtime_timeout'] logger.info('Updating runtime: {}'.format(name)) if name != 'all': runtime_meta = compute_handler.generate_runtime_meta(name) else: runtime_meta = None runtimes = compute_handler.list_runtimes(name) for runtime in runtimes: compute_handler.create_runtime(runtime[0], runtime[1], timeout) if runtime_meta: try: runtime_key = compute_handler.get_runtime_key( runtime[0], runtime[1]) internal_storage.put_runtime_meta(runtime_key, runtime_meta) except Exception: raise ("Unable to upload 'preinstalled modules' file into {}". format(internal_storage.backend))
def clean_runtimes(config=None): logger.info('Cleaning all runtimes and cache information') config = default_config(config) storage_config = extract_storage_config(config) internal_storage = InternalStorage(storage_config) compute_config = extract_compute_config(config) compute_handler = Compute(compute_config) # Clean local runtime_meta cache if os.path.exists(CACHE_DIR): shutil.rmtree(CACHE_DIR) # Clean localhost dirs localhost_jobs_path = os.path.join(TEMP, STORAGE_PREFIX_DEFAULT) if os.path.exists(localhost_jobs_path): shutil.rmtree(localhost_jobs_path) localhost_runtimes_path = os.path.join(TEMP, RUNTIMES_PREFIX_DEFAULT) if os.path.exists(localhost_runtimes_path): shutil.rmtree(localhost_runtimes_path) # Clean runtime metadata in the object storage sh = internal_storage.storage_handler runtimes = sh.list_keys(storage_config['bucket'], RUNTIMES_PREFIX_DEFAULT) if runtimes: sh.delete_objects(storage_config['bucket'], runtimes) compute_handler.delete_all_runtimes()
def build_runtime(name, file, config=None): config = default_config(config) compute_config = extract_compute_config(config) compute_handler = Compute(compute_config) compute_handler.build_runtime(name, file) create_runtime(name, config=config) update_runtime(name, config=config)
def build_runtime(name, file, config=None): config = default_config(config) compute_config = extract_compute_config(config) internal_compute = Compute(compute_config) internal_compute.build_runtime(name, file) create_runtime(name, config=config) update_runtime(name, config=config)
def __init__(self, config=None, runtime=None, runtime_memory=None, compute_backend=None, compute_backend_region=None, log_level=None, rabbitmq_monitor=False): """ Initialize and return a ServerlessExecutor class. :param config: Settings passed in here will override those in config file. Default None. :param runtime: Runtime name to use. Default None. :param runtime_memory: memory to use in the runtime :param log_level: log level to use during the execution :param rabbitmq_monitor: use rabbitmq as monitoring system :return `ServerlessExecutor` object. """ self.start_time = time.time() self._state = ExecutorState.new self.config = default_config(config) self.is_cf_cluster = is_cf_cluster() self.data_cleaner = self.config['pywren']['data_cleaner'] # Overwrite runtime variables if runtime: self.config['pywren']['runtime'] = runtime if runtime_memory: self.config['pywren']['runtime_memory'] = int(runtime_memory) if compute_backend: self.config['pywren']['compute_backend'] = compute_backend if compute_backend_region: self.config['pywren']['compute_backend_region'] = compute_backend_region # Log level Configuration self.log_level = log_level if not self.log_level: if(logger.getEffectiveLevel() != logging.WARNING): self.log_level = logging.getLevelName(logger.getEffectiveLevel()) if self.log_level: os.environ["CB_LOG_LEVEL"] = self.log_level if not self.is_cf_cluster: default_logging_config(self.log_level) if 'CB_EXECUTOR_ID' in os.environ: self.executor_id = os.environ['CB_EXECUTOR_ID'] else: self.executor_id = create_executor_id() logger.debug('ServerlessExecutor created with ID: {}'.format(self.executor_id)) # RabbitMQ monitor configuration self.rabbitmq_monitor = rabbitmq_monitor if self.rabbitmq_monitor: if self.config['rabbitmq']['amqp_url']: os.environ["CB_RABBITMQ_MONITOR"] = 'True' else: self.rabbitmq_monitor = False else: self.config['rabbitmq']['amqp_url'] = None storage_config = extract_storage_config(self.config) self.internal_storage = InternalStorage(storage_config) self.invoker = Invoker(self.config, self.executor_id) self.jobs = {}
def delete_runtime(name, config=None): config = default_config(config) storage_config = extract_storage_config(config) internal_storage = InternalStorage(storage_config) compute_config = extract_compute_config(config) compute_handler = Compute(compute_config) runtimes = compute_handler.list_runtimes(name) for runtime in runtimes: compute_handler.delete_runtime(runtime[0], runtime[1]) runtime_key = compute_handler.get_runtime_key(runtime[0], runtime[1]) internal_storage.delete_runtime_meta(runtime_key)
def clean_runtimes(config=None): logger.info('Cleaning all runtimes and cache information') config = default_config(config) storage_config = extract_storage_config(config) internal_storage = InternalStorage(storage_config) compute_config = extract_compute_config(config) compute_handler = Compute(compute_config) # Clean local runtime_meta cache if os.path.exists(CACHE_DIR): shutil.rmtree(CACHE_DIR) sh = internal_storage.storage_handler runtimes = sh.list_keys(storage_config['bucket'], 'runtime') if runtimes: sh.delete_objects(storage_config['bucket'], runtimes) compute_handler.delete_all_runtimes()
def create_runtime(name, memory=None, config=None): config = default_config(config) storage_config = extract_storage_config(config) internal_storage = InternalStorage(storage_config) compute_config = extract_compute_config(config) compute_handler = Compute(compute_config) memory = config['pywren']['runtime_memory'] if not memory else memory timeout = config['pywren']['runtime_timeout'] logger.info('Creating runtime: {}, memory: {}'.format(name, memory)) runtime_key = compute_handler.get_runtime_key(name, memory) runtime_meta = compute_handler.create_runtime(name, memory, timeout=timeout) try: internal_storage.put_runtime_meta(runtime_key, runtime_meta) except Exception: raise("Unable to upload 'preinstalled-modules' file into {}".format(internal_storage.backend))
def run_tests(test_to_run, config=None): global CONFIG, STORAGE_CONFIG, STORAGE CONFIG = json.load(args.config) if config else default_config() STORAGE_CONFIG = extract_storage_config(CONFIG) STORAGE = InternalStorage(STORAGE_CONFIG).storage suite = unittest.TestSuite() if test_to_run == 'all': suite.addTest(unittest.makeSuite(TestPywren)) else: try: suite.addTest(TestPywren(test_to_run)) except ValueError: print("unknown test, use: --help") sys.exit() runner = unittest.TextTestRunner() runner.run(suite)
def clean_runtimes(config=None): logger.info('Cleaning all runtimes') config = default_config(config) storage_config = extract_storage_config(config) internal_storage = InternalStorage(storage_config) compute_config = extract_compute_config(config) compute_handler = Compute(compute_config) # Clean local runtime_meta cache cache_dir = os.path.join(os.path.expanduser('~'), '.cloudbutton') if os.path.exists(cache_dir): shutil.rmtree(cache_dir) sh = internal_storage.storage_handler runtimes = sh.list_keys(storage_config['bucket'], 'runtime') if runtimes: sh.delete_objects(storage_config['bucket'], runtimes) compute_handler.delete_all_runtimes()
def clean_all(config=None): logger.info('Cleaning all PyWren information') config = default_config(config) storage_config = extract_storage_config(config) internal_storage = InternalStorage(storage_config) compute_config = extract_compute_config(config) compute_handler = Compute(compute_config) # Clean localhost executor temp dirs shutil.rmtree(STORAGE_FOLDER, ignore_errors=True) shutil.rmtree(DOCKER_FOLDER, ignore_errors=True) # Clean object storage temp dirs compute_handler.delete_all_runtimes() storage = internal_storage.storage clean_bucket(storage, storage_config['bucket'], RUNTIMES_PREFIX, sleep=1) clean_bucket(storage, storage_config['bucket'], JOBS_PREFIX, sleep=1) # Clean local pywren cache shutil.rmtree(CACHE_DIR, ignore_errors=True)
def __init__(self, config=None, runtime=None, runtime_memory=None, compute_backend=None, compute_backend_region=None, storage_backend=None, storage_backend_region=None, rabbitmq_monitor=None, log_level=None): """ Initialize a FunctionExecutor class. :param config: Settings passed in here will override those in config file. Default None. :param runtime: Runtime name to use. Default None. :param runtime_memory: memory to use in the runtime. Default None. :param compute_backend: Name of the compute backend to use. Default None. :param compute_backend_region: Name of the compute backend region to use. Default None. :param storage_backend: Name of the storage backend to use. Default None. :param storage_backend_region: Name of the storage backend region to use. Default None. :param log_level: log level to use during the execution. Default None. :param rabbitmq_monitor: use rabbitmq as the monitoring system. Default None. :return `FunctionExecutor` object. """ self.start_time = time.time() self._state = FunctionExecutor.State.New self.is_remote_cluster = is_remote_cluster() # Log level Configuration self.log_level = log_level if not self.log_level: if (logger.getEffectiveLevel() != logging.WARNING): self.log_level = logging.getLevelName( logger.getEffectiveLevel()) if self.log_level: os.environ["PYWREN_LOGLEVEL"] = self.log_level if not self.is_remote_cluster: default_logging_config(self.log_level) # Overwrite pywren config parameters config_ow = {'pywren': {}} if runtime is not None: config_ow['pywren']['runtime'] = runtime if runtime_memory is not None: config_ow['pywren']['runtime_memory'] = int(runtime_memory) if compute_backend is not None: config_ow['pywren']['compute_backend'] = compute_backend if compute_backend_region is not None: config_ow['pywren'][ 'compute_backend_region'] = compute_backend_region if storage_backend is not None: config_ow['pywren']['storage_backend'] = storage_backend if storage_backend_region is not None: config_ow['pywren'][ 'storage_backend_region'] = storage_backend_region if rabbitmq_monitor is not None: config_ow['pywren']['rabbitmq_monitor'] = rabbitmq_monitor self.config = default_config(config, config_ow) self.executor_id = create_executor_id() logger.debug('FunctionExecutor created with ID: {}'.format( self.executor_id)) # RabbitMQ monitor configuration self.rabbitmq_monitor = self.config['pywren'].get( 'rabbitmq_monitor', False) if self.rabbitmq_monitor: if 'rabbitmq' in self.config and 'amqp_url' in self.config[ 'rabbitmq']: self.rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url') else: raise Exception( "You cannot use rabbitmq_mnonitor since 'amqp_url'" " is not present in configuration") self.data_cleaner = self.config['pywren']['data_cleaner'] storage_config = extract_storage_config(self.config) self.internal_storage = InternalStorage(storage_config) self.invoker = FunctionInvoker(self.config, self.executor_id, self.internal_storage) self.jobs = {}
import unittest import pywren_ibm_cloud as pywren import urllib.request from pywren_ibm_cloud.storage import InternalStorage from pywren_ibm_cloud.config import default_config, extract_storage_config from multiprocessing.pool import ThreadPool import logging # logging.basicConfig(level=logging.DEBUG) parser = argparse.ArgumentParser(description="test all PyWren's functionality", usage='python -m pywren_ibm_cloud.tests [-c CONFIG] [-f TESTNAME]') parser.add_argument('-c', '--config', type=argparse.FileType('r'), metavar='', default=None, help="use json config file") parser.add_argument('-t', '--test', metavar='', default='all', help='run a specific test, type "-t help" for tests list') args = parser.parse_args() CONFIG = default_config() STORAGE_CONFIG = extract_storage_config(CONFIG) STORAGE = InternalStorage(STORAGE_CONFIG).storage_handler PREFIX = '__pywren.test' TEST_FILES_URLS = ["http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.enron.txt", "http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.kos.txt", "http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.nips.txt", "http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.nytimes.txt", "http://archive.ics.uci.edu/ml/machine-learning-databases/bag-of-words/vocab.pubmed.txt"] def initTests(): print('Uploading test files...') def up(param): i, url = param
from shutil import rmtree from concurrent.futures.thread import ThreadPoolExecutor CENTROIDS_BUCKET = 'omeruseast' MOL_DB_PREFIX = 'metabolomics/db/centroids_chunks' CENTROIDS_SEGMENTS_PREFIX = 'metabolomics/vm_db_segments' CENTR_SEGM_PATH = '/data/metabolomics/db/segms' MZ_MIN = 79.99708557 MZ_MAX = 499.97909546 DS_SEGMENTS_N = 15 DS_SEGM_SIZE_MB = 100 from pywren_ibm_cloud.storage import InternalStorage from pywren_ibm_cloud.config import default_config, extract_storage_config PYWREN_CONFIG = default_config() STORAGE_CONFIG = extract_storage_config(PYWREN_CONFIG) STORAGE = InternalStorage(STORAGE_CONFIG).storage_handler def download_database(storage, bucket, prefix): keys = storage.list_keys(bucket, prefix) def _download(key): data_stream = storage.get_object(bucket, key, stream=True) return pd.read_msgpack(data_stream).sort_values('mz') with ThreadPoolExecutor() as pool: centroids_df = pd.concat(list(pool.map(_download, keys))) return centroids_df