Esempio n. 1
0
def delete_runtime(image_name, config=None):
    logger.info('Deleting runtime: {}'.format(image_name))

    if config is None:
        config = wrenconfig.default()
    else:
        config = wrenconfig.default(config)

    storage_config = wrenconfig.extract_storage_config(config)
    storage_client = storage.InternalStorage(storage_config)
    cf_config = wrenconfig.extract_cf_config(config)
    cf_client = CloudFunctions(cf_config)

    if image_name == 'default':
        image_name = _get_default_image_name()

    image_name_formated = create_action_name(image_name)
    actions = cf_client.list_actions(PACKAGE)
    region = cf_client.endpoint.split('//')[1].split('.')[0]
    namespace = cf_client.namespace

    for action in actions:
        action_name, memory = action['name'].rsplit('-', 1)
        if image_name_formated == action_name:
            memory = int(memory.replace('MB', ''))
            runtime_name = create_runtime_name(image_name, memory)
            storage_client.delete_runtime_info(region, namespace, runtime_name)
            action_name = create_action_name(runtime_name)
            cf_client.delete_action(action_name)
Esempio n. 2
0
    def __init__(self,
                 config=None,
                 runtime=None,
                 log_level=None,
                 runtime_timeout=wrenconfig.CF_RUNTIME_TIMEOUT):
        """
        Initialize and return an executor class.

        :param config: Settings passed in here will override those in `pywren_config`. Default None.
        :param runtime: Runtime name to use. Default None.
        :param runtime_timeout: Max time per action. Default 600
        :return `executor` object.

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
        """
        self._state = ExecutorState.new

        if config is None:
            self.config = wrenconfig.default()
        else:
            self.config = wrenconfig.default(config)

        if runtime:
            self.config['ibm_cf']['action_name'] = runtime

        if log_level:
            wrenlogging.default_config(log_level)

        ibm_cf_config = self.config['ibm_cf']
        self.runtime = ibm_cf_config['action_name']
        self.cf_cluster = ibm_cf_config['is_cf_cluster']
        self.data_cleaner = self.config['pywren']['data_cleaner']

        retry_config = {}
        retry_config['invocation_retry'] = self.config['pywren'][
            'invocation_retry']
        retry_config['retry_sleeps'] = self.config['pywren']['retry_sleeps']
        retry_config['retries'] = self.config['pywren']['retries']

        invoker = invokers.IBMCloudFunctionsInvoker(ibm_cf_config,
                                                    retry_config)

        self.storage_config = wrenconfig.extract_storage_config(self.config)
        self.internal_storage = storage.InternalStorage(self.storage_config)
        self.executor = Executor(invoker, self.config, self.internal_storage,
                                 runtime_timeout)
        self.executor_id = self.executor.executor_id

        self.futures = []
        self.reduce_future = None
Esempio n. 3
0
    def __init__(self, config=None, runtime=None, log_level=None, job_max_runtime=JOB_MAX_RUNTIME):
        """
        Initialize and return an executor class.
    
        :param config: Settings passed in here will override those in `pywren_config`. Default None.
        :param runtime: Runtime name to use. Default None.
        :param job_max_runtime: Max time per lambda. Default 300
        :return `executor` object.
    
        Usage
          >>> import pywren
          >>> pw = pywren.ibm_cf_executor()
        """
        self._state = ExecutorState.new
        
        if config is None:
            self.config = wrenconfig.default()
        else:
            self.config = wrenconfig.default(config)

        if runtime:
            self.config['ibm_cf']['action_name'] = runtime
            
        if log_level:
            wrenlogging.default_config(log_level)
        
        self._openwhisk = False
        if any([k.startswith('__OW_') for k in os.environ.keys()]):
            # OpenWhisk execution
            self._openwhisk = True
            wrenlogging.ow_config(logging.INFO)
    
        self.runtime = self.config['ibm_cf']['action_name']
    
        ibm_cf_config = self.config['ibm_cf']
        invoker = invokers.IBMCloudFunctionsInvoker(ibm_cf_config)
        self.storage_config = wrenconfig.extract_storage_config(self.config)
        self.storage_handler = storage.Storage(self.storage_config)
        self.executor = Executor(invoker, self.config, self.storage_handler, job_max_runtime)
        self.executor_id = self.executor.executor_id

        self.futures = None
        self.reduce_future = None
        
        log_msg='IBM Cloud Functions executor created with ID {}'.format(self.executor_id)
        logger.info(log_msg)
        if(logger.getEffectiveLevel() == logging.WARNING):
            print(log_msg)
Esempio n. 4
0
    def __init__(self, jr_config, result_queue):
        super().__init__()
        start_time = time.time()
        self.result_queue = result_queue
        self.config = jr_config
        self.stats = stats(self.config['stats_filename'])
        self.stats.write('jobrunner_start', start_time)
        pw_config = json.loads(os.environ.get('PYWREN_CONFIG', ''))
        self.storage_config = extract_storage_config(pw_config)

        if 'SHOW_MEMORY_USAGE' in os.environ:
            self.show_memory = eval(os.environ['SHOW_MEMORY_USAGE'])
        else:
            self.show_memory = False

        log_level = self.config['log_level']
        wrenlogging.ow_config(log_level)
        self.func_key = self.config['func_key']
        self.data_key = self.config['data_key']
        self.data_byte_range = self.config['data_byte_range']
        self.output_key = self.config['output_key']
Esempio n. 5
0
def extract_modules(image_name, config=None, pywren_location=None):
    # Extract installed Python modules from docker image
    # And store them into storage

    # Create runtime_name from image_name
    username, appname = image_name.split('/')
    runtime_name = appname.replace(':', '_')

    # Load PyWren config from ~/.pywren_config
    if config is None:
        config = wrenconfig.default()
    else:
        config = wrenconfig.default(config)

    # Create storage_handler to upload modules file
    storage_config = wrenconfig.extract_storage_config(config)
    internal_storage = storage.InternalStorage(storage_config)

    # sys.stdout = open(os.devnull, 'w')
    if pywren_location is None:
        action_location = "extract_modules.py"
    else:
        action_location = os.path.join(pywren_location, "runtime",
                                       "extract_modules.py")

    with open(action_location, "r") as action_py:
        action_code = action_py.read()
    cf_client = CloudFunctions(config['ibm_cf'])
    action_name = runtime_name + '_modules'
    cf_client.create_action(action_name,
                            code=action_code,
                            kind='blackbox',
                            image=image_name,
                            is_binary=False)
    runtime_meta = cf_client.invoke_with_result(action_name)
    internal_storage.put_runtime_info(runtime_name, runtime_meta)
    cf_client.delete_action(action_name)
Esempio n. 6
0
def _extract_modules(image_name, memory, cf_client, config):
    # Extract installed Python modules from docker image
    # And store them into storage
    # Create storage_handler to upload modules file
    storage_config = wrenconfig.extract_storage_config(config)
    internal_storage = storage.InternalStorage(storage_config)

    pywren_location = _get_pywren_location()
    action_location = os.path.join(pywren_location, "runtime",
                                   "extract_modules.py")

    with open(action_location, "r") as action_py:
        action_code = action_py.read()

    modules_action_name = '{}-modules'.format(create_action_name(image_name))

    # old_stdout = sys.stdout
    # sys.stdout = open(os.devnull, 'w')
    logger.debug(
        "Creating action for extracting Python modules list: {}".format(
            modules_action_name))
    cf_client.create_action(modules_action_name,
                            image_name,
                            code=action_code,
                            is_binary=False)
    # sys.stdout = old_stdout

    region = cf_client.endpoint.split('//')[1].split('.')[0]
    namespace = cf_client.namespace
    memory = cf_client.default_runtime_memory if not memory else memory
    runtime_name = create_runtime_name(image_name, memory)
    logger.debug(
        "Going to extract Python modules list from: {}".format(image_name))
    runtime_meta = cf_client.invoke_with_result(modules_action_name)
    internal_storage.put_runtime_info(region, namespace, runtime_name,
                                      runtime_meta)
    cf_client.delete_action(modules_action_name)
def run(config=None):
    global CONFIG
    global STORAGE_CONFIG
    global STORAGE

    if config is None:
        CONFIG = wrenconfig.default()
    else:
        CONFIG = wrenconfig.default(config)

    STORAGE_CONFIG = wrenconfig.extract_storage_config(CONFIG)
    STORAGE = cos.COSBackend(STORAGE_CONFIG['ibm_cos'])

    if len(sys.argv) <= 1:
        task = 'full'
    else:
        task = sys.argv[1]

    suite = unittest.TestSuite()
    if task == 'pywren':
        suite.addTest(unittest.makeSuite(TestPywren))
    elif task == 'pywren_cos':
        suite.addTest(unittest.makeSuite(TestPywrenCos))
    elif task == 'full':
        suite.addTest(unittest.makeSuite(TestPywren))
        suite.addTest(unittest.makeSuite(TestPywrenCos))
    elif task == 'test_call_async':
        suite.addTest(TestPywren('test_call_async'))
    elif task == 'test_map':
        suite.addTest(TestPywren('test_map'))
    elif task == 'test_map_reduce':
        suite.addTest(TestPywren('test_map_reduce'))
    elif task == 'test_multiple_executions':
        suite.addTest(TestPywren('test_multiple_executions'))
    elif task == 'test_map_reduce_cos_bucket':
        suite.addTest(TestPywrenCos('test_map_reduce_cos_bucket'))
    elif task == 'test_map_reduce_cos_bucket_one_reducer_per_object':
        suite.addTest(
            TestPywrenCos('test_map_reduce_cos_bucket_one_reducer_per_object'))
    elif task == 'test_map_reduce_cos_key':
        suite.addTest(TestPywrenCos('test_map_reduce_cos_key'))
    elif task == 'test_map_reduce_cos_key_one_reducer_per_object':
        suite.addTest(
            TestPywrenCos('test_map_reduce_cos_key_one_reducer_per_object'))
    elif task == 'test_map_reduce_url':
        suite.addTest(TestPywrenCos('test_map_reduce_url'))
    elif task == 'test_storage_handler':
        suite.addTest(TestPywrenCos('test_storage_handler'))
    elif task == 'test_chunks_bucket':
        suite.addTest(TestPywrenCos('test_chunks_bucket'))
    elif task == 'test_chunks_bucket_one_reducer_per_object':
        suite.addTest(
            TestPywrenCos('test_chunks_bucket_one_reducer_per_object'))
    else:
        print(
            'Unknown Command... use: "init", "pywren", "pywren_cos", "clean" or a test function name.'
        )
        sys.exit()

    initTests()
    runner = unittest.TextTestRunner()
    runner.run(suite)
    cleanTests()
Esempio n. 8
0
def ibm_cloud_function_handler(event):
    start_time = time.time()
    logger.info("Starting handler")
    response_status = {'exception': None}
    response_status['start_time'] = start_time

    context_dict = {
        'ibm_cf_request_id': os.environ.get("__OW_ACTIVATION_ID"),
        'ibm_cf_hostname': os.environ.get("HOSTNAME"),
        'ibm_cf_python_version': os.environ.get("PYTHON_VERSION"),
    }

    config = event['config']
    storage_config = wrenconfig.extract_storage_config(config)
    custom_handler_env = {'PYWREN_CONFIG': json.dumps(config),
                          'STORAGE_CONFIG': json.dumps(storage_config),
                          'PYWREN_EXECUTOR_ID':  event['executor_id']}
    os.environ.update(custom_handler_env)

    try:
        status_key = event['status_key']
        func_key = event['func_key']
        data_key = event['data_key']
        data_byte_range = event['data_byte_range']
        output_key = event['output_key']

        if version.__version__ != event['pywren_version']:
            raise Exception("WRONGVERSION", "Pywren version mismatch",
                            version.__version__, event['pywren_version'])

        job_max_runtime = event.get("job_max_runtime", 550)  # default for CF

        response_status['func_key'] = func_key
        response_status['data_key'] = data_key
        response_status['output_key'] = output_key
        response_status['status_key'] = status_key

        #free_disk_bytes = free_disk_space("/tmp")
        #response_status['free_disk_bytes'] = free_disk_bytes

        extra_env = event.get('extra_env', {})
        extra_env['PYTHONPATH'] = "{}:{}".format(os.getcwd(), PYWREN_LIBS_PATH)
        extra_env['PYTHONUNBUFFERED'] = 'True'

        call_id = event['call_id']
        callgroup_id = event['callgroup_id']
        executor_id = event['executor_id']
        response_status['call_id'] = call_id
        response_status['callgroup_id'] = callgroup_id
        response_status['executor_id'] = executor_id

        # pass a full json blob
        jobrunner_config = {'func_key': func_key,
                            'data_key': data_key,
                            'data_byte_range': data_byte_range,
                            'python_module_path': PYTHON_MODULE_PATH,
                            'output_key': output_key,
                            'stats_filename': JOBRUNNER_STATS_FILENAME}

        with open(JOBRUNNER_CONFIG_FILENAME, 'w') as jobrunner_fid:
            json.dump(jobrunner_config, jobrunner_fid)

        if os.path.exists(JOBRUNNER_STATS_FILENAME):
            os.remove(JOBRUNNER_STATS_FILENAME)

        cmdstr = "python {} {}".format(JOBRUNNER_PATH, JOBRUNNER_CONFIG_FILENAME)

        logger.info("About to execute '{}'".format(cmdstr))
        setup_time = time.time()
        response_status['setup_time'] = setup_time - start_time

        local_env = os.environ.copy()
        local_env.update(extra_env)

        """
        stdout = os.popen(cmdstr).read()
        print(stdout)
        process = subprocess.run(cmdstr, shell=True, env=local_env, bufsize=1,
                                 stdout=subprocess.PIPE, preexec_fn=os.setsid,
                                 universal_newlines=True, timeout=job_max_runtime)

        print(process.stdout)
        """
        # This is copied from http://stackoverflow.com/a/17698359/4577954
        # reasons for setting process group: http://stackoverflow.com/a/4791612
        process = subprocess.Popen(cmdstr, shell=True, env=local_env, bufsize=1,
                                   stdout=subprocess.PIPE, preexec_fn=os.setsid,
                                   universal_newlines=True)

        logger.info("launched process")

        def consume_stdout(stdout, queue):
            with stdout:
                for line in stdout:
                    print(line, end='')
                    queue.put(line)

        q = Queue()

        t = Thread(target=consume_stdout, args=(process.stdout, q))
        t.daemon = True
        t.start()
        t.join(job_max_runtime)

        if t.isAlive():
            # If process is still alive after t.join(job_max_runtime), kill it
            logger.error("Process exceeded maximum runtime of {} sec".format(job_max_runtime))
            # Send the signal to all the process groups
            os.killpg(os.getpgid(process.pid), signal.SIGTERM)
            raise Exception("OUTATIME",  "Process executed for too long and was killed")

        if not q.empty():
            if 'Jobrunner finished' not in q.queue[q.qsize()-1].strip():
                raise Exception("OUTOFMEMORY",  "Process exceeded maximum memory and was killed")

        logger.info("Command execution finished")
        #print(subprocess.check_output("find {}".format(PYTHON_MODULE_PATH), shell=True))
        #print(subprocess.check_output("find {}".format(os.getcwd()), shell=True))

        if os.path.exists(JOBRUNNER_STATS_FILENAME):
            with open(JOBRUNNER_STATS_FILENAME, 'r') as fid:
                for l in fid.readlines():
                    key, value = l.strip().split(" ")
                    float_value = float(value)
                    response_status[key] = float_value

        response_status['exec_time'] = time.time() - setup_time
        response_status['host_submit_time'] = event['host_submit_time']
        #response_status['server_info'] = get_server_info()
        response_status.update(context_dict)
        response_status['end_time'] = time.time()

    except Exception as e:
        # internal runtime exceptions
        logger.error("There was an exception: {}".format(str(e)))
        response_status['end_time'] = time.time()
        response_status['exception'] = str(e)
        response_status['exception_args'] = e.args
        response_status['exception_traceback'] = traceback.format_exc()

    finally:
        store_status = True
        if 'STORE_STATUS' in extra_env:
            store_status = eval(extra_env['STORE_STATUS'])

        if store_status:
            internal_storage = storage.InternalStorage(storage_config)
            internal_storage.put_data(status_key, json.dumps(response_status))
Esempio n. 9
0
    def __init__(self,
                 config=None,
                 runtime=None,
                 runtime_memory=None,
                 log_level=None,
                 rabbitmq_monitor=False):
        """
        Initialize and return an executor class.

        :param config: Settings passed in here will override those in `pywren_config`. Default None.
        :param runtime: Runtime name to use. Default None.
        :param runtime_memory: memory to use in the runtime
        :param log_level: log level to use during the execution
        :param rabbitmq_monitor: use rabbitmq as monitoring system
        :return `executor` object.

        Usage
          >>> import pywren_ibm_cloud as pywren
          >>> pw = pywren.ibm_cf_executor()
        """
        self.start_time = time.time()
        self._state = ExecutorState.new

        if config is None:
            self.config = wrenconfig.default()
        else:
            self.config = wrenconfig.default(config)

        self.is_cf_cluster = is_cf_cluster()
        self.data_cleaner = self.config['pywren']['data_cleaner']

        # Overwrite runtime variables
        if runtime:
            self.config['pywren']['runtime'] = runtime
        if runtime_memory:
            self.config['pywren']['runtime_memory'] = int(runtime_memory)

        # Log level Configuration
        self.log_level = log_level
        if not self.log_level:
            if (logger.getEffectiveLevel() != logging.WARNING):
                self.log_level = logging.getLevelName(
                    logger.getEffectiveLevel())
        if self.log_level:
            os.environ["PYWREN_LOG_LEVEL"] = self.log_level
            if not self.is_cf_cluster:
                wrenlogging.default_config(self.log_level)

        # RabbitMQ monitor configuration
        self.rabbitmq_monitor = rabbitmq_monitor
        if self.rabbitmq_monitor:
            if self.config['rabbitmq']['amqp_url']:
                os.environ["PYWREN_RABBITMQ_MONITOR"] = 'True'
            else:
                self.rabbitmq_monitor = False
        else:
            self.config['rabbitmq']['amqp_url'] = None

        storage_config = wrenconfig.extract_storage_config(self.config)
        self.internal_storage = storage.InternalStorage(storage_config)

        invoker = invokers.IBMCloudFunctionsInvoker(self.config)
        self.executor = Executor(invoker, self.config, self.internal_storage)
        self.executor_id = self.executor.executor_id

        self.futures = []
Esempio n. 10
0
def function_handler(event):
    start_time = time.time()
    logger.debug("Action handler started")
    response_status = {'exception': False}
    response_status['host_submit_time'] = event['host_submit_time']
    response_status['start_time'] = start_time

    context_dict = {
        'ibm_cf_request_id': os.environ.get("__OW_ACTIVATION_ID"),
        'ibm_cf_python_version': os.environ.get("PYTHON_VERSION"),
    }

    config = event['config']
    storage_config = wrenconfig.extract_storage_config(config)

    log_level = event['log_level']
    wrenlogging.ow_config(log_level)

    call_id = event['call_id']
    callgroup_id = event['callgroup_id']
    executor_id = event['executor_id']
    logger.info("Execution ID: {}/{}/{}".format(executor_id, callgroup_id, call_id))
    job_max_runtime = event.get("job_max_runtime", 590)  # default for CF
    status_key = event['status_key']
    func_key = event['func_key']
    data_key = event['data_key']
    data_byte_range = event['data_byte_range']
    output_key = event['output_key']
    extra_env = event.get('extra_env', {})

    response_status['call_id'] = call_id
    response_status['callgroup_id'] = callgroup_id
    response_status['executor_id'] = executor_id
    # response_status['func_key'] = func_key
    # response_status['data_key'] = data_key
    # response_status['output_key'] = output_key
    # response_status['status_key'] = status_key

    try:
        if version.__version__ != event['pywren_version']:
            raise Exception("WRONGVERSION", "PyWren version mismatch",
                            version.__version__, event['pywren_version'])

        # response_status['free_disk_bytes'] = free_disk_space("/tmp")

        custom_env = {'PYWREN_CONFIG': json.dumps(config),
                      'PYWREN_EXECUTOR_ID':  executor_id,
                      'PYTHONPATH': "{}:{}".format(os.getcwd(), PYWREN_LIBS_PATH),
                      'PYTHONUNBUFFERED': 'True'}

        os.environ.update(custom_env)
        os.environ.update(extra_env)

        # pass a full json blob
        jobrunner_config = {'func_key': func_key,
                            'data_key': data_key,
                            'log_level': log_level,
                            'data_byte_range': data_byte_range,
                            'python_module_path': PYTHON_MODULE_PATH,
                            'output_key': output_key,
                            'stats_filename': JOBRUNNER_STATS_FILENAME}

        if os.path.exists(JOBRUNNER_STATS_FILENAME):
            os.remove(JOBRUNNER_STATS_FILENAME)

        setup_time = time.time()
        response_status['setup_time'] = round(setup_time - start_time, 8)

        result_queue = multiprocessing.Queue()
        jr = jobrunner(jobrunner_config, result_queue)
        jr.daemon = True
        logger.info("Starting jobrunner process")
        jr.start()
        jr.join(job_max_runtime)
        response_status['exec_time'] = round(time.time() - setup_time, 8)

        if jr.is_alive():
            # If process is still alive after jr.join(job_max_runtime), kill it
            logger.error("Process exceeded maximum runtime of {} seconds".format(job_max_runtime))
            # Send the signal to all the process groups
            jr.terminate()
            raise Exception("OUTATIME",  "Process executed for too long and was killed")

        try:
            # Only 1 message is returned by jobrunner
            result_queue.get(block=False)
        except Exception:
            # If no message, this means that the process was killed due memory usage
            raise Exception("OUTOFMEMORY",  "Process exceeded maximum memory and was killed")

        # print(subprocess.check_output("find {}".format(PYTHON_MODULE_PATH), shell=True))
        # print(subprocess.check_output("find {}".format(os.getcwd()), shell=True))

        if os.path.exists(JOBRUNNER_STATS_FILENAME):
            with open(JOBRUNNER_STATS_FILENAME, 'r') as fid:
                for l in fid.readlines():
                    key, value = l.strip().split(" ", 1)
                    try:
                        response_status[key] = float(value)
                    except Exception:
                        response_status[key] = value
                    if key == 'exception' or key == 'exc_pickle_fail' \
                       or key == 'result':
                        response_status[key] = eval(value)

        # response_status['server_info'] = get_server_info()
        response_status.update(context_dict)
        response_status['end_time'] = time.time()

    except Exception as e:
        # internal runtime exceptions
        logger.error("There was an exception: {}".format(str(e)))
        response_status['end_time'] = time.time()
        response_status['exception'] = True

        pickled_exc = pickle.dumps(sys.exc_info())
        pickle.loads(pickled_exc)  # this is just to make sure they can be unpickled
        response_status['exc_info'] = str(pickled_exc)

    finally:
        store_status = strtobool(os.environ.get('STORE_STATUS', 'True'))
        rabbit_amqp_url = config['rabbitmq'].get('amqp_url')
        dmpd_response_status = json.dumps(response_status)
        drs = sizeof_fmt(len(dmpd_response_status))

        if rabbit_amqp_url and store_status:
            status_sent = False
            output_query_count = 0
            while not status_sent and output_query_count < 5:
                output_query_count = output_query_count + 1
                try:
                    params = pika.URLParameters(rabbit_amqp_url)
                    connection = pika.BlockingConnection(params)
                    channel = connection.channel()
                    channel.queue_declare(queue=executor_id, auto_delete=True)
                    channel.basic_publish(exchange='', routing_key=executor_id,
                                          body=dmpd_response_status)
                    connection.close()
                    logger.info("Execution stats sent to rabbitmq - Size: {}".format(drs))
                    status_sent = True
                except Exception as e:
                    logger.error("Unable to send status to rabbitmq")
                    logger.error(str(e))
                    logger.info('Retrying to send stats to rabbitmq...')
                    time.sleep(0.2)
        if store_status:
            internal_storage = storage.InternalStorage(storage_config)
            logger.info("Storing execution stats - status.json - Size: {}".format(drs))
            internal_storage.put_data(status_key, dmpd_response_status)