Esempio n. 1
0
def generic_handler(event, context_dict, custom_handler_env=None):
    """
    event is from the invoker, and contains job information

    context_dict is generic infromation about the context
    that we are running in, provided by the scheduler

    custom_handler_env are environment variables we should set
    based on the platform we are on.
    """
    pid = os.getpid()

    response_status = {'exception': None}
    try:
        if event['storage_config']['storage_backend'] != 's3':
            raise NotImplementedError(("Using {} as storage backend is not supported " +
                                       "yet.").format(event['storage_config']['storage_backend']))
        s3_client = boto3.client("s3")
        s3_bucket = event['storage_config']['backend_config']['bucket']

        logger.info("invocation started")

        # download the input
        status_key = event['status_key']
        func_key = event['func_key']
        data_key = event['data_key']
        cancel_key = event['cancel_key']

        # Check for cancel
        if key_exists(s3_client, s3_bucket, cancel_key):
            logger.info("invocation cancelled")
            raise Exception("CANCELLED", "Function cancelled")
        time_of_last_cancel_check = time.time()

        data_byte_range = event['data_byte_range']
        output_key = event['output_key']

        if version.__version__ != event['pywren_version']:
            raise Exception("WRONGVERSION", "Pywren version mismatch",
                            version.__version__, event['pywren_version'])

        start_time = time.time()
        response_status['start_time'] = start_time

        runtime_s3_bucket = event['runtime']['s3_bucket']
        runtime_s3_key = event['runtime']['s3_key']
        if event.get('runtime_url'):
            # NOTE(shivaram): Right now we only support S3 urls.
            runtime_s3_bucket_used, runtime_s3_key_used = wrenutil.split_s3_url(
                event['runtime_url'])
        else:
            runtime_s3_bucket_used = runtime_s3_bucket
            runtime_s3_key_used = runtime_s3_key

        job_max_runtime = event.get("job_max_runtime", 290) # default for lambda

        response_status['func_key'] = func_key
        response_status['data_key'] = data_key
        response_status['output_key'] = output_key
        response_status['status_key'] = status_key

        data_key_size = get_key_size(s3_client, s3_bucket, data_key)
        #logger.info("bucket=", s3_bucket, "key=", data_key,  "status: ", data_key_size, "bytes" )
        while data_key_size is None:
            logger.warning("WARNING COULD NOT GET FIRST KEY")

            data_key_size = get_key_size(s3_client, s3_bucket, data_key)
        if not event['use_cached_runtime']:
            shutil.rmtree(RUNTIME_LOC, True)
            os.mkdir(RUNTIME_LOC)


        free_disk_bytes = free_disk_space(TEMP)
        response_status['free_disk_bytes'] = free_disk_bytes

        response_status['runtime_s3_key_used'] = runtime_s3_key_used
        response_status['runtime_s3_bucket_used'] = runtime_s3_bucket_used
        if (custom_handler_env is not None):
            delete_old_runtimes = custom_handler_env.get('delete_old_runtimes', False)
        else:
            delete_old_runtimes = False


        runtime_cached = download_runtime_if_necessary(s3_client, runtime_s3_bucket_used,
                                                       runtime_s3_key_used, delete_old_runtimes)
        logger.info("Runtime ready, cached={}".format(runtime_cached))
        response_status['runtime_cached'] = runtime_cached

        cwd = os.getcwd()
        jobrunner_path = os.path.join(cwd, "jobrunner.py")

        extra_env = event.get('extra_env', {})
        extra_env['PYTHONPATH'] = "{}".format(os.getcwd())

        call_id = event['call_id']
        callset_id = event['callset_id']
        response_status['call_id'] = call_id
        response_status['callset_id'] = callset_id
        runtime_meta = s3_client.head_object(Bucket=runtime_s3_bucket_used,
                                             Key=runtime_s3_key_used)
        ETag = str(runtime_meta['ETag'])[1:-1]
        conda_runtime_dir = CONDA_RUNTIME_DIR.format(ETag)
        conda_python_path = os.path.join(conda_runtime_dir, "bin")
        conda_python_runtime = os.path.join(conda_python_path, "python")

        # pass a full json blob
        jobrunner_config_filename = JOBRUNNER_CONFIG_FILENAME.format(pid)
        jobrunner_stats_filename = JOBRUNNER_STATS_FILENAME.format(pid)
        python_module_path = PYTHON_MODULE_PATH.format(pid)

        jobrunner_config = {'func_bucket' : s3_bucket,
                            'func_key' : func_key,
                            'data_bucket' : s3_bucket,
                            'data_key' : data_key,
                            'data_byte_range' : data_byte_range,
                            'python_module_path' : python_module_path,
                            'output_bucket' : s3_bucket,
                            'output_key' : output_key,
                            'stats_filename' : jobrunner_stats_filename}

        with open(jobrunner_config_filename, 'w') as jobrunner_fid:
            json.dump(jobrunner_config, jobrunner_fid)

        if os.path.exists(jobrunner_stats_filename):
            os.remove(jobrunner_stats_filename)

        cmdstr = "{} {} {}".format(conda_python_runtime,
                                   jobrunner_path,
                                   jobrunner_config_filename)

        setup_time = time.time()
        response_status['setup_time'] = setup_time - start_time

        local_env = os.environ.copy()
        if custom_handler_env is not None:
            local_env.update(custom_handler_env)

        local_env.update(extra_env)

        local_env['PATH'] = "{}{}{}".format(conda_python_path, os.pathsep,
                                            local_env.get("PATH", ""))

        logger.debug("command str=%s", cmdstr)
        # This is copied from http://stackoverflow.com/a/17698359/4577954
        # reasons for setting process group: http://stackoverflow.com/a/4791612

        if os.name == 'nt':
            process = subprocess.Popen(cmdstr, shell=True, env=local_env,
                                       bufsize=1, stdout=subprocess.PIPE,
                                       creationflags=subprocess.CREATE_NEW_PROCESS_GROUP)
        else:
            process = subprocess.Popen(cmdstr, # pylint: disable=subprocess-popen-preexec-fn
                                       shell=True, env=local_env, bufsize=1,
                                       stdout=subprocess.PIPE, preexec_fn=os.setsid)
        logger.info("launched process")

        def kill_process(process):
            if os.name == 'nt':
                subprocess.call(['taskkill', '/F', '/T', '/PID', str(process.pid)]) # pylint: disable=no-member
            else:
                os.killpg(os.getpgid(process.pid), signal.SIGTERM)

        def consume_stdout(stdout, queue):
            with stdout:
                for line in iter(stdout.readline, b''):
                    queue.put(line)

        q = Queue()

        t = Thread(target=consume_stdout, args=(process.stdout, q))
        t.daemon = True
        t.start()

        stdout = b""
        while t.isAlive() or process.returncode is None:
            logger.info("Running {} {}".format(time.time(), process.returncode))
            try:
                line = q.get_nowait()
                stdout += line
                logger.info(line)
            except Empty:
                time.sleep(PROCESS_STDOUT_SLEEP_SECS)
            process.poll() # this updates retcode but does not block
            if not t.isAlive() and process.returncode is None:
                time.sleep(PROCESS_STDOUT_SLEEP_SECS)

            total_runtime = time.time() - start_time
            time_since_cancel_check = time.time() - time_of_last_cancel_check
            if time_since_cancel_check > CANCEL_CHECK_EVERY_SECS:

                if key_exists(s3_client, s3_bucket, cancel_key):
                    logger.info("invocation cancelled")
                    # kill the process
                    kill_process(process)
                    raise Exception("CANCELLED",
                                    "Function cancelled")
                time_of_last_cancel_check = time.time()

            if total_runtime > job_max_runtime:
                logger.warning("Process exceeded maximum runtime of {} sec".format(job_max_runtime))
                # Send the signal to all the process groups
                kill_process(process)
                raise Exception("OUTATIME",
                                "Process executed for too long and was killed")


        response_status['retcode'] = process.returncode
        logger.info("command execution finished, retcode= {}".format(process.returncode))
        if process.returncode != 0:
            logger.warning("process returned non-zero retcode {}".format(process.returncode))
            logger.info(stdout.decode('ascii'))
            raise Exception("RETCODE",
                            "Python process returned a non-zero return code")

        if os.path.exists(JOBRUNNER_STATS_FILENAME):
            with open(JOBRUNNER_STATS_FILENAME, 'r') as fid:
                for l in fid.readlines():
                    key, value = l.strip().split(" ")
                    float_value = float(value)
                    response_status[key] = float_value

        end_time = time.time()

        response_status['stdout'] = stdout.decode("ascii")



        response_status['exec_time'] = time.time() - setup_time
        response_status['end_time'] = end_time

        response_status['host_submit_time'] = event['host_submit_time']
        response_status['server_info'] = get_server_info()

        response_status.update(context_dict)
    except Exception as e:
        # internal runtime exceptions
        response_status['exception'] = str(e)
        response_status['exception_args'] = e.args
        response_status['exception_traceback'] = traceback.format_exc()
    finally:
        # creating new client in case the client has not been created
        boto3.client("s3").put_object(Bucket=s3_bucket, Key=status_key,
                                      Body=json.dumps(response_status))
Esempio n. 2
0
def generic_handler(event, context_dict):
    """
    context_dict is generic infromation about the context
    that we are running in, provided by the scheduler
    """

    try:
        response_status = {'exception': None}
        s3 = boto3.resource('s3')

        logger.info("invocation started")

        # download the input
        status_key = event['status_key']
        func_key = event['func_key']
        data_key = event['data_key']
        data_byte_range = event['data_byte_range']
        output_key = event['output_key']

        if version.__version__ != event['pywren_version']:
            raise Exception("WRONGVERSION", "Pywren version mismatch",
                            version.__version__, event['pywren_version'])

        start_time = time.time()
        response_status['start_time'] = start_time

        func_filename = "/tmp/func.pickle"
        data_filename = "/tmp/data.pickle"
        output_filename = "/tmp/output.pickle"

        runtime_s3_bucket = event['runtime_s3_bucket']
        runtime_s3_key = event['runtime_s3_key']
        if event.get('runtime_url'):
            # NOTE(shivaram): Right now we only support S3 urls.
            runtime_s3_bucket_used, runtime_s3_key_used = wrenutil.split_s3_url(
                event['runtime_url'])
        else:
            runtime_s3_bucket_used = runtime_s3_bucket
            runtime_s3_key_used = runtime_s3_key

        job_max_runtime = event.get("job_max_runtime",
                                    290)  # default for lambda

        response_status['func_key'] = func_key
        response_status['data_key'] = data_key
        response_status['output_key'] = output_key
        response_status['status_key'] = status_key

        b, k = data_key
        KS = s3util.key_size(b, k)
        #logger.info("bucket=", b, "key=", k,  "status: ", KS, "bytes" )
        while KS is None:
            logger.warn("WARNING COULD NOT GET FIRST KEY")

            KS = s3util.key_size(b, k)
        if not event['use_cached_runtime']:
            subprocess.check_output("rm -Rf {}/*".format(RUNTIME_LOC),
                                    shell=True)

        # get the input and save to disk
        # FIXME here is we where we would attach the "canceled" metadata
        s3.meta.client.download_file(func_key[0], func_key[1], func_filename)
        func_download_time = time.time() - start_time
        response_status['func_download_time'] = func_download_time

        logger.info("func download complete, took {:3.2f} sec".format(
            func_download_time))

        if data_byte_range is None:
            s3.meta.client.download_file(data_key[0], data_key[1],
                                         data_filename)
        else:
            range_str = 'bytes={}-{}'.format(*data_byte_range)
            dres = s3.meta.client.get_object(Bucket=data_key[0],
                                             Key=data_key[1],
                                             Range=range_str)
            data_fid = open(data_filename, 'wb')
            data_fid.write(dres['Body'].read())
            data_fid.close()

        data_download_time = time.time() - start_time
        logger.info("data data download complete, took {:3.2f} sec".format(
            data_download_time))
        response_status['data_download_time'] = data_download_time

        # now split
        d = json.load(open(func_filename, 'r'))
        shutil.rmtree(PYTHON_MODULE_PATH, True)  # delete old modules
        os.mkdir(PYTHON_MODULE_PATH)
        # get modules and save
        for m_filename, m_data in d['module_data'].items():
            m_path = os.path.dirname(m_filename)

            if len(m_path) > 0 and m_path[0] == "/":
                m_path = m_path[1:]
            to_make = os.path.join(PYTHON_MODULE_PATH, m_path)
            #print "to_make=", to_make, "m_path=", m_path
            try:
                os.makedirs(to_make)
            except OSError as e:
                if e.errno == 17:
                    pass
                else:
                    raise e
            full_filename = os.path.join(to_make, os.path.basename(m_filename))
            #print "creating", full_filename
            fid = open(full_filename, 'wb')
            fid.write(b64str_to_bytes(m_data))
            fid.close()
        logger.info("Finished writing {} module files".format(
            len(d['module_data'])))
        logger.debug(
            subprocess.check_output("find {}".format(PYTHON_MODULE_PATH),
                                    shell=True))
        logger.debug(
            subprocess.check_output("find {}".format(os.getcwd()), shell=True))

        response_status['runtime_s3_key_used'] = runtime_s3_key_used
        response_status['runtime_s3_bucket_used'] = runtime_s3_bucket_used

        runtime_cached = download_runtime_if_necessary(s3,
                                                       runtime_s3_bucket_used,
                                                       runtime_s3_key_used)
        logger.info("Runtime ready, cached={}".format(runtime_cached))
        response_status['runtime_cached'] = runtime_cached

        cwd = os.getcwd()
        jobrunner_path = os.path.join(cwd, "jobrunner.py")

        extra_env = event.get('extra_env', {})
        extra_env['PYTHONPATH'] = "{}:{}".format(os.getcwd(),
                                                 PYTHON_MODULE_PATH)

        call_id = event['call_id']
        callset_id = event['callset_id']
        response_status['call_id'] = call_id
        response_status['callset_id'] = callset_id

        CONDA_PYTHON_PATH = "/tmp/condaruntime/bin"
        CONDA_PYTHON_RUNTIME = os.path.join(CONDA_PYTHON_PATH, "python")

        cmdstr = "{} {} {} {} {}".format(CONDA_PYTHON_RUNTIME, jobrunner_path,
                                         func_filename, data_filename,
                                         output_filename)

        setup_time = time.time()
        response_status['setup_time'] = setup_time - start_time

        local_env = os.environ.copy()

        local_env["OMP_NUM_THREADS"] = "1"
        local_env.update(extra_env)

        local_env['PATH'] = "{}:{}".format(CONDA_PYTHON_PATH,
                                           local_env.get("PATH", ""))

        logger.debug("command str=%s", cmdstr)
        # This is copied from http://stackoverflow.com/a/17698359/4577954
        # reasons for setting process group: http://stackoverflow.com/a/4791612
        process = subprocess.Popen(cmdstr,
                                   shell=True,
                                   env=local_env,
                                   bufsize=1,
                                   stdout=subprocess.PIPE,
                                   preexec_fn=os.setsid)

        logger.info("launched process")

        def consume_stdout(stdout, queue):
            with stdout:
                for line in iter(stdout.readline, b''):
                    queue.put(line)

        q = Queue()

        t = Thread(target=consume_stdout, args=(process.stdout, q))
        t.daemon = True
        t.start()

        stdout = b""
        while t.isAlive():
            try:
                line = q.get_nowait()
                stdout += line
                logger.info(line)
            except Empty:
                time.sleep(PROCESS_STDOUT_SLEEP_SECS)
            total_runtime = time.time() - start_time
            if total_runtime > job_max_runtime:
                logger.warn(
                    "Process exceeded maximum runtime of {} sec".format(
                        job_max_runtime))
                # Send the signal to all the process groups
                os.killpg(os.getpgid(process.pid), signal.SIGTERM)
                raise Exception(
                    "OUTATIME", "Process executed for too long and was killed")

        logger.info("command execution finished")

        s3.meta.client.upload_file(output_filename, output_key[0],
                                   output_key[1])
        logger.debug("output uploaded to %s %s", output_key[0], output_key[1])

        end_time = time.time()

        response_status['stdout'] = stdout.decode("ascii")

        response_status['exec_time'] = time.time() - setup_time
        response_status['end_time'] = end_time

        response_status['host_submit_time'] = event['host_submit_time']
        response_status['server_info'] = get_server_info()

        response_status.update(context_dict)
    except Exception as e:
        # internal runtime exceptions
        response_status['exception'] = str(e)
        response_status['exception_args'] = e.args
        response_status['exception_traceback'] = traceback.format_exc()
    finally:

        s3.meta.client.put_object(Bucket=status_key[0],
                                  Key=status_key[1],
                                  Body=json.dumps(response_status))
Esempio n. 3
0
def generic_handler(event, context_dict, custom_handler_env=None):
    """
    event is from the invoker, and contains job information

    context_dict is generic infromation about the context
    that we are running in, provided by the scheduler

    custom_handler_env are environment variables we should set
    based on the platform we are on.
    """

    response_status = {'exception': None}
    try:
        if event['storage_config']['storage_backend'] != 's3':
            raise NotImplementedError(("Using {} as storage backend is not supported " +
                                       "yet.").format(event['storage_config']['storage_backend']))
        s3_client = boto3.client("s3")
        s3_bucket = event['storage_config']['backend_config']['bucket']

        logger.info("invocation started")

        # download the input
        status_key = event['status_key']
        func_key = event['func_key']
        data_key = event['data_key']
        data_byte_range = event['data_byte_range']
        output_key = event['output_key']

        if version.__version__ != event['pywren_version']:
            raise Exception("WRONGVERSION", "Pywren version mismatch",
                            version.__version__, event['pywren_version'])

        start_time = time.time()
        response_status['start_time'] = start_time

        runtime_s3_bucket = event['runtime']['s3_bucket']
        runtime_s3_key = event['runtime']['s3_key']
        if event.get('runtime_url'):
            # NOTE(shivaram): Right now we only support S3 urls.
            runtime_s3_bucket_used, runtime_s3_key_used = wrenutil.split_s3_url(
                event['runtime_url'])
        else:
            runtime_s3_bucket_used = runtime_s3_bucket
            runtime_s3_key_used = runtime_s3_key

        job_max_runtime = event.get("job_max_runtime", 290) # default for lambda

        response_status['func_key'] = func_key
        response_status['data_key'] = data_key
        response_status['output_key'] = output_key
        response_status['status_key'] = status_key

        data_key_size = get_key_size(s3_client, s3_bucket, data_key)
        #logger.info("bucket=", s3_bucket, "key=", data_key,  "status: ", data_key_size, "bytes" )
        while data_key_size is None:
            logger.warning("WARNING COULD NOT GET FIRST KEY")

            data_key_size = get_key_size(s3_client, s3_bucket, data_key)
        if not event['use_cached_runtime']:
            subprocess.check_output("rm -Rf {}/*".format(RUNTIME_LOC), shell=True)


        free_disk_bytes = free_disk_space("/tmp")
        response_status['free_disk_bytes'] = free_disk_bytes

        response_status['runtime_s3_key_used'] = runtime_s3_key_used
        response_status['runtime_s3_bucket_used'] = runtime_s3_bucket_used

        runtime_cached = download_runtime_if_necessary(s3_client, runtime_s3_bucket_used,
                                                       runtime_s3_key_used)
        logger.info("Runtime ready, cached={}".format(runtime_cached))
        response_status['runtime_cached'] = runtime_cached

        cwd = os.getcwd()
        jobrunner_path = os.path.join(cwd, "jobrunner.py")

        extra_env = event.get('extra_env', {})
        extra_env['PYTHONPATH'] = "{}".format(os.getcwd())

        call_id = event['call_id']
        callset_id = event['callset_id']
        response_status['call_id'] = call_id
        response_status['callset_id'] = callset_id

        CONDA_PYTHON_PATH = "/tmp/condaruntime/bin"
        CONDA_PYTHON_RUNTIME = os.path.join(CONDA_PYTHON_PATH, "python")

        # pass a full json blob

        jobrunner_config = {'func_bucket' : s3_bucket,
                            'func_key' : func_key,
                            'data_bucket' : s3_bucket,
                            'data_key' : data_key,
                            'data_byte_range' : data_byte_range,
                            'python_module_path' : PYTHON_MODULE_PATH,
                            'output_bucket' : s3_bucket,
                            'output_key' : output_key,
                            'stats_filename' : JOBRUNNER_STATS_FILENAME}

        with open(JOBRUNNER_CONFIG_FILENAME, 'w') as jobrunner_fid:
            json.dump(jobrunner_config, jobrunner_fid)

        if os.path.exists(JOBRUNNER_STATS_FILENAME):
            os.remove(JOBRUNNER_STATS_FILENAME)

        cmdstr = "{} {} {}".format(CONDA_PYTHON_RUNTIME,
                                   jobrunner_path,
                                   JOBRUNNER_CONFIG_FILENAME)

        setup_time = time.time()
        response_status['setup_time'] = setup_time - start_time

        local_env = os.environ.copy()
        if custom_handler_env is not None:
            local_env.update(custom_handler_env)

        local_env.update(extra_env)

        local_env['PATH'] = "{}:{}".format(CONDA_PYTHON_PATH, local_env.get("PATH", ""))

        logger.debug("command str=%s", cmdstr)
        # This is copied from http://stackoverflow.com/a/17698359/4577954
        # reasons for setting process group: http://stackoverflow.com/a/4791612
        process = subprocess.Popen(cmdstr, shell=True, env=local_env, bufsize=1,
                                   stdout=subprocess.PIPE, preexec_fn=os.setsid)

        logger.info("launched process")
        def consume_stdout(stdout, queue):
            with stdout:
                for line in iter(stdout.readline, b''):
                    queue.put(line)

        q = Queue()

        t = Thread(target=consume_stdout, args=(process.stdout, q))
        t.daemon = True
        t.start()

        stdout = b""
        while t.isAlive():
            try:
                line = q.get_nowait()
                stdout += line
                logger.info(line)
            except Empty:
                time.sleep(PROCESS_STDOUT_SLEEP_SECS)
            total_runtime = time.time() - start_time
            if total_runtime > job_max_runtime:
                logger.warning("Process exceeded maximum runtime of {} sec".format(job_max_runtime))
                # Send the signal to all the process groups
                os.killpg(os.getpgid(process.pid), signal.SIGTERM)
                raise Exception("OUTATIME",
                                "Process executed for too long and was killed")


        logger.info("command execution finished")

        if os.path.exists(JOBRUNNER_STATS_FILENAME):
            with open(JOBRUNNER_STATS_FILENAME, 'r') as fid:
                for l in fid.readlines():
                    key, value = l.strip().split(" ")
                    float_value = float(value)
                    response_status[key] = float_value

        end_time = time.time()

        response_status['stdout'] = stdout.decode("ascii")


        response_status['exec_time'] = time.time() - setup_time
        response_status['end_time'] = end_time

        response_status['host_submit_time'] = event['host_submit_time']
        response_status['server_info'] = get_server_info()

        response_status.update(context_dict)
    except Exception as e:
        # internal runtime exceptions
        response_status['exception'] = str(e)
        response_status['exception_args'] = e.args
        response_status['exception_traceback'] = traceback.format_exc()
    finally:
        # creating new client in case the client has not been created
        boto3.client("s3").put_object(Bucket=s3_bucket, Key=status_key,
                                      Body=json.dumps(response_status))