def load_library(signature, cache_params): """Load existing dynamic library from disk. Returns library module if found, otherwise None. If found, the module is placed in memory cache for later lookup_lib calls. """ lib_filename = create_lib_filename(signature, cache_params) if not os.path.exists(lib_filename): debug("File %s does not exist" % (lib_filename, )) return None debug("Loading %s from %s" % (signature, lib_filename)) if cache_params["lib_loader"] == "ctypes": try: lib = ctypes.cdll.LoadLibrary(lib_filename) except os.error as e: lib = None emsg = analyse_load_error(e, lib_filename, cache_params) warning(emsg) else: debug("Loaded %s from %s" % (signature, lib_filename)) elif cache_params["lib_loader"] == "import": sys.path.append(os.path.dirname(lib_filename)) # Will raise an exception if it does not load correctly lib = __import__(signature) debug("Loaded %s from %s" % (signature, lib_filename)) else: error("Invalid loader: %s" % cache_params["lib_loader"]) if lib is not None: # Disk loading succeeded, register loaded library in memory # cache for next time _lib_cache[signature] = lib return lib
def create_comms_and_role(comm, comm_dir, buildon): """Determine which role each process should take, and create the right copy_comm and wait_comm for the build strategy. buildon must be one of "root", "node", or "process". Returns (copy_comm, wait_comm, role). """ # Now assign values to the copy_comm, wait_comm, and role, # depending on buildon strategy chosen. If we have no comm, # always return the builder role if comm is None: copy_comm, wait_comm, role = None, None, "builder" else: node_comm, node_root = create_node_comm(comm, comm_dir) if buildon == "root": copy_comm, wait_comm, role = create_comms_and_role_root( comm, node_comm, node_root) elif buildon == "node": copy_comm, wait_comm, role = create_comms_and_role_node( comm, node_comm, node_root) elif buildon == "process": copy_comm, wait_comm, role = create_comms_and_role_process( comm, node_comm, node_root) else: error("Invalid parameter buildon=%s" % (buildon, )) return copy_comm, wait_comm, role
def as_bool(value): if isinstance(value, bool): return value elif value in ("True", "true", "1"): return True elif value in ("False", "false", "0"): return False else: error("Invalid boolean value %s" % (value, ))
def check_params_keys(default, params): "Check that keys in params exist in defaults." for category in params: if category == "generator": continue if category not in default: error("Invalid parameter category '%s'." % category) if params[category] is not None: invalid = set(params[category]) - set(default[category]) if invalid: error("Invalid parameter names %s in category '%s'." % (sorted(invalid), category))
def create_comms_and_role_process(comm, node_comm, node_root): """Approach: each process builds its own module, no communication. To ensure no race conditions in this case independently of cache dir setup, we include an error check on the size of the autodetected node_comm. This should always be 1, or we provide the user with an informative message. TODO: Append program uid and process rank to basedir instead? """ if node_comm.size > 1: error("Asking for per-process building but processes share cache dir." " Please configure dijitso dirs to be distinct per process.") copy_comm = None wait_comm = None assert node_comm.rank == 0 assert comm.rank == node_root role = "builder" return copy_comm, wait_comm, role
def compress_source_code(src_filename, cache_params): """Keep, delete or compress source code based on value of cache parameter 'src_storage'. Can be "keep", "delete", or "compress". """ src_storage = cache_params["src_storage"] if src_storage == "keep": filename = src_filename elif src_storage == "delete": try_delete_file(src_filename) filename = None elif src_storage == "compress": filename = gzip_file(src_filename) try_delete_file(src_filename) else: error( "Invalid src_storage parameter. Expecting 'keep', 'delete', or 'compress'." ) return filename
def jit(jitable, name, params, generate=None, send=None, receive=None, wait=None): """Just-in-time compile and import of a shared library with a cache mechanism. A signature is computed from the name, params["generator"], and params["build"]. The name should be a unique identifier for the jitable, preferrably produced by a good hash function. The signature is used to identity if the library has already been compiled and cached. A two-level memory and disk cache ensures good performance for repeated lookups within a single program as well as persistence across program runs. If no library has been cached, the passed 'generate' function is called to generate the source code: header, source, dependencies = \ generate(jitable, name, signature, params["generator"]) It is expected to translate the 'jitable' object into C or C++ (default) source code which will subsequently be compiled as a shared library and stored in the disk cache. The returned 'dependencies' should be a tuple of signatures returned from other completed dijitso.jit calls, and are linked to when building. The compiled shared library is then loaded with ctypes and returned. For use in a parallel (MPI) context, three functions send, receive, and wait can be provided. Each process can take on a different role depending on whether generate, or receive, or neither is provided. * Every process that gets a generate function is called a 'builder', and will generate and compile code as described above on a cache miss. If the function send is provided, it will then send the shared library binary file as a binary blob by calling send(numpy_array). * Every process that gets a receive function is called a 'receiver', and will call 'numpy_array = receive()' expecting the binary blob with a compiled binary shared library which will subsequently be written to file in the local disk cache. * The rest of the processes are called 'waiters' and will do nothing. * If provided, all processes will call wait() before attempting to load the freshly compiled library from disk cache. The intention of the above pattern is to be flexible, allowing several different strategies for sharing build results. The user of dijitso can determine groups of processes that share a disk cache, and assign one process per physical disk cache directory to write to that directory, avoiding multiple processes writing to the same files. This forms the basis for three main strategies: * Build on every process. * Build on one process per physical cache directory. * Build on a single global root node and send a copy of the binary to one process per physical cache directory. It is highly recommended to avoid have multiple builder processes sharing a physical cache directory. """ # TODO: Could simplify interface here and roll # (jitable, name, params["generator"]) into a single jitobject? # TODO: send/receive doesn't combine well with generate # triggering additional jit calls for dependencies. # It's possible that dependencies are hard to determine without # generate doing some analysis that we want to avoid. # Drop send/receive? Probably not that useful anyway. # Complete params with hardcoded defaults and config file defaults params = validate_params(params) # 0) Look for library in memory or disk cache # FIXME: use only name as signature for now # TODO: just remove one of signature or name from API? # signature = jit_signature(name, params) name = as_unicode(name) signature = name cache_params = params["cache"] lib = lookup_lib(signature, cache_params) err_info = None if lib is None: # Since we didn't find the library in cache, we must build it. if receive and generate: # We're not supposed to generate if we're receiving error("Please provide only one of generate or receive.") elif generate: # 1) Generate source code header, source, dependencies = generate(jitable, name, signature, params["generator"]) # Ensure we got unicode from generate header = as_unicode(header) source = as_unicode(source) dependencies = [as_unicode(dep) for dep in dependencies] # 2) Compile shared library and 3) store in dijitso # inc/src/lib dir on success # NB! It's important to not raise exception on compilation # failure, such that we can reach wait() together with # other processes if any. status, output, lib_filename, err_info = \ build_shared_library(signature, header, source, dependencies, params) # 4a) Send library over network if we have a send function if send: if status == 0: lib_data = read_library_binary(lib_filename) else: lib_data = numpy.zeros((1, )) send(lib_data) elif receive: # 4b) Get library as binary blob from given receive # function and store in cache lib_data = receive() # Empty if compilation failed status = -1 if lib_data.shape == (1, ) else 0 if status == 0: write_library_binary(lib_data, signature, cache_params) else: # Do nothing (we'll be waiting below for other process to # build) if not wait: error( "Please provide wait if not providing one of generate or receive." ) # 5) Notify waiters that we're done / wait for builder to # notify us if wait: wait() # Finally load library from disk cache (places in memory # cache) # NB! This returns None if the file does not exist, # i.e. if compilation failed on builder process lib = load_library(signature, cache_params) if err_info: # TODO: Parse output to find error(s) for better error messages raise DijitsoError( "Dijitso JIT compilation failed, see '%s' for details" % err_info['fail_dir'], err_info) # Return built library and its signature return lib, signature