def _init_globalops(self, pop): """ Generate the C++ codes for the initialization of global operations within the Population::init_population() method. """ if len(pop.global_operations) == 0: return "" code = "\n// Initialize global operations\n" for op in pop.global_operations: ids = { 'op': op['function'], 'var': op['variable'], 'type': Global.config['precision'] } if Global._check_paradigm("openmp"): code += """_%(op)s_%(var)s = 0.0; """ % ids elif Global._check_paradigm("cuda"): code += """_%(op)s_%(var)s = 0.0; cudaMalloc((void**)&_gpu_%(op)s_%(var)s, sizeof(%(type)s)); """ % ids else: raise NotImplementedError return tabify(code, 2)
def _generate(self): """ Overrides default code generation. This function is called during the code generation procedure. """ if Global._check_paradigm("openmp"): self._generate_omp() elif Global._check_paradigm("cuda"): self._generate_cuda() else: raise NotImplementedError
def _body_def_glops(self): """ Dependent on the used global operations we add pre-defined templates to the ANNarchy body file. Return: dependent on the used paradigm we return one string (single thread, OpenMP) or tuple(string, string) (CUDA). """ ops = [] for pop in self._populations: for op in pop.global_operations: ops.append(op['function']) # no global operations if ops == []: if Global._check_paradigm("openmp"): return "" elif Global._check_paradigm("cuda"): return "", "" else: raise NotImplementedError( "CodeGenerator._body_def_glops(): no implementation for " + Global.config["paradigm"]) type_def = {'type': Global.config['precision']} # the computation kernel depends on the paradigm if Global._check_paradigm("openmp"): if Global.config["num_threads"] == 1: global_op_template = global_operation_templates_st else: global_op_template = global_operation_templates_openmp code = "" for op in sorted(list(set(ops))): code += global_op_template[op] % type_def return code elif Global._check_paradigm("cuda"): header = "" body = "" for op in sorted(list(set(ops))): header += global_operation_templates_cuda[op][ 'header'] % type_def body += global_operation_templates_cuda[op]['body'] % type_def return header, body else: raise NotImplementedError( "CodeGenerator._body_def_glops(): no implementation for " + Global.config["paradigm"])
def _generate(self): """ Overrides the default code generation. """ # Convolve_code convolve_code, sum_code = self._generate_pooling_code() # Generate the code if Global._check_paradigm("openmp"): self._generate_omp(convolve_code, sum_code) elif Global._check_paradigm("cuda"): self._generate_cuda(convolve_code, sum_code) else: Global._error("PoolingProjection: not implemented for the configured paradigm")
def _generate(self): """ Overrides the default code generation. """ # Convolve_code convolve_code, sum_code = self._generate_pooling_code() # Generate the code if Global._check_paradigm("openmp"): self._generate_omp(convolve_code, sum_code) elif Global._check_paradigm("cuda"): self._generate_cuda(convolve_code, sum_code) else: Global._error( "Pooling: not implemented for the configured paradigm")
def __init__(self, pre, post, target, window=0.0, name=None, copied=False): """ :param pre: pre-synaptic population. :param post: post-synaptic population. :param target: type of the connection. :param window: duration of the time window to collect spikes (default: dt). """ # Instantiate the projection SpecificProjection.__init__(self, pre, post, target, None, name, copied) # Check populations if not self.pre.neuron_type.type == 'spike': Global._error( 'The pre-synaptic population of a DecodingProjection must be spiking.' ) if not self.post.neuron_type.type == 'rate': Global._error( 'The post-synaptic population of a DecodingProjection must be rate-coded.' ) # Process window argument if window == 0.0: window = Global.config['dt'] self.window = window # Disable openMP post-synaptic matrix split self._no_split_matrix = True # Not on CUDA if Global._check_paradigm('cuda'): Global._error('DecodingProjections are not available on CUDA yet.')
def __init__(self, pre, post, target, name=None, copied=False): """ :param pre: pre-synaptic population. :param post: post-synaptic population. :param target: type of the connection. """ # Instantiate the projection SpecificProjection.__init__(self, pre, post, target, None, name, copied) # Check populations if not self.pre.neuron_type.type == 'rate': Global._error( 'The pre-synaptic population of a CurrentInjection must be rate-coded.' ) if not self.post.neuron_type.type == 'spike': Global._error( 'The post-synaptic population of a CurrentInjection must be spiking.' ) if not self.post.size == self.pre.size: Global._error( 'CurrentInjection: The pre- and post-synaptic populations must have the same size.' ) if Global._check_paradigm("cuda") and (isinstance( pre, PopulationView) or isinstance(post, PopulationView)): Global._error( "CurrentInjection on GPUs is not allowed for PopulationViews") # Prevent automatic split of matrices self._no_split_matrix = True
def compute_firing_rate(self, window): """ Tells spiking neurons in the population to compute their mean firing rate over the given window and store the values in the variable `r`. This method has an effect on spiking neurons only. If this method is not called, `r` will always be 0.0. `r` can of course be accessed and recorded as any other variable. *Parameter*: * **window**: window in ms over which the spikes will be counted. """ if Global._check_paradigm('cuda'): Global._error( 'compute_firing_rate() is not supported on CUDA yet.') if self.neuron_type.type == 'rate': Global._error( 'compute_firing_rate(): the neuron is already rate-coded...') self._compute_mean_fr = float(window) if self.initialized: getattr(self.cyInstance, 'compute_firing_rate')(self._compute_mean_fr)
def __init__(self, annarchy_dir, clean, compiler, compiler_flags, add_sources, extra_libs, path_to_json, silent, cuda_config, debug_build, profile_enabled, populations, projections, net_id): # Store arguments self.annarchy_dir = annarchy_dir self.clean = clean self.compiler = compiler self.compiler_flags = compiler_flags self.add_sources = add_sources self.extra_libs = extra_libs self.silent = silent self.cuda_config = cuda_config self.debug_build = debug_build self.profile_enabled = profile_enabled self.populations = populations self.projections = projections self.net_id = net_id # Get user-defined config self.user_config = { 'openmp': { 'compiler': 'clang++' if sys.platform == "darwin" else 'g++', 'flags': "-march=native -O2", }, 'cuda': { 'compiler': "nvcc", 'device': 0 } } if len(path_to_json) == 0: # check homedirectory if os.path.exists( os.path.expanduser('~/.config/ANNarchy/annarchy.json')): with open( os.path.expanduser('~/.config/ANNarchy/annarchy.json'), 'r') as rfile: self.user_config = json.load(rfile) else: with open(path_to_json, 'r') as rfile: self.user_config = json.load(rfile) # Sanity check if the NVCC compiler is available if Global._check_paradigm("cuda"): cmd = self.user_config['cuda'][ 'compiler'] + " --version 1> /dev/null" if os.system(cmd) != 0: Global._error( "CUDA is not available on your system. Please check the CUDA installation or the annarchy.json configuration." ) Global.config['cuda_version'] = check_cuda_version( self.user_config['cuda']['compiler'])
def _generate(self): """ Overrides default code generation. This function is called during the code generation procedure. """ # Filter definition filter_definition, filter_pyx_definition = self._filter_definition() # Convolve_code if not self.multiple: convolve_code, sum_code = self._generate_convolve_code() else: convolve_code, sum_code = self._generate_bank_code() if Global._check_paradigm("openmp"): self._generate_omp(filter_definition, filter_pyx_definition, convolve_code, sum_code) elif Global._check_paradigm("cuda"): raise Global.ANNarchyException( "Convolution is not available on CUDA devices yet.", True) else: raise NotImplementedError
def cpp_connector_available(connector_name, desired_format, storage_order): """ Checks if a CPP implementation is available for the desired connection pattern (*connector_name*) and the target sparse matrix format (*desired_format*). Please note that not all formats are available for *pre_to_post* storage order. """ # The user disabled this feature if not Global.config["use_cpp_connectors"]: return False cpp_patterns = { 'st': { 'post_to_pre': { "lil": ["Random", "Random Convergent"], "csr": ["Random", "Random Convergent"], "coo": [], "hyb": [], "ell": [], "dense": ["Random"] }, 'pre_to_post': { "csr": ["Random", "Random Convergent"] } }, 'omp': { "lil": [], "csr": [], "coo": [], "ell": [] }, 'cuda': { 'post_to_pre': { "csr": ["Random", "Random Convergent"], "coo": [], "ellr": ["Random", "Random Convergent"], "dense": ["Random"] } } } if Global._check_paradigm("openmp"): paradigm = "st" if Global.config["num_threads"] == 1 else "omp" else: paradigm = "cuda" try: return connector_name in cpp_patterns[paradigm][storage_order][ desired_format] except KeyError: # Fall back to Python construction return False
def _determine_size_in_bytes(self, pop): """ Generate code template to determine size in bytes for the C++ object *pop*. Please note, that this contain only default elements (parameters, variables). User defined elements, parallelization support data structures or similar are not considered. Consequently implementing generators should extent the resulting code template. This is done by filling the 'size_in_bytes' field in the _specific_template. """ if 'size_in_bytes' in pop._specific_template.keys(): return pop._specific_template['size_in_bytes'] from ANNarchy.generator.Utils import tabify code = "" # Parameters code += "// Parameters\n" for attr in pop.neuron_type.description['parameters']: ids = {'ctype': attr['ctype'], 'name': attr['name']} if attr['locality'] == "global": code += "size_in_bytes += sizeof(%(ctype)s);\t// %(name)s\n" % ids else: code += "size_in_bytes += sizeof(%(ctype)s) * %(name)s.capacity();\t// %(name)s\n" % ids # Variables code += "// Variables\n" for attr in pop.neuron_type.description['variables']: ids = {'ctype': attr['ctype'], 'name': attr['name']} if attr['locality'] == "global": code += "size_in_bytes += sizeof(%(ctype)s);\t// %(name)s\n" % ids else: code += "size_in_bytes += sizeof(%(ctype)s) * %(name)s.capacity();\t// %(name)s\n" % ids # Random variables code += "// RNGs\n" if Global._check_paradigm("openmp"): for dist in pop.neuron_type.description['random_distributions']: ids = {'ctype': dist['ctype'], 'name': dist['name']} if dist['locality'] == "local": code += "size_in_bytes += sizeof(%(ctype)s) * %(name)s.capacity();\t// %(name)s\n" % ids else: code += "size_in_bytes += sizeof(%(ctype)s);\t// %(name)s\n" % ids else: for dist in pop.neuron_type.description['random_distributions']: code += "size_in_bytes += sizeof(curandState*);\t// gpu_%(name)s\n" % { 'name': dist['name'] } code = tabify(code, 2) return code
def _header_custom_constants(self): """ Generate code for custom constants """ if len(Global._objects['constants']) == 0: return "" code = "" for obj in Global._objects['constants']: obj_str = { 'name': obj.name, 'float_prec': Global.config['precision'] } if Global._check_paradigm("openmp"): code += """ extern %(float_prec)s %(name)s; void set_%(name)s(%(float_prec)s value);""" % obj_str elif Global._check_paradigm("cuda"): code += """ void set_%(name)s(%(float_prec)s value);""" % obj_str else: raise NotImplementedError return code
def _init_random_dist(self, pop): """ Initialize random distribution sources. Parameters: * *pop* Population object Return: * code piece to initialize contained random objects. """ target_container_code = "" dist_code = "" if len(pop.neuron_type.description['random_distributions']) > 0: for rd in pop.neuron_type.description['random_distributions']: if Global._check_paradigm("openmp"): # in principal only important for openmp rng_def = { 'id': pop.id, 'float_prec': Global.config['precision'], 'global_index': '' } # RNG declaration, only for openmp rng_ids = { 'id': pop.id, 'rd_name': rd['name'], 'type': rd['ctype'], 'rd_init': rd['definition'] % rng_def, 'template': rd['template'] % { 'float_prec': Global.config['precision'] } } target_container_code += self._templates['rng'][ rd['locality']]['init'] % rng_ids dist_code += self._templates['rng'][ rd['locality']]['init_dist'] % rng_ids else: # Nothing to do here: # CUDA initializes in his inherited function pass return dist_code, target_container_code
def __init__(self, pre, post, target, variable, synapse=None, name=None, copied=False): # Instantiate the projection SpecificProjection.__init__(self, pre, post, target, synapse=synapse, name=name, copied=copied) self._variable = variable # Check populations if not self.pre.neuron_type.type == 'spike': Global._error( 'The pre-synaptic population of an NormProjection must be spiking.' ) if not self.pre.neuron_type.type == 'spike': Global._error( 'The post-synaptic population of an NormProjection must be spiking.' ) if synapse != None and not copied: Global._error( 'NormProjection does not allow the usage of customized spiking synapses yet.' ) # Not on CUDA if Global._check_paradigm('cuda'): Global._error('NormProjections are not available on CUDA yet.') # Prevent automatic split of matrices self._no_split_matrix = True
def compute_firing_rate(self, window): """ Tells spiking neurons in the population to compute their mean firing rate over the given window and store the values in the variable `r`. This method has an effect on spiking neurons only. If this method is not called, `r` will always be 0.0. `r` can of course be accessed and recorded as any other variable. *Parameter*: * **window**: window in ms over which the spikes will be counted. """ if Global._check_paradigm('cuda'): Global._error('compute_firing_rate() is not supported on CUDA yet.') if self.neuron_type.type == 'rate': Global._error('compute_firing_rate(): the neuron is already rate-coded...') self._compute_mean_fr = float(window) if self.initialized: getattr(self.cyInstance, 'compute_firing_rate')(self._compute_mean_fr)
def _init_random_dist(self, pop): """ Initialize random distribution sources. Parameters: * *pop* Population object Return: * code piece to initialize contained random objects. """ code = "" if len(pop.neuron_type.description['random_distributions']) > 0: code += """ // Random numbers""" for rd in pop.neuron_type.description['random_distributions']: if Global._check_paradigm("openmp"): # in principal only important for openmp rng_def = { 'id': pop.id, 'float_prec': Global.config['precision'], 'global_index': '' } # RNG declaration, only for openmp rng_ids = { 'id': pop.id, 'rd_name': rd['name'], 'type': rd['ctype'], 'rd_init': rd['definition'] % rng_def, } code += self._templates['rng'][rd['locality']]['init'] % rng_ids else: # Nothing to do here: # CUDA initializes in his inherited function pass return code
def determine_idx_type_for_projection(proj): """ The suitable index type depends on the maximum number of neurons in pre-synaptic and post-synaptic layer. Notice (8th June 2021): It appears to a problem for the current Cython version to handle datatypes like "unsigned int". So I decided to replace the unsigned datatypes by an own definition. These definitions are placed in *ANNarchy/generator/Template/PyxTemplate.py* """ # The user disabled this optimization. if Global.config["only_int_idx_type"]: return "int", "int", "int", "int" # Currently only implemented for some cases, # the others default to "old" configuration if proj.synapse_type.type == "spike": return "int", "int", "int", "int" if Global._check_paradigm("cuda"): return "int", "int", "int", "int" if proj._storage_format != "lil" and Global.config["num_threads"] > 1: return "int", "int", "int", "int" # max_size is related to the population sizes. As we use one type for # both dimension we need to determine the maximum pre_size = proj.pre.population.size if isinstance( proj.pre, PopulationView) else proj.pre.size post_size = proj.post.population.size if isinstance( proj.post, PopulationView) else proj.post.size max_size_one_dim = max(pre_size, post_size) max_size_both_dim = pre_size * post_size # For type decision we rely on the C++ boundaries which are decremented by 1 # to allow usage of CSR-like formats without row overflow. if max_size_one_dim < 255: # 1 byte cpp_idx_type = "unsigned char" cython_idx_type = "_ann_uint8" if max_size_both_dim < 255: # can use the same type (should be seldom ...) cpp_size_type = "unsigned char" cython_size_type = "_ann_uint8" else: # next higher data type cpp_size_type = "unsigned short int" cython_size_type = "_ann_uint16" elif max_size_one_dim < 65534: # 2 byte cpp_idx_type = "unsigned short int" cython_idx_type = "_ann_uint16" if max_size_both_dim < 65534: cpp_size_type = "unsigned short int" cython_size_type = "_ann_uint16" else: cpp_size_type = "unsigned int" cython_size_type = "_ann_uint32" elif max_size_one_dim < 4294967294: # 4 byte cpp_idx_type = "unsigned int" cython_idx_type = "_ann_uint32" if max_size_both_dim < 4294967294: cpp_size_type = "unsigned int" cython_size_type = "_ann_uint32" else: cpp_size_type = "unsigned long int" cython_size_type = "_ann_uint64" else: # this is a hypothetical case I guess (HD: 4th June 2021) raise NotImplementedError( "The matrix dimension exceeded the representable size ...") return cpp_idx_type, cython_idx_type, cpp_size_type, cython_size_type
def generate_makefile(self): """ Generate the Makefile. The makefile consists of two stages compile the cython wrapper and compile the ANNarchy model files. Both is then linked together to a shared library usable in Python. """ # Compiler if self.compiler == "default": self.compiler = self.user_config['openmp']['compiler'] if self.compiler_flags == "default": self.compiler_flags = self.user_config['openmp']['flags'] # flags are common to all platforms if not self.debug_build: cpu_flags = self.compiler_flags else: cpu_flags = "-O0 -g -D_DEBUG -march=native" if self.profile_enabled: cpu_flags += " -g" #extra_libs.append("-lpapi") # OpenMP flag omp_flag = "" if Global.config['paradigm'] == "openmp": omp_flag = "-fopenmp" # Disable openMP parallel RNG? if Global.config['disable_parallel_rng'] and Global._check_paradigm( "openmp"): cpu_flags += " -D_DISABLE_PARALLEL_RNG " # Cuda Library and Compiler # # hdin (22.03.2016): we should verify in the future, if compute_35 remains as best # configuration for Keplar and upwards. cuda_gen = "" gpu_flags = "" gpu_compiler = "nvcc" gpu_ldpath = "" if sys.platform.startswith( 'linux') and Global.config['paradigm'] == "cuda": cuda_gen = "" # TODO: -arch sm_%(ver)s if self.debug_build: gpu_flags = "-g -G -D_DEBUG" # read the config file for the cuda lib path if 'cuda' in self.user_config.keys(): gpu_compiler = self.user_config['cuda']['compiler'] gpu_ldpath = '-L' + self.user_config['cuda']['path'] + '/lib' gpu_flags += self.user_config['cuda']['flags'] # -Xcompiler expects the arguments seperated by ',' if len(cpu_flags.strip()) > 0: cpu_flags = cpu_flags.replace(" ", ",") cpu_flags += "," # Extra libs from extensions such as opencv libs = self.extra_libs for lib in extra_libs: libs += str(lib) + ' ' # Python environment py_version, py_major, python_include, python_lib, python_libpath, cython = python_environment( ) # Include path to Numpy is not standard on all distributions numpy_include = np.get_include() # ANNarchy default header: sparse matrix formats annarchy_include = ANNarchy.__path__[0] + '/include' # The connector module needs to reload some header files, # ANNarchy.__path__ provides the installation directory path_to_cython_ext = "-I " + ANNarchy.__path__[ 0] + '/core/cython_ext/ -I ' + ANNarchy.__path__[0][:-8] # Create Makefiles depending on the target platform and parallel framework if sys.platform.startswith('linux'): # Linux systems if Global.config['paradigm'] == "cuda": makefile_template = linux_cuda_template else: makefile_template = linux_omp_template elif sys.platform == "darwin": # mac os if self.compiler == 'clang++': makefile_template = osx_clang_template if Global.config[ 'num_threads'] == 1: # clang should report that it does not support openmp omp_flag = "" else: makefile_template = osx_gcc_template else: # Windows: to test.... Global._warning("Compilation on windows is not supported yet.") # Gather all Makefile flags makefile_flags = { 'compiler': self.compiler, 'add_sources': self.add_sources, 'cpu_flags': cpu_flags, 'cuda_gen': cuda_gen, 'gpu_compiler': gpu_compiler, 'gpu_flags': gpu_flags, 'gpu_ldpath': gpu_ldpath, 'openmp': omp_flag, 'extra_libs': libs, 'py_version': py_version, 'py_major': py_major, 'cython': cython, 'python_include': python_include, 'python_lib': python_lib, 'python_libpath': python_libpath, 'numpy_include': numpy_include, 'annarchy_include': annarchy_include, 'net_id': self.net_id, 'cython_ext': path_to_cython_ext } # Write the Makefile to the disk with open( self.annarchy_dir + '/generate/net' + str(self.net_id) + '/Makefile', 'w') as wfile: wfile.write(makefile_template % makefile_flags)
def _instantiate(net_id, import_id=-1, cuda_config=None, user_config=None): """ After every is compiled, actually create the Cython objects and bind them to the Python ones.""" if Global._profiler: t0 = time.time() # parallel_run(number=x) defines multiple networks (net_id) but only network0 is compiled if import_id < 0: import_id = net_id # subdirectory where the library lies annarchy_dir = Global._network[import_id]['directory'] libname = 'ANNarchyCore' + str(import_id) libpath = annarchy_dir + '/' + libname + '.so' cython_module = load_cython_lib(libname, libpath) Global._network[net_id]['instance'] = cython_module # Set the CUDA device if Global._check_paradigm("cuda"): device = 0 if cuda_config: device = int(cuda_config['device']) elif 'cuda' in user_config['cuda']: device = int(user_config['cuda']['device']) if Global.config['verbose']: Global._print('Setting GPU device', device) cython_module.set_device(device) # Sets the desired number of threads and execute thread placement. # This must be done before any other objects are initialized. if Global._check_paradigm("openmp") and Global.config["num_threads"] > 1: core_list = Global.config['visible_cores'] if core_list != []: # some sanity check if len(core_list) > multiprocessing.cpu_count(): Global._error( "The length of core ids provided to setup() is larger than available number of cores" ) if len(core_list) < Global.config['num_threads']: Global._error( "The list of visible cores should be at least the number of cores." ) if np.amax(np.array(core_list)) > multiprocessing.cpu_count(): Global._error( "At least one of the core ids provided to setup() is larger than available number of cores" ) cython_module.set_number_threads(Global.config['num_threads'], core_list) else: # HD (26th Oct 2020): the current version of psutil only consider one CPU socket # but there is a discussion of adding multi-sockets, so we could # re-add this code later ... """ num_cores = psutil.cpu_count(logical=False) # Check if the number of threads make sense if num_cores < Global.config['num_threads']: Global._warning("The number of threads =", Global.config['num_threads'], "exceeds the number of available physical cores =", num_cores) # ANNarchy should run only on physical cpu cores core_list = np.arange(0, num_cores) """ cython_module.set_number_threads(Global.config['num_threads'], []) if Global.config["num_threads"] > 1: if Global.config['verbose']: Global._print('Running simulation with', Global.config['num_threads'], 'threads.') else: if Global.config['verbose']: Global._print('Running simulation single-threaded.') # Sets the desired computation device for CUDA if Global._check_paradigm("cuda") and (user_config != None): # check if there is a configuration, # otherwise fall back to default device try: dev_id = int(user_config['cuda']['device']) except KeyError: dev_id = 0 cython_module.set_device(dev_id) # Configure seeds for random number generators # Required for state updates and also (in future) construction of connectivity if Global.config['seed'] == -1: seed = time.time() else: seed = Global.config['seed'] if not Global.config['disable_parallel_rng']: cython_module.set_seed(seed, Global.config['num_threads'], Global.config['use_seed_seq']) else: cython_module.set_seed(seed, 1, Global.config['use_seed_seq']) # Bind the py extensions to the corresponding python objects for pop in Global._network[net_id]['populations']: if Global.config['verbose']: Global._print('Creating population', pop.name) if Global.config['show_time']: t0 = time.time() # Instantiate the population pop._instantiate(cython_module) if Global.config['show_time']: Global._print('Creating', pop.name, 'took', (time.time() - t0) * 1000, 'milliseconds') # Instantiate projections for proj in Global._network[net_id]['projections']: if Global.config['verbose']: Global._print('Creating projection from', proj.pre.name, 'to', proj.post.name, 'with target="', proj.target, '"') if Global.config['show_time']: t0 = time.time() # Create the projection proj._instantiate(cython_module) if Global.config['show_time']: Global._print('Creating the projection took', (time.time() - t0) * 1000, 'milliseconds') # Finish to initialize the network cython_module.pyx_create(Global.config['dt']) # Set the user-defined constants for obj in Global._objects['constants']: getattr(cython_module, '_set_' + obj.name)(obj.value) # Transfer initial values for pop in Global._network[net_id]['populations']: if Global.config['verbose']: Global._print('Initializing population', pop.name) pop._init_attributes() for proj in Global._network[net_id]['projections']: if Global.config['verbose']: Global._print('Initializing projection', proj.name, 'from', proj.pre.name, 'to', proj.post.name, 'with target="', proj.target, '"') proj._init_attributes() # The rng dist must be initialized after the pops and projs are created! if Global._check_paradigm("openmp"): cython_module.pyx_init_rng_dist() # Start the monitors for monitor in Global._network[net_id]['monitors']: monitor._init_monitoring() if Global._profiler: t1 = time.time() Global._profiler.add_entry(t0, t1, "instantiate()", "compile")
def _generate_body(self): """ Generate the codes 'main' library file. The generated code will be used in different files, dependent on the chosen target platform: * openmp: ANNarchy.cpp * cuda: ANNarchyHost.cu and ANNarchyDevice.cu """ # struct declaration for each population pop_ptr = "" for pop in self._pop_desc: pop_ptr += pop['instance'] # struct declaration for each projection proj_ptr = "" for proj in self._proj_desc: proj_ptr += proj['instance'] # Code for the global operations glop_definition = self._body_def_glops() update_globalops = "" for pop in self._pop_desc: if 'gops_update' in pop.keys(): update_globalops += pop['gops_update'] # Reset presynaptic sums reset_sums = self._body_resetcomputesum_pop() # Compute presynaptic sums compute_sums = "" # Sum over all synapses if Global._check_paradigm("openmp"): for proj in self._proj_desc: compute_sums += proj["compute_psp"] # Init rng dist init_rng_dist = "" for pop in self._populations: init_rng_dist += """pop%(id)s.init_rng_dist();\n""" % { 'id': pop.id } # Update random distributions rd_update_code = "" for desc in self._pop_desc + self._proj_desc: if 'rng_update' in desc.keys(): rd_update_code += desc['rng_update'] # Equations for the neural variables update_neuron = "" for pop in self._pop_desc: if 'update' in pop.keys(): update_neuron += pop['update'] # Enque delayed outputs delay_code = "" for pop in self._pop_desc: if 'delay_update' in pop.keys(): delay_code += pop['delay_update'] # Equations for the synaptic variables update_synapse = "" for proj in self._proj_desc: if 'update' in proj.keys(): update_synapse += proj['update'] # Equations for the post-events post_event = "" for proj in self._proj_desc: if 'post_event' in proj.keys(): post_event += proj['post_event'] # Structural plasticity structural_plasticity = self._body_structural_plasticity() # Early stopping run_until = self._body_run_until() #Profiling if self._profgen: prof_dict = self._profgen.generate_body_dict() else: prof_dict = Profile.ProfileGenerator( self._annarchy_dir, self._net_id).generate_body_dict() # # Generate the ANNarchy.cpp code, the corrsponding template differs # greatly. For further information take a look into the corresponding # branches. # if Global.config['paradigm'] == "openmp": # custom constants custom_constant, _ = self._body_custom_constants() # code fields for openMP/single thread template base_dict = { 'float_prec': Global.config['precision'], 'pop_ptr': pop_ptr, 'proj_ptr': proj_ptr, 'glops_def': glop_definition, 'initialize': self._body_initialize(), 'init_rng_dist': init_rng_dist, 'run_until': run_until, 'compute_sums': compute_sums, 'reset_sums': reset_sums, 'update_neuron': update_neuron, 'update_globalops': update_globalops, 'update_synapse': update_synapse, 'random_dist_update': rd_update_code, 'delay_code': delay_code, 'post_event': post_event, 'structural_plasticity': structural_plasticity, 'custom_constant': custom_constant, } # profiling base_dict.update(prof_dict) # complete code template if Global.config["num_threads"] == 1: return BaseTemplate.st_body_template % base_dict else: return BaseTemplate.omp_body_template % base_dict elif Global.config['paradigm'] == "cuda": # Implementation notice ( HD: 10. June, 2015 ) # # The CUDA linking process is a big problem for object oriented approaches # and the seperation of implementation codes into several files. Even in the # current SDK 5.0 this problem is not fully solved. Linking is available, but # only for small, independent code pieces, by far not sufficient for full # object-oriented approaches ... # # For us, this currently have one consequence: we cannot completely seperate # the implementation of objects into several files. To hold a certain equality # between the structures of objects, I implemented the following workaround: # # We create the c-structs holding data fields and accessors as in OpenMP. We also # create the kernels, call entity in the corresponding generator objects, and # return the codes via the descriptor dictionary. # # This ensures a consistent interface in the generators and also in the generated # codes, but sometimes require additional overhead. Hopefully NVidia will improve # their linker in the next releases, so one could remove this overhead. psp_call = "" for proj in self._proj_desc: psp_call += proj['psp_call'] # custom constants host_custom_constant, _, device_custom_constant = self._body_custom_constants( ) # custom functions custom_func = "" for pop in self._pop_desc: custom_func += pop['custom_func'] for proj in self._proj_desc: custom_func += proj['custom_func'] for _, func in Global._objects['functions']: custom_func += extract_functions( func, local_global=True)[0]['cpp'].replace( "inline", "__device__") + '\n' # pre-defined/common available kernel common_kernel = self._cuda_common_kernel(self._projections) pop_kernel = "" for pop in self._pop_desc: pop_kernel += pop['update_body'] pop_update_fr = "" for pop in self._pop_desc: pop_update_fr += pop['update_FR'] psp_kernel = "" for proj in self._proj_desc: psp_kernel += proj['psp_body'] kernel_def = "" for pop in self._pop_desc: kernel_def += pop['update_header'] for proj in self._proj_desc: kernel_def += proj['psp_header'] kernel_def += proj['update_synapse_header'] kernel_def += proj['postevent_header'] delay_code = "" for pop in self._pop_desc: if 'update_delay' in pop.keys(): delay_code += pop['update_delay'] syn_kernel = "" for proj in self._proj_desc: syn_kernel += proj['update_synapse_body'] syn_call = "" for proj in self._proj_desc: syn_call += proj['update_synapse_call'] postevent_kernel = "" for proj in self._proj_desc: postevent_kernel += proj['postevent_body'] postevent_call = "" for proj in self._proj_desc: postevent_call += proj['postevent_call'] clear_sums = self._body_resetcomputesum_pop() # global operations glob_ops_header, glob_ops_body = self._body_def_glops() kernel_def += glob_ops_header # determine number of threads per kernel threads_per_kernel = self._cuda_kernel_config() # concurrent kernel execution stream_setup = self._cuda_stream_config() # memory transfers host_device_transfer, device_host_transfer = "", "" for pop in self._pop_desc + self._proj_desc: host_device_transfer += pop['host_to_device'] device_host_transfer += pop['device_to_host'] #Profiling if self._profgen: prof_dict = self._profgen.generate_body_dict() else: prof_dict = Profile.ProfileGenerator( self._annarchy_dir, self._net_id).generate_body_dict() # # HD ( 31.07.2016 ): # # I'm not really sure, what exactly causes the problem with this # atomicAdd function. If we move it into ANNarchyDevice.cu, the # macro seems to be evaluated wrongly and the atomicAdd() function # appears doubled or appears not. # # So as "solution", the atomicAdd definition block resides in # ANNarchyHost and only the computation kernels are placed in # ANNarchyDevice. If we decide to use SDK8 as lowest requirement, # one can move this kernel too. device_code = BaseTemplate.cuda_device_kernel_template % { #device stuff 'common_kernel': common_kernel, 'pop_kernel': pop_kernel, 'psp_kernel': psp_kernel, 'syn_kernel': syn_kernel, 'glob_ops_kernel': glob_ops_body, 'postevent_kernel': postevent_kernel, 'custom_func': custom_func, 'custom_constant': device_custom_constant, 'built_in': BaseTemplate.built_in_functions + BaseTemplate.integer_power_cuda % { 'float_prec': Global.config['precision'] }, 'float_prec': Global.config['precision'] } base_dict = { # network definitions 'float_prec': Global.config['precision'], 'pop_ptr': pop_ptr, 'proj_ptr': proj_ptr, 'run_until': run_until, 'clear_sums': clear_sums, 'compute_sums': psp_call, 'update_neuron': update_neuron, 'update_FR': pop_update_fr, 'update_globalops': update_globalops, 'update_synapse': syn_call, 'post_event': postevent_call, 'delay_code': delay_code, 'initialize': self._body_initialize(), 'structural_plasticity': structural_plasticity, # cuda host specific 'stream_setup': stream_setup, 'host_device_transfer': host_device_transfer, 'device_host_transfer': device_host_transfer, 'kernel_def': kernel_def, 'kernel_config': threads_per_kernel, 'custom_constant': host_custom_constant } base_dict.update(prof_dict) host_code = BaseTemplate.cuda_host_body_template % base_dict return device_code, host_code else: raise NotImplementedError
def _declaration_accessors(self, proj, single_matrix): """ Generate declaration and accessor code for variables/parameters of the projection. Returns: (dict, str): first return value contain declaration code and last one the accessor code. The declaration dictionary has the following fields: delay, event_driven, rng, parameters_variables, additional, cuda_stream """ # create the code for non-specific projections declare_event_driven = "" declare_rng = "" declare_additional = "" # Delays if proj.max_delay > 1: if proj.uniform_delay > 1 : key_delay = "uniform" else: if Global._check_paradigm("cuda"): Global.CodeGeneratorException("Non-uniform delays on rate-coded or spiking synapses are not available for CUDA devices.") if proj.synapse_type.type == "rate": key_delay = "nonuniform_rate_coded" else: key_delay = "nonuniform_spiking" declare_delay = self._templates['delay'][key_delay]['declare'] init_delay = self._templates['delay'][key_delay]['init'] else: declare_delay = "" init_delay = "" # Code for declarations and accessors declare_parameters_variables, accessor = self._generate_default_get_set(proj, single_matrix) # If no psp is defined, it's event-driven has_event_driven = False for var in proj.synapse_type.description['variables']: if var['method'] == 'event-driven': has_event_driven = True break if has_event_driven: declare_event_driven = self._templates['event_driven']['declare'] # Arrays for the random numbers if len(proj.synapse_type.description['random_distributions']) > 0: declare_rng += """ // Random numbers """ for rd in proj.synapse_type.description['random_distributions']: declare_rng += self._templates['rng'][rd['locality']]['decl'] % { 'rd_name' : rd['name'], 'type': rd['ctype'], 'float_prec': Global.config['precision'], 'template': rd['template'] % {'float_prec':Global.config['precision']} } # Structural plasticity if Global.config['structural_plasticity']: declare_parameters_variables += self._header_structural_plasticity(proj) # Specific projections can overwrite if 'declare_parameters_variables' in proj._specific_template.keys(): declare_parameters_variables = proj._specific_template['declare_parameters_variables'] if 'access_parameters_variables' in proj._specific_template.keys(): accessor = proj._specific_template['access_parameters_variables'] if 'declare_rng' in proj._specific_template.keys(): declare_rng = proj._specific_template['declare_rng'] if 'declare_event_driven' in proj._specific_template.keys(): declare_event_driven = proj._specific_template['declare_event_driven'] if 'declare_additional' in proj._specific_template.keys(): declare_additional = proj._specific_template['declare_additional'] # Finalize the declarations declaration = { 'declare_delay': declare_delay, 'init_delay': init_delay, 'event_driven': declare_event_driven, 'rng': declare_rng, 'parameters_variables': declare_parameters_variables, 'additional': declare_additional } return declaration, accessor
def _check_storage_formats(projections): """ ANNarchy 4.7 introduced a set of sparse matrix formats. Some of them are not implemented for all paradigms or might not support specific optimizations. """ for proj in projections: # Most of the sparse matrix formats are not trivially invertable and therefore we can not implement # spiking models with them if proj.synapse_type.type == "spike" and proj._storage_format in [ "ell", "ellr", "coo", "hyb" ]: raise Global.ANNarchyException( "Using 'storage_format=" + proj._storage_format + "' is not allowed for spiking synapses.", True) # Dense format is not implemented for GPUs and spiking models if proj._storage_format == "dense" and proj.synapse_type.type == "spike" and Global._check_paradigm( "cuda"): raise Global.ANNarchyException( "Dense representation is not available for spiking models on GPUs yet.", True) # For some of the sparse matrix formats we don't implemented plasticity yet. if proj.synapse_type.type == "spike" and proj._storage_format in [ "dense" ] and not isinstance(proj.synapse_type, DefaultSpikingSynapse): raise Global.ANNarchyException( "Using 'storage_format=" + proj._storage_format + "' is only allowed for default spiking synapses yet.", True) # For some of the sparse matrix formats we don't implemented plasticity yet. if proj.synapse_type.type == "rate" and proj._storage_format in [ "coo", "hyb" ] and not isinstance(proj.synapse_type, DefaultRateCodedSynapse): raise Global.ANNarchyException( "Using 'storage_format=" + proj._storage_format + "' is only allowed for default rate-coded synapses yet.", True) # OpenMP disabled? if proj._storage_format in ["bsr" ] and Global.config["num_threads"] > 1: raise Global.ANNarchyException( "Using 'storage_format=" + proj._storage_format + "' is not available for OpenMP yet.", True) # Single weight optimization available? if proj._has_single_weight() and proj._storage_format in ["dense"]: raise Global.ANNarchyException( "Using 'storage_format=" + proj._storage_format + "' is not allowed for single weight projections.", True) # Slicing available? if isinstance(proj.post, PopulationView) and proj._storage_format in ["dense"]: raise Global.ANNarchyException( "Using 'storage_format=" + proj._storage_format + "' is not allowed for PopulationViews as target.", True) # In some cases we don't allow the usage of non-unifom delay if (proj.max_delay > 1 and proj.uniform_delay == -1): if Global._check_paradigm("cuda"): raise Global.ANNarchyException( "Using non-uniform delays is not available for CUDA devices.", True) else: if proj._storage_format == "ellr": raise Global.ANNarchyException( "Using 'storage_format=" + proj._storage_format + "' is and non-uniform delays is not implemented.", True) if Global._check_paradigm("cuda") and proj._storage_format == "lil": proj._storage_format = "csr" Global._info( "LIL-type projections are not available for GPU devices ... default to CSR" ) if Global._check_paradigm("cuda") and proj._storage_format == "ell": Global._info( "We would recommend to use ELLPACK-R (format=ellr) on GPUs.")
def _instantiate(net_id, import_id=-1, cuda_config=None, user_config=None): """ After every is compiled, actually create the Cython objects and bind them to the Python ones.""" # parallel_run(number=x) defines multiple networks (net_id) but only network0 is compiled if import_id < 0: import_id = net_id # subdirectory where the library lies annarchy_dir = Global._network[import_id]['directory'] if Global.config['verbose']: Global._print('Building network ...') # Import the Cython library try: cython_module = imp.load_dynamic( 'ANNarchyCore' + str(import_id), # Name of the network annarchy_dir + '/ANNarchyCore' + str(import_id) + '.so' # Path to the library ) except Exception as e: Global._print(e) Global._error('Something went wrong when importing the network. Force recompilation with --clean.') Global._network[net_id]['instance'] = cython_module # Set the CUDA device if Global._check_paradigm("cuda"): device = 0 if cuda_config: device = int(cuda_config['device']) elif 'cuda' in user_config['cuda']: device = int(user_config['cuda']['device']) if Global.config['verbose']: Global._print('Setting GPU device', device) cython_module.set_device(device) # Bind the py extensions to the corresponding python objects for pop in Global._network[net_id]['populations']: if Global.config['verbose']: Global._print('Creating population', pop.name) if Global.config['show_time']: t0 = time.time() # Instantiate the population pop._instantiate(cython_module) if Global.config['show_time']: Global._print('Creating', pop.name, 'took', (time.time()-t0)*1000, 'milliseconds') # Instantiate projections for proj in Global._network[net_id]['projections']: if Global.config['verbose']: Global._print('Creating projection from', proj.pre.name, 'to', proj.post.name, 'with target="', proj.target, '"') if Global.config['show_time']: t0 = time.time() # Create the projection proj._instantiate(cython_module) if Global.config['show_time']: Global._print('Creating the projection took', (time.time()-t0)*1000, 'milliseconds') # Finish to initialize the network, especially the rng # Must be called after the pops and projs are created! cython_module.pyx_create(Global.config['dt'], Global.config['seed']) # Set the user-defined constants for obj in Global._objects['constants']: getattr(cython_module, '_set_'+obj.name)(obj.value) # Transfer initial values for pop in Global._network[net_id]['populations']: if Global.config['verbose']: Global._print('Initializing population', pop.name) pop._init_attributes() for proj in Global._network[net_id]['projections']: if Global.config['verbose']: Global._print('Initializing projection from', proj.pre.name, 'to', proj.post.name, 'with target="', proj.target, '"') proj._init_attributes() # Sets the desired number of threads if Global.config['num_threads'] > 1 and Global._check_paradigm("openmp"): cython_module.set_number_threads(Global.config['num_threads']) if Global.config['verbose']: Global._print('Running simulation with', Global.config['num_threads'], 'threads.') # Start the monitors for monitor in Global._network[net_id]['monitors']: monitor._init_monitoring()
def compile( directory='annarchy', clean=False, populations=None, projections=None, compiler="default", compiler_flags="default", cuda_config={'device': 0}, silent=False, debug_build=False, profile_enabled=False, net_id=0 ): """ This method uses the network architecture to generate optimized C++ code and compile a shared library that will perform the simulation. *Parameters*: * **directory**: name of the subdirectory where the code will be generated and compiled. Must be a relative path. Default: "annarchy/". * **clean**: boolean to specifying if the library should be recompiled entirely or only the changes since last compilation (default: False). * **populations**: list of populations which should be compiled. If set to None, all available populations will be used. * **projections**: list of projection which should be compiled. If set to None, all available projections will be used. * **compiler**: C++ compiler to use. Default: g++ on GNU/Linux, clang++ on OS X. Valid compilers are [g++, clang++]. * **compiler_flags**: platform-specific flags to pass to the compiler. Default: "-march=native -O2". Warning: -O3 often generates slower code and can cause linking problems, so it is not recommended. * **cuda_config**: dictionary defining the CUDA configuration for each population and projection. * **silent**: defines if the "Compiling... OK" should be printed. The ``compiler``, ``compiler_flags`` and part of ``cuda_config`` take their default value from the configuration file ``~/.config/ANNarchy/annarchy.json``. The following arguments are for internal development use only: * **debug_build**: creates a debug version of ANNarchy, which logs the creation of objects and some other data (default: False). * **profile_enabled**: creates a profilable version of ANNarchy, which logs several computation timings (default: False). """ # Check if the network has already been compiled if Global._network[net_id]['compiled']: Global._print("""compile(): the network has already been compiled, doing nothing. If you are re-running a Jupyter notebook, you should call `clear()` right after importing ANNarchy in order to reset everything.""") return # Get the command-line arguments parser = setup_parser() options, unknown = parser.parse_known_args() if len(unknown) > 0: Global._warning('unrecognized arguments:', unknown) # if the parameters set on command-line they overwrite Global.config if options.num_threads != None: Global.config['num_threads'] = options.num_threads # Get CUDA configuration if options.gpu_device >= 0: Global.config['paradigm'] = "cuda" cuda_config['device'] = int(options.gpu_device) # Check that CUDA is enabled try: from .CudaCheck import CudaCheck except: Global._error('CUDA is not installed on your system') # Check that a single backend is chosen if (options.num_threads != None) and (options.gpu_device >= 0): Global._error('CUDA and openMP can not be active at the same time, please check your command line arguments.') # Verbose if options.verbose != None: Global.config['verbose'] = options.verbose # Precision if options.precision != None: Global.config['precision'] = options.precision # Profiling if options.profile != None: profile_enabled = options.profile Global.config['profiling'] = options.profile Global.config['profile_out'] = options.profile_out # Debug if not debug_build: debug_build = options.debug # debug build Global.config["debug"] = debug_build # Clean clean = options.clean # enforce rebuild # Populations to compile if populations is None: # Default network populations = Global._network[net_id]['populations'] # Projections to compile if projections is None: # Default network projections = Global._network[net_id]['projections'] # Compiling directory annarchy_dir = os.getcwd() + '/' + directory if not annarchy_dir.endswith('/'): annarchy_dir += '/' Global._network[net_id]['directory'] = annarchy_dir # Turn OMP off for MacOS if (Global._check_paradigm("openmp") and Global.config['num_threads'] > 1 and sys.platform == "darwin"): Global._warning("OpenMP is not supported on Mac OS yet") Global.config['num_threads'] = 1 # Test if the current ANNarchy version is newer than what was used to create the subfolder from pkg_resources import parse_version if os.path.isfile(annarchy_dir+'/release'): with open(annarchy_dir+'/release', 'r') as rfile: prev_release = rfile.read().strip() prev_paradigm = '' # HD (03.08.2016): # in ANNarchy 4.5.7b I added also the paradigm to the release tag. # This if clause can be removed in later releases (TODO) if prev_release.find(',') != -1: prev_paradigm, prev_release = prev_release.split(', ') else: # old release tag clean = True if parse_version(prev_release) < parse_version(ANNarchy.__release__): clean = True elif prev_paradigm != Global.config['paradigm']: clean = True else: clean = True # for very old versions # Check if the last compilation was successful if os.path.isfile(annarchy_dir+'/compilation'): with open(annarchy_dir + '/compilation', 'r') as rfile: res = rfile.read() if res.strip() == "0": # the last compilation failed clean = True else: clean = True # Manage the compilation subfolder _folder_management(annarchy_dir, profile_enabled, clean, net_id) # Create a Compiler object compiler = Compiler(annarchy_dir=annarchy_dir, clean=clean, compiler=compiler, compiler_flags=compiler_flags, silent=silent, cuda_config=cuda_config, debug_build=debug_build, profile_enabled=profile_enabled, populations=populations, projections=projections, net_id=net_id) compiler.generate()
def _generate_decl_and_acc(self, pop): """ Data exchange between Python and ANNarchyCore library is done by specific get-/set-methods. This function creates for all variables and parameters the corresponding methods. """ # Parameters, Variables declaration, accessors, already_processed = self._generate_default_get_set( pop) # The conductance/current variables for spiking neurons are stored in # pop.neuron_type.description['variables'] but only if they are used. if pop.neuron_type.type == 'spike': try: all_targets = set(pop.neuron_type.description['targets'] + pop.targets) except TypeError: # The projection has multiple targets all_targets = set(pop.neuron_type.description['targets'] + pop.targets[0]) for target in sorted(list(all_targets)): attr_name = 'g_' + target if attr_name not in already_processed: # we assume here, that targets are local variables id_dict = { 'type': Global.config['precision'], 'name': attr_name, 'attr_type': 'variable' } declaration += self._templates['attr_decl'][ 'local'] % id_dict already_processed.append(attr_name) # Global operations if len(pop.global_operations) != 0: declaration += """ // Global operations """ for op in pop.global_operations: op_dict = { 'type': Global.config['precision'], 'op': op['function'], 'var': op['variable'] } if Global._check_paradigm("openmp"): declaration += """ %(type)s _%(op)s_%(var)s; """ % op_dict elif Global._check_paradigm("cuda"): declaration += """ %(type)s _%(op)s_%(var)s; %(type)s* _gpu_%(op)s_%(var)s; """ % op_dict else: raise NotImplementedError # Arrays for the random numbers declaration += """ // Random numbers """ for rd in pop.neuron_type.description['random_distributions']: declaration += self._templates['rng'][rd['locality']]['decl'] % { 'rd_name': rd['name'], 'type': rd['ctype'], 'template': rd['template'] % { 'float_prec': Global.config['precision'] } } return declaration, accessors
def _generate_default_get_set(self, pop): """ Generate a get/set template for all attributes in the given population """ # Pick basic template based on neuron type attr_template = self._templates['attr_decl'] acc_template = self._templates['attr_acc'] declaration = "" # member declarations accessors = "" # export member functions already_processed = [] code_ids_per_type = {} # Sort the parameters/variables per type for var in pop.neuron_type.description[ 'parameters'] + pop.neuron_type.description['variables']: # Avoid doublons if var['name'] in already_processed: continue # add an empty list for this type if needed if var['ctype'] not in code_ids_per_type.keys(): code_ids_per_type[var['ctype']] = [] # Important which template to choose locality = var['locality'] attr_type = 'parameter' if var in pop.neuron_type.description[ 'parameters'] else 'variable' # For GPUs we need to tell the host that this variable need to be updated if Global._check_paradigm("cuda"): if attr_type == "parameter" and locality == "global": read_dirty_flag = "" write_dirty_flag = "" else: write_dirty_flag = "%(name)s_host_to_device = true;" % { 'name': var['name'] } read_dirty_flag = "if ( %(name)s_device_to_host < t ) device_to_host();" % { 'name': var['name'] } else: read_dirty_flag = "" write_dirty_flag = "" # add to the processing list code_ids_per_type[var['ctype']].append({ 'type': var['ctype'], 'name': var['name'], 'locality': locality, 'attr_type': attr_type, 'write_dirty_flag': write_dirty_flag, 'read_dirty_flag': read_dirty_flag }) already_processed.append(var['name']) # For rate-coded models add _sum_target if pop.neuron_type.type == "rate": for target in sorted( list( set(pop.neuron_type.description['targets'] + pop.targets))): prec_type = Global.config['precision'] # add to the processing list code_ids_per_type[prec_type].append({ 'type': prec_type, 'name': "_sum_" + target, 'locality': 'local', 'attr_type': 'psp', 'write_dirty_flag': "_sum_" + target + "_host_to_device = true;", 'read_dirty_flag': "if ( _sum_" + target + "_device_to_host < t ) device_to_host();" }) # Final code, can contain of multiple sets of accessor functions accessors = "" for ctype in code_ids_per_type.keys(): local_attribute_get1 = "" local_attribute_get2 = "" local_attribute_set1 = "" local_attribute_set2 = "" global_attribute_get = "" global_attribute_set = "" for ids in code_ids_per_type[ctype]: locality = ids['locality'] if locality == "local": local_attribute_get1 += self._templates["attr_acc"][ "local_get_all"] % ids local_attribute_get2 += self._templates["attr_acc"][ "local_get_single"] % ids local_attribute_set1 += self._templates["attr_acc"][ "local_set_all"] % ids local_attribute_set2 += self._templates["attr_acc"][ "local_set_single"] % ids elif locality == "global": global_attribute_get += self._templates["attr_acc"][ "global_get"] % ids global_attribute_set += self._templates["attr_acc"][ "global_set"] % ids else: raise ValueError( "PopulationGenerator: invalild locality type for attribute" ) if Global._check_paradigm("cuda") and locality == "global": declaration += self._templates['attr_decl'][locality][ ids['attr_type']] % ids else: declaration += self._templates['attr_decl'][locality] % ids # build up the final codes if local_attribute_get1 != "": accessors += self._templates["accessor_template"]["local"] % { 'local_get1': local_attribute_get1, 'local_get2': local_attribute_get2, 'local_set1': local_attribute_set1, 'local_set2': local_attribute_set2, 'id': pop.id, 'ctype': ctype, 'ctype_name': ctype.replace(" ", "_") } if global_attribute_get != "": accessors += self._templates["accessor_template"]["global"] % { 'global_get': global_attribute_get, 'global_set': global_attribute_set, 'id': pop.id, 'ctype': ctype, 'ctype_name': ctype.replace(" ", "_") } return declaration, accessors, already_processed
def _body_custom_constants(self): """ Generate code for custom constants dependent on the target paradigm set in global settings. Returns (openMP): * decl_code: declarations in header file * init_code: initialization code Returns (CUDA): * host_decl_code: declarations in header file (host side) * host_init_code: initialization code (host side) * device_decl_code: declarations in header file (device side) """ if Global._check_paradigm("openmp"): if len(Global._objects['constants']) == 0: return "", "" decl_code = "" init_code = "" for obj in Global._objects['constants']: obj_str = { 'name': obj.name, 'value': obj.value, 'float_prec': Global.config['precision'] } decl_code += """ %(float_prec)s %(name)s; void set_%(name)s(%(float_prec)s value){%(name)s = value;};""" % obj_str init_code += """ %(name)s = 0.0;""" % obj_str return decl_code, init_code elif Global._check_paradigm("cuda"): if len(Global._objects['constants']) == 0: return "", "", "" host_decl_code = "" host_init_code = "" device_decl_code = "" for obj in Global._objects['constants']: obj_str = { 'name': obj.name, 'value': obj.value, 'float_prec': Global.config['precision'] } host_decl_code += """ __device__ __constant__ %(float_prec)s %(name)s; void set_%(name)s(%(float_prec)s value){ cudaError_t err = cudaMemcpyToSymbol(%(name)s, &value, sizeof(%(float_prec)s), 0, cudaMemcpyHostToDevice); #ifdef _DEBUG std::cout << "set %(name)s " << value << std::endl; if ( err != cudaSuccess ) std::cerr << cudaGetErrorString(err) << std::endl; #endif }""" % obj_str device_decl_code += "__device__ __constant__ %(float_prec)s %(name)s;\n" % obj_str host_init_code += """ %(name)s = 0.0;""" % obj_str return host_decl_code, host_init_code, device_decl_code else: raise NotImplementedError
def _init_population(self, pop): """ Generate the codes for the C++ function Population::init_population() method. """ code = "" attr_tpl = self._templates['attribute_cpp_init'] already_processed = [] # Parameters for var in pop.neuron_type.description['parameters']: # Avoid doublons if var['name'] in already_processed: continue if Global._check_paradigm("cuda") and var['locality'] == "global": code += attr_tpl[var['locality']]['parameter'] % { 'name': var['name'] } else: init = 'false' if var['ctype'] == 'bool' else ( '0' if var['ctype'] == 'int' else '0.0') var_ids = { 'id': pop.id, 'name': var['name'], 'type': var['ctype'], 'init': init, 'attr_type': 'parameter' } code += attr_tpl[var['locality']] % var_ids already_processed.append(var['name']) # Variables for var in pop.neuron_type.description['variables']: # Avoid doublons if var['name'] in already_processed: continue init = 'false' if var['ctype'] == 'bool' else ( '0' if var['ctype'] == 'int' else '0.0') var_ids = { 'id': pop.id, 'name': var['name'], 'type': var['ctype'], 'init': init, 'attr_type': 'variable' } if Global._check_paradigm("cuda") and var['locality'] == "global": code += attr_tpl[var['locality']]['variable'] % var_ids else: code += attr_tpl[var['locality']] % var_ids already_processed.append(var['name']) # Random numbers code += self._init_random_dist(pop)[1] # Global operations code += self._init_globalops(pop) # rate-coded targets if pop.neuron_type.type == 'rate': for target in sorted( list( set(pop.neuron_type.description['targets'] + pop.targets))): ids = { 'id': pop.id, 'name': "_sum_" + target, 'attr_type': 'psp', 'type': Global.config['precision'], 'init': 0.0 } code += attr_tpl['local'] % ids # or unused synaptic spiking targets else: try: all_targets = set(pop.neuron_type.description['targets'] + pop.targets) except TypeError: # The projection has multiple targets all_targets = set(pop.neuron_type.description['targets'] + pop.targets[0]) for target in sorted(list(all_targets)): attr_name = 'g_' + target if attr_name not in already_processed: id_dict = { 'type': Global.config['precision'], 'name': attr_name, 'attr_type': 'variable', 'init': 0.0 } code += self._templates['attribute_cpp_init'][ 'local'] % id_dict already_processed.append(attr_name) return code
def _init_parameters_variables(self, proj, single_spmv_matrix): """ Generate initialization code for variables / parameters of the projection *proj*. Returns 3 values: ret1 (str): weight initialization ret2 (str): delay initialization ret3 (str): other initializations (e. g. event-driven) """ # Is it a specific projection? if 'init_parameters_variables' in proj._specific_template.keys(): return proj._specific_template['init_parameters_variables'] # Learning by default code = "" weight_code = "" # choose initialization templates based on chosen paradigm attr_init_tpl = self._templates['attribute_cpp_init'] attributes = [] # Initialize parameters for var in proj.synapse_type.description['parameters'] + proj.synapse_type.description['variables']: # Avoid doublons if var['name'] in attributes: continue # Important to select which template locality = var['locality'] attr_type = 'parameter' if var in proj.synapse_type.description['parameters'] else 'variable' # The synaptic weight if var['name'] == 'w': if var['locality'] == "global" or proj._has_single_weight(): if cpp_connector_available(proj.connector_name, proj._storage_format, proj._storage_order): weight_code = tabify("w = w_dist_arg1;", 2) else: weight_code = tabify("w = values[0][0];", 2) elif var['locality'] == "local": if cpp_connector_available(proj.connector_name, proj._storage_format, proj._storage_order): # Init weights in CPP if proj.connector_weight_dist == None: init_code = self._templates['attribute_cpp_init']['local'] % { 'init': 'w_dist_arg1', 'type': var['ctype'], 'attr_type': 'parameter' if var in proj.synapse_type.description['parameters'] else 'variable', 'name': var['name'] } elif isinstance(proj.connector_weight_dist, ANNRandom.Uniform): if single_spmv_matrix: init_code = "w = init_matrix_variable_uniform<%(float_prec)s>(w_dist_arg1, w_dist_arg2, rng[0]);" else: init_code = "w = init_matrix_variable_uniform<%(float_prec)s>(w_dist_arg1, w_dist_arg2, rng);" elif isinstance(proj.connector_weight_dist, ANNRandom.Normal): if single_spmv_matrix: init_code = "w = init_matrix_variable_normal<%(float_prec)s>(w_dist_arg1, w_dist_arg2, rng[0]);" else: init_code = "w = init_matrix_variable_normal<%(float_prec)s>(w_dist_arg1, w_dist_arg2, rng);" elif isinstance(proj.connector_weight_dist, ANNRandom.LogNormal): if proj.connector_weight_dist.min==None and proj.connector_weight_dist.max==None: if single_spmv_matrix: init_code = "w = init_matrix_variable_log_normal<%(float_prec)s>(w_dist_arg1, w_dist_arg2, rng[0]);" else: init_code = "w = init_matrix_variable_log_normal<%(float_prec)s>(w_dist_arg1, w_dist_arg2, rng);" else: min_code = "std::numeric_limits<%(float_prec)s>::min()" if proj.connector_weight_dist.min==None else str(proj.connector_weight_dist.min) max_code = "std::numeric_limits<%(float_prec)s>::max()" if proj.connector_weight_dist.max==None else str(proj.connector_weight_dist.max) if single_spmv_matrix: init_code = "w = init_matrix_variable_log_normal_clip<%(float_prec)s>(w_dist_arg1, w_dist_arg2, rng[0], "+min_code+", "+max_code+");" else: init_code = "w = init_matrix_variable_log_normal_clip<%(float_prec)s>(w_dist_arg1, w_dist_arg2, rng, "+min_code+", "+max_code+");" else: raise NotImplementedError( str(type(proj.connector_weight_dist)) + " is not available for CPP-side connection patterns.") if Global._check_paradigm("cuda"): init_code += "\ngpu_w = init_matrix_variable_gpu<%(float_prec)s>(w);" weight_code = tabify(init_code % {'float_prec': Global.config['precision']}, 2) # Init_from_lil else: init = 'false' if var['ctype'] == 'bool' else ('0' if var['ctype'] == 'int' else '0.0') weight_code = attr_init_tpl[locality] % { 'id': proj.id, 'id_post': proj.post.id, 'name': var['name'], 'type': var['ctype'], 'init': init, 'attr_type': attr_type, 'float_prec': Global.config['precision'] } weight_code += tabify("update_matrix_variable_all<%(float_prec)s>(w, values);" % {'float_prec': Global.config['precision']}, 2) if Global._check_paradigm("cuda"): weight_code += tabify("\nw_host_to_device = true;", 2) else: raise NotImplementedError # All other variables else: init = 'false' if var['ctype'] == 'bool' else ('0' if var['ctype'] == 'int' else '0.0') var_ids = { 'id': proj.id, 'id_post': proj.post.id, 'name': var['name'], 'type': var['ctype'], 'init': init, 'attr_type': attr_type, 'float_prec': Global.config['precision'] } if Global._check_paradigm("cuda") and locality == "global": code += attr_init_tpl[locality][attr_type] % var_ids else: code += attr_init_tpl[locality] % var_ids attributes.append(var['name']) # Initialize delays differs for construction from LIL or CPP inited patterns if proj.max_delay > 1: # Special case: we have non-uniform delays, but not determined by a RandomDistribution # This will caused most likely by custom connectivity pattern if proj.connector_delay_dist == None and proj.uniform_delay==-1: id_pre = proj.pre.id if not isinstance(proj.pre, PopulationView) else proj.pre.population.id if proj.synapse_type.type == "rate": delay_code = self._templates['delay']['nonuniform_rate_coded']['init'] % {'id_pre': id_pre} else: delay_code = self._templates['delay']['nonuniform_spiking']['init'] % {'id_pre': id_pre} # # uniform delay elif proj.connector_delay_dist == None: if cpp_connector_available(proj.connector_name, proj._storage_format, proj._storage_order): delay_code = tabify("delay = d_dist_arg1;", 2) else: delay_code = self._templates['delay']['uniform']['init'] # # non-uniform delay drawn from distribution elif isinstance(proj.connector_delay_dist, ANNRandom.RandomDistribution): if cpp_connector_available(proj.connector_name, proj._storage_format, proj._storage_order): rng_init = "rng[0]" if single_spmv_matrix else "rng" delay_code = tabify(""" delay = init_matrix_variable_discrete_uniform<int>(d_dist_arg1, d_dist_arg2, %(rng_init)s); max_delay = -1;""" % {'id_pre': proj.pre.id, 'rng_init': rng_init}, 2) else: id_pre = proj.pre.id if not isinstance(proj.pre, PopulationView) else proj.pre.population.id if proj.synapse_type.type == "rate": delay_code = self._templates['delay']['nonuniform_rate_coded']['init'] % {'id_pre': id_pre} else: delay_code = self._templates['delay']['nonuniform_spiking']['init'] % {'id_pre': id_pre} else: raise NotImplementedError( str(type(proj.connector_weight_dist)) + " is not available.") else: delay_code = "" # If no psp is defined, it's event-driven has_event_driven = False for var in proj.synapse_type.description['variables']: if var['method'] == 'event-driven': has_event_driven = True break if has_event_driven: code += self._templates['event_driven']['cpp_init'] # Pruning if Global.config['structural_plasticity']: if 'pruning' in proj.synapse_type.description.keys(): code += """ // Pruning _pruning = false; _pruning_period = 1; _pruning_offset = 0; """ if 'creating' in proj.synapse_type.description.keys(): code += """ // Creating _creating = false; _creating_period = 1; _creating_offset = 0; """ return weight_code, delay_code, code
def _generate_default_get_set(self, proj, single_matrix): """ Instead of generating a code block with get/set for each variable we generate a common function which receives the name of the variable. """ local_accessor_template = """ std::vector<std::vector<%(ctype)s>> get_local_attribute_all_%(ctype_name)s(std::string name) { %(local_get1)s // should not happen std::cerr << "ProjStruct%(id_proj)s::get_local_attribute_all_%(ctype_name)s: " << name << " not found" << std::endl; return std::vector<std::vector<%(ctype)s>>(); } std::vector<%(ctype)s> get_local_attribute_row_%(ctype_name)s(std::string name, int rk_post) { %(local_get2)s // should not happen std::cerr << "ProjStruct%(id_proj)s::get_local_attribute_row_%(ctype_name)s: " << name << " not found" << std::endl; return std::vector<%(ctype)s>(); } %(ctype)s get_local_attribute_%(ctype_name)s(std::string name, int rk_post, int rk_pre) { %(local_get3)s // should not happen std::cerr << "ProjStruct%(id_proj)s::get_local_attribute: " << name << " not found" << std::endl; return 0.0; } void set_local_attribute_all_%(ctype_name)s(std::string name, std::vector<std::vector<%(ctype)s>> value) { %(local_set1)s } void set_local_attribute_row_%(ctype_name)s(std::string name, int rk_post, std::vector<%(ctype)s> value) { %(local_set2)s } void set_local_attribute_%(ctype_name)s(std::string name, int rk_post, int rk_pre, %(ctype)s value) { %(local_set3)s } """ semiglobal_accessor_template = """ std::vector<%(ctype)s> get_semiglobal_attribute_all_%(ctype_name)s(std::string name) { %(semiglobal_get1)s // should not happen std::cerr << "ProjStruct%(id_proj)s::get_semiglobal_attribute_all_%(ctype_name)s: " << name << " not found" << std::endl; return std::vector<%(ctype)s>(); } %(ctype)s get_semiglobal_attribute_%(ctype_name)s(std::string name, int rk_post) { %(semiglobal_get2)s // should not happen std::cerr << "ProjStruct%(id_proj)s::get_semiglobal_attribute_%(ctype_name)s: " << name << " not found" << std::endl; return 0.0; } void set_semiglobal_attribute_all_%(ctype_name)s(std::string name, std::vector<%(ctype)s> value) { %(semiglobal_set1)s } void set_semiglobal_attribute_%(ctype_name)s(std::string name, int rk_post, %(ctype)s value) { %(semiglobal_set2)s } """ global_accessor_template = """ %(ctype)s get_global_attribute_%(ctype_name)s(std::string name) { %(global_get)s // should not happen std::cerr << "ProjStruct%(id_proj)s::get_global_attribute_%(ctype_name)s: " << name << " not found" << std::endl; return 0.0; } void set_global_attribute_%(ctype_name)s(std::string name, %(ctype)s value) { %(global_set)s } """ declare_parameters_variables = "" # The transpose projection contains no own synaptic parameters if isinstance(proj, Transpose): return "", "" # choose templates dependend on the paradigm decl_template = self._templates['attribute_decl'] attributes = [] code_ids_per_type = {} # Sort the parameters/variables per type for var in proj.synapse_type.description['parameters'] + proj.synapse_type.description['variables']: # Avoid doublons if var['name'] in attributes: continue # add an empty list for this type if needed if var['ctype'] not in code_ids_per_type.keys(): code_ids_per_type[var['ctype']] = [] # important properties for code generation locality = var['locality'] attr_type = 'parameter' if var in proj.synapse_type.description['parameters'] else 'variable' # Special case for single weights if var['name'] == "w" and proj._has_single_weight(): locality = 'global' # For GPUs we need to tell the host that this variable need to be updated if Global._check_paradigm("cuda"): if locality == "global" and attr_type=="parameter": write_dirty_flag = "" read_dirty_flag = "" else: write_dirty_flag = "%(name)s_host_to_device = true;" % {'name': var['name']} read_dirty_flag = "if ( %(name)s_device_to_host < t ) device_to_host();" % {'name': var['name']} else: write_dirty_flag = "" read_dirty_flag = "" code_ids_per_type[var['ctype']].append({ 'type' : var['ctype'], 'name': var['name'], 'locality': locality, 'attr_type': attr_type, 'read_dirty_flag': read_dirty_flag, 'write_dirty_flag': write_dirty_flag }) attributes.append(var['name']) # Final code, can contain of multiple sets of accessor functions final_code = "" for ctype in code_ids_per_type.keys(): # Attribute accessors/declarators local_attribute_get1 = "" local_attribute_get2 = "" local_attribute_get3 = "" local_attribute_set1 = "" local_attribute_set2 = "" local_attribute_set3 = "" semiglobal_attribute_get1 = "" semiglobal_attribute_get2 = "" semiglobal_attribute_set1 = "" semiglobal_attribute_set2 = "" global_attribute_get = "" global_attribute_set = "" for ids in code_ids_per_type[ctype]: # Locality of a variable detemines the correct template # In case of CUDA also the attribute type is important locality = ids['locality'] attr_type = ids['attr_type'] # # Local variables can be vec[vec[d]], vec[d] or d if locality == "local": local_attribute_get1 += """ if ( name.compare("%(name)s") == 0 ) { %(read_dirty_flag)s return get_matrix_variable_all<%(type)s>(%(name)s); } """ % ids local_attribute_set1 += """ if ( name.compare("%(name)s") == 0 ) { update_matrix_variable_all<%(type)s>(%(name)s, value); %(write_dirty_flag)s return; } """ % ids local_attribute_get2 += """ if ( name.compare("%(name)s") == 0 ) { %(read_dirty_flag)s return get_matrix_variable_row<%(type)s>(%(name)s, rk_post); } """ % ids local_attribute_set2 += """ if ( name.compare("%(name)s") == 0 ) { update_matrix_variable_row<%(type)s>(%(name)s, rk_post, value); %(write_dirty_flag)s return; } """ % ids local_attribute_get3 += """ if ( name.compare("%(name)s") == 0 ) { %(read_dirty_flag)s return get_matrix_variable<%(type)s>(%(name)s, rk_post, rk_pre); } """ % ids local_attribute_set3 += """ if ( name.compare("%(name)s") == 0 ) { update_matrix_variable<%(type)s>(%(name)s, rk_post, rk_pre, value); %(write_dirty_flag)s return; } """ % ids # # Semiglobal variables can be vec[d] or d elif locality == "semiglobal": semiglobal_attribute_get1 += """ if ( name.compare("%(name)s") == 0 ) { return get_vector_variable_all<%(type)s>(%(name)s); } """ % ids semiglobal_attribute_get2 += """ if ( name.compare("%(name)s") == 0 ) { return get_vector_variable<%(type)s>(%(name)s, rk_post); } """ % ids semiglobal_attribute_set1 += """ if ( name.compare("%(name)s") == 0 ) { update_vector_variable_all<%(type)s>(%(name)s, value); %(write_dirty_flag)s return; } """ % ids semiglobal_attribute_set2 += """ if ( name.compare("%(name)s") == 0 ) { update_vector_variable<%(type)s>(%(name)s, rk_post, value); %(write_dirty_flag)s return; } """ % ids # # Global variables are only d else: global_attribute_get += """ if ( name.compare("%(name)s") == 0 ) { return %(name)s; } """ % ids global_attribute_set += """ if ( name.compare("%(name)s") == 0 ) { %(name)s = value; %(write_dirty_flag)s return; } """ % ids if Global._check_paradigm("cuda") and locality=="global": declare_parameters_variables += decl_template[locality][attr_type] % ids else: declare_parameters_variables += decl_template[locality] % ids attributes.append(var['name']) # build up the final codes if local_attribute_get1 != "": final_code += local_accessor_template % { 'local_get1' : local_attribute_get1, 'local_get2' : local_attribute_get2, 'local_get3' : local_attribute_get3, 'local_set1' : local_attribute_set1, 'local_set2' : local_attribute_set2, 'local_set3' : local_attribute_set3, 'id_proj': proj.id, 'ctype': ctype, 'ctype_name': ctype.replace(" ", "_") } if semiglobal_attribute_get1 != "": final_code += semiglobal_accessor_template % { 'semiglobal_get1' : semiglobal_attribute_get1, 'semiglobal_get2' : semiglobal_attribute_get2, 'semiglobal_set1' : semiglobal_attribute_set1, 'semiglobal_set2' : semiglobal_attribute_set2, 'id_proj': proj.id, 'ctype': ctype, 'ctype_name': ctype.replace(" ", "_") } if global_attribute_get != "": final_code += global_accessor_template % { 'global_get' : global_attribute_get, 'global_set' : global_attribute_set, 'id_proj': proj.id, 'ctype': ctype, 'ctype_name': ctype.replace(" ", "_") } return declare_parameters_variables, final_code
def _select_sparse_matrix_format(self, proj): """ The sparse matrix format determines the fundamental structure for connectivity representation. It depends on the model type as well as hardware paradigm. Returns (str1, str2, bool): * str1: sparse matrix format declaration * str2: sparse matrix format arguments if needed (e. g. sizes) * bool: if the matrix is a complete (True) or sliced matrix (False) """ if Global.config["structural_plasticity"] and proj._storage_format != "lil": raise Global.InvalidConfiguration("Structural plasticity is only allowed for LIL format.") # get preferred index type idx_type, _, size_type, _ = determine_idx_type_for_projection(proj) # ANNarchy supports a list of different formats to encode projections. # The general structure of the decision tree is: # # - rate-coded # - formats # - paradigm # - spike # - formats # - ordering # - paradigm if proj.synapse_type.type == "rate": # Sanity check if proj._storage_order == "pre_to_post": Global.CodeGeneratorException(" The storage_order 'pre_to_post' is invalid for rate-coded synapses (Projection: "+proj.name+")") # Check for the provided format + paradigm combination if a suitable implementation is available. if proj._storage_format == "lil": if Global._check_paradigm("openmp"): if Global.config['num_threads'] == 1: sparse_matrix_format = "LILMatrix<"+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"LILMatrix.hpp\"\n" single_matrix = True else: if proj._no_split_matrix: sparse_matrix_format = "LILMatrix<"+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"LILMatrix.hpp\"\n" single_matrix = True else: sparse_matrix_format = "PartitionedMatrix< LILMatrix<"+idx_type+", "+size_type+">, "+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"PartitionedMatrix.hpp\"\n#include \"LILMatrix.hpp\"\n" single_matrix = False else: Global.CodeGeneratorException(" No implementation assigned for rate-coded synapses using LIL and paradigm="+str(Global.config['paradigm'])+" (Projection: "+proj.name+")") elif proj._storage_format == "coo": if Global._check_paradigm("openmp"): sparse_matrix_format = "COOMatrix<"+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"COOMatrix.hpp\"\n" single_matrix = True elif Global._check_paradigm("cuda"): sparse_matrix_format = "COOMatrixCUDA<"+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"COOMatrixCUDA.hpp\"\n" single_matrix = True else: Global.CodeGeneratorException(" No implementation assigned for rate-coded synapses using COO and paradigm="+str(Global.config['paradigm'])+" (Projection: "+proj.name+")") elif proj._storage_format == "bsr": if Global._check_paradigm("openmp"): sparse_matrix_format = "BSRMatrix<"+idx_type+", "+size_type+", true>" sparse_matrix_include = "#include \"BSRMatrix.hpp\"\n" single_matrix = True elif Global._check_paradigm("cuda"): sparse_matrix_format = "BSRMatrixCUDA<"+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"BSRMatrixCUDA.hpp\"\n" single_matrix = True else: Global.CodeGeneratorException(" No implementation assigned for rate-coded synapses using BSR and paradigm="+str(Global.config['paradigm'])+" (Projection: "+proj.name+")") elif proj._storage_format == "csr": if Global._check_paradigm("openmp"): sparse_matrix_format = "CSRMatrix<"+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"CSRMatrix.hpp\"\n" single_matrix = True elif Global._check_paradigm("cuda"): sparse_matrix_format = "CSRMatrixCUDA<"+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"CSRMatrixCUDA.hpp\"\n" single_matrix = True else: Global.CodeGeneratorException(" No implementation assigned for rate-coded synapses using CSR and paradigm="+str(Global.config['paradigm'])+" (Projection: "+proj.name+")") elif proj._storage_format == "ellr": if Global._check_paradigm("openmp"): sparse_matrix_format = "ELLRMatrix<"+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"ELLRMatrix.hpp\"\n" single_matrix = True elif Global._check_paradigm("cuda"): sparse_matrix_format = "ELLRMatrixCUDA<"+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"ELLRMatrixCUDA.hpp\"\n" single_matrix = True else: Global.CodeGeneratorException(" No implementation assigned for rate-coded synapses using ELLPACK-R and paradigm="+str(Global.config['paradigm'])+" (Projection: "+proj.name+")") elif proj._storage_format == "ell": if Global._check_paradigm("openmp"): sparse_matrix_format = "ELLMatrix<"+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"ELLMatrix.hpp\"\n" single_matrix = True elif Global._check_paradigm("cuda"): sparse_matrix_format = "ELLMatrixCUDA<"+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"ELLMatrixCUDA.hpp\"\n" single_matrix = True else: Global.CodeGeneratorException(" No implementation assigned for rate-coded synapses using ELLPACK and paradigm="+str(Global.config['paradigm'])+" (Projection: "+proj.name+")") elif proj._storage_format == "hyb": if Global._check_paradigm("openmp"): sparse_matrix_format = "HYBMatrix<"+idx_type+", "+size_type+", true>" sparse_matrix_include = "#include \"HYBMatrix.hpp\"\n" single_matrix = True elif Global._check_paradigm("cuda"): sparse_matrix_format = "HYBMatrixCUDA<"+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"HYBMatrixCUDA.hpp\"\n" single_matrix = True else: Global.CodeGeneratorException(" No implementation assigned for rate-coded synapses using Hybrid (COO+ELL) and paradigm="+str(Global.config['paradigm'])+" (Projection: "+proj.name+")") elif proj._storage_format == "dense": if Global._check_paradigm("openmp"): sparse_matrix_format = "DenseMatrix<"+idx_type+", "+size_type+", true>" sparse_matrix_include = "#include \"DenseMatrix.hpp\"\n" single_matrix = True else: sparse_matrix_format = "DenseMatrixCUDA<"+idx_type+", "+size_type+", true>" sparse_matrix_include = "#include \"DenseMatrixCUDA.hpp\"\n" single_matrix = True else: Global.CodeGeneratorException(" No implementation assigned for rate-coded synapses using '"+proj._storage_format+"' storage format (Projection: "+proj.name+")") elif proj.synapse_type.type == "spike": # Check for the provided format + paradigm # combination if it's availability if proj._storage_format == "lil": if proj._storage_order == "pre_to_post": Global.CodeGeneratorException(" The storage_order 'pre_to_post' is invalid for LIL representations (Projection: "+proj.name+")") if Global._check_paradigm("openmp"): if Global.config['num_threads'] == 1 or proj._no_split_matrix: sparse_matrix_format = "LILInvMatrix<"+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"LILInvMatrix.hpp\"\n" single_matrix = True else: sparse_matrix_format = "PartitionedMatrix<LILInvMatrix<"+idx_type+", "+size_type+">, "+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"PartitionedMatrix.hpp\"\n#include \"LILInvMatrix.hpp\"\n" single_matrix = False else: Global.CodeGeneratorException(" No implementation assigned for spiking synapses using LIL and paradigm="+str(Global.config['paradigm'])+ " (Projection: "+proj.name+")") elif proj._storage_format == "csr": if proj._storage_order == "post_to_pre": if Global._check_paradigm("openmp"): sparse_matrix_format = "CSRCMatrix<"+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"CSRCMatrix.hpp\"\n" single_matrix = True elif Global._check_paradigm("cuda"): sparse_matrix_format = "CSRCMatrixCUDA<"+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"CSRCMatrixCUDA.hpp\"\n" single_matrix = True else: raise NotImplementedError else: if Global._check_paradigm("openmp"): if Global.config['num_threads'] == 1 or proj._no_split_matrix: sparse_matrix_format = "CSRCMatrixT<"+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"CSRCMatrixT.hpp\"\n" single_matrix = True else: sparse_matrix_format = "PartitionedMatrix<CSRCMatrixT<"+idx_type+", "+size_type+">, "+idx_type+", "+size_type+">" sparse_matrix_include = "#include \"PartitionedMatrix.hpp\"\n#include \"CSRCMatrixT.hpp\"\n" single_matrix = False else: raise NotImplementedError elif proj._storage_format == "dense": if proj._storage_order == "post_to_pre": if Global._check_paradigm("openmp"): sparse_matrix_format = "DenseMatrix<"+idx_type+", "+size_type+", false>" sparse_matrix_include = "#include \"DenseMatrix.hpp\"\n" single_matrix = True else: raise NotImplementedError else: raise NotImplementedError else: Global.CodeGeneratorException(" No implementation assigned for spiking synapses using '"+proj._storage_format+"' storage format (Projection: "+proj.name+")") else: Global.CodeGeneratorException(" Invalid synapse type " + proj.synapse_type.type) # HD (6th Oct 2020) # Currently I unified this by flipping the dimensions in CSRCMatrixT in the C++ code sparse_matrix_args = " %(post_size)s, %(pre_size)s" % { 'pre_size': proj.pre.population.size if isinstance(proj.pre, PopulationView) else proj.pre.size, 'post_size': proj.post.population.size if isinstance(proj.post, PopulationView) else proj.post.size } if proj._storage_format == "bsr": if hasattr(proj, "_bsr_size"): sparse_matrix_args += ", " + str(proj._bsr_size) else: sparse_matrix_args += ", " + str(determine_bsr_blocksize(proj.pre.population.size if isinstance(proj.pre, PopulationView) else proj.pre.size, proj.post.population.size if isinstance(proj.post, PopulationView) else proj.post.size)) if Global.config['verbose']: print("Selected", sparse_matrix_format, "(", sparse_matrix_args, ")", "for projection ", proj.name, "and single_matrix =", single_matrix ) return sparse_matrix_include, sparse_matrix_format, sparse_matrix_args, single_matrix