def _custom_functions(self): if len(Global._objects['functions']) == 0: return "", "" from ANNarchy.parser.Extraction import extract_functions export = "" wrapper = "" for name, func in Global._objects['functions']: desc = extract_functions(func, local_global=True)[0] # Export export += ' '*4 + desc['return_type'] + " " + desc['name'] + '(' for idx, arg in enumerate(desc['arg_types']): export += arg if idx < len(desc['arg_types']) - 1: export += ', ' export += ')' + '\n' # Wrapper arguments="" wrapper += "cpdef np.ndarray func_" + desc['name'] + '(' for idx, arg in enumerate(desc['args']): # Function call wrapper += arg if idx < len(desc['args']) - 1: wrapper += ', ' # Element access arguments += arg + "[i]" if idx < len(desc['args']) - 1: arguments += ', ' wrapper += '):' wrapper += """ return np.array([%(funcname)s(%(args)s) for i in range(len(%(first_arg)s))]) """ % {'funcname': desc['name'], 'first_arg' : desc['args'][0], 'args': arguments} return export, wrapper
def header_custom_functions(self): if len(Global._objects['functions']) == 0: return "" code = "" from ANNarchy.parser.Extraction import extract_functions for func in Global._objects['functions']: code += extract_functions(func, local_global=True)[0]['cpp'] + '\n' return code
def _header_custom_functions(self): """ Generate code for custom functions defined globally and are usable witihn neuron or synapse descriptions. These functions can only rely on provided arguments. """ if len(Global._objects['functions']) == 0: return "" # Attention CUDA: this definition will work only on host side. code = "" for _, func in Global._objects['functions']: code += extract_functions(func, local_global=True)[0]['cpp'] + '\n' return code
def _custom_functions(self): if len(Global._objects['functions']) == 0: return "", "" from ANNarchy.parser.Extraction import extract_functions export = "" wrapper = "" for name, func in Global._objects['functions']: desc = extract_functions(func, local_global=True)[0] # Export export += ' ' * 4 + desc['return_type'] + " " + desc['name'] + '(' for idx, arg in enumerate(desc['arg_types']): export += arg if idx < len(desc['arg_types']) - 1: export += ', ' export += ')' + '\n' # Wrapper arguments = "" wrapper += "cpdef np.ndarray func_" + desc['name'] + '(' for idx, arg in enumerate(desc['args']): # Function call wrapper += arg if idx < len(desc['args']) - 1: wrapper += ', ' # Element access arguments += arg + "[i]" if idx < len(desc['args']) - 1: arguments += ', ' wrapper += '):' wrapper += """ return np.array([%(funcname)s(%(args)s) for i in range(len(%(first_arg)s))]) """ % { 'funcname': desc['name'], 'first_arg': desc['args'][0], 'args': arguments } return export, wrapper
def _generate_body(self): """ Generate the codes 'main' library file. The generated code will be used in different files, dependent on the chosen target platform: * openmp: ANNarchy.cpp * cuda: ANNarchyHost.cu and ANNarchyDevice.cu """ # struct declaration for each population pop_ptr = "" for pop in self._pop_desc: pop_ptr += pop['instance'] # struct declaration for each projection proj_ptr = "" for proj in self._proj_desc: proj_ptr += proj['instance'] # Code for the global operations glop_definition = self._body_def_glops() update_globalops = "" for pop in self._pop_desc: if 'gops_update' in pop.keys(): update_globalops += pop['gops_update'] # Reset presynaptic sums reset_sums = self._body_resetcomputesum_pop() # Compute presynaptic sums compute_sums = "" # Sum over all synapses if Global._check_paradigm("openmp"): for proj in self._proj_desc: compute_sums += proj["compute_psp"] # Init rng dist init_rng_dist = "" for pop in self._populations: init_rng_dist += """pop%(id)s.init_rng_dist();\n""" % { 'id': pop.id } # Update random distributions rd_update_code = "" for desc in self._pop_desc + self._proj_desc: if 'rng_update' in desc.keys(): rd_update_code += desc['rng_update'] # Equations for the neural variables update_neuron = "" for pop in self._pop_desc: if 'update' in pop.keys(): update_neuron += pop['update'] # Enque delayed outputs delay_code = "" for pop in self._pop_desc: if 'delay_update' in pop.keys(): delay_code += pop['delay_update'] # Equations for the synaptic variables update_synapse = "" for proj in self._proj_desc: if 'update' in proj.keys(): update_synapse += proj['update'] # Equations for the post-events post_event = "" for proj in self._proj_desc: if 'post_event' in proj.keys(): post_event += proj['post_event'] # Structural plasticity structural_plasticity = self._body_structural_plasticity() # Early stopping run_until = self._body_run_until() #Profiling if self._profgen: prof_dict = self._profgen.generate_body_dict() else: prof_dict = Profile.ProfileGenerator( self._annarchy_dir, self._net_id).generate_body_dict() # # Generate the ANNarchy.cpp code, the corrsponding template differs # greatly. For further information take a look into the corresponding # branches. # if Global.config['paradigm'] == "openmp": # custom constants custom_constant, _ = self._body_custom_constants() # code fields for openMP/single thread template base_dict = { 'float_prec': Global.config['precision'], 'pop_ptr': pop_ptr, 'proj_ptr': proj_ptr, 'glops_def': glop_definition, 'initialize': self._body_initialize(), 'init_rng_dist': init_rng_dist, 'run_until': run_until, 'compute_sums': compute_sums, 'reset_sums': reset_sums, 'update_neuron': update_neuron, 'update_globalops': update_globalops, 'update_synapse': update_synapse, 'random_dist_update': rd_update_code, 'delay_code': delay_code, 'post_event': post_event, 'structural_plasticity': structural_plasticity, 'custom_constant': custom_constant, } # profiling base_dict.update(prof_dict) # complete code template if Global.config["num_threads"] == 1: return BaseTemplate.st_body_template % base_dict else: return BaseTemplate.omp_body_template % base_dict elif Global.config['paradigm'] == "cuda": # Implementation notice ( HD: 10. June, 2015 ) # # The CUDA linking process is a big problem for object oriented approaches # and the seperation of implementation codes into several files. Even in the # current SDK 5.0 this problem is not fully solved. Linking is available, but # only for small, independent code pieces, by far not sufficient for full # object-oriented approaches ... # # For us, this currently have one consequence: we cannot completely seperate # the implementation of objects into several files. To hold a certain equality # between the structures of objects, I implemented the following workaround: # # We create the c-structs holding data fields and accessors as in OpenMP. We also # create the kernels, call entity in the corresponding generator objects, and # return the codes via the descriptor dictionary. # # This ensures a consistent interface in the generators and also in the generated # codes, but sometimes require additional overhead. Hopefully NVidia will improve # their linker in the next releases, so one could remove this overhead. psp_call = "" for proj in self._proj_desc: psp_call += proj['psp_call'] # custom constants host_custom_constant, _, device_custom_constant = self._body_custom_constants( ) # custom functions custom_func = "" for pop in self._pop_desc: custom_func += pop['custom_func'] for proj in self._proj_desc: custom_func += proj['custom_func'] for _, func in Global._objects['functions']: custom_func += extract_functions( func, local_global=True)[0]['cpp'].replace( "inline", "__device__") + '\n' # pre-defined/common available kernel common_kernel = self._cuda_common_kernel(self._projections) pop_kernel = "" for pop in self._pop_desc: pop_kernel += pop['update_body'] pop_update_fr = "" for pop in self._pop_desc: pop_update_fr += pop['update_FR'] psp_kernel = "" for proj in self._proj_desc: psp_kernel += proj['psp_body'] kernel_def = "" for pop in self._pop_desc: kernel_def += pop['update_header'] for proj in self._proj_desc: kernel_def += proj['psp_header'] kernel_def += proj['update_synapse_header'] kernel_def += proj['postevent_header'] delay_code = "" for pop in self._pop_desc: if 'update_delay' in pop.keys(): delay_code += pop['update_delay'] syn_kernel = "" for proj in self._proj_desc: syn_kernel += proj['update_synapse_body'] syn_call = "" for proj in self._proj_desc: syn_call += proj['update_synapse_call'] postevent_kernel = "" for proj in self._proj_desc: postevent_kernel += proj['postevent_body'] postevent_call = "" for proj in self._proj_desc: postevent_call += proj['postevent_call'] clear_sums = self._body_resetcomputesum_pop() # global operations glob_ops_header, glob_ops_body = self._body_def_glops() kernel_def += glob_ops_header # determine number of threads per kernel threads_per_kernel = self._cuda_kernel_config() # concurrent kernel execution stream_setup = self._cuda_stream_config() # memory transfers host_device_transfer, device_host_transfer = "", "" for pop in self._pop_desc + self._proj_desc: host_device_transfer += pop['host_to_device'] device_host_transfer += pop['device_to_host'] #Profiling if self._profgen: prof_dict = self._profgen.generate_body_dict() else: prof_dict = Profile.ProfileGenerator( self._annarchy_dir, self._net_id).generate_body_dict() # # HD ( 31.07.2016 ): # # I'm not really sure, what exactly causes the problem with this # atomicAdd function. If we move it into ANNarchyDevice.cu, the # macro seems to be evaluated wrongly and the atomicAdd() function # appears doubled or appears not. # # So as "solution", the atomicAdd definition block resides in # ANNarchyHost and only the computation kernels are placed in # ANNarchyDevice. If we decide to use SDK8 as lowest requirement, # one can move this kernel too. device_code = BaseTemplate.cuda_device_kernel_template % { #device stuff 'common_kernel': common_kernel, 'pop_kernel': pop_kernel, 'psp_kernel': psp_kernel, 'syn_kernel': syn_kernel, 'glob_ops_kernel': glob_ops_body, 'postevent_kernel': postevent_kernel, 'custom_func': custom_func, 'custom_constant': device_custom_constant, 'built_in': BaseTemplate.built_in_functions + BaseTemplate.integer_power_cuda % { 'float_prec': Global.config['precision'] }, 'float_prec': Global.config['precision'] } base_dict = { # network definitions 'float_prec': Global.config['precision'], 'pop_ptr': pop_ptr, 'proj_ptr': proj_ptr, 'run_until': run_until, 'clear_sums': clear_sums, 'compute_sums': psp_call, 'update_neuron': update_neuron, 'update_FR': pop_update_fr, 'update_globalops': update_globalops, 'update_synapse': syn_call, 'post_event': postevent_call, 'delay_code': delay_code, 'initialize': self._body_initialize(), 'structural_plasticity': structural_plasticity, # cuda host specific 'stream_setup': stream_setup, 'host_device_transfer': host_device_transfer, 'device_host_transfer': device_host_transfer, 'kernel_def': kernel_def, 'kernel_config': threads_per_kernel, 'custom_constant': host_custom_constant } base_dict.update(prof_dict) host_code = BaseTemplate.cuda_host_body_template % base_dict return device_code, host_code else: raise NotImplementedError
def _generate_body(self): """ Generate the codes 'main' library file. The generated code will be used in different files, dependent on the chosen target platform: * openmp: ANNarchy.cpp * cuda: ANNarchyHost.cu and ANNarchyDevice.cu """ # struct declaration for each population pop_ptr = "" for pop in self._pop_desc: pop_ptr += pop['instance'] # struct declaration for each projection proj_ptr = "" for proj in self._proj_desc: proj_ptr += proj['instance'] # Code for the global operations glop_definition = self._body_def_glops() update_globalops = "" for pop in self._pop_desc: if 'gops_update' in pop.keys(): update_globalops += pop['gops_update'] # Reset presynaptic sums reset_sums = self._body_resetcomputesum_pop() # Compute presynaptic sums compute_sums = self._body_computesum_proj() # Update random distributions rd_update_code = "" for desc in self._pop_desc + self._proj_desc: if 'rng_update' in desc.keys(): rd_update_code += desc['rng_update'] # Equations for the neural variables update_neuron = "" for pop in self._pop_desc: if 'update' in pop.keys(): update_neuron += pop['update'] # Enque delayed outputs delay_code = "" for pop in self._pop_desc: if 'delay_update' in pop.keys(): delay_code += pop['delay_update'] # Equations for the synaptic variables update_synapse = "" for proj in self._proj_desc: if 'update' in proj.keys(): update_synapse += proj['update'] # Equations for the post-events post_event = "" for proj in self._proj_desc: if 'post_event' in proj.keys(): post_event += proj['post_event'] # Structural plasticity structural_plasticity = self._body_structural_plasticity() # Early stopping run_until = self._body_run_until() # Number threads number_threads = "omp_set_num_threads(threads);" if Global.config['num_threads'] > 1 else "" #Profiling if self._profgen: prof_dict = self._profgen.generate_body_dict() else: from .Profile import ProfileGenerator prof_dict = ProfileGenerator(self._annarchy_dir, self._net_id).generate_body_dict() # # Generate the ANNarchy.cpp code, the corrsponding template differs # greatly. For further information take a look into the corresponding # branches. # if Global.config['paradigm'] == "openmp": # custom constants custom_constant, _ = self._body_custom_constants() from .Template.BaseTemplate import omp_body_template base_dict = { 'float_prec': Global.config['precision'], 'pop_ptr': pop_ptr, 'proj_ptr': proj_ptr, 'glops_def': glop_definition, 'initialize': self._body_initialize(), 'run_until': run_until, 'compute_sums' : compute_sums, 'reset_sums' : reset_sums, 'update_neuron' : update_neuron, 'update_globalops' : update_globalops, 'update_synapse' : update_synapse, 'random_dist_update' : rd_update_code, 'delay_code' : delay_code, 'post_event' : post_event, 'structural_plasticity': structural_plasticity, 'set_number_threads' : number_threads, 'custom_constant': custom_constant, } base_dict.update(prof_dict) return omp_body_template % base_dict elif Global.config['paradigm'] == "cuda": # Implementation notice ( HD: 10. June, 2015 ) # # The CUDA linking process is a big problem for object oriented approaches # and the seperation of implementation codes into several files. Even in the # current SDK 5.0 this problem is not fully solved. Linking is available, but # only for small, independent code pieces, by far not sufficient for full # object-oriented approaches ... # # For us, this currently have one consequence: we cannot completely seperate # the implementation of objects into several files. To hold a certain equality # between the structures of objects, I implemented the following workaround: # # We create the c-structs holding data fields and accessors as in OpenMP. We also # create the kernels, call entity in the corresponding generator objects, and # return the codes via the descriptor dictionary. # # This ensures a consistent interface in the generators and also in the generated # codes, but sometimes require additional overhead. Hopefully NVidia will improve # their linker in the next releases, so one could remove this overhead. psp_call = "" for proj in self._proj_desc: psp_call += proj['psp_call'] # custom constants host_custom_constant, _, device_custom_constant = self._body_custom_constants() # custom functions custom_func = "" for pop in self._pop_desc: custom_func += pop['custom_func'] for proj in self._proj_desc: custom_func += proj['custom_func'] for _, func in Global._objects['functions']: custom_func += extract_functions(func, local_global=True)[0]['cpp'].replace("inline", "__device__") + '\n' pop_kernel = "" for pop in self._pop_desc: pop_kernel += pop['update_body'] pop_update_fr = "" for pop in self._pop_desc: pop_update_fr += pop['update_FR'] psp_kernel = "" for proj in self._proj_desc: psp_kernel += proj['psp_body'] kernel_def = "" for pop in self._pop_desc: kernel_def += pop['update_header'] for proj in self._proj_desc: kernel_def += proj['psp_header'] kernel_def += proj['update_synapse_header'] kernel_def += proj['postevent_header'] delay_code = "" for pop in self._pop_desc: if 'update_delay' in pop.keys(): delay_code += pop['update_delay'] syn_kernel = "" for proj in self._proj_desc: syn_kernel += proj['update_synapse_body'] syn_call = "" for proj in self._proj_desc: syn_call += proj['update_synapse_call'] postevent_kernel = "" for proj in self._proj_desc: postevent_kernel += proj['postevent_body'] postevent_call = "" for proj in self._proj_desc: postevent_call += proj['postevent_call'] clear_sums = self._body_resetcomputesum_pop() # global operations glob_ops_header, glob_ops_body = self._body_def_glops() kernel_def += glob_ops_header # determine number of threads per kernel threads_per_kernel = self._cuda_kernel_config() # concurrent kernel execution stream_setup = self._cuda_stream_config() # memory transfers host_device_transfer, device_host_transfer = "", "" for pop in self._pop_desc + self._proj_desc: host_device_transfer += pop['host_to_device'] device_host_transfer += pop['device_to_host'] #Profiling if self._profgen: prof_dict = self._profgen.generate_body_dict() else: from .Profile import ProfileGenerator prof_dict = ProfileGenerator(self._annarchy_dir, self._net_id).generate_body_dict() # # HD ( 31.07.2016 ): # # I'm not really sure, what exactly causes the problem with this # atomicAdd function. If we move it into ANNarchyDevice.cu, the # macro seems to be evaluated wrongly and the atomicAdd() function # appears doubled or appears not. # # So as "solution", the atomicAdd definition block resides in # ANNarchyHost and only the computation kernels are placed in # ANNarchyDevice. If we decide to use SDK8 as lowest requirement, # one can move this kernel too. from .Template.BaseTemplate import cuda_device_kernel_template, cuda_host_body_template, built_in_functions device_code = cuda_device_kernel_template % { #device stuff 'pop_kernel': pop_kernel, 'psp_kernel': psp_kernel, 'syn_kernel': syn_kernel, 'glob_ops_kernel': glob_ops_body, 'postevent_kernel': postevent_kernel, 'custom_func': custom_func, 'custom_constant': device_custom_constant, 'built_in': built_in_functions, 'float_prec': Global.config['precision'] } base_dict = { # network definitions 'float_prec': Global.config['precision'], 'pop_ptr': pop_ptr, 'proj_ptr': proj_ptr, 'run_until': run_until, 'clear_sums': clear_sums, 'compute_sums' : psp_call, 'update_neuron' : update_neuron, 'update_FR': pop_update_fr, 'update_globalops' : update_globalops, 'update_synapse' : syn_call, 'post_event': postevent_call, 'delay_code': delay_code, 'initialize' : self._body_initialize(), 'structural_plasticity': structural_plasticity, # cuda host specific 'stream_setup': stream_setup, 'host_device_transfer': host_device_transfer, 'device_host_transfer': device_host_transfer, 'kernel_def': kernel_def, 'kernel_config': threads_per_kernel, 'custom_constant': host_custom_constant } base_dict.update(prof_dict) host_code = cuda_host_body_template % base_dict return device_code, host_code else: raise NotImplementedError